Skip to content
Snippets Groups Projects
Commit 5b3033dc authored by David Fichtmueller's avatar David Fichtmueller
Browse files

added file: search_GeoNames_in_OpenRefine.py

parent b27cae76
Branches master
No related tags found
No related merge requests found
Pipeline #294 failed
import json
import urllib2
###
# This script is used for the annotating geographical locations with the GeoNames IDs. It is run within OpenRefine.
# This script is highly specific for our usecase, i.e. the hardcoded column names, but we decided to share it nontheless, so it might be helpful for others with similar problems.
# License: Mozilla Public License 2.0 (MPL 2.0)
###
USERNAME = '' #enter your geonames username here, otherwise the API calls will not work
DOMAIN = 'http://api.geonames.org/'
DICTIONARY = 'locations.json'
#Dictionary providing entries already handled with possible cases:
# Case 'not found'
# Case 'ambiguous'
# Case 'URI|Latitude|Longitude'
with open(DICTIONARY) as json_file:
data = json.load(json_file)
#Current location given by the data table
cellVal = cells['Fundort']['value']
#return(cellVal)
#Coordinates of the current entry
latitude = float(cells['Latitude']['value'].replace(',', '.'))
longitude = float(cells['Longitude']['value'].replace(',', '.'))
coordinatesValue = latitude + longitude
#If the current entry exists in dictionary and decide what to return
if len(data) > 0 and cellVal in data:
if data[cellVal] == "not found":
return "not found"
if data[cellVal] == "ambiguous":
return "ambiguous"
elif data[cellVal] != " ":
dataEntry = data[cellVal].split('|')
latitudeData = float(dataEntry[1])
longitudeData = float(dataEntry[2])
diffCoordinates = abs(latitude - latitudeData) + abs(longitude - longitudeData)
if diffCoordinates < 0.06:
return dataEntry[0]
else:
return "too distant"
#If the current entry is unknown
else:
method = 'searchJSON'
name = cells['Fundort']['value']
name = urllib2.quote(name.encode('utf-8'))
name = name.replace(" ", "+")
country = cells['fkIsoCode']['value']
adm2 = cells['Regional Unit']['value']
adm2 = urllib2.quote(adm2.encode('utf-8'))
adm2 = adm2.replace(" ", "+")
uri = "http://api.geonames.org/searchJSON?name=" + name + "&country=" + country + "&fuzzy=0.6" + "&username=" + USERNAME
print("Abfrage!")
#resource = urllib2.urlopen(uri)
resourceString = resource.read()
js = json.loads(resourceString)
numberOfResults = js['totalResultsCount']
if len(js['geonames']) == 0:
data[cells['Fundort']['value']] = "not found"
with open(DICTIONARY, 'w') as outfile:
json.dump(data, outfile)
return "not found"
if numberOfResults == 1:
latitudeResult = float(js['geonames'][0]['lat'])
longitudeResult = float(js['geonames'][0]['lng'])
diffCoordinates = abs(latitude-latitudeResult) + abs(longitude-longitudeResult)
if diffCoordinates < 0.06:
data[cells['Fundort']['value']] = "http://www.geonames.org/" + str(js['geonames'][0]['geonameId']) + "|" + str(latitudeResult) + "|" + str(longitudeResult)
with open(DICTIONARY, 'w') as outfile:
json.dump(data, outfile)
return data[cellVal].split("|")[0]
else:
data[cells['Fundort']['value']] = "http://www.geonames.org/" + str(js['geonames'][0]['geonameId']) + "|" + str(latitudeResult) + "|" + str(longitudeResult)
with open(DICTIONARY, 'w') as outfile:
json.dump(data, outfile)
return "too distant"
nearestPlace = []
for entry in range(0, numberOfResults):
latitudeResult = float(js['geonames'][entry]['lat'])
longitudeResult = float(js['geonames'][entry]['lng'])
diffCoordinates = abs(latitude-latitudeResult) + abs(longitude-longitudeResult)
if len(nearestPlace) > 0:
if diffCoordinates < nearestPlace[3]:
nearestPlace[0] = str(js['geonames'][entry]['geonameId'])
nearestPlace[1] = latitudeResult
nearestPlace[2] = longitudeResult
nearestPlace[3] = diffCoordinates
else:
nearestPlace.append(str(js['geonames'][entry]['geonameId']))
nearestPlace.append(latitudeResult)
nearestPlace.append(longitudeResult)
nearestPlace.append(diffCoordinates)
if nearestPlace[3] < 0.06:
data[cells['Fundort']['value']] = "http://www.geonames.org/" + str(nearestPlace[0]) + "|" + str(nearestPlace[1]) + "|" + str(nearestPlace[2])
nearestPlace = []
with open(DICTIONARY, 'w') as outfile:
json.dump(data, outfile)
return data[cellVal].split("|")[0]
else:
data[cells['Fundort']['value']] = "http://www.geonames.org/" + str(js['geonames'][0]['geonameId']) + "|" + str(latitudeResult) + "|" + str(longitudeResult)
with open(DICTIONARY, 'w') as outfile:
json.dump(data, outfile)
return "too distant"
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment