Skip to content

Commit

Permalink
Merge pull request #1 from jones1618/add-arguments-and-columns
Browse files Browse the repository at this point in the history
Add arguments and columns
  • Loading branch information
andrewjdyck authored Aug 27, 2019
2 parents 00fd98e + f91f7a9 commit ff7400b
Show file tree
Hide file tree
Showing 8 changed files with 2,399 additions and 46 deletions.
21 changes: 19 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,23 @@ This python script takes data in csv format and converts to geoJSON format for u

Two sample CSV documents, sample.csv and country_cap_latlon.csv, are included that can be used to show how the utility works.

The script currently requires that CSV data be in the format specified in the sample data 'sample.csv', however, this could be easily adapted to fit other formats.
By default, the script expects CSV data be in the format specified in the sample data 'sample.csv'. However, you can specify other formats in the program arguments.

Questions and comments to [email protected]
csvToGeoJSON arguments (all optional):

-h, --help Show this help message and exit
--verbose, --v Show parameters, detailed messages and geoJSON output
--csv CSV Filepath/name of CVS input file
Default: sample.csv
--output OUTPUT Filepath/name of GeoJSON output file
Default: output.geojson
--columns COLUMNS JSON-formatted dictionary that maps CSV column names to
GeoJSON fields.
Default: "{ 'id':'id', 'latitude':'lat', 'longitude':'lon', 'name':'name', 'value':'pop' }"
where each name:value pair is {geoJSON-column}:{CSV-column}
Note: CSV columns don't have to appear in a particular order since the position of the data columns
will be determined from their order in the CSV header (first row).
If CSV contains no ID field, specifying 'id':'-auto-' will automatically generate an ID value based
on the row # of the CSV. Otherwise, the ID value will be 0.

Questions and comments to [email protected] or [email protected]
4 changes: 4 additions & 0 deletions csv/alternative-columns-test.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pointid,label,lat,long,GDP
4,Afghanistan,65.216,33.677,250B
8,Albania,20.068,41.143,315B
894,Zambia,26.32,-14.614,114B
File renamed without changes.
228 changes: 184 additions & 44 deletions csvToGeoJSON.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,190 @@
import csv
import argparse
import json

# Read in raw data from csv
rawData = csv.reader(open('sample.csv', 'rb'), dialect='excel')

# the template. where data from the csv will be formatted to geojson
template = \
''' \
{ "type" : "Feature",
"id" : %s,
"geometry" : {
"type" : "Point",
"coordinates" : ["%s","%s"]},
"properties" : { "name" : "%s", "value" : "%s"}
},
'''

# the head of the geojson file
output = \
''' \
{ "type" : "Feature Collection",
{"features" : [
'''

# loop through the csv by row skipping the first
iter = 0
for row in rawData:
iter += 1
if iter >= 2:
id = row[0]
lat = row[1]
lon = row[2]
name = row[3]
pop = row[4]
output += template % (row[0], row[1], row[2], row[3], row[4])

# the tail of the geojson file
output += \
''' \
]
}
'''
#### ========================================================================
#### csvToGeoJSON
#### - Tool to convert CSV file of geographic data into
#### JSON-formatted file of GeoJSON features
#### ========================================================================

class GeoFeature:
''' Object to hold properties of GeoJSON Feature '''
def __init__(self):
self.id = 0
self.latitude = 0
self.longitude = 0
self.name = "-NA-"
self.value = "-NA-"
def print(self):
print( "{ id = %s, latitude = %s, longitude = %s, name = %s, value = %s }"
% (self.id, self.latitude, self.longitude, self.name, self.value)
)

class GeoFeatureColumn:
''' Object to hold mapping of GeoJSON fields to CSV field columns/indexes '''
def __init__(self, geoJSONField, CSVName, CSVIndex ):
self.geoJSONField = geoJSONField # name of GeoJSON property
self.CSVName = CSVName # name of CSV column for this property
self.CSVIndex = CSVIndex # index of CSV column for this property

def main():

# parse program arguments
parser = argparse.ArgumentParser()
parser.add_argument('--verbose', '--v', action="store_true", help="Show parameters, detailed messages and geoJSON output")
parser.add_argument('--csv', type=str, help="Filepath/name of CVS input file", default="sample.csv" )
parser.add_argument('--output', type=str, help="Filepath/name of GeoJSON output file", default="output.geojson" )
parser.add_argument('--columns',type=str, help="JSON-formatted dictionary that maps CSV column names to GeoJSON fields.",
default="{ 'id': 'id', 'latitude': 'lat', 'longitude': 'lon', 'name': 'name', 'value': 'pop' }")
args = parser.parse_args()

if args.verbose:
print( "Arguments:" )
print( " Output = %s" % args.output )
print( " CSV = %s" % args.csv )
print( " Columns = %s" % args.columns )
print()

# parse "columns" argument into dictionary of CSV column names mapped to GeoJSON properties
args.columns = args.columns.replace("'","\"")
diCSVColumns = json.loads(args.columns)
# print ("Columns = %s" % json.dumps(diCSVColumns))

# dictionary of all GeoFeatureColumns and their default mapping to CSV columns
diGeoColumns = {
"id": GeoFeatureColumn("id","id",-1),
"latitude": GeoFeatureColumn("latitude","lat",-1),
"longitude": GeoFeatureColumn("longitude","long",-1),
"name": GeoFeatureColumn("name","name",-1),
"value": GeoFeatureColumn("value","value",-1)
}

# set the CSVNames of all diGeoColumns given in the "columns" argument
if ( "id" in diCSVColumns ): diGeoColumns["id"].CSVName = diCSVColumns["id"]
if ( "latitude" in diCSVColumns ): diGeoColumns["latitude"].CSVName = diCSVColumns["latitude"]
if ( "longitude" in diCSVColumns ): diGeoColumns["longitude"].CSVName = diCSVColumns["longitude"]
if ( "name" in diCSVColumns ): diGeoColumns["name"].CSVName = diCSVColumns["name"]
if ( "value" in diCSVColumns ): diGeoColumns["value"].CSVName = diCSVColumns["value"]

errors = [] # list of errors found, if any
error = ""

# Read in raw data from csv
rawData = csv.reader(open(args.csv, 'r'), dialect='excel')

# the template. where data from the csv will be formatted to geojson
template = """
{ "type" : "Feature",
"id" : %s,
"geometry" : {
"type" : "Point",
"coordinates" : ["%s","%s"]
},
"properties" : {
"name" : "%s",
"value" : "%s"
}
}"""

# the head of the geojson file
output = """{ "type" : "Feature Collection",
{ "features" : ["""

# loop through the csv by row skipping the first
iRow = 0
for row in rawData:
if iRow == 0: # If first row (CSV header)
# determine column indexes of the values we need for GeoJSON
iCol = 0
if args.verbose: print( "CSV Columns:")

for col in row:
for fld in diGeoColumns:
# if CSV header column matches CSV field, remember this field's index
geoJSONField = ""
if col == diGeoColumns[fld].CSVName:
diGeoColumns[fld].CSVIndex = iCol
geoJSONField = " = geoJSON \"" + diGeoColumns[fld].geoJSONField + "\""
break
if args.verbose: print( f' column[{iCol}] = {quote(col):10}{geoJSONField}' )
iCol+=1

if args.verbose: print()

# report geoJSON columns that are missing from CSV header as errors
for fld in diGeoColumns:
if diGeoColumns[fld].CSVIndex < 0:
error = "No '%s' column in CSV header" % diGeoColumns[fld].geoJSONField
if diGeoColumns[fld].geoJSONField == "id" and diGeoColumns[fld].CSVName == "-auto-":
error += " (using row # since -auto- was specified)"
errors.append(error)

else: # if this is a data row
# populate a GeoFeature from its corresponding columns in the data row
featureRow = GeoFeature()

# if CSV row contains GeoJSON column, set GeoFeature property to its value
# otherwise, property's value is GeoFeature default

# set GeoFeature.id from CSV column or from row # if set to "-auto-"
if diGeoColumns["id"].CSVIndex >= 0: featureRow.id = row[diGeoColumns["id"].CSVIndex]
elif diGeoColumns["id"].CSVName == "-auto-": featureRow.id = iRow
# set GeoFeature.latitude from CSV column
if diGeoColumns["latitude"].CSVIndex >= 0: featureRow.latitude = row[diGeoColumns["latitude"].CSVIndex]
# set GeoFeature.longitude from CSV column
if diGeoColumns["longitude"].CSVIndex >= 0: featureRow.longitude = row[diGeoColumns["longitude"].CSVIndex]
# set GeoFeature.name from CSV column
if diGeoColumns["name"].CSVIndex >= 0: featureRow.name = row[diGeoColumns["name"].CSVIndex]
# set GeoFeature.value from CSV column
if diGeoColumns["value"].CSVIndex >= 0: featureRow.value = row[diGeoColumns["value"].CSVIndex]

# append comma between features (but not at end)
if iRow > 1: output += ","

# append formated GeoJSON Feature
output += template % (
featureRow.id,
featureRow.latitude,
featureRow.longitude,
featureRow.name,
featureRow.value
)
iRow += 1

# the tail of the geojson file
output += """
]
}
}"""

# opens an geoJSON file to write the output to
outFileHandle = open("output.geojson", "w")
outFileHandle.write(output)
outFileHandle.close()
# print any errors that were found
if len(errors):
print("Errors:")
for error in errors:
print(" %s" % error)
print()

if args.verbose:
print("GeoJSON Output:")
print("=========================================================================")
print(output)
print("=========================================================================")

# opens an geoJSON file to write the output to
outFileHandle = open(args.output, "w")
outFileHandle.write(output)
outFileHandle.close()

# end main()

# Utility functions

def quote(s):
''' return string argument wrapped in double-quotes '''
return "\"" + s + "\""

main()



Expand Down
38 changes: 38 additions & 0 deletions output/alternative-columns-test.geoJSON
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{ "type" : "Feature Collection",
{ "features" : [
{ "type" : "Feature",
"id" : 1,
"geometry" : {
"type" : "Point",
"coordinates" : ["65.216","33.677"]
},
"properties" : {
"name" : "Afghanistan",
"value" : "250B"
}
},
{ "type" : "Feature",
"id" : 2,
"geometry" : {
"type" : "Point",
"coordinates" : ["20.068","41.143"]
},
"properties" : {
"name" : "Albania",
"value" : "315B"
}
},
{ "type" : "Feature",
"id" : 3,
"geometry" : {
"type" : "Point",
"coordinates" : ["26.32","-14.614"]
},
"properties" : {
"name" : "Zambia",
"value" : "114B"
}
}
]
}
}
Loading

0 comments on commit ff7400b

Please sign in to comment.