I'm trying to create a geojson file, using an input txt file that I have. This is the code that I have but there seems to be errors with it.
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io
col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway,random
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''
#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
jsonLoad = gdf.to_json()
IF the above works, then this part would generate it into the exact file format that I want.
import json
from geojson import Point, Feature, dump
#save the data to the file
parsed = json.loads(jsonLoad)
print(json.dumps(parsed, indent=4, sort_keys=True))
#parsed = gdf.to_json()
with open('myfile.geojson', 'w') as f:
dump(parsed, f,indent=1)
I'm trying to get them grouped according to their WayID
but when I get my resultant file, in the geoJSON I don't see the tags like shelter,yes,highway,footway
attached to it, which I don't understand why it's not being stored in the geoJSON?
For example this is my generated file,
{
"features": [
{
"geometry": {
"coordinates": [
[
103.9364834,
1.3218368
],
[
103.9365417,
1.3208156
],
[
103.9367689,
1.3206226
],
[
103.9345338,
1.3202877
],
[
103.9344606,
1.3235089
],
[
103.9370296,
1.3207544
],
[
103.9364744,
1.3218821
],
[
103.9367017,
1.3219285
],
[
103.936561,
1.3203222
],
[
103.936842,
1.320661
],
[
103.9371016,
1.3207378
],
[
103.933684,
1.3237604
],
[
103.9355026,
1.3237205
],
[
103.9364707,
1.321643
],
[
103.9363887,
1.3216271
]
],
"type": "LineString"
},
"id": "0",
"properties": {
"WAYID": 190637
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
[
103.9365788,
1.3202255
],
[
103.9339101,
1.3205415
],
[
103.9363223,
1.3202778
]
],
"type": "LineString"
},
"id": "1",
"properties": {
"WAYID": 190888
},
"type": "Feature"
}
],
"type": "FeatureCollection"
}
But under the properties
I would've expected to see the rest of the tags columns like 'tag2','tag3','tag4','tag5'
What am I missing here? Would appreciate if someone could tell me why this is happening, thank you!
EDIT:
For the same issue, if I change a couple of data lines to have more tags eg:
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io
col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5','tag6','tag7','tag8','tag9','tag10','tag11','tag12','tag13','tag14','tag15','tag16','tag17','tag18','tag19','tag20']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3224845,long=103.9332554,106525,116692201,addr:housenumber,4,residential,BLOCK,addr:country,AG,building:levels,14,footway,sidewalk,addr:street,Random South Avenue 10,addr:postcode,460004,building,residential,addr:city,Boo
lat=1.3217691,long=103.9348351,106119,190571,highway,footway
lat=1.323215,long=103.9330919,106524,116692204,addr:housenumber,23,residential,BLOCK,addr:country,AG,building:levels,14,addr:street,Random Street Name 1,addr:postcode,460011,building,residential,addr:city,Boo
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''
#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlongRemoveComma.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID','tag2','tag3','tag4','tag5','tag6','tag7','tag8','tag9','tag10','tag11','tag12','tag13','tag14','tag15','tag16','tag17','tag18','tag19','tag20'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
jsonLoad = gdf.to_json()
It returns me an empty dataset, but I want it to be grouped mainly by WAYID
with all the respective tags attached
{
"features": [],
"type": "FeatureCollection"
}
CodePudding user response:
The problem comes from the fact that when you groupby WAYID
you dump all other columns. That is why all tags vanish. Do this:
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io
col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''
#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID', 'tag2', 'tag3', 'tag4', 'tag5'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
jsonLoad = gdf.to_json()
and run you code again. This will return
{
"features": [
{
"geometry": {
"coordinates": [
[
103.9344606,
1.3235089
],
[
103.9355026,
1.3237205
]
],
"type": "LineString"
},
"id": "0",
"properties": {
"WAYID": 190637,
"tag2": "highway",
"tag3": "footway",
"tag4": "shelter",
"tag5": "yes"
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
[
103.9364834,
1.3218368
],
[
103.9345338,
1.3202877
],
[
103.9364744,
1.3218821
],
[
103.9367017,
1.3219285
],
[
103.936561,
1.3203222
],
[
103.9371016,
1.3207378
],
[
103.933684,
1.3237604
],
[
103.9364707,
1.321643
],
[
103.9363887,
1.3216271
]
],
"type": "LineString"
},
"id": "1",
"properties": {
"WAYID": 190637,
"tag2": "shelter",
"tag3": "yes",
"tag4": "highway",
"tag5": "footway"
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
[
103.9365788,
1.3202255
],
[
103.9339101,
1.3205415
],
[
103.9363223,
1.3202778
]
],
"type": "LineString"
},
"id": "2",
"properties": {
"WAYID": 190888,
"tag2": "shelter",
"tag3": "yes",
"tag4": "highway",
"tag5": "footway"
},
"type": "Feature"
}
],
"type": "FeatureCollection"
}
UPDATE:
Do this to keep all the columns:
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
gdf = gdf.sort_values("pointID").groupby("WAYID", as_index=False).first()
jsonLoad = gdf.to_json()
and running your code again gives:
{
"features": [
{
"geometry": {
"coordinates": [
103.9348351,
1.3217691
],
"type": "Point"
},
"id": "0",
"properties": {
"WAYID": 190571,
"lat": 1.3217691,
"long": 103.9348351,
"pointID": 106119,
"tag10": null,
"tag11": null,
"tag12": null,
"tag13": null,
"tag14": null,
"tag15": null,
"tag16": null,
"tag17": null,
"tag18": null,
"tag19": null,
"tag2": "highway",
"tag20": null,
"tag3": "footway",
"tag4": null,
"tag5": null,
"tag6": null,
"tag7": null,
"tag8": null,
"tag9": null
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
103.933684,
1.3237604
],
"type": "Point"
},
"id": "1",
"properties": {
"WAYID": 190637,
"lat": 1.3237604,
"long": 103.933684,
"pointID": 106563,
"tag10": null,
"tag11": null,
"tag12": null,
"tag13": null,
"tag14": null,
"tag15": null,
"tag16": null,
"tag17": null,
"tag18": null,
"tag19": null,
"tag2": "shelter",
"tag20": null,
"tag3": "yes",
"tag4": "highway",
"tag5": "footway",
"tag6": null,
"tag7": null,
"tag8": null,
"tag9": null
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
103.9339101,
1.3205415
],
"type": "Point"
},
"id": "2",
"properties": {
"WAYID": 190888,
"lat": 1.3205415,
"long": 103.9339101,
"pointID": 106642,
"tag10": null,
"tag11": null,
"tag12": null,
"tag13": null,
"tag14": null,
"tag15": null,
"tag16": null,
"tag17": null,
"tag18": null,
"tag19": null,
"tag2": "shelter",
"tag20": null,
"tag3": "yes",
"tag4": "highway",
"tag5": "footway",
"tag6": null,
"tag7": null,
"tag8": null,
"tag9": null
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
103.9332554,
1.3224845
],
"type": "Point"
},
"id": "3",
"properties": {
"WAYID": 116692201,
"lat": 1.3224845,
"long": 103.9332554,
"pointID": 106525,
"tag10": "footway",
"tag11": "sidewalk",
"tag12": "addr:street",
"tag13": "Random South Avenue 10",
"tag14": "addr:postcode",
"tag15": "460004",
"tag16": "building",
"tag17": "residential",
"tag18": "addr:city",
"tag19": "Boo",
"tag2": "addr:housenumber",
"tag20": null,
"tag3": "4",
"tag4": "residential",
"tag5": "BLOCK",
"tag6": "addr:country",
"tag7": "AG",
"tag8": "building:levels",
"tag9": 14.0
},
"type": "Feature"
},
{
"geometry": {
"coordinates": [
103.9330919,
1.323215
],
"type": "Point"
},
"id": "4",
"properties": {
"WAYID": 116692204,
"lat": 1.323215,
"long": 103.9330919,
"pointID": 106524,
"tag10": "addr:street",
"tag11": "Random Street Name 1",
"tag12": "addr:postcode",
"tag13": "460011",
"tag14": "building",
"tag15": "residential",
"tag16": "addr:city",
"tag17": "Boo",
"tag18": null,
"tag19": null,
"tag2": "addr:housenumber",
"tag20": null,
"tag3": "23",
"tag4": "residential",
"tag5": "BLOCK",
"tag6": "addr:country",
"tag7": "AG",
"tag8": "building:levels",
"tag9": 14.0
},
"type": "Feature"
}
],
"type": "FeatureCollection"
}