Home > OS >  issue with data not being written to file using python dataframe
issue with data not being written to file using python dataframe

Time:11-02

I'm trying to create a geojson file, using an input txt file that I have. This is the code that I have but there seems to be errors with it.

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway,random
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
    
jsonLoad = gdf.to_json()

IF the above works, then this part would generate it into the exact file format that I want.

import json
from geojson import Point, Feature, dump
#save the data to the file 
parsed = json.loads(jsonLoad)
print(json.dumps(parsed, indent=4, sort_keys=True))
#parsed = gdf.to_json()
with open('myfile.geojson', 'w') as f:
   dump(parsed, f,indent=1)

I'm trying to get them grouped according to their WayID but when I get my resultant file, in the geoJSON I don't see the tags like shelter,yes,highway,footway attached to it, which I don't understand why it's not being stored in the geoJSON?

For example this is my generated file,

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9364834,
                        1.3218368
                    ],
                    [
                        103.9365417,
                        1.3208156
                    ],
                    [
                        103.9367689,
                        1.3206226
                    ],
                    [
                        103.9345338,
                        1.3202877
                    ],
                    [
                        103.9344606,
                        1.3235089
                    ],
                    [
                        103.9370296,
                        1.3207544
                    ],
                    [
                        103.9364744,
                        1.3218821
                    ],
                    [
                        103.9367017,
                        1.3219285
                    ],
                    [
                        103.936561,
                        1.3203222
                    ],
                    [
                        103.936842,
                        1.320661
                    ],
                    [
                        103.9371016,
                        1.3207378
                    ],
                    [
                        103.933684,
                        1.3237604
                    ],
                    [
                        103.9355026,
                        1.3237205
                    ],
                    [
                        103.9364707,
                        1.321643
                    ],
                    [
                        103.9363887,
                        1.3216271
                    ]
                ],
                "type": "LineString"
            },
            "id": "0",
            "properties": {
                "WAYID": 190637
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9365788,
                        1.3202255
                    ],
                    [
                        103.9339101,
                        1.3205415
                    ],
                    [
                        103.9363223,
                        1.3202778
                    ]
                ],
                "type": "LineString"
            },
            "id": "1",
            "properties": {
                "WAYID": 190888
            },
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}

But under the properties I would've expected to see the rest of the tags columns like 'tag2','tag3','tag4','tag5'

What am I missing here? Would appreciate if someone could tell me why this is happening, thank you!

EDIT:

For the same issue, if I change a couple of data lines to have more tags eg:

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5','tag6','tag7','tag8','tag9','tag10','tag11','tag12','tag13','tag14','tag15','tag16','tag17','tag18','tag19','tag20']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3224845,long=103.9332554,106525,116692201,addr:housenumber,4,residential,BLOCK,addr:country,AG,building:levels,14,footway,sidewalk,addr:street,Random South Avenue 10,addr:postcode,460004,building,residential,addr:city,Boo
lat=1.3217691,long=103.9348351,106119,190571,highway,footway
lat=1.323215,long=103.9330919,106524,116692204,addr:housenumber,23,residential,BLOCK,addr:country,AG,building:levels,14,addr:street,Random Street Name 1,addr:postcode,460011,building,residential,addr:city,Boo
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlongRemoveComma.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID','tag2','tag3','tag4','tag5','tag6','tag7','tag8','tag9','tag10','tag11','tag12','tag13','tag14','tag15','tag16','tag17','tag18','tag19','tag20'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
    
jsonLoad = gdf.to_json()

It returns me an empty dataset, but I want it to be grouped mainly by WAYID with all the respective tags attached

{
    "features": [],
    "type": "FeatureCollection"
}

CodePudding user response:

The problem comes from the fact that when you groupby WAYID you dump all other columns. That is why all tags vanish. Do this:

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID', 'tag2', 'tag3', 'tag4', 'tag5'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
    
jsonLoad = gdf.to_json()

and run you code again. This will return

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9344606,
                        1.3235089
                    ],
                    [
                        103.9355026,
                        1.3237205
                    ]
                ],
                "type": "LineString"
            },
            "id": "0",
            "properties": {
                "WAYID": 190637,
                "tag2": "highway",
                "tag3": "footway",
                "tag4": "shelter",
                "tag5": "yes"
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9364834,
                        1.3218368
                    ],
                    [
                        103.9345338,
                        1.3202877
                    ],
                    [
                        103.9364744,
                        1.3218821
                    ],
                    [
                        103.9367017,
                        1.3219285
                    ],
                    [
                        103.936561,
                        1.3203222
                    ],
                    [
                        103.9371016,
                        1.3207378
                    ],
                    [
                        103.933684,
                        1.3237604
                    ],
                    [
                        103.9364707,
                        1.321643
                    ],
                    [
                        103.9363887,
                        1.3216271
                    ]
                ],
                "type": "LineString"
            },
            "id": "1",
            "properties": {
                "WAYID": 190637,
                "tag2": "shelter",
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway"
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9365788,
                        1.3202255
                    ],
                    [
                        103.9339101,
                        1.3205415
                    ],
                    [
                        103.9363223,
                        1.3202778
                    ]
                ],
                "type": "LineString"
            },
            "id": "2",
            "properties": {
                "WAYID": 190888,
                "tag2": "shelter",
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway"
            },
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}

UPDATE:

Do this to keep all the columns:


gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
gdf = gdf.sort_values("pointID").groupby("WAYID", as_index=False).first()

    
jsonLoad = gdf.to_json()

and running your code again gives:

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    103.9348351,
                    1.3217691
                ],
                "type": "Point"
            },
            "id": "0",
            "properties": {
                "WAYID": 190571,
                "lat": 1.3217691,
                "long": 103.9348351,
                "pointID": 106119,
                "tag10": null,
                "tag11": null,
                "tag12": null,
                "tag13": null,
                "tag14": null,
                "tag15": null,
                "tag16": null,
                "tag17": null,
                "tag18": null,
                "tag19": null,
                "tag2": "highway",
                "tag20": null,
                "tag3": "footway",
                "tag4": null,
                "tag5": null,
                "tag6": null,
                "tag7": null,
                "tag8": null,
                "tag9": null
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.933684,
                    1.3237604
                ],
                "type": "Point"
            },
            "id": "1",
            "properties": {
                "WAYID": 190637,
                "lat": 1.3237604,
                "long": 103.933684,
                "pointID": 106563,
                "tag10": null,
                "tag11": null,
                "tag12": null,
                "tag13": null,
                "tag14": null,
                "tag15": null,
                "tag16": null,
                "tag17": null,
                "tag18": null,
                "tag19": null,
                "tag2": "shelter",
                "tag20": null,
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway",
                "tag6": null,
                "tag7": null,
                "tag8": null,
                "tag9": null
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.9339101,
                    1.3205415
                ],
                "type": "Point"
            },
            "id": "2",
            "properties": {
                "WAYID": 190888,
                "lat": 1.3205415,
                "long": 103.9339101,
                "pointID": 106642,
                "tag10": null,
                "tag11": null,
                "tag12": null,
                "tag13": null,
                "tag14": null,
                "tag15": null,
                "tag16": null,
                "tag17": null,
                "tag18": null,
                "tag19": null,
                "tag2": "shelter",
                "tag20": null,
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway",
                "tag6": null,
                "tag7": null,
                "tag8": null,
                "tag9": null
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.9332554,
                    1.3224845
                ],
                "type": "Point"
            },
            "id": "3",
            "properties": {
                "WAYID": 116692201,
                "lat": 1.3224845,
                "long": 103.9332554,
                "pointID": 106525,
                "tag10": "footway",
                "tag11": "sidewalk",
                "tag12": "addr:street",
                "tag13": "Random South Avenue 10",
                "tag14": "addr:postcode",
                "tag15": "460004",
                "tag16": "building",
                "tag17": "residential",
                "tag18": "addr:city",
                "tag19": "Boo",
                "tag2": "addr:housenumber",
                "tag20": null,
                "tag3": "4",
                "tag4": "residential",
                "tag5": "BLOCK",
                "tag6": "addr:country",
                "tag7": "AG",
                "tag8": "building:levels",
                "tag9": 14.0
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.9330919,
                    1.323215
                ],
                "type": "Point"
            },
            "id": "4",
            "properties": {
                "WAYID": 116692204,
                "lat": 1.323215,
                "long": 103.9330919,
                "pointID": 106524,
                "tag10": "addr:street",
                "tag11": "Random Street Name 1",
                "tag12": "addr:postcode",
                "tag13": "460011",
                "tag14": "building",
                "tag15": "residential",
                "tag16": "addr:city",
                "tag17": "Boo",
                "tag18": null,
                "tag19": null,
                "tag2": "addr:housenumber",
                "tag20": null,
                "tag3": "23",
                "tag4": "residential",
                "tag5": "BLOCK",
                "tag6": "addr:country",
                "tag7": "AG",
                "tag8": "building:levels",
                "tag9": 14.0
            },
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}
  • Related