Home > Enterprise >  Turning a dataframe into a geodata frame in python
Turning a dataframe into a geodata frame in python

Time:05-25

I'm trying to turn a dataframe into a geodata frame for spatial analysis, but my coordinates ('geometry') column is a string owing to the word 'POLYGON' at the start of each entry.

How can I edit my data so my coordinates column has numbers only?

ptal_lsoas['geometry']= ptal_lsoas['geometry'].apply(Point)
geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')

this is the error message i get

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-20-d175e91ad85f> in <module>
----> 1 ptal_lsoas['geometry']= ptal_lsoas['geometry'].apply(Point)
      2 geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')

/opt/conda/lib/python3.8/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwargs)
   4354         dtype: float64
   4355         """
-> 4356         return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
   4357 
   4358     def _reduce(

/opt/conda/lib/python3.8/site-packages/pandas/core/apply.py in apply(self)
   1034             return self.apply_str()
   1035 
-> 1036         return self.apply_standard()
   1037 
   1038     def agg(self):

/opt/conda/lib/python3.8/site-packages/pandas/core/apply.py in apply_standard(self)
   1090                 # List[Union[Callable[..., Any], str]]]]]"; expected
   1091                 # "Callable[[Any], Any]"
-> 1092                 mapped = lib.map_infer(
   1093                     values,
   1094                     f,  # type: ignore[arg-type]

/opt/conda/lib/python3.8/site-packages/pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()

/opt/conda/lib/python3.8/site-packages/shapely/geometry/point.py in __init__(self, *args)
     46         BaseGeometry.__init__(self)
     47         if len(args) > 0:
---> 48             self._set_coords(*args)
     49 
     50     # Coordinate getters and setters

/opt/conda/lib/python3.8/site-packages/shapely/geometry/point.py in _set_coords(self, *args)
    131         self.empty()
    132         if len(args) == 1:
--> 133             self._geom, self._ndim = geos_point_from_py(args[0])
    134         elif len(args) > 3:
    135             raise TypeError("Point() takes at most 3 arguments ({} given)".format(len(args)))

/opt/conda/lib/python3.8/site-packages/shapely/geometry/point.py in geos_point_from_py(ob, update_geom, update_ndim)
    212         coords = ob
    213     n = len(coords)
--> 214     dx = c_double(coords[0])
    215     dy = c_double(coords[1])
    216     dz = None

TypeError: must be real number, not str

thanks in advance!

edit:

as commented below, i now don't have the word polygon infant of my coordinates, but it is still giving me this error when i try and convert it to a geodataframe

geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-27-c55e804329dc> in <module>
----> 1 geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')

/opt/conda/lib/python3.8/site-packages/geopandas/geodataframe.py in __init__(self, geometry, crs, *args, **kwargs)
    165                 )
    166                 # TODO: raise error in 0.9 or 0.10.
--> 167             self.set_geometry(geometry, inplace=True)
    168 
    169         if geometry is None and crs:

/opt/conda/lib/python3.8/site-packages/geopandas/geodataframe.py in set_geometry(self, col, drop, inplace, crs)
    293 
    294         # Check that we are using a listlike of geometries
--> 295         level = _ensure_geometry(level, crs=crs)
    296         index = frame.index
    297         frame[geo_column_name] = level

/opt/conda/lib/python3.8/site-packages/geopandas/geodataframe.py in _ensure_geometry(data, crs)
     41             return GeoSeries(out, index=data.index, name=data.name)
     42         else:
---> 43             out = from_shapely(data, crs=crs)
     44             return out
     45 

/opt/conda/lib/python3.8/site-packages/geopandas/array.py in from_shapely(data, crs)
    166 
    167     """
--> 168     return GeometryArray(vectorized.from_shapely(data), crs=crs)
    169 
    170 

/opt/conda/lib/python3.8/site-packages/geopandas/_vectorized.py in from_shapely(data)
    131             out.append(None)
    132         else:
--> 133             raise TypeError("Input must be valid geometry objects: {0}".format(geom))
    134 
    135     if compat.USE_PYGEOS:

TypeError: Input must be valid geometry objects:  ((532105.0919998939 182011.23, 532162.4910001159 181867.7629999601, 532248.3160000765 181895.3249998323, 532282.6300000258 181906.4960000554, 532308.6079999561 181915.5200001549, 532303.4919998596 181814.1099998362, 532251.1259998521 181720.0000001175, 532267.7280001113 181643.780999956, 532213.063999875 181477.2030001849, 532282.2499999444 181460.4999999297, 532248.2490000051 181332.0360001744, 532227.687999856 181278.8749999276, 532173.1249999898 181263.453, 532074.3749998672 181338.2970001521, 532080.2499999747 181456.7499999724, 531948.313000044 181471.96900013, 531951.8749998901 181548.0000001803, 531985.8750000516 181583.4999998761, 532052.4999999914 181600.3909998685, 532064.3750001056 181561.5940001571, 532095.5629998141 181577.3510000679, 532077.7050000296 181797.669000078, 532069.8130000822 181825.9059999275, 532021.1880001619 181803.8910001202, 532022.3739999083 181893.4690001195, 532082.8759998722 181911.7809999972, 532105.0919998939 182011.23))

this is what my cells currently look like (obviously with more rows)

geometry LSOA11CD AvPTAl2015
((532105.0919998939 182011.23, 532162.4910001... E01000001 69.8233
((532746.8130000263 181786.891000028, 532671.... E01000002 83.7820

thanks!

second edit:


print(ptal_lsoas.geometry.head())
0    POLYGON ((532105.092 182011.230, 532162.491 18...
1    POLYGON ((532746.813 181786.891, 532671.688 18...
2    POLYGON ((532135.145 182198.119, 532158.250 18...
3    POLYGON ((533807.946 180767.770, 533649.063 18...
4    POLYGON ((545122.049 184314.931, 545271.917 18...
Name: geometry, dtype: geometry

import matplotlib as mpl
mpl.use('TkAgg')

%matplotlib inline
import matplotlib.pyplot as plt
import rasterio 
import rasterio.plot
import rasterstats as rs
import pysal as ps
import mapclassify
import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
import matplotlib.cm as cm
import urllib
import zipfile
import re
import os
import shapely
from shapely.geometry import Point

import random 
random.seed(123456789) 

pd.set_option('display.float_format', lambda x: '{:,.4f}'.format(x))

import warnings
warnings.simplefilter('ignore')

import zipfile

if os.path.isdir('data') is not True:
    print("Creating 'data' directory...")
    os.mkdir('data')

url  = 'https://github.com/cusp-london/Spatial-Data-Analysis/blob/master/LDN-LSOAs.zip?raw=true'
path = os.path.join("data","LDN-LSOAs.zip")

r    = urllib.request.urlretrieve(url, path)

z    = zipfile.ZipFile(path)
m    = z.extractall("data")

url  = 'https://github.com/cusp-london/Spatial-Data-Analysis/blob/master/NSSHRP_UNIT_URESPOP.zip?raw=true'
path = os.path.join("data","NSSHRP_UNIT_URESPOP.zip")

r    = urllib.request.urlretrieve(url, path)

url  = 'https://data.london.gov.uk/download/public-transport-accessibility-levels/77d9b319-931e-4090-bf8e-f578938bd352/LSOA2011 AvPTAI2015.csv'
path = os.path.join("data","LSOA_PTAL.csv")

r    = urllib.request.urlretrieve(url, path)

london = pd.read_csv('LSOA_Data.csv')
ptal = pd.read_csv('LSOA_PTAL.csv')

lsoa_shapes = london [['geometry', 'LSOA11CD']]
lsoa_names = london [['LSOA11NM', 'LSOA11CD']]

lsoa_shapes.head()


geometry    LSOA11CD
0   POLYGON ((532105.0919998939 182011.23, 532162....   E01000001
1   POLYGON ((532746.8130000263 181786.891000028, ...   E01000002
2   POLYGON ((532135.1449999654 182198.1190000199,...   E01000003
3   POLYGON ((533807.9460001207 180767.7700000888,...   E01000005
4   POLYGON ((545122.048999952 184314.931000118, 5...   E01000006


lsoas = london [['geometry','LSOA11NM']]
lsoas = ptal.rename(columns={'LSOA11NM':'LSOA11CD'})
​
lsoas.head()
Out[10]:
LSOA11CD    AvPTAI2015  PTAL    PTAIHigh    PTAILow
0   E01000001   69.8233 6b  97.4435 35.9190
1   E01000002   83.7820 6b  117.9120    66.3503
2   E01000003   41.7417 6b  49.5318 37.3635
3   E01000005   85.8893 6b  120.8470    45.9168
4   E01000006   22.4558 5   34.1054 0.0000


Out[11]:
LSOA11CD    AvPTAI2015  PTAL    PTAIHigh    PTAILow
0   E01000001   69.8233 6b  97.4435 35.9190
1   E01000002   83.7820 6b  117.9120    66.3503
2   E01000003   41.7417 6b  49.5318 37.3635
3   E01000005   85.8893 6b  120.8470    45.9168
4   E01000006   22.4558 5   34.1054 0.0000

ptal_lsoas = pd.merge (lsoa_shapes, ptal, left_on = ['LSOA11CD'],
                  right_on = ['LSOA11CD'],
                  how = 'inner')

ptal_lsoas

geometry    LSOA11CD    AvPTAI2015  PTAL    PTAIHigh    PTAILow
0   POLYGON ((532105.0919998939 182011.23, 532162....   E01000001   69.8233 6b  97.4435 35.9190
1   POLYGON ((532746.8130000263 181786.891000028, ...   E01000002   83.7820 6b  117.9120    66.3503
2   POLYGON ((532135.1449999654 182198.1190000199,...   E01000003   41.7417 6b  49.5318 37.3635
3   POLYGON ((533807.9460001207 180767.7700000888,...   E01000005   85.8893 6b  120.8470    45.9168
4   POLYGON ((545122.048999952 184314.931000118, 5...   E01000006   22.4558 5   34.1054 0.0000
... ... ... ... ... ... ...
4830    POLYGON ((544642.6800000862 179824.6740001431,...   E01033742   3.9532  1b  5.7986  3.0585
4831    POLYGON ((546579.1949997952 181097.8129996927,...   E01033743   3.8174  1b  14.3944 0.0000
4832    POLYGON ((544536.4859999884 179447.1149999507,...   E01033744   10.1709 3   23.5689 3.5750
4833    POLYGON ((546415.7449998577 180152.2700002448,...   E01033745   4.6838  1b  11.4851 0.0000
4834    POLYGON ((538140.0000000072 177313, 538129.176...   E01033746   18.9882 4   24.1235 14.5436
4835 rows × 6 columns

ptal_lsoas

geometry    LSOA11CD    AvPTAI2015  PTAL    PTAIHigh    PTAILow
0   POLYGON ((532105.092 182011.230, 532162.491 18...   E01000001   69.8233 6b  97.4435 35.9190
1   POLYGON ((532746.813 181786.891, 532671.688 18...   E01000002   83.7820 6b  117.9120    66.3503
2   POLYGON ((532135.145 182198.119, 532158.250 18...   E01000003   41.7417 6b  49.5318 37.3635
3   POLYGON ((533807.946 180767.770, 533649.063 18...   E01000005   85.8893 6b  120.8470    45.9168
4   POLYGON ((545122.049 184314.931, 545271.917 18...   E01000006   22.4558 5   34.1054 0.0000
... ... ... ... ... ... ...
4830    POLYGON ((544642.680 179824.674, 544766.313 17...   E01033742   3.9532  1b  5.7986  3.0585
4831    POLYGON ((546579.195 181097.813, 546687.036 18...   E01033743   3.8174  1b  14.3944 0.0000
4832    POLYGON ((544536.486 179447.115, 544602.630 17...   E01033744   10.1709 3   23.5689 3.5750
4833    POLYGON ((546415.745 180152.270, 546320.715 18...   E01033745   4.6838  1b  11.4851 0.0000
4834    POLYGON ((538140.000 177313.000, 538129.177 17...   E01033746   18.9882 4   24.1235 14.5436
4835 rows × 6 columns

ptal_lsoas['geometry']= gpd.GeoSeries.from_wkt(ptal_lsoas['geometry'])

geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-17-2d79f4c9cee1> in <module>
----> 1 ptal_lsoas['geometry']= gpd.GeoSeries.from_wkt(ptal_lsoas['geometry'])
      2 
      3 geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')

/opt/conda/lib/python3.8/site-packages/geopandas/geoseries.py in from_wkt(cls, data, index, crs, **kwargs)
    444         dtype: geometry
    445         """
--> 446         return cls._from_wkb_or_wkb(from_wkt, data, index=index, crs=crs, **kwargs)
    447 
    448     @classmethod

/opt/conda/lib/python3.8/site-packages/geopandas/geoseries.py in _from_wkb_or_wkb(cls, from_wkb_or_wkt_function, data, index, crs, **kwargs)
    457                 index = data.index
    458             data = data.values
--> 459         return cls(from_wkb_or_wkt_function(data, crs=crs), index=index, **kwargs)
    460 
    461     @property

/opt/conda/lib/python3.8/site-packages/geopandas/array.py in from_wkt(data, crs)
    218 
    219     """
--> 220     return GeometryArray(vectorized.from_wkt(data), crs=crs)
    221 
    222 

/opt/conda/lib/python3.8/site-packages/geopandas/_vectorized.py in from_wkt(data)
    194     """
    195     if compat.USE_PYGEOS:
--> 196         return pygeos.from_wkt(data)
    197 
    198     import shapely.wkt

/opt/conda/lib/python3.8/site-packages/pygeos/io.py in from_wkt(geometry, **kwargs)
    158     <pygeos.Geometry POINT (0 0)>
    159     """
--> 160     return lib.from_wkt(geometry, **kwargs)
    161 
    162 

TypeError: Expected bytes, got Polygon

print(ptal_lsoas.geometry.head())

0    POLYGON ((532105.092 182011.230, 532162.491 18...
1    POLYGON ((532746.813 181786.891, 532671.688 18...
2    POLYGON ((532135.145 182198.119, 532158.250 18...
3    POLYGON ((533807.946 180767.770, 533649.063 18...
4    POLYGON ((545122.049 184314.931, 545271.917 18...
Name: geometry, dtype: geometry

CodePudding user response:

It sounds like your shapes are in the "well known text" (aka wkt) format. You can convert a wkt column to a geometry column with geopandas.GeoSeries.from_wkt:

# replace string geometry representations with shapely geometries
ptal_lsoas['geometry'] = gpd.GeoSeries.from_wkt(ptal_lsoas['geometry'])

# initialize GeoDataFrame with the result
# ('geometry' is the default geometry column name)
geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas)

Note that the entire string, e.g. POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10)) is needed for this to be parsed by WKT. Don't try to do any pre-processing. If your column is in a valid wkt format, then the entire string can be parsed by from_wkt. Without the string prefix, geopandas/shapely won't know what type of geometry the data is.

CodePudding user response:

have you tried simply like this?

ptal_lsoas['geometry'] = ptal_lsoas['geometry'].str.replace('POLYGON',"")

I only guess, you have coords in geometry for both, long and lat, and they are separated by comma, so no other dtype conversion should be done. in case if you have only digits, add the row below:

ptal_lsoas['geometry'] = ptal_lsoas['geometry'].astype(int)
  • Related