I'm trying to turn a dataframe into a geodata frame for spatial analysis, but my coordinates ('geometry') column is a string owing to the word 'POLYGON' at the start of each entry.
How can I edit my data so my coordinates column has numbers only?
ptal_lsoas['geometry']= ptal_lsoas['geometry'].apply(Point)
geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')
this is the error message i get
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-20-d175e91ad85f> in <module>
----> 1 ptal_lsoas['geometry']= ptal_lsoas['geometry'].apply(Point)
2 geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')
/opt/conda/lib/python3.8/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwargs)
4354 dtype: float64
4355 """
-> 4356 return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
4357
4358 def _reduce(
/opt/conda/lib/python3.8/site-packages/pandas/core/apply.py in apply(self)
1034 return self.apply_str()
1035
-> 1036 return self.apply_standard()
1037
1038 def agg(self):
/opt/conda/lib/python3.8/site-packages/pandas/core/apply.py in apply_standard(self)
1090 # List[Union[Callable[..., Any], str]]]]]"; expected
1091 # "Callable[[Any], Any]"
-> 1092 mapped = lib.map_infer(
1093 values,
1094 f, # type: ignore[arg-type]
/opt/conda/lib/python3.8/site-packages/pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()
/opt/conda/lib/python3.8/site-packages/shapely/geometry/point.py in __init__(self, *args)
46 BaseGeometry.__init__(self)
47 if len(args) > 0:
---> 48 self._set_coords(*args)
49
50 # Coordinate getters and setters
/opt/conda/lib/python3.8/site-packages/shapely/geometry/point.py in _set_coords(self, *args)
131 self.empty()
132 if len(args) == 1:
--> 133 self._geom, self._ndim = geos_point_from_py(args[0])
134 elif len(args) > 3:
135 raise TypeError("Point() takes at most 3 arguments ({} given)".format(len(args)))
/opt/conda/lib/python3.8/site-packages/shapely/geometry/point.py in geos_point_from_py(ob, update_geom, update_ndim)
212 coords = ob
213 n = len(coords)
--> 214 dx = c_double(coords[0])
215 dy = c_double(coords[1])
216 dz = None
TypeError: must be real number, not str
thanks in advance!
edit:
as commented below, i now don't have the word polygon infant of my coordinates, but it is still giving me this error when i try and convert it to a geodataframe
geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-27-c55e804329dc> in <module>
----> 1 geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')
/opt/conda/lib/python3.8/site-packages/geopandas/geodataframe.py in __init__(self, geometry, crs, *args, **kwargs)
165 )
166 # TODO: raise error in 0.9 or 0.10.
--> 167 self.set_geometry(geometry, inplace=True)
168
169 if geometry is None and crs:
/opt/conda/lib/python3.8/site-packages/geopandas/geodataframe.py in set_geometry(self, col, drop, inplace, crs)
293
294 # Check that we are using a listlike of geometries
--> 295 level = _ensure_geometry(level, crs=crs)
296 index = frame.index
297 frame[geo_column_name] = level
/opt/conda/lib/python3.8/site-packages/geopandas/geodataframe.py in _ensure_geometry(data, crs)
41 return GeoSeries(out, index=data.index, name=data.name)
42 else:
---> 43 out = from_shapely(data, crs=crs)
44 return out
45
/opt/conda/lib/python3.8/site-packages/geopandas/array.py in from_shapely(data, crs)
166
167 """
--> 168 return GeometryArray(vectorized.from_shapely(data), crs=crs)
169
170
/opt/conda/lib/python3.8/site-packages/geopandas/_vectorized.py in from_shapely(data)
131 out.append(None)
132 else:
--> 133 raise TypeError("Input must be valid geometry objects: {0}".format(geom))
134
135 if compat.USE_PYGEOS:
TypeError: Input must be valid geometry objects: ((532105.0919998939 182011.23, 532162.4910001159 181867.7629999601, 532248.3160000765 181895.3249998323, 532282.6300000258 181906.4960000554, 532308.6079999561 181915.5200001549, 532303.4919998596 181814.1099998362, 532251.1259998521 181720.0000001175, 532267.7280001113 181643.780999956, 532213.063999875 181477.2030001849, 532282.2499999444 181460.4999999297, 532248.2490000051 181332.0360001744, 532227.687999856 181278.8749999276, 532173.1249999898 181263.453, 532074.3749998672 181338.2970001521, 532080.2499999747 181456.7499999724, 531948.313000044 181471.96900013, 531951.8749998901 181548.0000001803, 531985.8750000516 181583.4999998761, 532052.4999999914 181600.3909998685, 532064.3750001056 181561.5940001571, 532095.5629998141 181577.3510000679, 532077.7050000296 181797.669000078, 532069.8130000822 181825.9059999275, 532021.1880001619 181803.8910001202, 532022.3739999083 181893.4690001195, 532082.8759998722 181911.7809999972, 532105.0919998939 182011.23))
this is what my cells currently look like (obviously with more rows)
geometry | LSOA11CD | AvPTAl2015 |
---|---|---|
((532105.0919998939 182011.23, 532162.4910001... | E01000001 | 69.8233 |
((532746.8130000263 181786.891000028, 532671.... | E01000002 | 83.7820 |
thanks!
second edit:
print(ptal_lsoas.geometry.head())
0 POLYGON ((532105.092 182011.230, 532162.491 18...
1 POLYGON ((532746.813 181786.891, 532671.688 18...
2 POLYGON ((532135.145 182198.119, 532158.250 18...
3 POLYGON ((533807.946 180767.770, 533649.063 18...
4 POLYGON ((545122.049 184314.931, 545271.917 18...
Name: geometry, dtype: geometry
import matplotlib as mpl
mpl.use('TkAgg')
%matplotlib inline
import matplotlib.pyplot as plt
import rasterio
import rasterio.plot
import rasterstats as rs
import pysal as ps
import mapclassify
import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
import matplotlib.cm as cm
import urllib
import zipfile
import re
import os
import shapely
from shapely.geometry import Point
import random
random.seed(123456789)
pd.set_option('display.float_format', lambda x: '{:,.4f}'.format(x))
import warnings
warnings.simplefilter('ignore')
import zipfile
if os.path.isdir('data') is not True:
print("Creating 'data' directory...")
os.mkdir('data')
url = 'https://github.com/cusp-london/Spatial-Data-Analysis/blob/master/LDN-LSOAs.zip?raw=true'
path = os.path.join("data","LDN-LSOAs.zip")
r = urllib.request.urlretrieve(url, path)
z = zipfile.ZipFile(path)
m = z.extractall("data")
url = 'https://github.com/cusp-london/Spatial-Data-Analysis/blob/master/NSSHRP_UNIT_URESPOP.zip?raw=true'
path = os.path.join("data","NSSHRP_UNIT_URESPOP.zip")
r = urllib.request.urlretrieve(url, path)
url = 'https://data.london.gov.uk/download/public-transport-accessibility-levels/77d9b319-931e-4090-bf8e-f578938bd352/LSOA2011 AvPTAI2015.csv'
path = os.path.join("data","LSOA_PTAL.csv")
r = urllib.request.urlretrieve(url, path)
london = pd.read_csv('LSOA_Data.csv')
ptal = pd.read_csv('LSOA_PTAL.csv')
lsoa_shapes = london [['geometry', 'LSOA11CD']]
lsoa_names = london [['LSOA11NM', 'LSOA11CD']]
lsoa_shapes.head()
geometry LSOA11CD
0 POLYGON ((532105.0919998939 182011.23, 532162.... E01000001
1 POLYGON ((532746.8130000263 181786.891000028, ... E01000002
2 POLYGON ((532135.1449999654 182198.1190000199,... E01000003
3 POLYGON ((533807.9460001207 180767.7700000888,... E01000005
4 POLYGON ((545122.048999952 184314.931000118, 5... E01000006
lsoas = london [['geometry','LSOA11NM']]
lsoas = ptal.rename(columns={'LSOA11NM':'LSOA11CD'})
lsoas.head()
Out[10]:
LSOA11CD AvPTAI2015 PTAL PTAIHigh PTAILow
0 E01000001 69.8233 6b 97.4435 35.9190
1 E01000002 83.7820 6b 117.9120 66.3503
2 E01000003 41.7417 6b 49.5318 37.3635
3 E01000005 85.8893 6b 120.8470 45.9168
4 E01000006 22.4558 5 34.1054 0.0000
Out[11]:
LSOA11CD AvPTAI2015 PTAL PTAIHigh PTAILow
0 E01000001 69.8233 6b 97.4435 35.9190
1 E01000002 83.7820 6b 117.9120 66.3503
2 E01000003 41.7417 6b 49.5318 37.3635
3 E01000005 85.8893 6b 120.8470 45.9168
4 E01000006 22.4558 5 34.1054 0.0000
ptal_lsoas = pd.merge (lsoa_shapes, ptal, left_on = ['LSOA11CD'],
right_on = ['LSOA11CD'],
how = 'inner')
ptal_lsoas
geometry LSOA11CD AvPTAI2015 PTAL PTAIHigh PTAILow
0 POLYGON ((532105.0919998939 182011.23, 532162.... E01000001 69.8233 6b 97.4435 35.9190
1 POLYGON ((532746.8130000263 181786.891000028, ... E01000002 83.7820 6b 117.9120 66.3503
2 POLYGON ((532135.1449999654 182198.1190000199,... E01000003 41.7417 6b 49.5318 37.3635
3 POLYGON ((533807.9460001207 180767.7700000888,... E01000005 85.8893 6b 120.8470 45.9168
4 POLYGON ((545122.048999952 184314.931000118, 5... E01000006 22.4558 5 34.1054 0.0000
... ... ... ... ... ... ...
4830 POLYGON ((544642.6800000862 179824.6740001431,... E01033742 3.9532 1b 5.7986 3.0585
4831 POLYGON ((546579.1949997952 181097.8129996927,... E01033743 3.8174 1b 14.3944 0.0000
4832 POLYGON ((544536.4859999884 179447.1149999507,... E01033744 10.1709 3 23.5689 3.5750
4833 POLYGON ((546415.7449998577 180152.2700002448,... E01033745 4.6838 1b 11.4851 0.0000
4834 POLYGON ((538140.0000000072 177313, 538129.176... E01033746 18.9882 4 24.1235 14.5436
4835 rows × 6 columns
ptal_lsoas
geometry LSOA11CD AvPTAI2015 PTAL PTAIHigh PTAILow
0 POLYGON ((532105.092 182011.230, 532162.491 18... E01000001 69.8233 6b 97.4435 35.9190
1 POLYGON ((532746.813 181786.891, 532671.688 18... E01000002 83.7820 6b 117.9120 66.3503
2 POLYGON ((532135.145 182198.119, 532158.250 18... E01000003 41.7417 6b 49.5318 37.3635
3 POLYGON ((533807.946 180767.770, 533649.063 18... E01000005 85.8893 6b 120.8470 45.9168
4 POLYGON ((545122.049 184314.931, 545271.917 18... E01000006 22.4558 5 34.1054 0.0000
... ... ... ... ... ... ...
4830 POLYGON ((544642.680 179824.674, 544766.313 17... E01033742 3.9532 1b 5.7986 3.0585
4831 POLYGON ((546579.195 181097.813, 546687.036 18... E01033743 3.8174 1b 14.3944 0.0000
4832 POLYGON ((544536.486 179447.115, 544602.630 17... E01033744 10.1709 3 23.5689 3.5750
4833 POLYGON ((546415.745 180152.270, 546320.715 18... E01033745 4.6838 1b 11.4851 0.0000
4834 POLYGON ((538140.000 177313.000, 538129.177 17... E01033746 18.9882 4 24.1235 14.5436
4835 rows × 6 columns
ptal_lsoas['geometry']= gpd.GeoSeries.from_wkt(ptal_lsoas['geometry'])
geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-17-2d79f4c9cee1> in <module>
----> 1 ptal_lsoas['geometry']= gpd.GeoSeries.from_wkt(ptal_lsoas['geometry'])
2
3 geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')
/opt/conda/lib/python3.8/site-packages/geopandas/geoseries.py in from_wkt(cls, data, index, crs, **kwargs)
444 dtype: geometry
445 """
--> 446 return cls._from_wkb_or_wkb(from_wkt, data, index=index, crs=crs, **kwargs)
447
448 @classmethod
/opt/conda/lib/python3.8/site-packages/geopandas/geoseries.py in _from_wkb_or_wkb(cls, from_wkb_or_wkt_function, data, index, crs, **kwargs)
457 index = data.index
458 data = data.values
--> 459 return cls(from_wkb_or_wkt_function(data, crs=crs), index=index, **kwargs)
460
461 @property
/opt/conda/lib/python3.8/site-packages/geopandas/array.py in from_wkt(data, crs)
218
219 """
--> 220 return GeometryArray(vectorized.from_wkt(data), crs=crs)
221
222
/opt/conda/lib/python3.8/site-packages/geopandas/_vectorized.py in from_wkt(data)
194 """
195 if compat.USE_PYGEOS:
--> 196 return pygeos.from_wkt(data)
197
198 import shapely.wkt
/opt/conda/lib/python3.8/site-packages/pygeos/io.py in from_wkt(geometry, **kwargs)
158 <pygeos.Geometry POINT (0 0)>
159 """
--> 160 return lib.from_wkt(geometry, **kwargs)
161
162
TypeError: Expected bytes, got Polygon
print(ptal_lsoas.geometry.head())
0 POLYGON ((532105.092 182011.230, 532162.491 18...
1 POLYGON ((532746.813 181786.891, 532671.688 18...
2 POLYGON ((532135.145 182198.119, 532158.250 18...
3 POLYGON ((533807.946 180767.770, 533649.063 18...
4 POLYGON ((545122.049 184314.931, 545271.917 18...
Name: geometry, dtype: geometry
CodePudding user response:
It sounds like your shapes are in the "well known text" (aka wkt) format. You can convert a wkt column to a geometry column with geopandas.GeoSeries.from_wkt
:
# replace string geometry representations with shapely geometries
ptal_lsoas['geometry'] = gpd.GeoSeries.from_wkt(ptal_lsoas['geometry'])
# initialize GeoDataFrame with the result
# ('geometry' is the default geometry column name)
geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas)
Note that the entire string, e.g. POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))
is needed for this to be parsed by WKT. Don't try to do any pre-processing. If your column is in a valid wkt format, then the entire string can be parsed by from_wkt
. Without the string prefix, geopandas/shapely won't know what type of geometry the data is.
CodePudding user response:
have you tried simply like this?
ptal_lsoas['geometry'] = ptal_lsoas['geometry'].str.replace('POLYGON',"")
I only guess, you have coords in geometry
for both, long and lat, and they are separated by comma, so no other dtype conversion should be done.
in case if you have only digits,
add the row below:
ptal_lsoas['geometry'] = ptal_lsoas['geometry'].astype(int)