Using Pandas, how to check if a valid polygon geometry can be created using coordinates from a string?
Input example:
import pandas as pd
coord_series = pd.Series([
'-33, 50, -30, 38, -40, 27, -33.0, 50.5',
'-xx, xx, -xx, xx, -xx, xx, -xxxx, xxxx',
None,
'-10',
'-10, 20, -30, 40, -50, 60, -70',
'-11, 11, -11, 11, -11, 11, -11.1, 11.1'
])
Only the first string forms a valid polygon.
A function is needed which accepts one Series object and outputs one Series object.
CodePudding user response:
The below code looks like a lot, but I couldn't create a shorter solution. shapely.geometry.Polygon.is_valid
seems needed to do the assessment, but it's not enough, as sometimes it throws an error.
from shapely.geometry import Polygon
def validate_polygon(s: pd.Series) -> pd.Series:
def is_digit(lst):
return [e.strip().lstrip('-').replace('.', '', 1).isdigit() if e else None for e in lst]
coords = s.str.split(',')
even_cnt = coords.map(lambda x: x if x and len(x) % 2 == 0 else None)
point_cnt_ge3 = even_cnt.map(lambda x: x if x and len(x) >= 6 else None)
all_digits = point_cnt_ge3.map(lambda x: x if x and all(is_digit(x)) else None)
polygon_shell = all_digits.map(lambda x: list(zip(*[iter(map(float, x))]*2)) if x else None)
is_valid = polygon_shell.map(lambda x: Polygon(x).is_valid if x else False)
return is_valid
print(validate_polygon(coord_series))
# 0 True
# 1 False
# 2 False
# 3 False
# 4 False
# 5 False
# dtype: bool
CodePudding user response:
We can use shapely.geometry.Polygon.is_valid
together with try...except
:
from shapely.geometry import Polygon
def validate_polygon(ser: pd.Series) -> pd.Series:
def is_valid(s):
try:
p_shell = list(zip(*[iter(map(float, s.split(',')))]*2))
return Polygon(p_shell).is_valid if p_shell != [] else False
except (ValueError, AttributeError):
return False
return ser.map(is_valid)
print(validate_polygon(coord_series))
# 0 True
# 1 False
# 2 False
# 3 False
# 4 False
# 5 False
# dtype: bool