How to calculate the variance
of location details
Location has latitude
and longitude
. I am looking for a single value that will capture the variance of the location details (not separate variance for latitude and longitude). What is the best way to achieve that?
>>> pdf = pd.DataFrame({'latitude': {0: 47.0, 8: 54.0, 14: 55.0, 15: 39.0, 2: 31.0},
'longitude': {0: 29.0, 8: 10.0, 14: 36.0, 15: -9.0, 2: 121.0}
})
>>> pdf
latitude longitude
0 47.0 29.0
8 54.0 10.0
14 55.0 36.0
15 39.0 -9.0
2 31.0 121.0
#these points are having the same spread, but at different location
df2 = pd.DataFrame({'latitude': {0: 147.0, 8: 154.0, 14: 155.0, 15: 139.0, 2: 131.0},
'longitude': {0: 154.0, 8: 155.0, 14: 139.0, 15: 131.0, 2: 147.0} })
df2['new'] = (df2['latitude']-df2['latitude'].mean()).mul(df2['longitude']-df2['longitude'].mean()).div(100)
score = df2['new'].var()
df2.plot(kind='scatter', x='longitude', y='latitude')
Output score 0.4407372
#these points are further apart
df3 = pd.DataFrame({'latitude': {0: 14.0, 8: 15.0, 14: 155.0, 15: 13.0, 2: 131.0},
'longitude': {0: 15.0, 8: 215.0, 14: 39.0, 15: 131.0, 2: 147.0} })
df3['new'] = (df3['latitude']-df3['latitude'].mean()).mul(df3['longitude']-df3['longitude'].mean()).div(100)
score = df3['new'].var()
df3.plot(kind='scatter', x='longitude', y='latitude')
Output score 2332.5498432
CodePudding user response:
Single variance measure, converting latlong to cartesian (from recipe). Using safe divide method described here
import pandas as pd
import numpy as np
pdf = pd.DataFrame(
{
"latitude": {0: 47.0, 8: 54.0, 14: 55.0, 15: 39.0, 2: 31.0},
"longitude": {0: 29.0, 8: 10.0, 14: 36.0, 15: -9.0, 2: 121.0},
}
)
def get_cartesian(latlon):
lat, lon = latlon
lat, lon = np.deg2rad(lat), np.deg2rad(lon)
R = 6371 # radius of the earth
x = R * np.cos(lat) * np.cos(lon)
y = R * np.cos(lat) * np.sin(lon)
z = R * np.sin(lat)
return (x, y, z)
pdf = pdf.assign(
latlong=pd.Series([x for x in zip(pdf.latitude.values, pdf.longitude.values)], index=pdf.index),
cartesian=lambda x: x["latlong"].apply(get_cartesian),
x=lambda c: c["cartesian"].apply(lambda x: x[0]),
y=lambda c: c["cartesian"].apply(lambda x: x[1]),
z=lambda c: c["cartesian"].apply(lambda x: x[2]),
)
sum_means = pdf.x.mean() pdf.y.mean() pdf.z.mean()
variance = sum_means and sum_means / 3 or 0
result:
2910.4314471812527