i try to manipulate some dataframe and i did a function to calculate the distance between 2 cities.
def find_distance(A,B):
key = '0377f0e6b42a47fe9d30a4e9a2b3bb63' # get api key from: https://opencagedata.com
geocoder = OpenCageGeocode(key)
result_A = geocoder.geocode(A)
lat_A = result_A[0]['geometry']['lat']
lng_A = result_A[0]['geometry']['lng']
result_B = geocoder.geocode(B)
lat_B = result_B[0]['geometry']['lat']
lng_B = result_B[0]['geometry']['lng']
return int(geodesic((lat_A,lng_A), (lat_B,lng_B)).kilometers)
this is my dataframe
2 32 Mulhouse 1874.0 2 797 16.8 16,3 € 10.012786
13 13 Saint-Étienne 1994.0 3 005 14.3 13,5 € 8.009882
39 39 Roubaix 2845.0 2 591 17.4 15,0 € 6.830968
27 27 Perpignan 2507.0 3 119 15.1 13,3 € 6.727255
40 40 Tourcoing 3089.0 2 901 17.5 15,3 € 6.327547
25 25 Limoges 2630.0 2 807 14.2 12,5 € 6.030424
20 20 Le Mans 2778.0 3 202 14.4 12,3 € 5.789559
there is my code:
def clean_text(row):
# return the list of decoded cell in the Series instead
return [r.decode('unicode_escape').encode('ascii', 'ignore') for r in row]
def main():
inFile = "prix_m2_france.xlsx" #On ouvre l'excel
inSheetName = "Sheet1" #le nom de l excel
cols = ['Ville', 'Prix_moyen', 'Loyer_moyen'] #Les colomnes
df =(pd.read_excel(inFile, sheet_name = inSheetName))
df[cols] = df[cols].replace({'€': '', ",": ".", " ": "", "\u202f":""}, regex=True)
# df['Prix_moyen'] = df.apply(clean_text)
# df['Loyer_moyen'] = df.apply(clean_text)
df['Prix_moyen'] = df['Prix_moyen'].astype(float)
df['Loyer_moyen'] = df['Loyer_moyen'].astype(float)
# df["Prix_moyen"] = 1
df["revenu"] = (df['Loyer_moyen'] * 12) / (df["Prix_moyen"] * 1.0744) * 100
# df['Ville'].replace({'Le-Havre': 'Le Havre', 'Le-Mans': 'Le Mans'})
df["Ville"] = df['Ville'].replace(['Le-Havre', 'Le-Mans'], ['Le Havre', 'Le Mans'])
df["distance"] = find_distance("Paris", df["Ville"])
df2 = df.sort_values(by = 'revenu', ascending = False)
print(df2.head(90))
main()
df["distance"] = find_distance("Paris", df["Ville"]) fails and give me this error:
opencage.geocoder.InvalidInputError: Input must be a unicode string, not 0 Paris 1 Marseille 2 Lyon 3 T
I imagine it as a loop where i will put the distance between paris and the city but i guess it take all the dataframe on my first value.
Thanks for your help
(Edit, i just pasted a part of my dataframe)
CodePudding user response:
You can try something like :
df["distance"] = [find_distance("Paris", city) for city in df["Ville"]]