I have two DataFrame as listed below
plusMinusOne = pd.DataFrame({0: [2459650, 2459650,2459650,2459654,2459654,2459654,2459660], 1: [100, 90,80,14,15,16,2]},index=[3,4,5,12,13,14,27])
bias = pd.DataFrame({0: [2459651, 2459652,2459653,2459655,2459656,2459658,2459659], 1: [10, 20,30,40,50,60,70]})
I have to subtract plusMinusOne's 1st column with bias 1th column by matching the bias 0th column with plusMinusOne's 0th column.
As 2459650
is not present in bias dataFrame i have to check for 2459651/2459649
from bias and subtract any one's value from that. I have to look for 1 above or 1 below from bias and then subtract the value for every row
I was trying like this.
for i in plusMinusOne[0]:
if i 1 in bias[0].values:
plusMinusOne[1] = plusMinusOne[1].sub(plusMinusOne[0].map(
bias.assign(key=bias[0]-1).set_index('key')[1]), fill_value=0)
break
elif i-1 in bias[0].values:
plusMinusOne[1] = plusMinusOne[1].sub(plusMinusOne[0].map(
bias.assign(key=bias[0] 1).set_index('key')[1]), fill_value=0)
break
My expected output is :
plusMinusOne
2459650 90
2459650 80
2459650 70
2459654 -26
2459654 -25
2459654 -24
2459660 -68
CodePudding user response:
Vectorized solution,
def bias_diff(row):
value = 0
if (row[0] == bias[0]).any():
value = row[1] - bias[(row[0]) == bias[0]].iloc[0,1]
elif ((row[0] 1) == bias[0]).any():
value = row[1] - bias[(row[0] 1) == bias[0]].iloc[0,1]
else:
value = row[1] - bias[(row[0]-1) == bias[0]].iloc[0,1]
return value
plusMinusOne[1] = plusMinusOne.apply(bias_diff, axis=1)
print(plusMinusOne)
Output
0 1
3 2459650 90
4 2459650 80
5 2459650 70
12 2459654 -26
13 2459654 -25
14 2459654 -24
27 2459660 -68
CodePudding user response:
This is not an efficient code, but this work for your case.
import pandas as pd
df1 = pd.DataFrame({0: [2459650, 2459650,2459650,2459654,2459654,2459654,2459660], 1: [100, 90,80,14,15,16,2]})
df2 = pd.DataFrame({0: [2459651, 2459652,2459653,2459655,2459656,2459658,2459659], 1: [10, 20,30,40,50,60,70]})
def data_process(df1,df2,i):
data = None
for j in range(len(df2)):
if df1[0][i] == df2[0][j]:
data = df1[1][i]-df2[1][j]
else:
try:
if (df1[0][i]) 1 == df2[0][j]:
data = df1[1][i]-df2[1][j]
elif (df1[0][i])-1 == df2[0][j]:
data = df1[1][i] - df2[1][j]
except:
pass
return data
processed_data = []
for i in range(len(df1)):
processed_data.append(data_process(df1,df2,i))
df1[2] = processed_data
print(df1[[0,2]])
The output dataframe is
0 2
0 2459650 90
1 2459650 80
2 2459650 70
3 2459654 -26
4 2459654 -25
5 2459654 -24
6 2459660 -68