I am newer to python, and am trying to write a 'for' loop to perform a calculation on one column in a dataframe and put the results in another column. However, I'm getting the following error:
ValueError: Length of values (0) does not match length of index (52)
What is the problem in my code?
#Import modules
import pandas as pd
import numpy as np
import os
#Read dataframe
df = pd.read_csv(file)
df.head(52)
Here is the data:
Sample_ID longitude latitude n_ppm Mehlich_3_ppm k_ppm pH Buffer_pH OM Zn_ppm
0 1 -100.922987 39.248554 32 20 29 7.1 7.2 2.7 3.4
1 2 -100.921920 39.248792 35 13 22 7.2 7.3 2.7 3.0
2 3 -100.920808 39.248584 34 8 21 7.5 7.3 2.6 1.2
3 4 -100.919779 39.248591 32 8 33 7.4 7.3 2.6 1.2
4 5 -100.918698 39.247933 42 7 26 7.1 7.1 2.6 1.2
5 6 -100.919457 39.247719 42 7 28 7.4 7.3 2.7 1.5
6 7 -100.920822 39.247916 36 9 32 7.4 7.3 2.8 1.6
7 8 -100.922241 39.247906 36 35 25 7.4 7.3 2.6 3.4
8 9 -100.923254 39.247781 31 35 26 7.5 7.3 3.0 4.2
9 10 -100.924217 39.247858 41 67 40 7.2 7.3 3.0 4.7
10 11 -100.925033 39.246966 50 46 26 7.6 7.3 2.9 2.3
11 12 -100.924190 39.246857 55 71 34 6.9 7.1 3.0 4.3
12 13 -100.923128 39.247089 50 68 27 7.2 7.2 2.7 3.6
13 14 -100.921795 39.246824 50 63 20 7.2 7.2 2.9 3.9
14 15 -100.920870 39.247048 50 43 32 7.2 7.2 3.1 3.1
15 16 -100.919414 39.246841 50 14 32 7.2 7.2 2.7 1.5
16 17 -100.918580 39.247120 50 10 23 7.3 7.3 2.6 1.2
17 18 -100.917600 39.246849 50 9 20 7.3 7.3 2.4 1.1
18 19 -100.917600 39.246111 50 23 36 7.3 7.3 3.3 1.4
19 20 -100.918410 39.245921 50 28 26 7.3 7.3 3.2 1.1
20 21 -100.919478 39.246194 50 99 23 7.1 7.1 3.4 3.0
21 22 -100.920773 39.245991 38 27 38 7.3 7.3 3.3 2.6
22 23 -100.921784 39.246063 37 26 40 7.4 7.3 3.4 3.5
23 24 -100.923103 39.246296 30 48 36 7.3 7.3 3.4 3.2
24 25 -100.924202 39.246112 34 64 33 7.1 7.2 3.2 3.1
25 26 -100.925255 39.246100 41 30 36 7.1 7.2 2.9 2.6
26 27 -100.925431 39.245284 33 29 34 7.7 7.4 3.0 2.3
27 28 -100.924429 39.245093 43 52 23 6.8 7.1 3.7 3.2
28 29 -100.923143 39.245226 36 24 36 7.1 7.2 2.9 3.8
29 30 -100.922008 39.245047 38 36 36 7.0 7.1 3.2 3.6
30 31 -100.920764 39.245258 39 17 35 7.2 7.2 3.3 3.5
31 32 -100.919717 39.244983 41 10 26 7.2 7.2 3.1 1.8
32 33 -100.918345 39.245352 37 13 27 7.2 7.3 3.3 1.7
33 34 -100.917390 39.245077 34 14 21 7.1 7.2 2.9 1.3
34 35 -100.917553 39.244407 41 67 22 7.1 7.2 3.6 1.9
35 36 -100.918440 39.244260 41 12 34 7.2 7.2 2.9 1.3
36 37 -100.919476 39.244483 32 9 23 7.4 7.3 2.8 1.2
37 38 -100.920955 39.244112 45 25 39 7.1 7.2 3.0 4.5
38 39 -100.921705 39.244254 36 27 39 7.2 7.2 3.2 3.8
39 40 -100.923370 39.244238 33 28 35 7.0 7.2 2.7 3.1
40 41 -100.924262 39.244383 45 32 32 7.1 7.2 3.0 3.0
41 42 -100.925087 39.244483 40 12 29 7.2 7.2 2.9 2.7
42 43 -100.924365 39.243440 30 59 23 6.9 7.1 3.8 3.2
43 44 -100.923045 39.243196 43 10 22 7.2 7.3 2.6 2.9
44 45 -100.921845 39.243471 37 23 20 7.1 7.2 3.1 3.3
45 46 -100.920755 39.243252 35 21 37 7.2 7.2 3.2 5.1
46 47 -100.919772 39.243325 45 12 36 7.1 7.2 2.6 2.1
47 48 -100.918568 39.243501 40 9 20 7.0 7.2 2.7 1.0
48 49 -100.919691 39.242702 34 8 32 7.2 7.2 3.0 2.2
49 50 -100.920656 39.242624 35 15 20 7.4 7.3 3.2 4.8
50 51 -100.922180 39.242513 36 9 28 7.0 7.2 2.8 2.4
51 52 -100.922826 39.242723 34 10 37 7.1 7.2 4.3 2.8
organicMatterField = df['OM']
yieldGoal = 150
nitrogenField = df['n_ppm']
pca = 0
nitrogen_needed = []
for nit in df['n_ppm']:
nit = ((1.6 * int(yieldGoal)) - (organicMatterField * 20) - (nitrogenField) - float(pca))
#Create a new column within the dataframe
df['Nitrogen recommendation (ppm)'] = nitrogen_needed
print(df)
The error is thrown here. It may be because my 'for' loop isn't written right, but I'm not sure.
CodePudding user response:
I'm not sure if I understand your calcuations but in DataFrame
you can do it without for
-loop
df['Nitrogen recommendation (ppm)'] = ((1.6 * 150) - (df['OM'] * 20) - (df['n_ppm']) - float(0))
And if you would need to use for
-loop then you should iterate by rows, calculate value and append()
to nitrogen_needed
nitrogen_needed = []
for index, row in df.iterrows():
result = ((1.6 * int(yieldGoal)) - (row['OM'] * 20) - (row['n_ppm']) - float(pca))
nitrogen_needed.append(result)
df['Nitrogen recommendation (ppm)'] = nitrogen_needed
You don't append result to nitrogen_needed
so you have empty list []
in nitrogen_needed
- so finally you try to do
df['Nitrogen recommendation (ppm)'] = []
and this gives error because df
has 52
rows, but empty list []
has 0
items/rows.
But if you need more complex calcuation (i.e. with if/elif/else
) then you can use apply(function)
instead of for
-loop
yieldGoal = 150
pca = 0
def calculate(row):
result = ((1.6 * int(yieldGoal)) - (row['OM'] * 20) - (row['n_ppm']) - float(pca))
return result
df['Nitrogen recommendation (ppm)'] = df.apply(calculate, axis=1)
Full working example with data in code - so everyone can simply copy and test it.
I use io.StringIO
to simulate file in memory but you should use filename
text = ''' Sample_ID longitude latitude n_ppm Mehlich_3_ppm k_ppm pH Buffer_pH OM Zn_ppm
0 1 -100.922987 39.248554 32 20 29 7.1 7.2 2.7 3.4
1 2 -100.921920 39.248792 35 13 22 7.2 7.3 2.7 3.0
2 3 -100.920808 39.248584 34 8 21 7.5 7.3 2.6 1.2
3 4 -100.919779 39.248591 32 8 33 7.4 7.3 2.6 1.2
4 5 -100.918698 39.247933 42 7 26 7.1 7.1 2.6 1.2
5 6 -100.919457 39.247719 42 7 28 7.4 7.3 2.7 1.5
6 7 -100.920822 39.247916 36 9 32 7.4 7.3 2.8 1.6
7 8 -100.922241 39.247906 36 35 25 7.4 7.3 2.6 3.4
8 9 -100.923254 39.247781 31 35 26 7.5 7.3 3.0 4.2
9 10 -100.924217 39.247858 41 67 40 7.2 7.3 3.0 4.7
10 11 -100.925033 39.246966 50 46 26 7.6 7.3 2.9 2.3
11 12 -100.924190 39.246857 55 71 34 6.9 7.1 3.0 4.3
12 13 -100.923128 39.247089 50 68 27 7.2 7.2 2.7 3.6
13 14 -100.921795 39.246824 50 63 20 7.2 7.2 2.9 3.9
14 15 -100.920870 39.247048 50 43 32 7.2 7.2 3.1 3.1
15 16 -100.919414 39.246841 50 14 32 7.2 7.2 2.7 1.5
16 17 -100.918580 39.247120 50 10 23 7.3 7.3 2.6 1.2
17 18 -100.917600 39.246849 50 9 20 7.3 7.3 2.4 1.1
18 19 -100.917600 39.246111 50 23 36 7.3 7.3 3.3 1.4
19 20 -100.918410 39.245921 50 28 26 7.3 7.3 3.2 1.1
20 21 -100.919478 39.246194 50 99 23 7.1 7.1 3.4 3.0
21 22 -100.920773 39.245991 38 27 38 7.3 7.3 3.3 2.6
22 23 -100.921784 39.246063 37 26 40 7.4 7.3 3.4 3.5
23 24 -100.923103 39.246296 30 48 36 7.3 7.3 3.4 3.2
24 25 -100.924202 39.246112 34 64 33 7.1 7.2 3.2 3.1
25 26 -100.925255 39.246100 41 30 36 7.1 7.2 2.9 2.6
26 27 -100.925431 39.245284 33 29 34 7.7 7.4 3.0 2.3
27 28 -100.924429 39.245093 43 52 23 6.8 7.1 3.7 3.2
28 29 -100.923143 39.245226 36 24 36 7.1 7.2 2.9 3.8
29 30 -100.922008 39.245047 38 36 36 7.0 7.1 3.2 3.6
30 31 -100.920764 39.245258 39 17 35 7.2 7.2 3.3 3.5
31 32 -100.919717 39.244983 41 10 26 7.2 7.2 3.1 1.8
32 33 -100.918345 39.245352 37 13 27 7.2 7.3 3.3 1.7
33 34 -100.917390 39.245077 34 14 21 7.1 7.2 2.9 1.3
34 35 -100.917553 39.244407 41 67 22 7.1 7.2 3.6 1.9
35 36 -100.918440 39.244260 41 12 34 7.2 7.2 2.9 1.3
36 37 -100.919476 39.244483 32 9 23 7.4 7.3 2.8 1.2
37 38 -100.920955 39.244112 45 25 39 7.1 7.2 3.0 4.5
38 39 -100.921705 39.244254 36 27 39 7.2 7.2 3.2 3.8
39 40 -100.923370 39.244238 33 28 35 7.0 7.2 2.7 3.1
40 41 -100.924262 39.244383 45 32 32 7.1 7.2 3.0 3.0
41 42 -100.925087 39.244483 40 12 29 7.2 7.2 2.9 2.7
42 43 -100.924365 39.243440 30 59 23 6.9 7.1 3.8 3.2
43 44 -100.923045 39.243196 43 10 22 7.2 7.3 2.6 2.9
44 45 -100.921845 39.243471 37 23 20 7.1 7.2 3.1 3.3
45 46 -100.920755 39.243252 35 21 37 7.2 7.2 3.2 5.1
46 47 -100.919772 39.243325 45 12 36 7.1 7.2 2.6 2.1
47 48 -100.918568 39.243501 40 9 20 7.0 7.2 2.7 1.0
48 49 -100.919691 39.242702 34 8 32 7.2 7.2 3.0 2.2
49 50 -100.920656 39.242624 35 15 20 7.4 7.3 3.2 4.8
50 51 -100.922180 39.242513 36 9 28 7.0 7.2 2.8 2.4
51 52 -100.922826 39.242723 34 10 37 7.1 7.2 4.3 2.8
'''
import pandas as pd
import io
df = pd.read_csv(io.StringIO(text), sep='\s ', index_col=0)
# --- version 1 ---
yieldGoal = 150
pca = 0
df['Nitrogen recommendation (ppm)'] = ((1.6 * yieldGoal) - (df['OM'] * 20) - (df['n_ppm']) - float(pca))
print(df)
# --- version 2 ---
yieldGoal = 150
pca = 0
nitrogen_needed = []
for index, row in df.iterrows():
result = ((1.6 * int(yieldGoal)) - (row['OM'] * 20) - (row['n_ppm']) - float(pca))
nitrogen_needed.append(result)
df['Nitrogen recommendation (ppm)'] = nitrogen_needed
print(df)
# --- version 3 ---
yieldGoal = 150
pca = 0
def calculate(row):
result = ((1.6 * int(yieldGoal)) - (row['OM'] * 20) - (row['n_ppm']) - float(pca))
return result
df['Nitrogen recommendation (ppm)'] = df.apply(calculate, axis=1)
print(df)