I have a Dataframe df
containing information about people. I need to compute the standard deviation of the weight of people whose name starts with N. This is my code to create the Dataframe:
# 1. Here we import pandas
import pandas as pd
# 2. Here we import numpy
import numpy as np
np.random.seed(0)
df = pd.DataFrame({'Age':[18, 21, 28, 19, 23, 22, 18, 24, 25, 20],
'Hair colour':['Blonde', 'Brown', 'Black', 'Blonde', 'Blonde', 'Black','Brown', 'Brown', 'Black', 'Black'],
'Length (in cm)':np.random.normal(175, 10, 10).round(1),
'Weight (in kg)':np.random.normal(70, 5, 10).round(1)},
index = ['Leon', 'Mirta', 'Nathan', 'Linda', 'Bandar', 'Violeta', 'Noah', 'Niji', 'Lucy', 'Mark'],)
I should get a single number as a result.
Firstly, I attempted to use the function df.loc
, like so:
# 1. Here we import numpy
import numpy as np
# 2. Here we import pandas
import pandas as pd
ans_4 = df.loc[pd.Series(df.index).str.startswith('N'), 'Weight (in kg)'].std()
However, I always get this IndexingError
:
---------------------------------------------------------------------------
IndexingError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_21692/106038441.py in <module>
3 # 2. Here we import pandas
4 import pandas as pd
----> 5 ans_4 = df.loc[pd.Series(df.index).str.startswith('N'), 'Weight (in kg)'].std()
~\anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
923 with suppress(KeyError, IndexError):
924 return self.obj._get_value(*key, takeable=self._takeable)
--> 925 return self._getitem_tuple(key)
926 else:
927 # we by definition only have the 0th axis
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
1107 return self._multi_take(tup)
1108
-> 1109 return self._getitem_tuple_same_dim(tup)
1110
1111 def _get_label(self, label, axis: int):
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple_same_dim(self, tup)
804 continue
805
--> 806 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
807 # We should never have retval.ndim < self.ndim, as that should
808 # be handled by the _getitem_lowerdim call above.
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1142 return self._get_slice_axis(key, axis=axis)
1143 elif com.is_bool_indexer(key):
-> 1144 return self._getbool_axis(key, axis=axis)
1145 elif is_list_like_indexer(key):
1146
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _getbool_axis(self, key, axis)
946 # caller is responsible for ensuring non-None axis
947 labels = self.obj._get_axis(axis)
--> 948 key = check_bool_indexer(labels, key)
949 inds = key.nonzero()[0]
950 return self.obj._take_with_is_copy(inds, axis=axis)
~\anaconda3\lib\site-packages\pandas\core\indexing.py in check_bool_indexer(index, key)
2386 mask = isna(result._values)
2387 if mask.any():
-> 2388 raise IndexingError(
2389 "Unalignable boolean Series provided as "
2390 "indexer (index of the boolean Series and of "
IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).
```
What do I need to do to fix the code?
CodePudding user response:
You are almost there. Casting df.index
to pd.Series is unnecessary and leads to an error as indices get misaligned form the original df. Try:
df.loc[df.index.str.startswith('N'),'Weight (in kg)'].std()
output: 4.261846235299126