I am trying to pass a subset of my dataframe rows — conditioned with 'rating_count' > m
— to the 'weighted_rating' function. However, the passed data contains only the 'user_id' column while it's expected to contain several other columns. As the result I receive the KeyError
on the line v = xx['rating_count']
(see the log below).
So, I need xx['rating_count']
and xx['rating']
to be present inside the function.
def weighted_rating(xx):
print(xx)
v = xx['rating_count']
R = xx['rating']
return (v/(v m) * R) (m/(m v) * C)
final_data['weighted_rating'] = final_data.loc[final_data['rating_count'] >= m].apply(lambda x: weighted_rating(x))
Output:
659 [email protected]
660 [email protected]
662 [email protected]
663 [email protected]
664 [email protected]
...
1653167 [email protected]
1653169 [email protected]
1653178 [email protected]
1653179 [email protected]
1653190 [email protected]
Name: user_id, Length: 88446, dtype: object
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index_class_helper.pxi in pandas._libs.index.Int64Engine._check_type()
pandas\_libs\index_class_helper.pxi in pandas._libs.index.Int64Engine._check_type()
KeyError: 'rating_count'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_3296/835381681.py in <module>
----> 1 final_data.loc[final_data['rating_count'] >= m].apply(lambda x: weighted_rating(x))
~\anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, raw, result_type, args, **kwargs)
8738 kwargs=kwargs,
8739 )
-> 8740 return op.apply()
8741
8742 def applymap(
~\anaconda3\lib\site-packages\pandas\core\apply.py in apply(self)
686 return self.apply_raw()
687
--> 688 return self.apply_standard()
689
690 def agg(self):
~\anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
810
811 def apply_standard(self):
--> 812 results, res_index = self.apply_series_generator()
813
814 # wrap results
~\anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
826 for i, v in enumerate(series_gen):
827 # ignore SettingWithCopy here in case the user mutates
--> 828 results[i] = self.f(v)
829 if isinstance(results[i], ABCSeries):
830 # If we have a view on v, we need to make a copy because
~\AppData\Local\Temp/ipykernel_3296/835381681.py in <lambda>(x)
----> 1 final_data.loc[final_data['rating_count'] >= m].apply(lambda x: weighted_rating(x))
~\AppData\Local\Temp/ipykernel_3296/3170994745.py in weighted_rating(xx)
1 def weighted_rating(xx):
2 print(xx)
----> 3 v = xx['rating_count']
4 R = xx['rating']
5 return (v/(v m) * R) (m/(m v) * C)
~\anaconda3\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
940
941 elif key_is_scalar:
--> 942 return self._get_value(key)
943
944 if is_hashable(key):
~\anaconda3\lib\site-packages\pandas\core\series.py in _get_value(self, label, takeable)
1049
1050 # Similar to Index.get_value, but we do not fall back to positional
-> 1051 loc = self.index.get_loc(label)
1052 return self.index._get_values_for_loc(self, loc, label)
1053
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 'rating_count'
I also tried the following code with no luck:
final_data['weighted_rating'] = final_data[final_data['rating_count'] >= m].apply(lambda x: weighted_rating(x))
Am I doing something wrong? Please help
Edit: Adding sample data
<div>
<table border="1" >
<thead>
<tr style="text-align: right;">
<th></th>
<th>user_id</th>
<th>user_age</th>
<th>gender</th>
<th>location</th>
<th>joining_date</th>
<th>content_id</th>
<th>duration_user</th>
<th>date</th>
<th>start_time</th>
<th>end_time</th>
<th>content_type</th>
<th>language</th>
<th>genre</th>
<th>duration_content</th>
<th>release_date</th>
<th>rating</th>
<th>episode_count</th>
<th>season_count</th>
<th>rating_count</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>[email protected]</td>
<td>38</td>
<td>F</td>
<td>Goa</td>
<td>2018-09-03</td>
<td>cont_3375_16_10</td>
<td>2220000.0</td>
<td>2020-06-03</td>
<td>18:47:17</td>
<td>19:24:17</td>
<td>series</td>
<td>english</td>
<td>action</td>
<td>3060000.0</td>
<td>2015-11-16</td>
<td>5.0</td>
<td>10.0</td>
<td>16.0</td>
<td>64</td>
</tr>
<tr>
<th>1</th>
<td>[email protected]</td>
<td>38</td>
<td>F</td>
<td>Goa</td>
<td>2018-09-03</td>
<td>cont_1195_1_8</td>
<td>900000.0</td>
<td>2019-04-18</td>
<td>11:12:40</td>
<td>11:27:40</td>
<td>sports</td>
<td>english</td>
<td>football</td>
<td>5400000.0</td>
<td>2017-03-09</td>
<td>0.0</td>
<td>8.0</td>
<td>1.0</td>
<td>66</td>
</tr>
<tr>
<th>2</th>
<td>[email protected]</td>
<td>38</td>
<td>F</td>
<td>Goa</td>
<td>2018-09-03</td>
<td>cont_3470_2_15</td>
<td>1620000.0</td>
<td>2021-09-18</td>
<td>11:55:34</td>
<td>12:22:34</td>
<td>series</td>
<td>english</td>
<td>horror</td>
<td>2820000.0</td>
<td>1997-08-05</td>
<td>8.0</td>
<td>15.0</td>
<td>2.0</td>
<td>63</td>
</tr>
<tr>
<th>3</th>
<td>[email protected]</td>
<td>38</td>
<td>F</td>
<td>Goa</td>
<td>2018-09-03</td>
<td>cont_310_25_9</td>
<td>780000.0</td>
<td>2020-08-09</td>
<td>11:38:44</td>
<td>11:51:44</td>
<td>series</td>
<td>english</td>
<td>comedy</td>
<td>3960000.0</td>
<td>2019-06-29</td>
<td>4.0</td>
<td>9.0</td>
<td>25.0</td>
<td>62</td>
</tr>
<tr>
<th>4</th>
<td>[email protected]</td>
<td>38</td>
<td>F</td>
<td>Goa</td>
<td>2018-09-03</td>
<td>cont_4350_1_3</td>
<td>3480000.0</td>
<td>2021-06-25</td>
<td>23:42:44</td>
<td>00:40:44</td>
<td>sports</td>
<td>english</td>
<td>cricket</td>
<td>3840000.0</td>
<td>2002-10-21</td>
<td>0.0</td>
<td>3.0</td>
<td>1.0</td>
<td>66</td>
</tr>
</tbody>
</table>
</div>
CodePudding user response:
I assume you want to apply weighted_rating()
to each row of the dataframe final_data
. In order to do that, you need to pass axis=1
to apply() method.
final_data['weighted_rating'] = final_data[final_data['rating_count'] >= m].apply(lambda x: weighted_rating(x), axis=1)
ref: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html