Pandas pivot_table Assertion error: `result` has not been initialized-CodePudding

df:

avg	count	date	val	prop	unit	distance	d-atmp	d-clouds	d-dewpoint
0.0786107	12	2014-10-03 00:00:00	22	atmp	(Deg C)	24829.6	24829.6	nan	nan
0.0786107	12	2014-10-03 00:00:00	0	clouds	(oktas)	22000.6	nan	22000.6	nan
0.0786107	12	2014-10-03 00:00:00	32	dewpoint	(Deg C)	21344.1	nan	nan	21344.1
0.0684246	6	2014-10-04 00:00:00	21.5	atmp	(Deg C)	26345.1	26345.1	nan	nan

cols = ['avg', 'date', 'count', 'd-atmp', 'd-cloud', 'd-dewpoint']
d = pd.pivot_table(x, index=cols, columns=['prop', 'unit'], values='val', aggfunc=max)

Ideal result:

date	countObs	avg	d-atmp	atmp (Deg C)	d-clouds	clouds (oktas)	d-dewpoint	dewpoint (Deg C)
2014-10-03 00:00:00	12	0.0786107	24829.6	22	22000.6	0	21344.1	32
2014-10-04 00:00:00	6	0.0684246	26345.1	21.5	nan	nan	nan	nan

Error

--------------------------------------------------------------------------- NotImplementedError                       Traceback (most recent call last) ~/.local/lib/python3.9/site-packages/pandas/core/groupby/generic.py in array_func(values)    1067             try:
-> 1068                 result = self.grouper._cython_operation(    1069                     "aggregate", values, how, axis=data.ndim - 1, min_count=min_count

~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in
_cython_operation(self, kind, values, how, axis, min_count, **kwargs)
    998         ngroups = self.ngroups
--> 999         return cy_op.cython_operation(    1000             values=values,

~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in cython_operation(self, values, axis, min_count, comp_ids, ngroups,
**kwargs)
    659 
--> 660         return self._cython_op_ndim_compat(
    661             values,

~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in
_cython_op_ndim_compat(self, values, min_count, ngroups, comp_ids, mask, **kwargs)
    515 
--> 516         return self._call_cython_op(
    517             values,

~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in
_call_cython_op(self, values, min_count, ngroups, comp_ids, mask, **kwargs)
    561         out_shape = self._get_output_shape(ngroups, values)
--> 562         func, values = self.get_cython_func_and_vals(values, is_numeric)
    563         out_dtype = self.get_out_dtype(values.dtype)

~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in get_cython_func_and_vals(self, values, is_numeric)
    204 
--> 205         func = self._get_cython_function(kind, how, values.dtype, is_numeric)
    206 

~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in
_get_cython_function(cls, kind, how, dtype, is_numeric)
    169                 # raise NotImplementedError here rather than TypeError later
--> 170                 raise NotImplementedError(
    171                     f"function is not implemented for this dtype: "

NotImplementedError: function is not implemented for this dtype: [how->mean,dtype->object]

During handling of the above exception, another exception occurred:

AssertionError                            Traceback (most recent call last) <ipython-input-119-b64b487d2810> in <module>
      5 # o
      6 # cols  = []
----> 7 d = pd.pivot_table(x, index=cols, columns=['osmcObsProperty', 'unit'], values='val') #, aggfunc=max #np.mean or max appear similar , dropna=False
      8 
      9 d.reset_index(inplace=True)

~/.local/lib/python3.9/site-packages/pandas/core/reshape/pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed, sort)
     93         return table.__finalize__(data, method="pivot_table")
     94 
---> 95     table = __internal_pivot_table(
     96         data,
     97         values,

~/.local/lib/python3.9/site-packages/pandas/core/reshape/pivot.py in
__internal_pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed, sort)
    163 
    164     grouped = data.groupby(keys, observed=observed, sort=sort)
--> 165     agged = grouped.agg(aggfunc)
    166     if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
    167         agged = agged.dropna(how="all")

~/.local/lib/python3.9/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
    977 
    978         op = GroupByApply(self, func, args, kwargs)
--> 979         result = op.agg()
    980         if not is_dict_like(func) and result is not None:
    981             return result

~/.local/lib/python3.9/site-packages/pandas/core/apply.py in agg(self)
    156 
    157         if isinstance(arg, str):
--> 158             return self.apply_str()
    159 
    160         if is_dict_like(arg):

~/.local/lib/python3.9/site-packages/pandas/core/apply.py in apply_str(self)
    505             elif self.axis != 0:
    506                 raise ValueError(f"Operation {f} does not support axis=1")
--> 507         return self._try_aggregate_string_function(obj, f, *self.args, **self.kwargs)
    508 
    509     def apply_multiple(self) -> FrameOrSeriesUnion:

~/.local/lib/python3.9/site-packages/pandas/core/apply.py in
_try_aggregate_string_function(self, obj, arg, *args, **kwargs)
    575         if f is not None:
    576             if callable(f):
--> 577                 return f(*args, **kwargs)
    578 
    579             # people may try to aggregate on a non-callable attribute

~/.local/lib/python3.9/site-packages/pandas/core/groupby/groupby.py in mean(self, numeric_only)    1685         numeric_only = self._resolve_numeric_only(numeric_only)    1686 
-> 1687         result = self._cython_agg_general(    1688             "mean",    1689             alt=lambda x: Series(x).mean(numeric_only=numeric_only),

~/.local/lib/python3.9/site-packages/pandas/core/groupby/generic.py in
_cython_agg_general(self, how, alt, numeric_only, min_count)    1080         # TypeError -> we may have an exception in trying to aggregate    1081         #  continue and exclude the block
-> 1082         new_mgr = data.grouped_reduce(array_func, ignore_failures=True)    1083     1084         if len(new_mgr) < len(data):

~/.local/lib/python3.9/site-packages/pandas/core/internals/managers.py in grouped_reduce(self, func, ignore_failures)    1233                 for sb in blk._split():    1234                     try:
-> 1235                         applied = sb.apply(func)    1236                     except (TypeError, NotImplementedError):    1237                       if not ignore_failures:

~/.local/lib/python3.9/site-packages/pandas/core/internals/blocks.py in apply(self, func, **kwargs)
    379         """
    380         with np.errstate(all="ignore"):
--> 381             result = func(self.values, **kwargs)
    382 
    383         return self._split_op_result(result)

~/.local/lib/python3.9/site-packages/pandas/core/groupby/generic.py in array_func(values)    1074                 # try to python agg    1075
# TODO: shouldn't min_count matter?
-> 1076                 result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)    1077     1078             return result

~/.local/lib/python3.9/site-packages/pandas/core/groupby/groupby.py in
_agg_py_fallback(self, values, ndim, alt)    1396         #  should always be preserved by the implemented aggregations    1397         # TODO: Is this exactly right; see WrappedCythonOp get_result_dtype?
-> 1398         res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True)    1399     1400         if isinstance(values, Categorical):

~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in agg_series(self, obj, func, preserve_dtype)    1047     1048         else:
-> 1049             result = self._aggregate_series_fast(obj, func)    1050     1051         npvalues = lib.maybe_convert_objects(result, try_float=False)

~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in
_aggregate_series_fast(self, obj, func)    1072         ids = ids.take(indexer)    1073         sgrouper = libreduction.SeriesGrouper(obj, func, ids, ngroups)
-> 1074         result, _ = sgrouper.get_result()    1075         return result    1076 

~/.local/lib/python3.9/site-packages/pandas/_libs/reduction.pyx in pandas._libs.reduction.SeriesGrouper.get_result()

AssertionError: `result` has not been initialized.

CodePudding user response：

IIUC, you can use groupby_agg:

out = df.groupby('date', as_index=False).agg(max)

Output:

date	avg	count	val	prop	unit	distance	d-atmp	d-clouds	d-dewpoint
2014-10-03 00:00:00	0.0786107	12	32	dewpoint	(oktas)	24829.6	24829.6	22000.6	21344.1
2014-10-04 00:00:00	0.0684246	6	21.5	atmp	(Deg C)	26345.1	26345.1	nan	nan

CodePudding user response：

You could pivot; then use groupby max:

cols = ['avg', 'date', 'count', 'd-atmp', 'd-clouds', 'd-dewpoint']
tmp = df.pivot(index=cols, columns=['prop', 'unit'], values='val')
tmp.columns = tmp.columns.map(' '.join)
out = tmp.reset_index().groupby('date', as_index=False).max()\
      [['date', 'count', 'avg', 'd-atmp', 'atmp (Deg C)', 'd-clouds', 
        'clouds (oktas)', 'd-dewpoint', 'dewpoint (Deg C)']]

Output:

                  date  count       avg   d-atmp  atmp (Deg C)  d-clouds  clouds (oktas)  d-dewpoint  dewpoint (Deg C)  
0  2014-10-03 00:00:00     12  0.078611  24829.6          22.0   22000.6             0.0     21344.1              32.0  
1  2014-10-04 00:00:00      6  0.068425  26345.1          21.5       NaN             NaN         NaN               NaN