Home > OS >  a column in a dataframe having list of dictionary i want to convert that into seperate column
a column in a dataframe having list of dictionary i want to convert that into seperate column

Time:09-06

enter image description here

data = [{'category': 'A', 'maxvalue': '6', 'minvalue': '0'}, {'category': 'B', 'maxvalue': '11', 'minvalue': '6'}, {'category': 'C', 'maxvalue': '21', 'minvalue': '11'}, {'category': 'D', 'maxvalue': '31', 'minvalue': '21'}, {'category': 'E', 'maxvalue': '41', 'minvalue': '31'}, {'category': 'F', 'maxvalue': '9999999999', 'minvalue': '41'}] 

this data is inside a data frame and now that dataframe, the format i want is like below along with other columns inside dataframe

A     B     C      D      E      F
0-6   6-11  11-21  21-31  31-41  41-51

CodePudding user response:

For an example of data as following :

import pandas as pd
import numpy as np
data=[[{'category': 'A', 'maxvalue': '6', 'minvalue': '0'}, 
       {'category': 'B', 'maxvalue': '11', 'minvalue': '6'},
       {'category': 'C', 'maxvalue': '21', 'minvalue': '11'},
       {'category': 'D', 'maxvalue': '31', 'minvalue': '21'}, 
       {'category': 'E', 'maxvalue': '41', 'minvalue': '31'}, 
       {'category': 'F', 'maxvalue': '9999999999', 'minvalue': '41'}],
      [{'category': 'A', 'maxvalue': '5', 'minvalue': '0'},
       {'category': 'B', 'maxvalue': '10', 'minvalue': '5'}, 
       {'category': 'C', 'maxvalue': '20', 'minvalue': '10'}, 
       {'category': 'D', 'maxvalue': '30', 'minvalue': '20'},
       {'category': 'E', 'maxvalue': '40', 'minvalue': '30'},
       {'category': 'F', 'maxvalue': '9999999999', 'minvalue': '41'}],[]] 

df=pd.DataFrame(columns=['bucket'])
df['bucket']=data
df

enter image description here

df2=df.bucket.apply(lambda x:pd.DataFrame.from_dict(x))
df2
data=[]
for i in range(len(df2)):
  if not(df2.iloc[i].empty):
    df2.iloc[i]['value']=df2.iloc[i]['minvalue'] '-' (df2.iloc[i]['maxvalue'])
    data.append(df2.iloc[i].T.loc['value'].to_list())
  else:
      data.append(np.zeros(len(df2.iloc[0].T.loc['category'].to_list())))


dfinal=pd.DataFrame(data,columns=df2.iloc[0].T.loc['category'].to_list())
final

enter image description here

Else if you want it to reconfigure in the same dataframe :

df[dfinal.columns]=data
df

CodePudding user response:

One solution could be as follows:

import pandas as pd
import numpy as np

# data set without duplicates in "category"
lst = [[{'category': 'A', 'maxvalue': '4', 'minvalue': '0'},
        {'category': 'B', 'maxvalue': '51', 'minvalue': '41'}],
       np.nan,
       [{'category': 'A', 'maxvalue': '6', 'minvalue': '0'},  
        {'category': 'B', 'maxvalue': '21', 'minvalue': '11'}]
       ]

df = pd.DataFrame(columns=['bucket'])
df['bucket'] = lst

# code
from itertools import chain

# select notna rows
out = df[df['bucket'].notna()]

# get dict keys in separate cols with their associated vals as the vals
# repeat index from df according to len of each list
out = pd.DataFrame(list(chain.from_iterable(out['bucket'])), 
                   index=pd.Index.repeat(out.index, out['bucket'].str.len()))

out['result'] = (out.minvalue.astype(str) '-' out.maxvalue.astype(str))

# =============================================================================
# solution 1: no duplicate categories per row
# =============================================================================

out1 = out.copy()
out1 = out1.pivot(index=None, columns='category', values='result').fillna('')

print(out1)

category    A      B
0         0-4  41-51
2         0-6  11-21

# =============================================================================
# solution 2: duplicate categories per row, e.g. 2x "A" in row 0
# =============================================================================

lst = [[{'category': 'A', 'maxvalue': '4', 'minvalue': '0'},
        {'category': 'A', 'maxvalue': '51', 'minvalue': '41'}],
       np.nan,
       [{'category': 'A', 'maxvalue': '6', 'minvalue': '0'},  
        {'category': 'B', 'maxvalue': '21', 'minvalue': '11'}]
       ]

# continue after `out['result'] = (out.minvalue...)`
out2 = out.copy()

out2['result'] = out2.groupby([out2.index,'category'])['result']\
    .transform(lambda x: ','.join(x))

out2.set_index('category', append=True, inplace=True)
out2 = out2[~out2.index.duplicated(keep='first')]['result']

out2 = out2.reset_index().set_index('level_0').rename_axis(None, axis=0)
out2 = out2.pivot(index=None, columns='category', values='result').fillna('')

print(out2)

category          A      B
0         0-4,41-51       
2               0-6  11-21

# =============================================================================
# solution 3: same data as 2, but only keeping first of duplicates
# =============================================================================

# continue after `out['result'] = (out.minvalue...)`
out3 = out.copy()

out3 = out3.reset_index(drop=False)\
    .drop_duplicates(['index','category'])\
        .set_index('index', drop=True).rename_axis(None, axis=0)

out3 = out3.pivot(index=None, columns='category', values='result').fillna('')

print(out3) # only printing 0-4, not 41-51 for "A" in row 0

category    A      B
0         0-4       
2         0-6  11-21

  • Related