I am trying to create dataframe . from a combination of dataframe with a cross dictionary as below
CodePudding user response:
Try:
from ast import literal_eval
df["x"] = df["x"].apply(literal_eval)
df["y"] = df["y"].apply(literal_eval)
x = df.set_index("ID").stack().to_frame().explode(0).dropna()
x["name"] = pd.cut(
x[0],
list(scale[1].values())[::-1] [float("inf")],
right=False,
labels=list(scale[1])[::-1],
)
x["tmp"] = x.index.get_level_values(1)
x = x.pivot_table(
index=pd.Grouper(level=0),
columns=["tmp", "name"],
values=0,
aggfunc=list,
)
idx = pd.MultiIndex.from_product(
[set(x.columns.get_level_values(0)), scale[1].keys()]
)
x = x.reindex(idx, axis=1)
x.columns = [f"{a}_{b}" for a, b in x.columns]
x = x.apply(lambda s: s.fillna({i: [0] for i in x.index}))
print(
x[
sorted(x.columns, key=lambda x: (x.split("_")[0], int(x.split("_")[1])))
].reset_index()
)
Prints:
ID x_500 x_2500 x_5000 x_7500 x_10000 y_500 y_2500 y_5000 y_7500 y_10000
0 EF407412 [0] [3140, 4836] [2788] [0] [0] [0] [0] [1408, 1572, 2277] [0] [0]
1 KM043272 [7001] [0] [1494, 1932, 2029] [539, 906] [0] [0] [0] [0] [0] [0]