I'm trying to generate a nested dictionary but when for loops finish all the values are the same in every key.
def frequency(df, headers, numerical, target):
di = {}
if len(numerical) == 0:
print("No NM")
else:
for i in numerical:
if i in headers:
headers.remove(i)
for i in range(len(headers)):
attdic = {}
valdic = {}
aux_list = df[headers[i]].to_list()
aux_list = list(OrderedDict.fromkeys(aux_list))
tar_list = df[target].to_list()
tar_list = list(OrderedDict.fromkeys(tar_list))
for j in range(len(aux_list)):
print(aux_list[j])
for k in range(len(tar_list)):
temp_df = df.loc[(df[headers[i]] == aux_list[j]) & (df[target] == tar_list[k])]
tempvar = len(temp_df.index) 1
valdic[tar_list[k]] = tempvar
attdic[aux_list[j]] = valdic
di[headers[i]] = attdic
Output:
{'Outlook': {'overcast': {'No': 2, 'Yes': 5}, 'rainy': {'No': 2, 'Yes': 5}, 'sunny': {'No': 2, 'Yes': 5}}, 'Windy': {False: {'No': 3, 'Yes': 5}, True: {'No': 3, 'Yes': 5}}}
Expected output:
{'Outlook': {'overcast': {'No': 3, 'Yes': 3}, 'rainy': {'No': 3, 'Yes': 4}, 'sunny': {'No': 2, 'Yes': 5}}, 'Windy': {False: {'No': 4, 'Yes': 6}, True: {'No': 3, 'Yes': 5}}}
CodePudding user response:
Notice that you create one valdic
for each i
, but you should have created one for each (i,j)
, so you can move it inside the loop for j
, and maybe it works
def frequency(df, headers, numerical, target):
di = {}
if len(numerical) == 0:
print("No NM")
else:
for i in numerical:
if i in headers:
headers.remove(i)
for i in range(len(headers)):
attdic = {}
aux_list = df[headers[i]].to_list()
aux_list = list(OrderedDict.fromkeys(aux_list))
tar_list = df[target].to_list()
tar_list = list(OrderedDict.fromkeys(tar_list))
for j in range(len(aux_list)):
valdic = {} # create a new valdic for each j
print(aux_list[j])
for k in range(len(tar_list)):
temp_df = df.loc[(df[headers[i]] == aux_list[j]) & (df[target] == tar_list[k])]
tempvar = len(temp_df.index) 1
valdic[tar_list[k]] = tempvar
attdic[aux_list[j]] = valdic
di[headers[i]] = attdic