The loop is not working. I am trying to group the rows as per the if condition. However, after running the code, all the rows are being categorized into a single category 'global governance'
# create a for loop to re-group as per the values mentioned above
for values in data['Area']:
if ((values=='corporate governance') | (values=='accounting') | (values== 'auditing') | (values=='manufacturing') | (values=='dao challenges')):
data['final group']='corporate governance'
elif (values=='blockchain governance' or values=='bct standards' or values== 'blockchain governance' or
values=='blockchain hashing' or values=='developers incentive' or values=='smart contract' or values=='Smart contract' ):
data['final group']='Blockchain Technology Governance'
elif (values=='Block chain and legal compliance' or values=='Decision Rights' or values=='Dispute resolution' or
values=='law' or values=='judicature'):
data['final group']='Judicature'
elif (values == 'Blockchain and Trust'):
data['final group']='Trust'
elif ((values=='Foreign Aid') | (values=='Global Commons governance') | (values=='global Commons governance')):
data['final group']='global governance'
elif (values=='Public governance' or values=='Rural governance' or values=='Usage of bct in governance' or values=='public governance' or
values=='Smart city' or values=='smart city' or values=='Rural governance' or values=='national records'):
data['final group']='governance'
elif (values=='Music' or values=='Waste management' or values=='blockchian and healthcare' or values== 'forest management' or
values=='healthcare' or
values=='Resource governance'):
data['final group']='Resource governance'
elif (values=='scientific publication' or values=='Academic Governance'):
data['final group']='Academia'
elif(values=='SDG' or values== 'carbon trading' or values=='environmental governance'):
data['final group']='Sustainable developemnt'
elif(values=='data governance' or values=='organisational data governance'):
data['final group']='Data governance'
elif(values=='digital platform governance' or values =='e commerce' or
values=='platform governance' or values=='product management'):
data['final group']='platform governance'
elif(values=='Supplychain'):
data['final group']=='Supplychain'
CodePudding user response:
Expanding from my comment. Your loop loops through every row in column Area
of your df; however, when it sets the value for column final group
it's setting it for every row of the column. So the Area
for your last row in your dataframe is determining the value of final group
for every row. Instead consider something like .apply()
:
def determine_final_group(area):
if area in ['corporate governance','accounting','auditing','manufacturing','dao challenges']:
return 'corporate governance'
elif area in ['blockchain governance','bct standards' ,'blockchain governance','blockchain hashing' ,'developers incentive' ,'smart contract' ,'Smart contract']:
return 'Blockchain Technology Governance'
elif area in ['Block chain and legal compliance' ,'Decision Rights' ,'Dispute resolution', 'law','judicature']:
return 'Judicature'
elif area == 'Blockchain and Trust':
return 'Trust'
elif ['Foreign Aid','Global Commons governance','global Commons governance']:
return 'global governance'
#continue the elif logic using this pattern...
data['final group'] = data['Area'].apply(determine_final_group)
CodePudding user response:
When you do an assignment like data['final group']='corporate governance'
it assigns that value for the entire column, not just the one row.
- Put that into a function,
- with a return for the final group (not assignment)
- and remove the for-loop
- use .map instead.
def get_final_group(values):
if values == 'corporate governance' or values == 'accounting' or values == 'auditing' or values == 'manufacturing' or values == 'dao challenges':
return 'corporate governance'
if (values == 'blockchain governance' or values == 'bct standards' or values == 'blockchain governance' or
values == 'blockchain hashing' or values == 'developers incentive' or values == 'smart contract' or values == 'Smart contract'):
return 'Blockchain Technology Governance'
...
df['final_group'] = df['Area'].map(get_final_group)
Notes and impprovements:
- Since it's
returning
the value, you don't needelif
. - Don't use
|
- that's for bit-wise OR. - Don't use excessive parenthesis, it's worse for readability
- You can refactor the
if
's to be more readable with something like:if area in ('corporate governance', 'accounting', 'auditing', 'manufacturing', 'dao challenges' ): return 'corporate governance' # and so on
- even better, use a dictionary to map the resulting values:
final_group = { 'corporate governance': 'corporate governance', 'accounting': 'corporate governance', 'auditing': 'corporate governance', 'manufacturing': 'corporate governance', 'dao challenges': 'corporate governance', 'blockchain governance': 'Blockchain Technology Governance', 'bct standards': 'Blockchain Technology Governance', 'blockchain governance': 'Blockchain Technology Governance', 'blockchain hashing': 'Blockchain Technology Governance', 'developers incentive': 'Blockchain Technology Governance', 'smart contract': 'Blockchain Technology Governance', 'Smart contract': 'Blockchain Technology Governance', # and so on } df['final_group'] = df['Area'].map(final_group)
- And since that gets really verbose, you can reverse it and use each target as the dictionary key, and the values would be all the possible ones for that, as a list or tuple:
final_group_categories = { 'corporate governance': ( 'corporate governance', 'accounting', 'auditing', 'manufacturing', 'dao challenges', ), 'Blockchain Technology Governance': ( 'blockchain governance', 'bct standards', 'blockchain governance', 'blockchain hashing', 'developers incentive', 'smart contract', 'Smart contract', ), # etc. } # then reverse it to use in the map function: final_group = {} for group_target, invals in final_group_categories.items(): for inval in invals: final_group[inval] = group_target # same thing as a dict comprehension: final_group = {inval: group_target for group_target, invals in final_group_categories.items() for inval in invals } df['final_group'] = df['Area'].map(final_group)
- Do the matching on lowercase so that you don't have duplicates like
'smart contract'
and'Smart contract'
.df['final_group'] = df['Area'].str.lower().map(final_group)