I have a CSV file that contains chemical matter names and some info.What I need to do is add new columns and write their formulas, molecular weights and count H,C,N,O,S atom numbers in each formula.I am stuck with the counting atom numbers part.I have the function related it but I don't know how to merge it and make code work.
import pandas as pd
import urllib.request
import copy
import re
df = pd.read_csv('AminoAcids.csv')
def countAtoms(string, dict={}):
curDict = copy.copy(dict)
atoms = re.findall("[A-Z]{1}[a-z]*[0-9]*", string)
for j in atoms:
atomGroups = re.match('([A-Z]{1}[a-z]*)([0-9]*)', j)
atom = atomGroups.group(1)
number = atomGroups.group(2)
try :
curDict[atom] = curDict[atom] int(number)
except KeyError:
try :
curDict[atom] = int(number)
except ValueError:
curDict[atom] = 1
except ValueError:
curDict[atom] = curDict[atom] 1
return curDict
df["Formula"] = ['C3H7NO2', 'C6H14N4O2 ','C4H8N2O3','C4H7NO4 ',
'C3H7NO2S ','C5H9NO4','C5H10N2O3','C2H5NO2 ','C6H9N3O2',
'C6H13NO2','C6H13NO2','C6H14N2O2 ','C5H11NO2S ','C9H11NO2',
'C5H9NO2 ','C3H7NO3','C4H9NO3 ','C11H12N2O2 ','C9H11NO3 ','C5H11NO2']
df["Molecular Weight"] = ['89.09','174.2','132.12',
'133.1','121.16','147.13','146.14','75.07','155.15',
'131.17','131.17','146.19','149.21','165.19','115.13',
'105.09','119.12','204.22','181.19','117.15']
df["H"] = 0
df["C"] = 0
df["N"] = 0
df["O"] = 0
df["S"] = 0
df.to_csv("AminoAcids.csv", index=False)
print(df.to_string())
CodePudding user response:
If I understand correctly, you should be able to use str.extract
here:
df["H"] = df["Formula"].str.extract(r'H(\d )')
df["C"] = df["Formula"].str.extract(r'C(\d )')
df["N"] = df["Formula"].str.extract(r'N(\d )')
df["O"] = df["Formula"].str.extract(r'O(\d )')
df["S"] = df["Formula"].str.extract(r'S(\d )')
CodePudding user response:
here is another approach with similar result:
df.join(df['Formula'].str.findall('([A-Z])(\d*)').map(dict).apply(pd.Series).replace('', 1))
>>>
'''
Formula Molecular Weight C H N O S
0 C3H7NO2 89.09 3 7 1 2 NaN
1 C6H14N4O2 174.2 6 14 4 2 NaN
2 C4H8N2O3 132.12 4 8 2 3 NaN
3 C4H7NO4 133.1 4 7 1 4 NaN
4 C3H7NO2S 121.16 3 7 1 2 1.0
5 C5H9NO4 147.13 5 9 1 4 NaN
6 C5H10N2O3 146.14 5 10 2 3 NaN
7 C2H5NO2 75.07 2 5 1 2 NaN
8 C6H9N3O2 155.15 6 9 3 2 NaN
9 C6H13NO2 131.17 6 13 1 2 NaN
10 C6H13NO2 131.17 6 13 1 2 NaN
11 C6H14N2O2 146.19 6 14 2 2 NaN
12 C5H11NO2S 149.21 5 11 1 2 1.0
13 C9H11NO2 165.19 9 11 1 2 NaN
14 C5H9NO2 115.13 5 9 1 2 NaN
15 C3H7NO3 105.09 3 7 1 3 NaN
16 C4H9NO3 119.12 4 9 1 3 NaN
17 C11H12N2O2 204.22 11 12 2 2 NaN
18 C9H11NO3 181.19 9 11 1 3 NaN
19 C5H11NO2 117.15 5 11 1 2 NaN