Currently, I'm removing the Roman numerals in some csv files with the following Python code:
def inplace_change(filename, old_string, new_string):
# Safely read the input filename using 'with'
with open(filename) as f:
s = f.read()
if old_string not in s:
print('"{old_string}" not found in {filename}.'.format(**locals()))
return
# Safely write the changed content, if found in the file
with open(filename, 'w') as f:
print('Changing "{old_string}" to "{new_string}" in {filename}'.format(**locals()))
s = s.replace(old_string, new_string)
f.write(s)
d_list = ['1. ', '2. ', '3. ', '4. ','XVIII. ','XVII. ','XVI. ','XV. ','XIV. ', 'XIII. ',
'XII. ','XI. ', 'IX. ','VIII. ', 'VII. ', 'VI. ','IV. ', 'IV. ', 'XVIII.','XVII.','XVI.','XV.','XIV.', 'XIII.',
'XII.','XI.', 'IX.','VIII.', 'VII.', 'VI.','IV.', 'IV.', 'Ⅰ.', 'Ⅱ.','Ⅲ.','Ⅳ.','Ⅴ.','Ⅵ.','Ⅶ-1.','Ⅶ-2.','Ⅶ.','Ⅱ.'
'Ⅷ.','Ⅸ.','Ⅹ.','1.','2.','3.','4.','5.','6.','7.',
'I. ','II. ','III. ','Ⅷ.',
'ⅥI. ', 'VIIII. ', '- ', 'I', 'II',
'V.', 'Ⅵ', 'VIII', 'I.', 'II.',
'V.', 'X.', 'Ⅹ', 'V', 'Ⅷ.',
]
for file in os.listdir(output_path '/CIS'):
for dlist in d_list:
inplace_change(output_path '/CIS/' file, old_string= dlist, new_string= '')
continue
However, the processing speed is too slow, which is a concern. Is there a faster and more convenient way?
CodePudding user response:
From what I understood from your code, in each iteration over d_list
, you have to then open the file. So the process is not efficient because each read/write on files will cost some time.
How about opening a file, then iterate over d_list
? For example:
new_string= ''
d_list = ['1. ', '2. ', '3. ', '4. ','XVIII. ','XVII. ','XVI. ','XV. ','XIV. ', 'XIII. ',
'XII. ','XI. ', 'IX. ','VIII. ', 'VII. ', 'VI. ','IV. ', 'IV. ', 'XVIII.','XVII.','XVI.','XV.','XIV.', 'XIII.',
'XII.','XI.', 'IX.','VIII.', 'VII.', 'VI.','IV.', 'IV.', 'Ⅰ.', 'Ⅱ.','Ⅲ.','Ⅳ.','Ⅴ.','Ⅵ.','Ⅶ-1.','Ⅶ-2.','Ⅶ.','Ⅱ.'
'Ⅷ.','Ⅸ.','Ⅹ.','1.','2.','3.','4.','5.','6.','7.',
'I. ','II. ','III. ','Ⅷ.',
'ⅥI. ', 'VIIII. ', '- ', 'I', 'II',
'V.', 'Ⅵ', 'VIII', 'I.', 'II.',
'V.', 'X.', 'Ⅹ', 'V', 'Ⅷ.',
]
def inplace_change(filename):
# Safely read the input filename using 'with'
with open(filename, 'r ') as f: #'r ' is for read and write
s = f.read()
for old_string in d_list:
if old_string not in s:
print(f'"{old_string}" not found in {filename}.')
else:
# Safely write the changed content, if found in the file
f.seek(0) #IMPORTANT, to bring the cursor to the beginning of the file
print(f'Changing "{old_string}" to "{new_string}" in {filename}')
s = s.replace(old_string, new_string)
f.write(s)
for file in os.listdir(output_path '/CIS'):
inplace_change(output_path '/CIS/' file)