I have a text file in this format:
0.jpg 12,13,14,15,16
0.jpg 13,14,15,16,17
1.jpg 1,2,3,4,5
1.jpg 2,3,4,5,6
I want to check if the image name is the same and then concatenate those lines into one line with the following format:
0.jpg 12,13,14,15,16 13,14,15,16,17
1.jpg 1,2,3,4,5 2,3,4,5,6
I have tried something like this but don't know how to do the actual comparison and also don't quite know what logic to apply since the first line_elements[0]
will be taken and compared with each other line's line_elements[0]
with open("file.txt", "r") as input: # Read all data lines.
data = input.readlines()
with open("out_file.txt", "w") as output: # Create output file.
for line in data: # Iterate over data lines.
line_elements = line.split() # Split line by spaces.
line_updated = [line_elements[0]] # Initialize fixed line (without undesired patterns) with image's name.
if line_elements[0] = (next line's line_elements[0])???:
for i in line_elements[1:]: # Iterate over groups of numbers in current line.
tmp = i.split(',') # Split current group by commas.
if len(tmp) == 5:
line_updated.append(','.join(tmp))
if len(line_updated) > 1: # If the fixed line is valid, write it to output file.
output.write(f"{' '.join(line_updated)}\n")
Could be something like:
for i in range (len(data)):
if line_elements[0] in line[i] == line_elements[0] in line[i 1]:
line_updated = [line_elements[0]]
for i in line_elements[1:]: # Iterate over groups of numbers in current line.
tmp = i.split(',') # Split current group by commas.
if len(tmp) == 5:
line_updated.append(','.join(tmp))
if len(line_updated) > 1: # If the fixed line is valid, write it to output file.
output.write(f"{' '.join(line_updated)}\n")
CodePudding user response:
Save the first field of the line in a variable. Then check if the first field of the current line is equal to the value. If it is, append to the value, otherwise write the saved line and start a new output line.
current_name = None
with open("out_file.txt", "w") as output:
for line in data:
name, values = line.split()
if name == current_name:
current_values = ' ' values
continue
if current_name:
output.write(f'{current_name} {current_values}\n')
current_name, current_values = name, values
# write the last block
if current_name:
output.write(f'{current_name} {current_values}\n')