I want to sort the every row of the CSV string with the following code
import csv
def sort_csv_columns(csv_string: str) -> str:
# Split the CSV string into lines
lines = csv_string.strip().split("\n")
# Split the first line (column names) and sort it case-insensitively
header = lines[0].split(",")
header.sort(key=str.lower)
# Split the remaining lines (data rows) and sort them by the sorted header
data = [line.split(",") for line in lines[1:]]
data.sort(key=lambda row: [row[header.index(col)] for col in header])
# Join the sorted data and header into a single CSV string
sorted_csv = "\n".join([",".join(header)] [",".join(row) for row in data])
return sorted_csv
# Test the function
csv_string = "Beth,Charles,Danielle,Adam,Eric\n17945,10091,10088,3907,10132\n2,12,13,48,11"
sorted_csv = sort_csv_columns(csv_string)
print(sorted_csv)
Output
Adam,Beth,Charles,Danielle,Eric
17945,10091,10088,3907,10132
2,12,13,48,11
Expected Output
Adam,Beth,Charles,Danielle,Eric\n
3907,17945,10091,10088,10132\n
48,2,12,13,11
What am I doing wrong
I am not able to sort the row besides the top header
CodePudding user response:
As
data
represents your lines, thendata.sort
can only sort the lines between, them, not the lines content (the cells), you need to sort on each element ofdata
Also doing the following will always give
0,1,2,3,4
as you check index on the list on iterate on[header.index(col) for col in header]
Sort header then reorder
You need sorting, but without sort
method, you just need to reorder the values regarding the new header order
def sort_csv_columns(csv_string: str) -> str:
lines = csv_string.strip().split("\n")
initial_header = lines[0].split(",")
header = sorted(initial_header, key=str.lower)
data = [line.split(",") for line in lines[1:]]
data = [[row[initial_header.index(col)] for col in header]
for row in data]
sorted_csv = "\n".join([",".join(header)] [",".join(row) for row in data])
return sorted_csv
Sort by header but maintain row together
You can avoid the reorder part if you sort the data while having a the content stored by column instead of rows
def sort_csv_columns(csv_string: str) -> str:
data = [line.split(",") for line in csv_string.strip().split("\n")]
# [['Beth', 'Charles', 'Danielle', 'Adam', 'Eric'], ['17945', '10091', '10088', '3907', '10132']
# , ['2', '12', '13', '48', '11']]
data = list(zip(*data))
# [('Beth', '17945', '2'), ('Charles', '10091', '12'), ('Danielle', '10088', '13'),
# ('Adam', '3907', '48'), ('Eric', '10132', '11')]
# sort by first value : name
data.sort(key=lambda row: row[0].lower())
sorted_csv = "\n".join([",".join(row) for row in zip(*data)])
return sorted_csv