import os
import sys
valid_lines = []
corrupt_lines = []
'''
The validate_data function will check the students.csv line by line for corrupt data.
- Valid lines should be added to the valid_lines list.
- Invalid lines should be added to the corrupt_lines list.
Example input: 0896801,Kari,Wilmore,1970-06-18,INF
This data is valid and the line should be added to the valid_lines list unchanged.
Example input: 0773226,Junette,Gur_ry,1995-12-05,
This data is invalid and the line should be added to the corrupt_lines list in the following format:
0773226,Junette,Gur_ry,1995-12-05, => INVALID DATA: ['0773226', 'Gur_ry', '']
In the above example the studentnumber does not start with '08' or '09',
the last name contains a special character and the student program is empty.
Don't forget to put the students.csv file in the same location as this file!
'''
def validate_data(line):
# TYPE YOUR SOLUTION CODE HERE
#CSV STUDENT NUMBER
student_number_valid = True
index = 0
studentnumber, firstname, lastname, birthdate, studyprogram = line.split(",")
for element in line.split(","):
if index == 0:
student_number = element
if len(studentnumber) > 0:
csv_s_n = studentnumber[0]
csv_s_n2 = studentnumber[1]
if csv_s_n == '0' and csv_s_n2 == '8' or '9':
valid_lines.append(studentnumber)
else:
corrupt_lines.append(studentnumber)
else:
corrupt_lines.append(studentnumber)
#CSV NAME
if len(firstname) > 0:
if firstname.isalpha() == True:
valid_lines.append(firstname)
else:
corrupt_lines.append(firstname)
else:
corrupt_lines.append(firstname)
if len(lastname) > 0:
if lastname.isalpha() == True:
valid_lines.append(lastname)
else:
corrupt_lines.append(lastname)
else:
corrupt_lines.append(lastname)
#CSV BIRTHDAY
if len(birthdate.split()) == 3:
year1, month1, day1 = birthdate.split("-")
year1=int(year1)
valid_months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
valid_days = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10","11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"]
thirty_day_months = ["04", "06", "09", "11"]
if not year1 in range(1960, 2004 1):
birthdate = (f"{year1}-{month1}-{day1}")
corrupt_lines.append(birthdate)
if not month1 in valid_months:
birthdate = (f"{year1}-{month1}-{day1}")
corrupt_lines.append(birthdate)
if month1 == "02":
valid_days.remove("31")
valid_days.remove("30")
valid_days.remove("29")
if not day1 in valid_days:
birthdate = (f"{year1}-{month1}-{day1}")
corrupt_lines.append(birthdate)
# validate day for february
if month1 in thirty_day_months:
valid_days.remove("31")
if not day1 in valid_days:
birthdate = (f"{year1}-{month1}-{day1}")
corrupt_lines.append(birthdate)
# validate day for 30-day months
else:
birthdate = (f"{year1}-{month1}-{day1}")
valid_lines.append(birthdate)
else:
corrupt_lines.append(birthdate)
#CSV STUDYPROGRAM
if len(studyprogram) > 1:
if studyprogram == "INF" or "TINF" or "CMD" or "AI":
valid_lines.append(studyprogram)
else:
corrupt_lines.append(studyprogram)
else:
corrupt_lines.append(studyprogram)
def main(csv_file):
with open(os.path.join(sys.path[0], csv_file), newline='') as csv_file:
# skip header line
next(csv_file)
for line in csv_file:
validate_data(line.strip())
print('### VALID LINES ###')
print("\n".join(valid_lines))
print('### CORRUPT LINES ###')
print("\n".join(corrupt_lines))
if __name__ == "__main__":
main('students.csv')
As you can read, the function validate_data should check the imported file for corrupt and valid lines, then append them to the correct list, and print them. It works, except that, as you can probably see, the lines will not print in a single line.
I'm sure I have to make two other lists to append the correct data into a single line, doing the same with the corrupt data, but when I try it fails.
CodePudding user response:
You're adding the line to each list depending on each validation. So you'll add it to each list multiple times, depending on which validations succeed and fail.
You should only add it to one list or the other -- if it fails any validation, add it to corrupt_list
, and only add to valid_list
if all validations succeed.
The simple way to do this is to add to corrupt_list
when each validation fails, and then return from the function. If you make it to the end of all the validations, add to valid_list
.
You shouldn't be appending individual fields to valid_lines
and corrupt_lines
, they're supposed to contain the entire line.
csv_s_n2 == '8' or '9'
is not the correct what to test if a variable is equvale to either value. See Why does "a == x or y or z" always evaluate to True? How can I compare "a" to all of those?
def validate_data(line):
# TYPE YOUR SOLUTION CODE HERE
#CSV STUDENT NUMBER
student_number_valid = True
index = 0
studentnumber, firstname, lastname, birthdate, studyprogram = line.split(",")
if len(studentnumber) > 0:
csv_s = studentnumber[0:2]
if csv_s not in ('08', '09'))
corrupt_lines.append(line)
return
else:
corrupt_lines.append(line)
return
#CSV NAME
if len(firstname) > 0:
if not firstname.isalpha():
corrupt_lines.append(line)
return
else:
corrupt_lines.append(line)
return
if len(lastname) > 0:
if not lastname.isalpha():
corrupt_lines.append(line)
return
else:
corrupt_lines.append(line)
return
#CSV BIRTHDAY
if len(birthdate.split('-')) == 3:
year1, month1, day1 = birthdate.split("-")
year1=int(year1)
valid_months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
valid_days = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10","11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"]
thirty_day_months = ["04", "06", "09", "11"]
if not year1 in range(1960, 2004 1):
corrupt_lines.append(line)
return
if not month1 in valid_months:
corrupt_lines.append(line)
return
if month1 == "02":
# validate day for february
valid_days.remove("31")
valid_days.remove("30")
valid_days.remove("29")
elif month1 in thirty_day_months:
# validate day for 30-day months
valid_days.remove("31")
if not day1 in valid_days:
corrupt_lines.append(line)
return
else:
corrupt_lines.append(line)
return
#CSV STUDYPROGRAM
if studyprogram not in ("INF", "TINF", "CMD", "AI"):
corrupt_lines.append(line)
return
valid_lines.append(line)