I try to find a way to add a function in my script to ignore or delete the first line of my CSV files. I know we can do that with pandas but it is possible without?
Many thanks for your help.
Here is my code -
from os import mkdir
from os.path import join, splitext, isdir
from glob import iglob
from csv import DictReader
from collections import defaultdict
from urllib.request import urlopen
from shutil import copyfileobj
csv_folder = r"/Users/folder/PycharmProjects/pythonProject/CSVfiles/"
glob_pattern = "*.csv"
for file in iglob(join(csv_folder, glob_pattern)):
with open(file) as csv_file:
reader = DictReader(csv_file)
save_folder, _ = splitext(file)
if not isdir(save_folder):
mkdir(save_folder)
title_counter = defaultdict(int)
for row in reader:
url = row["link"]
title = row["title"]
title_counter[title] = 1
_, ext = splitext(url)
save_filename = join(save_folder, f"{title}_{title_counter[title]}{ext}".replace('/', '-'))
print(f"'{save_filename}'")
with urlopen(url) as req, open(save_filename, "wb") as save_file:
copyfileobj(req, save_file)
CodePudding user response:
Use the next()
function to skip the first row of your CSV.
with open(file) as csv_file:
reader = DictReader(csv_file)
# skip first row
next(reader)
CodePudding user response:
You could just read the raw text from the file as normal and then split the text by new line and delete the first line:
file = open(filename, 'r') # Open the file
content = file.read() # Read the file
lines = content.split("\n") # Split the text by the newline character
del lines[0] # Delete the first index from the resulting list, ie delete the first line.
Although this may take a long time for larger CSV files, so this may not be the best solution.
Or you could simply skip the first row in your for loop. Instead of:
...
for row in reader:
...
Could you use:
...
for row_num, row in enumerate(list(reader)):
if row_num == 0:
continue
...
instead? I think that should skip the first row.