Home > Back-end >  Edit this code to run through all CSV files in a folder?
Edit this code to run through all CSV files in a folder?

Time:03-16

I want to preface this with the fact that I am brand new to python and pandas. I created the code below to run through the CSV file and parse out rows based on column value, then create and save into 5 CSVs. The challenge I am facing now is that I have 50 files. I am hoping to find a way that I can use what I have and then add a loop that will run through the entire folder; instead of entering the path of each file individually. Thanks for any help possible.

import pandas as pd 
df=pd.read_csv(r"C:\Users\Kris\Data\Loans 12-21.csv",)
df=df.rename(columns = {'Segmentation/Pool Code':'Code'})

df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]

df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]

df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85"])]

df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125"])]

df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102"])]

#Save Files
df_Auto.to_csv(r"C:\Users\Kris\Data\Loans 12-21_auto.csv")
df_Mortgage.to_csv(r"C:\Users\Kris\Data\Loans 12-21_Mortgafe.csv")
df_HELOC.to_csv(r"C:\Users\Kris\Data\Loans 12-21_HELOC.csv")
df_CC.to_csv(r"C:\Users\Kris\Data\Loans 12-21_CC.csv")
df_Other.to_csv(r"C:\Users\Kris\Data\Loans 12-21.csv")

CodePudding user response:

Use this as a starting point. It will read each CSV in the csvs list, process it, and write the results to several new files:

import pandas as pd 
import os

csv_dir = r"C:\Users\Kris\Data"
csvs = [entry.path for entry in os.scandir(csv_dir) if entry.name.lower().endswith('.csv')]

for csv in csvs:
    df=pd.read_csv(csv)
    df=df.rename(columns = {'Segmentation/Pool Code':'Code'})

    df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]

    df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]

    df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85"])]

    df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125"])]

    df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102"])]

    #Save Files
    file_name, ext = os.path.splitext(csv)
    df_Auto.to_csv(f"{file_name}_auto{ext}")
    df_Mortgage.to_csv(f"{file_name}_Mortgafe{ext}")
    df_HELOC.to_csv(f"{file_name}_HELOC{ext}")
    df_CC.to_csv(f"{file_name}_CC{ext}")
    df_Other.to_csv(f"{file_name}{ext}")

CodePudding user response:

when folder have only csv files:

import pandas as pd
import os

url = "C://Users//Kris//Data//" # insert your csv folder path
files = os.listdir(url)

for i in files:
df=pd.read_csv(csv)
    df=df.rename(columns = {'Segmentation/Pool Code':'Code'})

    df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]

    df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]

    df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85"])]

    df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125"])]

    df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102"])]

    #Save Files

    df_Auto.to_csv(url   i.split(".")[0]   "_auto.csv")
    df_Mortgage.to_csv(url   i.split(".")[0]   "_Mortgafe.csv")
    df_HELOC.to_csv(url   i.split(".")[0]   "_HELOC.csv")
    df_CC.to_csv(url   i.split(".")[0]   "_CC.csv")
    df_Other.to_csv(url   i.split(".")[0]  ".csv")
  • Related