I want to preface this with the fact that I am brand new to python and pandas. I created the code below to run through the CSV file and parse out rows based on column value, then create and save into 5 CSVs. The challenge I am facing now is that I have 50 files. I am hoping to find a way that I can use what I have and then add a loop that will run through the entire folder; instead of entering the path of each file individually. Thanks for any help possible.
import pandas as pd
df=pd.read_csv(r"C:\Users\Kris\Data\Loans 12-21.csv",)
df=df.rename(columns = {'Segmentation/Pool Code':'Code'})
df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]
df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]
df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85"])]
df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125"])]
df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102"])]
#Save Files
df_Auto.to_csv(r"C:\Users\Kris\Data\Loans 12-21_auto.csv")
df_Mortgage.to_csv(r"C:\Users\Kris\Data\Loans 12-21_Mortgafe.csv")
df_HELOC.to_csv(r"C:\Users\Kris\Data\Loans 12-21_HELOC.csv")
df_CC.to_csv(r"C:\Users\Kris\Data\Loans 12-21_CC.csv")
df_Other.to_csv(r"C:\Users\Kris\Data\Loans 12-21.csv")
CodePudding user response:
Use this as a starting point. It will read each CSV in the csvs
list, process it, and write the results to several new files:
import pandas as pd
import os
csv_dir = r"C:\Users\Kris\Data"
csvs = [entry.path for entry in os.scandir(csv_dir) if entry.name.lower().endswith('.csv')]
for csv in csvs:
df=pd.read_csv(csv)
df=df.rename(columns = {'Segmentation/Pool Code':'Code'})
df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]
df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]
df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85"])]
df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125"])]
df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102"])]
#Save Files
file_name, ext = os.path.splitext(csv)
df_Auto.to_csv(f"{file_name}_auto{ext}")
df_Mortgage.to_csv(f"{file_name}_Mortgafe{ext}")
df_HELOC.to_csv(f"{file_name}_HELOC{ext}")
df_CC.to_csv(f"{file_name}_CC{ext}")
df_Other.to_csv(f"{file_name}{ext}")
CodePudding user response:
when folder have only csv files:
import pandas as pd
import os
url = "C://Users//Kris//Data//" # insert your csv folder path
files = os.listdir(url)
for i in files:
df=pd.read_csv(csv)
df=df.rename(columns = {'Segmentation/Pool Code':'Code'})
df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]
df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]
df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85"])]
df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125"])]
df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102"])]
#Save Files
df_Auto.to_csv(url i.split(".")[0] "_auto.csv")
df_Mortgage.to_csv(url i.split(".")[0] "_Mortgafe.csv")
df_HELOC.to_csv(url i.split(".")[0] "_HELOC.csv")
df_CC.to_csv(url i.split(".")[0] "_CC.csv")
df_Other.to_csv(url i.split(".")[0] ".csv")