Home > other >  Loop current python code for entire directory
Loop current python code for entire directory

Time:03-12

What I am looking to do is take the current code I have built and be able to use it or something similar so I can run it on an entire directory of CSV files instead of doing one file at a time. Problem being is I have 50 files and it would be much simpler if I can just point it at a directory and have it run on every file inside the directory.

Thanks in advance

import pandas as pd 
df=pd.read_csv(r"C:\Users\Kris\OneDrive - kris\SW\12-21.csv",)
df=df.rename(columns = {'Segmentation/Pool Code':'Code'})

df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]

df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]

df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85",])]

df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125",])]

df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102",])]

#Save Files

df_Auto.to_csv(r"C:\Users\Kris\OneDrive - kris\SW\12-21_auto.csv")
df_Mortgage.to_csv(r"C:\Users\Kris\OneDrive - kris\SW\12-21_mortgage.csv")
df_HELOC.to_csv(r"C:\Users\Kris\OneDrive - kris\SW\12-21_HELOC.csv")
df_CC.to_csv(r"C:\Users\Kris\OneDrive - kris\SW\12-21_CC.csv")
df_Other.to_csv(r"C:\Users\Kris\OneDrive - kris\SW\12-21_Other.csv")

CodePudding user response:

you should define a function which processes a csv and then loop over all files in your directory. For example:

import os
def process_csv(path_name):
    print(f"Processing csv at {path_name}")

def loop_over_directory(directory_name):
    for file_name in os.listdir(directory_name):
        process_csv(directory_name   "/"   file_name)

CodePudding user response:

Using glob and a function to loop over:

import pandas as pd
import os
from glob import glob

# Using a dict to make things a bit more generic
code_dict = {
    'Auto': ['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'],
    'Mortgage': ["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'],
    'HELOC': ["17","83","88","19","31","84","85",],
    'CC': ["116","118","119","120","121","122","123","125",],
    'Other': ["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102",]
}

def process_csv(file):
    # Splitting up the file name in to parts for use later
    base_name = os.path.splitext(os.path.basename(file))[0]
    save_dir = os.path.split(file)[0]

    print(f"Reading {base_name}")

    df = pd.read_csv(file)
    df = df.rename(columns={"Segmentation/Pool Code": "Code"})

    # Looping over each of the items in the dictionary
    for name, codes in code_dict.items():
        sub_df = df.loc[df["Code"].isin(codes)]
        # Constructing the save file from the dictionary key
        save_file = os.path.join(save_dir, f"{base_name}_{name.lower()}.csv")
        sub_df.to_csv(save_file)

# Search for files in this directory:
search_dir = r"C:\Users\Kris\OneDrive - kris\SW"

# glob is a nice way to search in a directory
files = glob(os.path.join(search_dir, "*.csv"))
for file in files:
    # Process each file one at a time
    process_csv(file)
  • Related