I am currently trying to write a code that is supposed to add mulitple dataframes into one, using the append method. However, with the code I currently use, it seems that only the first dataframe is read. I have tried locating the problem by adding a len(df) to my code and it seems to that the merged dataframe lenght does not change after appending a new one. I am also using a loop that reads threw the files in a folder and this is used in order to only use the top rows for the first dataframe and then skip them when it is not the first.
In my loop I get the output:
41568
READING FILE 2
lenght of added dataframe 1044
lenght of appended dataframe: 41568
READING FILE 3
lenght of added dataframe 9138
lenght of appended dataframe: 41568
I expected to get the results of a combined dataframe.
The expected output is:
41568
READING FILE 2
lenght of added dataframe 1044
lenght of appended dataframe: 42612
READING FILE 3
lenght of added dataframe 9138
lenght of appended dataframe: 51750
My current code is:
from calendar import Calendar
from cmath import exp
from curses.panel import bottom_panel
from datetime import date
from sqlite3 import DateFromTicks
import sys
sys.path.append(r'c:\users\a384663\appdata\local\programs\python\python39\lib\site-packages')
from cgitb import text
from distutils import command
from errno import ERANGE
from hashlib import new
from heapq import merge
from importlib.metadata import entry_points
from itertools import count
from operator import eq
from tkinter import Frame, Spinbox, StringVar, Toplevel, filedialog
import tkinter as tk
from tkinter.constants import HORIZONTAL, X
from tkinter.ttk import Combobox, Progressbar
from tkinter import *
from turtle import done
import pandas as pd
import math
import numpy as np
import os
from os import listdir
from os.path import isfile, join
import matplotlib
import matplotlib.path as mplPath
import matplotlib.pyplot as plt
from tkcalendar import Calendar
from datetime import datetime
import glob
path=(r'C:\Users\a384663\Desktop\Autofreight\COPIED FILES FOR SPECIAL STUDY')
os.chdir(r'C:\Users\a384663\Desktop\Autofreight\COPIED FILES FOR SPECIAL STUDY')
onlyfiles = [f for f in listdir(path) if isfile(join(path, f))]
print(onlyfiles)
filen=1
list_of_dataframes = []
for filename in onlyfiles:
print("READING FILE",filen)
if filen ==1:
#try:
#merged_df=pd.read_csv(filename, encoding='iso-8859-1',skiprows=43, usecols=[1, 19, 30, 31, 33, 83, 155, 157])
merged_df=pd.read_csv(filename, encoding='iso-8859-1',skiprows=43, usecols=[1, 155, 157])
print(len(merged_df))
#except:
# pass
if filen > 1:
try:
#merged_df.append(pd.read_csv(filename, encoding='iso-8859-1',skiprows=47, usecols=[1, 19, 30, 31, 33, 83, 155, 157]))
testcsv=pd.read_csv(filename, encoding='iso-8859-1',skiprows=47, usecols=[1, 155, 157])
print("lenght of added dataframe",len(testcsv))
merged_df.append(testcsv)
print("lenght of appended dataframe:",len(merged_df))
except:
print("EXCEPTION", filename)
filen =1
print(len(merged_df))
Does anyone know how to fix this problem?
Than you in advance!
CodePudding user response:
After appending the dataframe you need to asssign it to variable like below
merged_df = merged_df.append(testcsv)