How should we write a code in OOP that has to read a file and several libraries?-CodePudding

I have requirement to write my code in OOP format. For this I saw few videos on what is OOP, all the examples I could find, through those examples I am not able to lean how to convert my code into OOP. All, I could make out was I have to create a class suppose class interpolate and then def somehow actual, forward fill, backward fill, linear and cubic. But I dont know what exactly I have to write in it say,

def forwardfill (?,?):
    ?? ( should I simple copy the stuff here?)

# # Generate dataset
from scipy.interpolate import interp1d
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv(r'C:\Users\Admin\OOP\TimeSeries\dataset.csv', parse_dates=['LastUpdated'], index_col='LastUpdated')

fig, axes = plt.subplots(5, 1, sharex=True, figsize=(20, 20))
plt.rcParams.update({'xtick.bottom' : False})

error = 0
df = df.resample('15min').mean()

## 1. Actual -------------------------------
df['Occupancy'].plot(title='Actual', ax=axes[0], label='Actual', color='green', style=".-")

## 2. Forward Fill --------------------------
df_ffill = df.ffill() #df.ffill-pandas func to forward fill missing values
df_ffill['Occupancy'].plot(title='Forward Fill (MSE: '   str(error)  ")", ax=axes[1], label='Forward Fill', style=".-")

## 3. Backward Fill -------------------------
df_bfill = df.bfill() #df.ffill-pandas func to backward fill missing values
df_bfill['Occupancy'].plot(title="Backward Fill (MSE: "   str(error)  ")", ax=axes[2], label='Back Fill', color='purple', style=".-")

## 4. Linear Interpolation ------------------
df['rownum'] = np.arange(df.shape[0]) #df.shape[0]-gives number of row count
df_nona = df.dropna(subset = ['Occupancy']) #df.dropna- Remove missing values.
f = interp1d(df_nona['rownum'], df_nona['Occupancy'])
df['linear_fill'] = f(df['rownum'])
df['linear_fill'].plot(title="Linear Fill (MSE: "   str(error)  ")", ax=axes[3], label='Cubic Fill', color='red', style=".-")

## 5. Cubic Interpolation --------------------
f2 = interp1d(df_nona['rownum'], df_nona['Occupancy'], kind='cubic')
df['cubic_fill'] = f2(df['rownum'])
df['cubic_fill'].plot(title="Cubic Fill (MSE: "   str(error)  ")", ax=axes[4], label='Cubic Fill', color='deeppink', style=".-")

CodePudding user response：

When someone asks you about OOP it might be he wanted your code in a class-method way. Why is it good? You will have a reusable class for visualisations, just put different CSV paths and column names.

First of all, I have made a little df for further testing and saved it:

days = pd.date_range('1/1/2000', periods=8, freq='D')
df = {'price': [10, 11, 9, 13, 14, 18, 17, 19],
      'volume': [50, 60, 40, 100, 50, 100, 40, 50]}
df = pd.DataFrame(df)
df['date'] = days
print(df)
df.to_csv('some_df.csv', index=False)

It just looks like this:

   price  volume       date
0     10      50 2000-01-01
1     11      60 2000-01-02
2      9      40 2000-01-03
3     13     100 2000-01-04
4     14      50 2000-01-05
5     18     100 2000-01-06
6     17      40 2000-01-07
7     19      50 2000-01-08

Then I can use my class:

class TimeSeriesOOP:
    def __init__(self, path_to_csv, date_column=None, index_column=None):
        self.df = self.csv_reader(path_to_csv, date_column=date_column, index_column=index_column)
        self.process_dataframe( )
        self.df_ffill = self.df.ffill( )  # df.ffill-pandas func to forward fill missing values
        self.df_bfill = self.df.bfill( )  # df.ffill-pandas func to backward fill missing values

    @staticmethod
    def csv_reader(path_to_csv, date_column=None, index_column=None):
        dataframe = pd.read_csv(path_to_csv, parse_dates=[date_column],
                                index_col=index_column)
        return dataframe

    def process_dataframe(self):  # make separate func if you need more processing
        self.df = self.df.resample('15min').mean( )

    def make_interpolations(self, column_of_interest):
        # 4. Linear Interpolation ------------------
        self.df['rownum'] = np.arange(self.df.shape[0])  # df.shape[0]-gives number of row count
        df_nona = self.df.dropna(subset=[column_of_interest])  # df.dropna- Remove missing values.
        f = interp1d(df_nona['rownum'], df_nona[column_of_interest])
        self.df['linear_fill'] = f(self.df['rownum'])

        # 5. Cubic Interpolation --------------------
        f2 = interp1d(df_nona['rownum'], df_nona[column_of_interest], kind='cubic')
        self.df['cubic_fill'] = f2(self.df['rownum'])

    def draw_all(self, column_of_interest):
        self.make_interpolations(column_of_interest=column_of_interest)

        fig, axes = plt.subplots(5, 1, sharex=True, figsize=(20, 20))
        plt.rcParams.update({'xtick.bottom': False})
        error = 0

        # 1. Actual -------------------------------
        self.df[column_of_interest].plot(title='Actual', ax=axes[0], label='Actual', color='green', style=".-")

        # 2. Forward Fill --------------------------
        self.df_ffill[column_of_interest].plot(title='Forward Fill (MSE: '   str(error)   ")", ax=axes[1],
                                               label='Forward Fill', style=".-")

        # 3. Backward Fill -------------------------
        self.df_bfill[column_of_interest].plot(title="Backward Fill (MSE: "   str(error)   ")", ax=axes[2],
                                               label='Back Fill',
                                               color='purple', style=".-")

        # 4. Linear Interpolation ------------------
        self.df['linear_fill'].plot(title="Linear Fill (MSE: "   str(error)   ")", ax=axes[3], label='Cubic Fill',
                                    color='red',
                                    style=".-")

        # 5. Cubic Interpolation --------------------
        self.df['cubic_fill'].plot(title="Cubic Fill (MSE: "   str(error)   ")", ax=axes[4], label='Cubic Fill',
                                   color='deeppink',
                                   style=".-")
        plt.show( )

We logically separate all processes into different methods:

read CSV
process it
make interpolations
draw everything

How to use it?

time_series_visualiser = TimeSeriesOOP('some_df.csv', date_column='date', index_column='date')
col_of_interest = 'price'
time_series_visualiser.draw_all(column_of_interest=col_of_interest)

You can see output plot here

So, I just use our created class with sample data. But you can use other names and reuse the whole class for your needs!

Try something like these:

time_series_visualiser = TimeSeriesOOP(r'C:\Users\Admin\OOP\TimeSeries\dataset.csv',
date_column='LastUpdated', index_column='LastUpdated')
col_of_interest = 'Occupancy'
time_series_visualiser.draw_all(column_of_interest=col_of_interest)