How to take a PickleType data and convert to csv files in a folder using Python-CodePudding

I am trying to take a PickleType data from sqlalchemy and converted it to csv files in a folder.

Here is a snip of my class with the PickleType data:

from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column
from sqlalchemy.sql.sqltypes import PickleType 
Base = declarative_base()

class Sales(Base):
    .....
    data = Column(PickleType, nullable=False, comment="Sale Data for sale")

Here is my function that I am trying to get that data in PickleType and convert it to CSV files in a folder

from data import Sales
import pandas as pd
import os.path
import pickle

def _to_csv(self, sales_data : Sales):
    """
    To get Sales data to output CSV files into a folder
    """
    data1 = []
    with open(sales_data.data, 'rb') as file:
        data2 = pickle.dumps(file)
        data1 = pickle.load(data2)
        df = pd.DataFrame(data1)
        df.to_csv(os.path.join('/Desktop/sales','/car_sales.csv'))
        file.close()

CodePudding user response：

I don't know the structure of your class and data but if you have the data you can just append them into a list and give call your _to_csv method in parallel to save it for you. sth like this:


from data import Sales
import pandas as pd
import os.path
import pickle
import multiprocessing as mp
import concurrent.futures


def _to_csv(self, data):
    """
    To get Sales data to output CSV files into a folder
    """
    data1 = []
    with open(data 'rb') as file:
        data2 = pickle.dumps(file)
        data1 = pickle.load(data2)
        df = pd.DataFrame(data1)
        df.to_csv(os.path.join('/Desktop/sales',f'/{data}.csv'))
        file.close()

# I am assuming that you have the name of the files that you wanna read and save as cvs
data_list = []
for idx in range(20):
    name = f'car_sales{idx}'
    data_list.append(name)    

with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as executor:
    executor.map(_to_csv, data_list)

or if you want to assign each data name to its result you can do sth like this:

with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as executor:
    futures ={executor.submit(_to_csv, data):data for data in data_list}
    for future in concurrent.futures.as_completed(futures):
        data = futures[future]
        try:
            result = future.result()
        except Exception as why:
            print('sth went wrong!', why)
        else:
            print(data, result)