Home > Enterprise >  converting text file to comma delimited csv file or json file
converting text file to comma delimited csv file or json file

Time:04-14

I have a text file that I want to convert to a comma-delimited CSV file, where the first row(headers) are the MySQL table fields. My text file looks like this

      id                   purchase-date               last-updated-date        status,         name   
     305-0847312-2761164      2022-04-11T22:23:27 00:00   2022-04-11T22:23:31 00:00  Pending    DE  DE Core Free Shipping
     028-3270261-2897162      2022-04-11T22:17:27 00:00   2022-04-11T22:17:30 00:00  Pending    
     028-8245400-1649940      2022-04-11T22:15:29 00:00   2022-04-11T22:15:32 00:00  Pending    
     028-2661715-2120359      2022-04-11T21:57:24 00:00   2022-04-11T21:57:28 00:00  Pending    
     303-9076983-4225163      2022-04-11T21:53:52 00:00   2022-04-11T21:53:55 00:00  Pending    
     304-7440363-0208337      2022-04-11T21:49:14 00:00   2022-04-11T21:49:17 00:00  Pending    
     302-2070657-8345128      2022-04-11T21:30:12 00:00   2022-04-12T01:32:20 00:00  Shipped     some names

What I want to achieve is a file like this

   id,                   purchase-date,               last-updated-date,        status   
     305-0847312-2761164,      2022-04-11T22:23:27 00:00,   2022-04-11T22:23:31 00:00,  Pending    
     028-3270261-2897162,      2022-04-11T22:17:27 00:00,   2022-04-11T22:17:30 00:00,  Pending    
    

I need this file to save in the database where the first row are the column names

I tried pandas

   read_file = pd.read_csv("reports/report.txt")
   read_file.to_csv("reports/report.csv", index=None, sep="\n")
exit()

BUt I got error

    pandas.errors.ParserError: Error tokenizing data. C error: Expected 2 fields in line 194, saw 4

Questions

  1. How do you convert this text file to a comma delimited csv file?

  2. Or a more preferable way, to convert this txt file to json file, saving this to db is much easier if its in json format, something like:

    [
      {
         id: 305-0847312-2761164,
         purchase_date: 2022-04-11T22:23:27 00:00
      },
      {
         id: 305-0847312-2761165,
         purchase_date: 2022-05-11T22:23:27 00:00,
         name: null( some names is empty)
      },
       {
         id: 305-0847312-2761165,
         purchase_date: 2022-05-11T22:23:27 00:00,
         name: 'some name'
      },
      .......................
    ]
    
    with open(file) as f:
         for i in f:
             myTable.objects.create(id=id,**i)
    

I tried to convert to json format and I think its pretty close

text = "reports/download/sellingpartner/report.txt"

l = 1
dict1 = []
with open(text) as fh:
    first_line = fh.readline()
    fields = [e.replace("-", "_") for e in first_line.split()]
    l = 1
    lines = fh.readlines()[1:]
    for line in lines:
        description = line
        print(description)
        i = 0
        dict2 = {}
        while i < len(fields):
            dict2[fields[i]] = description[i]
            i = i   1
        dict1.append(dict2)
        l = l   1
output = open("reports/download/sellingpartner/file.json", "w")
json.dump(dict1, output, indent=4)
output.close()

result

[
{
    "amazon_order_id": "3",
    "merchant_order_id": "0",
    "purchase_date": "4",
    "last_updated_date": "-",
    "order_status": "8",
    "fulfillment_channel": "5",
    "sales_channel": "9",
    "order_channel": "2",
    "ship_service_level": "7",
    "product_name": "7",
    "sku": "5",
    "asin": "-",
    "item_status": "1",
    "quantity": "5",
    "currency": "3",
    "item_price": "1",
    "item_tax": "5",
    "shipping_price": "3",
    "shipping_tax": "1",
    "gift_wrap_price": "\t",
    "gift_wrap_tax": "3",
    "item_promotion_discount": "0",
    "ship_promotion_discount": "4",
    "ship_city": "-",
    "ship_state": "8",
    "ship_postal_code": "5",
    "ship_country": "9",
    "promotion_ids": "2",
    "is_business_order": "7",
    "purchase_order_number": "7",
    "price_designation": "5",
    "is_iba": "-",
    "order_invoice_type": "1"
},
{
    "amazon_order_id": "3",
    "merchant_order_id": "0",
    "purchase_date": "5",
    "last_updated_date": "-",
    "order_status": "5",
    "fulfillment_channel": "4",
    "sales_channel": "1",
    "order_channel": "2",
    "ship_service_level": "3",
    "product_name": "9",
    "sku": "1",
    "asin": "-",
    "item_status": "1",
    "quantity": "7",
    "currency": "2",
    "item_price": "2",
    "item_tax": "7",
    "shipping_price": "1",
    "shipping_tax": "2",
    "gift_wrap_price": "\t",
    "gift_wrap_tax": "3",
    "item_promotion_discount": "0",
    "ship_promotion_discount": "5",
    "ship_city": "-",
    "ship_state": "5",
    "ship_postal_code": "4",
    "ship_country": "1",
    "promotion_ids": "2",
    "is_business_order": "3",
    "purchase_order_number": "9",
    "price_designation": "1",
    "is_iba": "-",
    "order_invoice_type": "1"
},
{
    "amazon_order_id": "3",
    "merchant_order_id": "0",
    "purchase_date": "5",
    "last_updated_date": "-",
    "order_status": "6",
    "fulfillment_channel": "5",
    "sales_channel": "4",
    "order_channel": "7",
    "ship_service_level": "1",
    "product_name": "4",
    "sku": "7",
    "asin": "-",
    "item_status": "8",
    "quantity": "7",
    "currency": "4",
    "item_price": "8",
    "item_tax": "3",
    "shipping_price": "6",
    "shipping_tax": "3",
    "gift_wrap_price": "\t",
    "gift_wrap_tax": "3",
    "item_promotion_discount": "0",
    "ship_promotion_discount": "5",
    "ship_city": "-",
    "ship_state": "6",
    "ship_postal_code": "5",
    "ship_country": "4",
    "promotion_ids": "7",
    "is_business_order": "1",
    "purchase_order_number": "4",
    "price_designation": "7",
    "is_iba": "-",
    "order_invoice_type": "8"
},
{
    "amazon_order_id": "3",
    "merchant_order_id": "0",
    "purchase_date": "5",
    "last_updated_date": "-",
    "order_status": "1",
    "fulfillment_channel": "9",
    "sales_channel": "4",
    "order_channel": "5",
    "ship_service_level": "9",
    "product_name": "1",
    "sku": "5",
    "asin": "-",
    "item_status": "3",
    "quantity": "8",
    "currency": "1",
    "item_price": "5",
    "item_tax": "5",
    "shipping_price": "2",
    "shipping_tax": "4",
    "gift_wrap_price": "\t",
    "gift_wrap_tax": "3",
    "item_promotion_discount": "0",
    "ship_promotion_discount": "5",
    "ship_city": "-",
    "ship_state": "1",
    "ship_postal_code": "9",
    "ship_country": "4",
    "promotion_ids": "5",
    "is_business_order": "9",
    "purchase_order_number": "1",
    "price_designation": "5",
    "is_iba": "-",
    "order_invoice_type": "3"
},
     

One thing I need to solve is how to separate the line by commas, because each values contains spaces, and commas too.Values are separated by multiple spaces, see below

 406-4733989-6345153    406-4733989-6345153 2022-04-12T05:37:01 00:00   2022-04-12T05:37:05 00:00   Pending Amazon  Amazon.it       Expedited   Impact Fixy - Supporto per cellulare a 360° per bicicletta e moto, estremamente stabile, con giunto sferico a 360° e gomma di sicurezza, supporto pe  4Z-LVBE-IGD9    B08MXYS4S3  Unshipped   1   EUR 16.95       milano  mi  20161   IT  PAWS-V2-27105377117-91  false           false   
     
       

I converted the .tx extension to csv file and opened it with libre office, see screenshotenter image description here

I tried another approach,

jsonArray = []
output = "reports/download/sellingpartner/file.json"
text = "reports/download/sellingpartner/report.txt"
# read csv file
with open(text, encoding="utf-8") as csvf:
    # load csv file data using csv library's dictionary reader
    csvReader = csv.DictReader(csvf)

    # convert each csv row into python dict
    for row in csvReader:
        # add this python dict to json array
        jsonArray.append(row)

# convert python jsonArray to JSON String and write to file
with open(output, "w", encoding="utf-8") as jsonf:
    jsonString = json.dumps(jsonArray, indent=4)
    jsonf.write(jsonString)

Here's the result:

 [
    {
       "amazon-order-id\tmerchant-order-id\tpurchase-date\tlast-updated-date\torder-status\tfulfillment-channel\tsales-channel\torder-channel\tship-service-level\tproduct-name\tsku\tasin\titem-status\tquantity\tcurrency\titem-price\titem-tax\tshipping-price\tshipping-tax\tgift-wrap-price\tgift-wrap-tax\titem-promotion-discount\tship-promotion-discount\tship-city\tship-state\tship-postal-code\tship-country\tpromotion-ids\tis-business-order\tpurchase-order-number\tprice-designation\tis-iba\torder-invoice-type": "304-3907519-7764337\t304-3907519-7764337\t2022-04-12T17:10:44 00:00\t2022-04-12T17:10:47 00:00\tPending\tAmazon\tAmazon.de\t\tExpedited\tImpact Ketten\u00c3\u00b6l Fahrrad 120ml - Einzigartiges 8 Komponenten Fahrradketten \u00c3\u0096l als Alternative zu Kettenfett & Kettenspray f\u00c3\u00bcr E-Bike",
      "null": [
          " Mountainbike\tL9-3CSN-J1GL\tB08DVJ95DS\tUnshipped\t1\tEUR\t9.95\t\t\t\t\t\t\t\tLichtenfels\t\t96215\tDE\t\tfalse\t\t\tfalse\t"
       ]
      },
      { 
         ........................
      }
  ]

CodePudding user response:

Assuming the headers and column data contain no whitespace and columns are separated by whitespace, a minimal pure Python example could be:

import csv

with open('data.txt', 'r') as f:
    data = f.read()

# Remove trailing whitespace and split by whitespace
lines = [line.strip() for line in data.splitlines()]
lines = [line.split() for line in lines]

# Use built-in csv module to handle escape chars etc
with open('data.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    for line in lines:
        writer.writerow(line)

See the CSV Writer documentation for more info.

CodePudding user response:

I solved it using delimiter and using pandas

 with open(f"{path}/report.txt", "w ") as output:
            output.write(res.payload.get("document")) // from amazon api
        in_txt = csv.reader(open(f"{path}/report.txt", "r"), delimiter="\t")
        out_csv = csv.writer(open(f"{path}/report.csv", "w"))

        out_csv.writerows(in_txt)
        text = f"{path}/report.csv"
        df = pd.read_csv(text)
        # df.fillna("", inplace=True)
        row_iter = df.iterrows()
        for index, row in row_iter:
               Mytable.objects.update_or_create(id=id, defaults={**row})
  • Related