Home > Mobile >  Python: How to convert text with columns to JSON file
Python: How to convert text with columns to JSON file

Time:07-21

I have problem with converting my text file to JSON file. The text file looks like in the example below:

Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Subfamily:           Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Full name:           OrangeKid-Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:PostScript name:     OrangeKid-Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred family:    Orange Kid
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred subfamily: Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Mac font menu name:  OrangeKid-Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Version:             OTF 4.000;PS 001.001;Core 1.0.29
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Unique ID:           4.000;LARA;OrangeKid-Regular

I was trying this script:

import gspread
import os
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload


import csv
import json
import pandas as pd
from pathlib import Path
#csv_file = pd.DataFrame(pd.read_csv("/users/krzysztofpaszta/CSVtoGD/build-a-bridge.csv", sep = ",", header = 0, index_col = False))
#csv_file.to_json("/users/krzysztofpaszta/CSVtoGD/build-a-bridge.json", orient = "records", date_format = "epoch", double_precision = 10, force_ascii = True, date_unit = "ms", default_handler = None)

#Odczyt pliku CSV
def read_CSV(file, json_file):
    csv_rows = []
    with open(file) as csvfile:
        reader = csv.DictReader(csvfile)
        field = reader.fieldnames
        for row in reader:
            csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
        convert_write_json(csv_rows, json_file) #definicja funkcji ponizej

#Zamiana CSV na JSON
def convert_write_json(data, json_file):
    with open(json_file, "w") as f:
        f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '))) 
        f.write(json.dumps(data))

#pętla w folderze
pliki = "/users/user/CSVtoGD/"

files = Path(pliki).glob('*.csv') 

for f in files:
    read_CSV(f, str(f.with_suffix('.json'))) 

But the results are not as expected. The data is mixed up and I have no idea how to repair that. I am pretty new at python and JSON files. Could someone give me some hint? That's my script's results:

[
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Subfamily:           Regular"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Full name:           OrangeKid-Regular"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:PostScript name:     OrangeKid-Regular"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred family:    Orange Kid"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred subfamily: Regular"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Mac font menu name:  OrangeKid-Regular"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Version:             OTF 4.000;PS 001.001;Core 1.0.29"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Unique ID:           4.000;LARA;OrangeKid-Regular"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Description:         To see more fonts by Ray Larabie"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Designer:            Ray Larabie"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Designer URL:        http://www.typodermic.com"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Manufacturer:        Ray Larabie"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Vendor URL:          http://www.larabiefonts.com"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Trademark:           Orange Kid is a trademark of Ray Larabie."
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Copyright:           (c) 1999-2009 Ray Larabie. See attached license agreement for more information. If EULA is missing"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:License URL:         http://typodermicfonts.com/pages/license"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:License Description: http://typodermicfonts.com/pages/license"
    },
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Vendor ID:           TYPO"
    }
][{"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Subfamily:           Regular"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Full name:           OrangeKid-Regular"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:PostScript name:     OrangeKid-Regular"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred family:    Orange Kid"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred subfamily: Regular"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Mac font menu name:  OrangeKid-Regular"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Version:             OTF 4.000;PS 001.001;Core 1.0.29"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Unique ID:           4.000;LARA;OrangeKid-Regular"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Description:         To see more fonts by Ray Larabie"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Designer:            Ray Larabie"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Designer URL:        http://www.typodermic.com"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Manufacturer:        Ray Larabie"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Vendor URL:          http://www.larabiefonts.com"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Trademark:           Orange Kid is a trademark of Ray Larabie."}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Copyright:           (c) 1999-2009 Ray Larabie. See attached license agreement for more information. If EULA is missing"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:License URL:         http://typodermicfonts.com/pages/license"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:License Description: http://typodermicfonts.com/pages/license"}, {"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid": "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Vendor ID:           TYPO"}]

CodePudding user response:

The data looks like a fixed width format, i.e., there's no unique column separator, but each column has a certain width. csv is not the right tool for that, but pandas has a read_fwf method that seems to do the right thing:

df = pd.read_fwf("path/to/your_file.txt", header=None)

Result:

                     0                    1                    2
0  Assets/HighroadE...      kid.ttf:Family:           Orange Kid
1  Assets/HighroadE...   kid.ttf:Subfamily:              Regular
2  Assets/HighroadE...   kid.ttf:Full name:    OrangeKid-Regular
3  Assets/HighroadE...  kid.ttf:PostScri...    OrangeKid-Regular
4  Assets/HighroadE...  kid.ttf:Preferre...           Orange Kid
5  Assets/HighroadE...  kid.ttf:Preferre...              Regular
6  Assets/HighroadE...  kid.ttf:Mac font...    OrangeKid-Regular
7  Assets/HighroadE...     kid.ttf:Version:  OTF 4.000;PS 001...
8  Assets/HighroadE...   kid.ttf:Unique ID:  4.000;LARA;Orang...

Now it's just a matter of converting it via df.to_json() with the appropriate settings (esp. orient=...) but since you didn't specify what the expected format is, I'll leave that for you to figure out.

CodePudding user response:

I don't know what result you expect but you could read it as normal text file and split on last : - using rsplit(':', 1)

text = '''Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Subfamily:           Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Full name:           OrangeKid-Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:PostScript name:     OrangeKid-Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred family:    Orange Kid
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred subfamily: Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Mac font menu name:  OrangeKid-Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Version:             OTF 4.000;PS 001.001;Core 1.0.29
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Unique ID: '''

import io
import json

data = {}

#with open(filename) as fh_in:
with io.StringIO(text) as fh_in:
    for line in fh_in:
        line = line.strip() # remove `\n` at the end and spaces
        if line:  # skip empty lines
            name, value = line.rsplit(':', 1)
            data[name] = value.strip()

data = [data]
print(json.dumps(data, indent=4))       

Result

[
    {
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family": "Orange Kid",
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Subfamily": "Regular",
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Full name": "OrangeKid-Regular",
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:PostScript name": "OrangeKid-Regular",
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred family": "Orange Kid",
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred subfamily": "Regular",
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Mac font menu name": "OrangeKid-Regular",
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Version": "OTF 4.000;PS 001.001;Core 1.0.29",
        "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Unique ID": ""
    }
]

But if you have more .ttf in this file and you want every .ttf as separted directory then you need little more complex code

text = '''Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family:              Orange Kid
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Subfamily:           Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Full name:           OrangeKid-Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:PostScript name:     OrangeKid-Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred family:    Orange Kid
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred subfamily: Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Mac font menu name:  OrangeKid-Regular
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Version:             OTF 4.000;PS 001.001;Core 1.0.29
Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Unique ID:
other.ttf:Family:              Orange Kid
other.ttf:Subfamily:           Regular
other.ttf:Full name:           OrangeKid-Regular
other.ttf:PostScript name:     OrangeKid-Regular
other.ttf:Preferred family:    Orange Kid
other.ttf:Preferred subfamily: Regular
other.ttf:Mac font menu name:  OrangeKid-Regular
other.ttf:Version:             OTF 4.000;PS 001.001;Core 1.0.29
other.ttf:Unique ID: '''

import io
import json

data = {}

#with open(filename) as fh_in:
with io.StringIO(text) as fh_in:
    for line in fh_in:
        line = line.strip() # remove `\n` at the end and spaces
        if line:  # skip empty lines
            ttf, field, value = line.split(':')
            if ttf not in data:
                data[ttf] = {}
            data[ttf][field] = value.strip()

print(json.dumps(data, indent=4))        

Result:

{
    "Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf": {
        "Family": "Orange Kid",
        "Subfamily": "Regular",
        "Full name": "OrangeKid-Regular",
        "PostScript name": "OrangeKid-Regular",
        "Preferred family": "Orange Kid",
        "Preferred subfamily": "Regular",
        "Mac font menu name": "OrangeKid-Regular",
        "Version": "OTF 4.000;PS 001.001;Core 1.0.29",
        "Unique ID": ""
    },
    "other.ttf": {
        "Family": "Orange Kid",
        "Subfamily": "Regular",
        "Full name": "OrangeKid-Regular",
        "PostScript name": "OrangeKid-Regular",
        "Preferred family": "Orange Kid",
        "Preferred subfamily": "Regular",
        "Mac font menu name": "OrangeKid-Regular",
        "Version": "OTF 4.000;PS 001.001;Core 1.0.29",
        "Unique ID": ""
    }
}
  • Related