Home > OS >  How to convert following list into pandas dataframe?
How to convert following list into pandas dataframe?

Time:07-28

lst = ['Hospital Name: ', 'Methodist LEADING MEDICINE', 'Hospital Address: ', 'PO Box 3133 Houston, TX 77253-3133', 'Total Charges: ', 'Hospital Name: ', 'Hospital Address: ', 'PO Box 3133 Houston, TX 77253-3133', 'Total Charges: ', 'Hospital Name: ', 'Hospital Address: ', 'PO Box 3133 Houston, TX 77253-3133', 'Total Charges: ', '131,975.58', 'Hospital Name: ', 'Houston Methodist Sugar Land Hospital', 'Hospital Address: ', '16655 Southwest Frwy Sugar Land TX 77479', 'Total Charges: ']

I want output pandas dataframe like

Hospital Name:                   Hospital Address:                         Total Charges:
Methodist LEADING MEDICINE       PO Box 3133 Houston, TX 77253-3133        None
None                             PO Box 3133 Houston, TX 77253-3133        None
None                             PO Box 3133 Houston, TX 77253-3133        131,975.58
Houston Methodist Sugar          16655 Southwest Frwy Sugar Land TX 77479  None
Land Hospital

How I can do this by using python

CodePudding user response:

Just use these code and you can get what you want

import pandas as pd

lst = ["Hospital Name: ", "Methodist LEADING MEDICINE", "Hospital Address: ", "PO Box 3133 Houston, TX 77253-3133", "Total Charges: ", "Hospital Name: ", "Hospital Address: ", "PO Box 3133 Houston, TX 77253-3133", "Total Charges: ", "Hospital Name: ", "Hospital Address: ", "PO Box 3133 Houston, TX 77253-3133", "Total Charges: ", "131,975.58", "Hospital Name: ", "Houston Methodist Sugar Land Hospital", "Hospital Address: ", "16655 Southwest Frwy Sugar Land TX 77479", "Total Charges: "]
columns = ["Hospital Name: ", "Hospital Address: ", "Total Charges: "]

data = {
    "Hospital Name: ": [],
    "Hospital Address: ": [],
    "Total Charges: ": [],
}

for i in range(len(lst)):
    if lst[i] in columns:
        if i 1 > len(lst)-1 or lst[i 1] in columns:
            data[lst[i]].append(None)
        else:
            data[lst[i]].append(lst[i 1])

df = pd.DataFrame(data, columns=columns)

CodePudding user response:

import pandas as pd


lst = ['Hospital Name: ', 'Methodist LEADING MEDICINE', 'Hospital Address: ', 'PO Box 3133 Houston, TX 77253-3133', 'Total Charges: ', 'Hospital Name: ', 'Hospital Address: ', 'PO Box 3133 Houston, TX 77253-3133', 'Total Charges: ', 'Hospital Name: ', 'Hospital Address: ', 'PO Box 3133 Houston, TX 77253-3133', 'Total Charges: ', '131,975.58', 'Hospital Name: ', 'Houston Methodist Sugar Land Hospital', 'Hospital Address: ', '16655 Southwest Frwy Sugar Land TX 77479', 'Total Charges: ']
items, headers = [], ['Hospital Name: ', 'Hospital Address: ', 'Total Charges: ']

for index, item in enumerate(lst):
    if index == len(lst) - 1:
        if item in headers:
            items.extend([item, None])
        else:
            items.append(item)

    else:
        if not (item in headers and lst[index 1] in headers):
            items.append(item)
        else:
            items.extend([item, None])

print(pd.DataFrame({"Hospital Name: ": items[1::6], "Hospital Address: ": items[3::6], "Total Charges: ": items[5::6]}))

CodePudding user response:

use from this code:

first ensure that the end of list we have a valid member the create each of list for column with a for loop and O(n). then create dataframe

import pandas as pd

if lst[-1].find(':')>-1:
    lst.append('')
name , address , charge = [],[],[]
for i in range(len(lst)-1):
    if lst[i] == 'Hospital Name: ' and lst[i 1].find(':')==-1: 
        name.append(lst[i 1])
    if lst[i] == 'Hospital Name: ' and lst[i 1].find(':')>-1: 
        name.append('')
        
    if lst[i] == 'Hospital Address: ' and lst[i 1].find(':')==-1: 
        address.append(lst[i 1])
    if lst[i] == 'Hospital Address: ' and lst[i 1].find(':')>-1: 
        address.append('')
        
    if lst[i] == 'Total Charges: ' and lst[i 1].find(':')==-1: 
        charge.append(lst[i 1])
    if lst[i] == 'Total Charges: ' and lst[i 1].find(':')>-1: 
        charge.append('')

data = {'hospital name': name, 'hospital address': address, 'total charge': charge}        
df = pd.DataFrame(data)
print(df)

CodePudding user response:

or via dictionaries

import pandas as pd

lst = ['Hospital Name: ', 'Methodist LEADING MEDICINE', 'Hospital Address: ', 'PO Box 3133 Houston, TX 77253-3133', 'Total Charges: ', 'Hospital Name: ', 'Hospital Address: ', 'PO Box 3133 Houston, TX 77253-3133', 'Total Charges: ', 'Hospital Name: ', 'Hospital Address: ', 'PO Box 3133 Houston, TX 77253-3133', 'Total Charges: ', '131,975.58', 'Hospital Name: ', 'Houston Methodist Sugar Land Hospital', 'Hospital Address: ', '16655 Southwest Frwy Sugar Land TX 77479', 'Total Charges: ']
ordered_headers = ['Hospital Name: ', 'Hospital Address: ', 'Total Charges: ']
items = []
prev_item_dict = None

for index, item in enumerate(lst):
    if item == ordered_headers[0]:
        if prev_item_dict is not None:
            # append previously created to the list of items
            items.append(prev_item_dict)
        prev_item_dict = {}
    if item in ordered_headers:
        value_for_index = lst[index 1] if index 1 < len(lst) and lst[index 1] not in ordered_headers else None
        # assign the (key: value) pair per each future row
        prev_item_dict[item] = value_for_index
# add last item
if prev_item_dict is not None:
    items.append(prev_item_dict)

pd.DataFrame(items)
  • Related