How to transpose the csv file data based on series of group data items-CodePudding

Need some help on python code related to transpose. Have one csv file looking below (a1&b1,a2&b2,c1&c2,d1&d2 are like groups)

r1;a1;b1;a2;b2;c1;c2;d1;d2
12;ec;em;ed;em;ec;ek;;
23;ec;ek;ec;rk;;;;
34;ec;rk;ec;jk;ek;45;;

Need to get the output file as

12;ec;em
12;ed;em
12;ec;ek
23;ec;ek
23;ec;rk
34;ec;rk
34;ec;jk
34;ek;45

CodePudding user response：

I'll attempt to suggest just a plain simple example. I tried to make it as explicit and modular as possible, such as it is easy to see and to change at your will.

import csv


def flatten(t):
    return [item for sublist in t for item in sublist]


def reformat_first_with_pairs(p_list):
    reformated = []
    for i in range(1, len(p_list), 2):
        reformated.append([p_list[0], p_list[i], p_list[i   1]])
    return reformated


def disqualify_if_contains(p_list, value):
    filtered = []
    for key, sub_list in enumerate(p_list):
        if not value in sub_list:
            filtered.append(sub_list)
    return filtered


def rows_from_csv_file(file_name, f_delimiter=';'):
    input_file_rows = []
    try:
        with open(file_name, newline='') as csvfile:
            reader = csv.reader(csvfile)
            for row in reader:
                input_file_rows.append(row[0].split(f_delimiter))
    except EnvironmentError:
        raise
    return input_file_rows


def rows_to_csv_file(file_name, rows, f_delimiter=';'):
    try:
        with open(file_name, 'w ', newline='') as csvfile:
            writer = csv.writer(csvfile, delimiter=f_delimiter,
                                quotechar='|', quoting=csv.QUOTE_MINIMAL)
            for row in rows:
                writer.writerow(row)
            return True
    except EnvironmentError:
        raise


def custom_csv_reformat_00(input_file, output_file):
    try:
        input_file_rows = rows_from_csv_file(input_file)
    except EnvironmentError as e:
        raise

    input_file_rows.pop(0)

    reformated_list = flatten(map(reformat_first_with_pairs, input_file_rows))
    reformated_list = disqualify_if_contains(reformated_list, '')

    try:
        rows_to_csv_file(output_file, reformated_list)
    except EnvironmentError as e:
        raise


IF_NAME = 'input.csv'
OF_NAME = 'output.csv'

try:
    custom_csv_reformat_00(IF_NAME, OF_NAME)
except Exception as e:
    print(e)

again just to point out, I don't know if your goal is to remove the first line, as with :

input_file_rows.pop(0)

CodePudding user response：

One option is to iterate through the dataframe, in pairs, based on the columns, and then concatenate into one dataframe:


temp = df.set_index('r1')
outcome = [temp.iloc[:, n:n 2]
               .set_axis(['left', 'right'], axis = 'columns')
           for n in range(0, len(temp.columns), 2)]

pd.concat(outcome).dropna().sort_index().reset_index()

   r1 left right
0  12   ec    em
1  12   ed    em
2  12   ec    ek
3  23   ec    ek
4  23   ec    rk
5  34   ec    rk
6  34   ec    jk
7  34   ek    45

A possibly faster option would be to dump everything into numpy, reshape before returning back to Pandas to clean up:

temp = df.set_index('r1')

# since the column reshape is in pairs, we use 2:
outcome = np.reshape(temp.to_numpy(), (-1, 2))

# index will be repeated for the pairs of columns:
index = temp.index.repeat(len(temp.columns)//2)

pd.DataFrame(outcome, index = index).sort_index().dropna().reset_index()

   r1   0   1
0  12  ec  em
1  12  ed  em
2  12  ec  ek
3  23  ec  ek
4  23  ec  rk
5  34  ec  rk
6  34  ec  jk
7  34  ek  45

One other option is pivot_longer from pyjanitor, where you can pass the patterns required for the reshape:

(
df
.pivot_longer(index = 'r1', 
              names_to = ['left', 'right'], 
              names_pattern = ["a1|a2|c1|d1", "b1|b2|c2|d2"], 
              sort_by_appearance = True)
.dropna()
)

    r1 left right
0   12   ec    em
1   12   ed    em
2   12   ec    ek
4   23   ec    ek
5   23   ec    rk
8   34   ec    rk
9   34   ec    jk
10  34   ek    45