using bash or python to compare two files content-CodePudding

i'm looking for a way to create a new file (c.txt), with contents of b.txt file column and contents of a.txt file.

In shell or in python.

Here is an exemple, maybe it will be more clear...

This is the a.txt file:

# Ansible managed
#########################################################
# date : 20220125T094050
# profile name : zoneA
# role version : v0.0.6
#
#########################################################
# security ID    |   default   |    custom   |   applied   |
# rule_a_10      |           1 |             |           1 |
# rule_a_12      |           1 |             |           1 |
# rule_a_13      |          -1 |             |          -1 |
# rule_b_01      |          -1 |             |          -1 |
# rule_c_01      |           1 |             |           1 |

This is the b.txt file:

# Ansible managed
#########################################################
# date : 20220125T114050
# profile name : zoneB
# role version : v0.0.7
#
#########################################################
# security ID    |   default   |    custom   |   applied   |
# rule_a_10      |          -1 |             |          -1 |
# rule_a_12      |          -1 |             |          -1 |
# rule_a_13      |          -1 |           0 |           0 |
# rule_b_01      |          -1 |           1 |           1 |
# rule_d_01      |          -1 |           1 |           1 |

I'm looking to have a c.txt file with this content, if values are in the custom column:

# Ansible managed
#########################################################
# date : 20220125T094050  => date from A file
# profile name : zoneA    => zone from A file 
# role version : v0.0.6   => version from B file
#
#########################################################
# security ID    |   default   |    custom   |   applied   |
# rule_a_10      |           1 |             |           1 |
# rule_a_12      |           1 |             |           1 |
# rule_a_13      |          -1 |           0 |           0 |
# rule_b_01      |          -1 |           1 |           1 |
# rule_c_01      |           1 |             |           1 |
# rule_d_01      |          -1 |           1 |           1 |

Any help will be appreciated.

Thanks

CodePudding user response：

Using pandas makes your code less eye straining.

from io import StringIO
import pandas as pd


a_txt = '''# Ansible managed
#########################################################
# date : 20220125T094050
# profile name : zoneA
# role version : v0.0.6
#
#########################################################
# security ID    |   default   |    custom   |   applied   |
# rule_a_10      |           1 |             |           1 |
# rule_a_12      |           1 |             |           1 |
# rule_a_13      |          -1 |             |          -1 |
# rule_b_01      |          -1 |             |          -1 |
# rule_c_01      |           1 |             |           1 |
'''

b_txt = '''# Ansible managed
#########################################################
# date : 20220125T114050
# profile name : zoneB
# role version : v0.0.7
#
#########################################################
# security ID    |   default   |    custom   |   applied   |
# rule_a_10      |          -1 |             |          -1 |
# rule_a_12      |          -1 |             |          -1 |
# rule_a_13      |          -1 |           0 |           0 |
# rule_b_01      |          -1 |           1 |           1 |
# rule_d_01      |          -1 |           1 |           1 |
'''

dfa = pd.read_csv(StringIO(a_txt), sep='|', header=7, dtype=object)
dfb = pd.read_csv(StringIO(b_txt), sep='|', header=7, dtype=object)

# custom: find number
t_b = dfb.loc[dfb.iloc[:, 2].str.match('.*\d.*')]

dft = pd.merge(dfa.iloc[:, 0], t_b.iloc[:, 0], how='outer', indicator=True)

# A only
t_a = pd.merge(dft.loc[dft.iloc[:, 1] == 'left_only'].iloc[:, 0], dfa, how='inner', on=dft.columns[0])

# B only or A/B both(use B)
t_b = pd.merge(dft.loc[dft.iloc[:, 1] != 'left_only'].iloc[:, 0], dfb, how='inner', on=dft.columns[0])

# A only   B only   A/B both(use B)
df = pd.concat([t_a, t_b])

# sort by security ID
df = df.sort_values(by=[df.columns[0]])

# not necessary but not remove/drop (for last sep(|))
df = df.rename({'Unnamed: 4': ''}, axis=1)

# c_txt = '/path/c.txt'
c_txt = StringIO()
df.to_csv(c_txt, sep='|', index=False)
print(c_txt.getvalue())

CodePudding user response：

thanks for your return,

here is what i do:

filename_f1 = 'a.txt'
filename_f2 = 'b.txt'
def get_lines_f1():
    with open(filename_f1, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    return lines
def get_lines_f2():
    with open(filename_f2, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    return lines
def parse_and_extract():
    lines_f1 = get_lines_f1()
    lines_f2 = get_lines_f2()
    with open('result.txt', 'w', encoding='utf-8') as file:
        for line_f2 in lines_f2:
            if "|" in line_f2 and "custom" not in line_f2:
                sec_id_f2 = line_f2.split('|')[0]
                default_f2 = line_f2.split('|')[1]
                custom_f2 = line_f2.split('|')[2]
                applied_f2 = line_f2.split('|')[3]
                if custom_f2 and (not custom_f2.isspace()):
                    sec_id_f2 = line_f2.split('|')[0]
                    default_f2 = line_f2.split('|')[1]
                    custom_f2 = line_f2.split('|')[2]
                    applied_f2 = line_f2.split('|')[3]
                    print (line_f2)

                    for line_f1 in lines_f1:
                        if "|" in line_f1 and "custom" not in line_f1 and sec_id_f2==line_f1.split('|')[0]:
                            sec_id_f1 = line_f1.split('|')[0]
                            default_f1 = line_f1.split('|')[1]
                            custom_f1 = line_f1.split('|')[2]
                            applied_f1 = line_f1.split('|')[3]
                            file.write( sec_id_f2   "|"   default_f1   "|"   custom_f2   "|"   applied_f2   "|\n")


            else:
                print ("Not found!")
parse_and_extract()

and this is what i get:

cat result.txt
# rule_a_13      |          -1 |           0 |           0 |
# rule_b_01      |          -1 |           1 |           1 |

but the rule rule_d_01 is missing...

Working on it,

thanks