Home > database >  Matching Regex on new line Python
Matching Regex on new line Python

Time:03-09

The following Regex gives me this output (note that I am using Python):

enter image description here

Which is perfect and exactly how I want it to be. However when I match this code in Python it works but I doesn't capture the next line of vlans when I use groupdict (talking about the second entry):

{'port_name': 'Te1/0/1', 'description': 'CVH10 Mgt Clstr', 'duplex': 'Full', 'speed': '10000', 'neg': 'Off', 'link_state': 'Up', 'flow_control': 'On', 'mode': '    T', 'vlans': '(1),161-163'}
{'port_name': 'Te1/0/2', 'description': 'CVH10 VM 1', 'duplex': 'Full', 'speed': '10000', 'neg': 'Off', 'link_state': 'Up', 'flow_control': 'On', 'mode': '    T', 'vlans': '(1),11,101,110,'}
{'port_name': 'Fo2/1/1', 'description': None, 'duplex': 'N/A', 'speed': 'N/A', 'neg': 'N/A', 'link_state': 'Detach', 'flow_control': 'N/A', 'mode': None, 'vlans': None}
{'port_name': 'Te2/0/8', 'description': None, 'duplex': 'Full', 'speed': '10000', 'neg': 'Off', 'link_state': 'Down', 'flow_control': 'Off', 'mode': '   A', 'vlans': '1'}

As you can see in the Regex above the second entry matches 19 vlans, but the Python output only gives me 4. How can I fix this?

This is the code that I'm running:

from sys import argv
import re
import pprint
pp = pprint.PrettyPrinter()

script, filename = argv

interface_details = re.compile(r'^(?P<port_name>[\w\/] )[^\S\r\n] (?P<description>(?!Full\b|N\/A\b)\S (?:[^\S\r\n] \S )*?)?\s (?P<duplex>Full|N\/A)\b\s (?P<speed>[\d\w\/] )\s (?P<neg>[\w\/] )\s (?P<link_state>[\w] )\s (?P<flow_control>[\w\/] )(?:(?P<mode>[^\S\r\n] \w )(?:[^\S\r\n] (?P<vlans>[\d(),-] (?:\r?\n[^\S\r\n] [\d(),-] )*))?)?')

local_list = []

def main():
    with open(filename) as current_file:
        for linenumber, line in enumerate(current_file, 1):
          working_dict = {}
          interface_details_result = interface_details.match(line)
          if interface_details_result is not None:
              working_dict.update(interface_details_result.groupdict())
              local_list.append(working_dict)

    for each in local_list:
        print(each)

if __name__ == '__main__':
    main()

Note that I'm using argv so it's runned as: python3 main.py test.txt

The data of the text file is listed below

>show interfaces status
Port      Description     Duplex Speed   Neg  Link   Flow  M  VLAN
                                              State  Ctrl
--------- --------------- ------ ------- ---- ------ ----- -- -------------------
Te1/0/1   CVH10 Mgt Clstr Full   10000   Off  Up     On    T  (1),161-163
Te1/0/2   CVH10 VM 1      Full   10000   Off  Up     On    T  (1),11,101,110,
                                                              120,130,140,150,
                                                              160,170,180,190,
                                                              200,210,230,240,
                                                              250,666,999
Fo2/1/1                   N/A    N/A     N/A  Detach N/A
Te2/0/8                   Full   10000   Off  Down   Off   A  1

CodePudding user response:

You are matching line by line.

Te1/0/2   CVH10 VM 1      Full   10000   Off  Up     On    T  (1),11,101,110,
                                                              120,130,140,150,
                                                              160,170,180,190,
                                                              200,210,230,240,
                                                              250,666,999

The first line which is-

Te1/0/2   CVH10 VM 1      Full   10000   Off  Up     On    T  (1),11,101,110,

passes your regex expression.

But the following lines doesn't. For example the second line is-

                                                      120,130,140,150,

For this interface_details.match(" 120,130,140,150,") doesn't match the regex.

CodePudding user response:

Continuing @anirudh's answer,

test_str will hold the entire string data read from file and regex will be your regex

regex = r"^(?P<port_name>[\w\/] )[^\S\r\n] (?P<description>(?!Full\b|N\/A\b)\S (?:[^\S\r\n] \S )*?)?\s (?P<duplex>Full|N\/A)\b\s (?P<speed>[\d\w\/] )\s (?P<neg>[\w\/] )\s (?P<link_state>[\w] )\s (?P<flow_control>[\w\/] )(?:(?P<mode>[^\S\r\n] \w )(?:[^\S\r\n] (?P<vlans>[\d(),-] (?:\r?\n[^\S\r\n] [\d(),-] )*))?)?"

test_str = ("Port      Description     Duplex Speed   Neg  Link   Flow  M  VLAN\n"
    "                                              State  Ctrl\n"
    "--------- --------------- ------ ------- ---- ------ ----- -- -------------------\n"
    "Te1/0/1   CVH10 Mgt Clstr Full   10000   Off  Up     On    T  (1),161-163\n"
    "Te1/0/2   CVH10 VM 1      Full   10000   Off  Up     On    T  (1),11,101,110,\n"
    "                                                              120,130,140,150,\n"
    "                                                              160,170,180,190,\n"
    "                                                              200,210,230,240,\n"
    "                                                              250,666,999\n"
    "Fo2/1/1                   N/A    N/A     N/A  Detach N/A\n"
    "Te2/0/8                   Full   10000   Off  Down   Off   A  1")

for match in re.finditer(regex, test_str, re.MULTILINE):
    print(match.groupdict())

This will get you the result you need.

CodePudding user response:

Currently you are reading separate lines, so the pattern will not match for the lines that have only this:

                                                          120,130,140,150,

What you could do is read the whole file instead, and add re.M enabling multiline.

In your code you are using this, which will first update the dict, and then append the working_dict resulting in n times the same value.

working_dict.update(interface_details_result.groupdict())
local_list.append(working_dict)

If you want to gather all the groupdict's in a list, you can append it using local_list.append(m.groupdict())

import re
import pprint
pp = pprint.PrettyPrinter()

interface_details = re.compile(r'^(?P<port_name>[\w\/] )[^\S\r\n] (?P<description>(?!Full\b|N\/A\b)\S (?:[^\S\r\n] \S )*?)?\s (?P<duplex>Full|N\/A)\b\s (?P<speed>[\d\w\/] )\s (?P<neg>[\w\/] )\s (?P<link_state>[\w] )\s (?P<flow_control>[\w\/] )(?:(?P<mode>[^\S\r\n] \w )(?:[^\S\r\n] (?P<vlans>[\d(),-] (?:\r?\n[^\S\r\n] [\d(),-] )*))?)?', re.M)

def main():
    local_list = []
    filename = "file.txt"
    with open(filename) as current_file:
        all_lines = current_file.read()
        results = re.finditer(interface_details, all_lines)
        for m in results:
            local_list.append(m.groupdict())
    for each in local_list:
        print(each)
if __name__ == '__main__':
    main()
  • Related