simplify python code for reading data from files and store into numpy array-CodePudding

i have inp file that needs to read from python data.inp

*Heading
** Job name: inp6_1 Model name: Model-1
*Node
      1,          50.,          20.,          40.
      2,         100.,          20.,          40.
      3,         100.,          20.,           0.
      4,          50.,          20.,           0.
      5,         100.,           0.,          40.
      6,         100.,           0.,           0.
      7,          50.,           0.,           0.
      8,          50.,           0.,          40.
      9,           0.,          40.,          40.
*Element, type=C3D8
  1,  82, 336, 712, 294,   1,  15, 168,  46
  2, 336, 337, 713, 712,  15,  16, 169, 168
  3, 337, 338, 714, 713,  16,  17, 170, 169
*Elset, elset=Set-1, instance=Part-1-1, generate
 321,  951,   10
*End Assembly

the purpose is to store all numbers between "*Node" and "*Element" in inp file. Below is my current code, it is workable, but pretty lengthy because i used with open(filepath, 'r') as file: twice, first time is to get line numer, second time is to read from line and store to numpy array. I tried to put 2 for loops under with, but it is not working where i only get one line of number from inp file.

my working code:

def findNodes(filepath):
    with open(filepath, 'r') as file:
        for num, line in enumerate(file,1):
            if '*Node' in line:
                nodeLineStart = num
            if '*Element' in line:
                nodeLineEnd = num

    xx = np.empty(shape=[1, 4])
    with open(filepath, 'r') as file:
        for num, lin in enumerate(file, 1):
            if nodeLineStart 1 <= num <= nodeLineEnd-1:
                text = lin.replace(" ", "")
                lines = np.genfromtxt(StringIO(text), delimiter=",").reshape(1, 4)
                xx = np.append(xx, lines, axis=0)
    return xx

ndarray = findNodes('data.inp')

CodePudding user response：

You can just read split the entire string instead of reading it line by line:

# Read as single string
with open(filepath, 'r') as file:
    contents = file.read()

# find *Node and *Element and get substring in between
first = "*Node"
second = "*Element"
numbers = contents[contents.find(first) len(first):contents.find(second)]

# Remove commas, split string at whitespace characters and convert numbers to floats
numbers = [float(x) for x in numbers.replace(',', '').split()]

Or using your basic structure and numpy array as return type:

def findNodes(filepath, first="*Node", second="*Element"):
    with open(filepath, 'r') as file:
        contents = file.read()
    numbers = contents[contents.find(first) len(first):contents.find(second)]
    return np.array([float(x) for x in numbers.replace(',', '').split()])

findNodes("data.inp")

CodePudding user response：

BernieD already gave a good answer, but if you do want to read the file line by line, you could use an indicator variable that keeps track of whether the current line is in between the start and stop keywords or not:

def findNodes(filepath):
    datalist = []
    datastream = False
    
    with open(filepath, 'r') as file:
        for line in file:
            if '*Element' in line:
                datastream = False
            if datastream:
                datalist.append([float(n) for n in 
                                 line.replace(' ', '').replace('\n', '').split(',')])
            if '*Node' in line:
                datastream = True

    return np.array(datalist)


ndarray = findNodes('data.inp')

CodePudding user response：

This is from a script I wrote to parse inp files:

def read_input(ifn):
    """Read an Abaqus INP file, read its sections.
    Return the section headings and the lines.
    """
    with open(ifn) as inf:
        lines = [ln.strip() for ln in inf.readlines()]
    # Remove comments
    lines = [ln for ln in lines if not ln.startswith("**")]
    # Find section headers
    headings = [(ln[1:], n) for n, ln in enumerate(lines) if ln.startswith("*")]
    # Filter the headings so that every heading has a start-of-data and
    # end-of-data index.
    headings.append(("end", -1))
    ln = [h[1] for h in headings]
    headings = [
        (name, start   1, end) for (name, start), end in zip(headings[:-1], ln[1:])
    ]
    return headings, lines


def retrieve_nodes(headings, lines):
    """Extract the nodes out of lines.
    Return a dict of nodes, indexed by the node number.
    A node is a 3-tuple of coordinate strings.
    The node coordinates are *not* converted to floats, so as to not lose precision.

    Arguments:
        headings (list): list of (name, start, end) tuples.
        lines (list): list of lines.

    Returns:
        A dict of nodes (x,y,z)-tuples indexed by the node number.
    """
    nodes = {}
    for h in headings:
        if h[0].lower().startswith("node"):
            for ln in lines[h[1]:h[2]]:
                idx, x, y, z = ln.split(",")
                nodes[int(idx)] = (x.strip(), y.strip(), z.strip())
            # Assuming there is only one NODE section.
            break
    return nodes

CodePudding user response：

Try:

def findNodes(filepath): 
    with open(filepath,'r') as fr:
        alldata = fr.read()
    data = re.findall(r"(?<=\*Node\n)[\d.,\s\n] (?=\n\*Element)",alldata)[0]
    nums = []
    for dataline in data.split('\n'): 
        nums.extend([float(num) for num in dataline.split(',')])
    return np.array(nums).reshape((9,4))
print(findNodes('data.inp'))

prints as content of the numpy array arr:

[[  1.  50.  20.  40.]
 [  2. 100.  20.  40.]
 [  3. 100.  20.   0.]
 [  4.  50.  20.   0.]
 [  5. 100.   0.  40.]
 [  6. 100.   0.   0.]
 [  7.  50.   0.   0.]
 [  8.  50.   0.  40.]
 [  9.   0.  40.  40.]]

Is this what you wanted to achieve?