Home > Software design >  Describing a file in a certain area with a Dict
Describing a file in a certain area with a Dict

Time:02-15

I have a problem. I have a file that I read and I extract everything between \begin{acronym} and \end{acronym} (this should represent data). Then I want to save my dict (sorted_output ) again between \begin{acronym} and \end{acronym}. The old entries should be removed. Only the dict in between should be saved. The order is crucial. When I execute my code, I got the same file like my certain file.

Code

# read the file
with open("./file.txt", 'r', encoding="utf-8") as file:
        data = file.read().rstrip()
data = r"""\chapter*{Short}
\addcontentsline{toc}{chapter}{Short}
\markboth{Short}{Short}
\begin{acronym}[example]
    \acro{knmi}[KNMI]{Koninklijk Nederlands Meteorologisch Instituut}
    \acro{example}[e.g.]{For example}
\end{acronym}"""

# just transforming of the data
# ...
sorted_output = {
    "abbreviation_eg": {
        "symbol": "eg",
        "shortform": "e.g.",
        "longform": "For example"
    }
    "abbreviation_fbi": {
        "symbol": "fbi",
        "shortform": "FBI",
        "longform": "Federal Bureau of Investigation"
    },   
    "abbreviation_knmi": {
        "symbol": "knmi",
        "shortform": "KNMI",
        "longform": "Koninklijk Nederlands Meteorologisch Instituut"
    },
}

    with open(abbreviationPathComplete, encoding="utf-8") as fin, open('stats.txt','w', encoding="utf-8") as fout:
        for line in fin:
            fout.write(line)
            if(line.startswith(r'\begin{acronym}')):
                next_line = next(fin)
                if (next_line.startswith(r'\end{acronym}')):
                     for key in sorted_output:
                         print(sorted_output[key]["shortform"])
                         file.write(r'\acro{'  sorted_output[key]["symbol"]  '}[' sorted_output[key]["shortform"]  ']{' sorted_output[key]["longform"]  '}')
                fout.write(next_line)

Certain file

\chapter*{Short}
\addcontentsline{toc}{chapter}{Short}
\markboth{Short}{Short}
\begin{acronym}[example]
    \acro{knmi}[KNMI]{Koninklijk Nederlands Meteorologisch Instituut}
    \acro{example}[e.g.]{For example}
\end{acronym}

Desired file

\chapter*{Short}
\addcontentsline{toc}{chapter}{Short}
\markboth{Short}{Short}
\begin{acronym}[example]
    \acro{example}[e.g.]{For example}
    \acro{fbi}[FBI]{Federal Bureau of Investigation}
    \acro{knmi}[KNMI]{Koninklijk Nederlands Meteorologisch Instituut}
\end{acronym}

CodePudding user response:

I would use something like this:

replace = True
with open('stats.txt', 'w') as file:
    for line in data.split('\n'):
        if (line.strip().startswith('\acro{')):
            if replace:
                for key in sorted_output:
                    print(sorted_output[key]["shortform"])
                    file.write('\t')
                    file.write(r'\acro{{' sorted_output[key]["symbol"] '}}[' sorted_output[key]["shortform"] ']{' sorted_output[key]["longform"] '}\n')
            replace = False
        else:
            file.write(line.replace("\b", "\\b").replace("\a", "\\a") '\n')

it just replaces the line with the first acronym with the data in sorted_output. It ignores the other acronym lines from the data

CodePudding user response:

Based on your comments and questions:

data = r"""\chapter*{Short}
\addcontentsline{toc}{chapter}{Short}
\markboth{Short}{Short}
\begin{acronym}[Short]
    \acro{knmi}[KNMI]{Koninklijk Nederlands Meteorologisch Instituut}
    \acro{example}[e.g.]{For example}
\end{acronym}"""

data = data.split('\n')

sorted_output = {
    "abbreviation_eg": {
        "symbol": "eg",
        "shortform": "e.g.",
        "longform": "For example"
    },
    "abbreviation_fbi": {
        "symbol": "fbi",
        "shortform": "FBI",
        "longform": "Federal Bureau of Investigation"
    },   
    "abbreviation_knmi": {
        "symbol": "knmi",
        "shortform": "KNMI",
        "longform": "Koninklijk Nederlands Meteorologisch Instituut"
    }
}

write = True
with open('stats.txt', 'w') as file:
    for line in data:
        if line.startswith(r'\begin{acronym}'):
            write = False
            file.write(line '\n')
            for key in sorted_output:
                file.write(r'\\acro{{' sorted_output[key]["symbol"] '}}[' sorted_output[key]["shortform"] ']{' sorted_output[key]["longform"] '}\n')
        else:
            if write:
                file.write(line '\n')
            else:
                if line.startswith(r'\end{acronym}'):
                    file.write(line)
                    write = True 

CodePudding user response:

for line in data:

In your example, data is a string, so the loop is over each letter. This never starts with r'\begin{acronym}'

So the if statement never is True, and nothing will happen.

  • Related