Creating hierarchically numbered section headings counts subsections incorrectly-CodePudding

a_lst = [
    '# title',
    '## subtitle',
    '## s2  ',
    '## S3',
    '# t2',
    '# t4',
    '## s1',
    '## s2'
]

I want to convert the above list into this list:

req_lst = [
    '1. title',
    '1.1. subtitle',
    '1.2. s2  ',
    '1.3. S3',
    '2. t2',
    '3. t4',
    '3.1 s1',
    '3.2. s2'
]

I have written the following code in order to do so:

modified_lst = []
h_no = 0
sh_no = 0

for i in range(len(a_lst)):
    # If string starts with '# ' it is a heading
    if a_lst[i][:2] == '# ':
        h_no  = 1
        temp_hno = h_no
        modified_lst.append(a_lst[i].replace('#', str(h_no) '.', 1))
    # If the string starts with '## ' it is a subheading
    elif a_lst[i][:3] == '## ':
        if temp_hno  == h_no:
            sh_no  = 1
            modified_lst.append(a_lst[i].replace('##', str(h_no)  '.'  str(sh_no)   '.', 1))
        else:
            sh_no = 1
            modified_lst.append(a_lst[i].replace('##', str(h_no)  '.'  str(sh_no)   '.', 1))

But this gives me this modified_lst:

modified_lst = [
    '1. title',
    '1.1. subtitle',
    '1.2. s2  ',
    '1.3. S3',
    '2. t2',
    '3. t4',
    '3.4. s1',
    '3.5. s2'
]

How can I create the correct numbering for subsections s1 and s2 under t4?

CodePudding user response：

You should reset sh_no every time you see a new #. so modify the first if to this:

if a_lst[i][:2] == '# ':
    sh_no = 0
    h_no  = 1
    temp_hno = h_no
    modified_lst.append(a_lst[i].replace('#', str(h_no) '.', 1))

CodePudding user response：

A nice trick for this kind of project is to use an (infinite) iterator, such as counter from the itertools module:

from itertools import count

a_lst = ['# title', '## subtitle', '## s2  ', '## S3', '# t2', '# t4', '## s1', '## s2']

out_lst = [None]*len(a_lst)  # initialize output list (one could also use append in the loop)

c1 = count(start=1)          # initialize MAJOR count

for i,s in enumerate(a_lst):
    if s.startswith('# '):
        MAJOR = next(c1)
        out_lst[i] = f'{MAJOR}. {s[2:]}'
        c2 = count(start=1) # reset MINOR count
    else:
        out_lst[i] = f'{MAJOR}.{next(c2)}. {s[3:]}'

output:

>>> out_lst
['1. title',
 '1.1. subtitle',
 '1.2. s2  ',
 '1.3. S3',
 '2. t2',
 '3. t4',
 '3.1. s1',
 '3.2. s2']

CodePudding user response：

Here's a simple but dynamic approach, whether there is 1x # of there are 10x #, whether a # would suddenly jump to ##### or vice versa.

Maintain a list that will hold the numbers e.g. [1], [1, 1], [1, 2], [1, 3], [2], and so on. Then, based on the length of each of the next # in the list, we will either increment that last digit in the list, or append a new 1, or remove the last and then increment.

a_lst = [
    '# title',
    '## subtitle',
    '## s2',
    '## S3',
    '# t2',
    '# t4',
    '## s1',
    '## s2',
    '### a',
    '#### b',
    '#### c',
    '### d',
    '# e',
    '### f',
    '### g',
    '# h',
    '##### i',
    '##### j',
    '##### k',
    '### l',
    '# m',
]

req_lst = []
numbering = []

for item in a_lst:
    num, _, text = item.partition(" ")

    if len(num) == len(numbering):
        numbering[-1]  = 1
    elif len(num) > len(numbering):
        numbering.extend([1] * (len(num) - len(numbering)))
    elif len(num) < len(numbering):
        numbering = numbering[:len(num)]
        numbering[-1]  = 1

    req_lst.append(".".join(map(str, numbering))   ". "   text)

print(req_lst)

Output

[
    "1. title",
    "1.1. subtitle",
    "1.2. s2",
    "1.3. S3",
    "2. t2",
    "3. t4",
    "3.1. s1",
    "3.2. s2",
    "3.2.1. a",
    "3.2.1.1. b",
    "3.2.1.2. c",
    "3.2.2. d",
    "4. e",
    "4.1.1. f",
    "4.1.2. g",
    "5. h",
    "5.1.1.1.1. i",
    "5.1.1.1.2. j",
    "5.1.1.1.3. k",
    "5.1.2. l",
    "6. m"
]

CodePudding user response：

You have to reset sh_no after you get #.

Try this:

a_lst = ['# title', '## subtitle', '## s2  ', '## S3', '# t2', '# t4', '## s1', '## s2']
modified_lst = []
h_no = 0
sh_no = 0

for i in range(len(a_lst)):

    if a_lst[i][:2] == '# ':
        h_no  = 1
        temp_hno = h_no
        sh_no = 0
        modified_lst.append(a_lst[i].replace('#', str(h_no) '.', 1))
    elif a_lst[i][:3] == '## ':
        if temp_hno  == h_no:
            sh_no  = 1
            modified_lst.append(a_lst[i].replace('##', str(h_no)  '.'  str(sh_no)   '.', 1))
        else:
            sh_no  = 1
            modified_lst.append(a_lst[i].replace('##', str(h_no)  '.'  str(sh_no)   '.', 1))
print(modified_lst)

Output:

['1. title', '1.1. subtitle', '1.2. s2  ', '1.3. S3', '2. t2', '3. t4', '3.1. s1', '3.2. s2']

CodePudding user response：

Here's a fixed code snippet:

modified_lst = []
h_no = 0
sh_no = 0

a_lst = ['# title', '## subtitle', '## s2  ', '## S3', '# t2', '# t4', '## s1', '## s2']

for i in range(len(a_lst)):

    if a_lst[i][:2] == '# ':
        sh_no = 0
        h_no  = 1
        temp_hno = h_no
        modified_lst.append(a_lst[i].replace('#', str(h_no) '.', 1))
    elif a_lst[i][:3] == '## ':
        if temp_hno  == h_no:
            sh_no  = 1
            modified_lst.append(a_lst[i].replace('##', str(h_no)  '.'  str(sh_no)   '.', 1))
        else:
            sh_no = 1
            modified_lst.append(a_lst[i].replace('##', str(h_no)  '.'  str(sh_no)   '.', 1))

print(modified_lst)

You forgot to assign sh_no = 0 when getting in the if a_lst[i][:2] == '# ': condition.

CodePudding user response：

Here is a generic method using collections.Counter.

It works for an arbitrary level of nesting and enables to have dummy values:

from collections import Counter

a_lst = ['# A', '## B', '## C', 'D', '## E', '# F', '# G', '## H', '### I']

out_lst = [None]*len(a_lst)

counts = Counter()

for i,s in enumerate(a_lst):
    # check if dummy title and calculate level
    try:
        pre, post = s.split(' ', 1) # split string on first space
    except:
        out_lst[i] = s
        continue   # not a title, skip
    level = Counter(pre)['#']-1 # count trailing "#"

    if level == -1 or len(pre)-1 != level:
        out_lst[i] = s
        continue   # not a title, skip
    
    # increase current level
    counts[level]  = 1
    
    # trim lower levels
    counts = Counter({k: counts[k] for k in range(level 1)})

    # format output
    pre = '.'.join(map(str, counts.values()))
    out_lst[i] = pre '. ' post

input:

a_lst = ['# A', '## B', '## C', 'D', '## E', '# F',
         '# G', '## H', '### I', '######## J']

output:

['1. A',
 '1.1. B',
 '1.2. C',
 'D',
 '1.3. E',
 '2. F',
 '3. G',
 '3.1. H',
 '3.1.1. I',
 '3.1.1.0.0.0.0.1. J']

CodePudding user response：

Here's a generator based solution:

def chapn(sa):
    main, sub = 0, 0
    for s in sa:
        ss = s.lstrip('#')  # strip leading #'s
        n = len(s) - len(ss)  # count the stripped #'s
        sub = sub   1 if n == 2 else 0  # determine numbers for main and sup
        main = main   1 if n == 1 else main
        yield str(main)   '.'   (str(sub) if sub > 0 else '')   ss

Use:

list(chapn(a_lst))

Output:

['1. title',
 '1.1 subtitle',
 '1.2 s2  ',
 '1.3 S3',
 '2. t2',
 '3. t4',
 '3.1 s1',
 '3.2 s2']

CodePudding user response：

All answer already have need to reset sh_no, I will just add one more solution, Resembles queue poping, inserting and updating last element

# stores previous number in treatable form (1.1.1. as ['1','1','1'])
# compare with previous str_pre_hash and if more # add extra 1's, if equal hash then increment last index, if less hash truncate previous indices
str_pre_hash = []
target = []
for x in a_lst:
    hashes, rest = x.split(' ', 1)
    new_hashes = len(hashes) - len(str_pre_hash)
    if new_hashes > 0:
        str_pre_hash  = ['1'] * new_hashes
    else:
        str_pre_hash = str_pre_hash[:len(str_pre_hash)   new_hashes]
        str_pre_hash[-1] = str(int(str_pre_hash[-1])   1)
    target.append('.'.join(str_pre_hash)   '. '   rest)
print(target)

CodePudding user response：

Try this:

a_lst = ['# title', '## subtitle', '## s2  ', '## S3', '# t2', '# t4', '## s1', '## s2']

a,b = 0,1
for idx, al in enumerate(a_lst):
    if al.split()[0].count('#') == 1:
        a  = 1; b=1
        al = al.replace('#',f'{a}.')
    elif al.split()[0].count('#') == 2:
        al = al.replace('##',f'{a}.{b}.')
        b  = 1
    a_lst[idx] = al
    
print(a_lst)

Output:

['1. title', '1.1. subtitle', '1.2. s2  ', '1.3. S3', '2. t2', '3. t4', '3.1. s1', '3.2. s2']