Home > Mobile >  how to return a new string with different possibilities if the original string included a forward sl
how to return a new string with different possibilities if the original string included a forward sl

Time:08-29

here is an example of what I want to achieve:

input:

example = "bear a/little/no resemblance to sth/sb/whatever"

output:

alternatives = ['bear a resemblance to sth',
                'bear a resemblance to sb',
                'bear a resemblance to whatever',
                "bear little resemblance to sth",
                "bear little resemblance to sb",
                "bear little resemblance to whatever",
                "bear no resemblance to sth",
                "bear no resemblance to sb",
                "bear no resemblance to whatever",
]

here is another example:

input:

example = "beat about/around the bush"

output:

alternatives = ['beat about the bush',
                'beat around the bush'
]

and one more

input:

example = "become available/rich/a writer, etc."

output:

alternatives = ['become available',
                'become rich',
                'become a writer']

I'm given an English sentence that could contain a forward slash, a forward slash in this case means OR. so in case I found a forward slash in the example string I need to return a new string with both words on each side of the /

the sentence could contain any number of / or could contain none.

EDIT

I was able to achieve the desired outcome with the following code, but I feel like this is far from being Pythonic, would appreciate it if someone would suggest a more Pythonic approach to this problem.

alt  = [] #short for alternatives

# multiple cleaning stages for every baseword

##1## remove ', etc.'
a = example.replace(', etc.', '')

##2## does this string have / / in it
regex = re.compile(r'(\w )\/(\w )\/(\w )')

match = regex.search(a)

delete_later = [] #a list with sentence to delete later from alt as it cleans up old used sentences

if match:
    part = a.partition(match.group(0))
    s1 = part[0] match.group(1) part[2]
    s2 = part[0] match.group(2) part[2]
    s3 = part[0] match.group(3) part[2]
    alt.append(s1)
    alt.append(s2)
    alt.append(s3)

#check again:
for _ in range(10):
    for item in alt:
        regex = re.compile(r'(\w )\/(\w )\/(\w )')
        match = regex.search(item)
        if match:
            delete_later.append(item)
            part = item.partition(match.group(0))
            s1 = part[0] match.group(1) part[2]
            s2 = part[0] match.group(2) part[2]
            s3 = part[0] match.group(3) part[2]
            alt.append(s1)
            alt.append(s2)
            alt.append(s3)

    #clean up
    for i in delete_later:
        try:
            #avoid Traceback: ValueError: list.remove(x): x not in list
            alt.remove(i)
        except:
            pass


##3## does this string have / in it
if len(alt) > 0:
    for _ in range(10):
        for item in alt:
            regex = re.compile(r'(\w )\/(\w )')
            match = regex.search(item)
            if match:
                delete_later.append(item)
                part = item.partition(match.group(0))
                s1 = part[0] match.group(1) part[2]
                s2 = part[0] match.group(2) part[2]
                alt.append(s1)
                alt.append(s2)

        #clean up
        for i in delete_later:
            try:
                #avoid Traceback: ValueError: list.remove(x): x not in list
                alt.remove(i)
            except:
                pass

#else:
#check for the 1st time
regex = re.compile(r'(\w )\/(\w )')
match = regex.search(a)

delete_later = [] #a list with sentence to delete later from alt as it cleans up old used sentences

if match:
    part = a.partition(match.group(0))
    s1 = part[0] match.group(1) part[2]
    s2 = part[0] match.group(2) part[2]
    alt.append(s1)
    alt.append(s2)

#check again:
for _ in range(10):
    for item in alt:
        regex = re.compile(r'(\w )\/(\w )')
        match = regex.search(item)
        if match:
            delete_later.append(item)
            part = item.partition(match.group(0))
            s1 = part[0] match.group(1) part[2]
            s2 = part[0] match.group(2) part[2]
            alt.append(s1)
            alt.append(s2)

    #clean up
    for i in delete_later:
        try:
            #avoid Traceback: ValueError: list.remove(x): x not in list
            alt.remove(i)
        except:
            pass


for i,e in enumerate(alt , 1):
    print(i,e)

CodePudding user response:

You can use this regexp to catch your cases: "(\w \/) (. , etc\.)|(\w \/) \w ":

  • (\w \/) is for the first part of the options string ending with /
  • the ending is covered by two separate cases, either (. , etc\.) or \w

Full code:

import re
from pprint import pprint


def get_options(s):
    # removal of custom delimiters such as "etc" and splitting
    return s.replace(", etc.", "").split("/")


def split(s):
    result = re.search("(\w \/) (. ?, etc\.)|(\w \/) \w ", s)

    if result:
        result = result.group()
        return [s.replace(result, r) for r in get_options(result)]
    else:
        return [s]


examples = [
    "bear a/little/no resemblance to sth/sb/whatever",
    "beat about/around the bush",
    "become available/rich/a writer, etc.",
    "(the) most attractive/important/popular, etc. a dance/language/riding, etc. school",
]

n = 0
while len(examples) > n:
    n = len(examples)
    result = []
    for s in examples:
        result.extend(split(s))
    examples = result

pprint(examples)
  • Related