How to matching string in multilines in Python?-CodePudding

I got a logic problem about matching strings in multilines. anyone can help.

The goal is to match the word in search_sentence to the sentence in every line.

search_sentence = "for learning distinctive features among"

raw_sentences = ["methods it is possible to among recognize Instagram filters and at-", # 01
                 "tenuate the sensor pattern noise signal in images. Amerini", # 02
                 "et al. [10] introduced a CNN for learning distinctive features", # 03
                 "among social networks. for learning distinctive features among from the histogram of the discrete co-", # 04
                 "sine transform (DCT) coefficients and the noise residual of", # 05
                 "the images. Phan et al. [11] proposed a method to track mul-", # 06
                 "tiple image sharing on social networks by using a CNN for ar-", # 07
                 "chitecture able to learn", # 08
                 "et al. [10] introduced a CNN for learning distinctive features among it is possible to among recognize Instagram filters", # 09
                 "and at- tenuate xx"] # 10


def longest_intersection(string1, string2):
    list1 = string1.split()
    list2 = string2.split()
    intersection = []
    for word in list1:
        if word in list2 and word == list2[0]:
            intersection.append(word)
            list2.remove(word)
    if " ".join(intersection) in search_sentence:
        return intersection


for line in raw_sentences:


    one_line_match = ' '.join(longest_intersection(line.strip(), search_sentence))


    if one_line_match != "" and one_line_match[0] == search_sentence[0]:
        print(one_line_match)
        search_sentence = search_sentence.replace(one_line_match, "").strip()
        if search_sentence == "":
            search_sentence = "for learning distinctive features among"
    else:
        print("[no matched sentences!]")
        search_sentence = "for learning distinctive features among"

for now, my outputs are:

[no matched sentences!] [no matched sentences!] for learning distinctive features among [no matched sentences!] [no matched sentences!] for [no matched sentences!] for learning distinctive features among [no matched sentences!]

but I desired output will be like this:

[no matched sentences!]
[no matched sentences!]
for learning distinctive features
among for learning distinctive features among
[no matched sentences!]
[no matched sentences!]
[no matched sentences!]
[no matched sentences!]
for learning distinctive features among
[no matched sentences!]

CodePudding user response：

I hope it will work for solution. I use regex module to search in a sentence. (search_sentence in a raw_sentence)

import re
search_sentence = "for learning distinctive features among"
raw_sentences = ["methods it is possible to among recognize Instagram filters and at-", # 01
                 "tenuate the sensor pattern noise signal in images. Amerini", # 02
                 "et al. [10] introduced a CNN for learning distinctive features", # 03
                 "among social networks. for learning distinctive features among from the histogram of the discrete co-", # 04
                 "sine transform (DCT) coefficients and the noise residual of", # 05
                 "the images. Phan et al. [11] proposed a method to track mul-", # 06
                 "tiple image sharing on social networks by using a CNN for ar-", # 07
                 "chitecture able to learn", # 08
                 "et al. [10] introduced a CNN for learning distinctive features among it is possible to among recognize Instagram filters", # 09
                 "and at- tenuate xx"] # 10

for sentence in raw_sentences:
    result = re.findall(f'\s ({search_sentence})\s ', sentence, re.IGNORECASE)
    if len(result) > 0:
        print(result[0])
    else:
        print('no matched sentences!')

CodePudding user response：

def find_sentences(search_sentence, raw_sentences):
  search_words = search_sentence.split() # split search_sentence into a list of words
  output = []

  for sentence in raw_sentences:
    sentence_words = sentence.split() # split sentence into a list of words
    found = True
    for word in search_words:
      if word not in sentence_words:
        found = False
        break
    if found:
      output.append(sentence)
    else:
      output.append("[no matched sentences!]")

  return output

CodePudding user response：

Based on initial code, here is a better solution (but not the best) with 3 more limit cases :

search_sentence = "for learning distinctive features among"
initial_sentence = "for learning distinctive features among"

raw_sentences = ["methods it is possible to among recognize Instagram filters and at-", # 01
                 "tenuate the sensor pattern noise signal in images. Amerini", # 02
                 "et al. [10] introduced a CNN for learning distinctive features", # 03
                 "among social networks. for learning distinctive features among from the histogram of the discrete co-", # 04
                 "sine transform (DCT) coefficients and the noise residual of", # 05
                 "the images. Phan et al. [11] proposed a method to track mul-", # 06
                 "tiple image sharing on social networks by using a CNN for ar-", # 07
                 "chitecture able to learn", # 08
                 "et al. [10] introduced a CNN for learning distinctive features among it is possible to among recognize Instagram filters", # 09
                 "and at- tenuate xx", # 10
                 "for learning", # 11
                 "distinctive", # 12
                 "features among", # 13
                 "for learning TEST distinctive features among", # 14
                 "for learning"] # 15

def longest_intersection(string1, string2):
    list1 = string1.split()
    list2 = string2.split()
    intersection = []
    index = 0
    for word in list1:
        if index == len(list2):
            index = 0
            if len(list2) != len(initial_sentence):
                list2 = initial_sentence.split()
        if word == list2[index]:
            intersection.append(word)
            index = index   1
        else:
            if intersection == []:
                continue
            elif intersection[-1] == initial_sentence.split()[-1]:
                continue
            else:
                intersection = []
    return intersection

all_matches = []
for line in raw_sentences:
    one_line_match = ' '.join(longest_intersection(line.strip(), search_sentence))
    if one_line_match != "" and one_line_match[0] == search_sentence[0]:
        all_matches.append(one_line_match)
        search_sentence = search_sentence.replace(one_line_match, "").strip()
        if search_sentence == "":
            search_sentence = "for learning distinctive features among"
    else:
        all_matches.append("[no matched sentences!]")
        search_sentence = "for learning distinctive features among" 

index = 0
for match in all_matches:
    if index == len(all_matches)-1: #last match
        if match.split()[-1] == initial_sentence.split()[-1]:
            print(match)
            break
        else:
            print("[no matched sentences!]")
            break
    print(match)
    index = index  1

with the result :

[no matched sentences!]
[no matched sentences!]
for learning distinctive features
among for learning distinctive features among
[no matched sentences!]
[no matched sentences!]
[no matched sentences!]
[no matched sentences!]
for learning distinctive features among
[no matched sentences!]
for learning
distinctive
features among
[no matched sentences!]
[no matched sentences!]