Is there a way to split the text I'm inputting into different strings that the dictionary looks
Is there a way to split the text I'm inputting into different strings that the dictionary looks


I am mostly trying to create software that reads says the definition of every word you typed into the text box. Right now it only reads if there is one work and crashes if there is more than one. How would I go about fixing this?

import wolframalpha
client = wolframalpha.Client('8QR2WG-628657K83Q')

from multiprocessing import Process

import wikipedia

import PySimpleGUI as sg

import cv2

import random

import sys
import threading
import time

import nltk

# from oxforddictionaries.words import OxfordDictionaries
# Oxford = OxfordDictionaries('b4170561','f32687e0ecbc219cfd723bb220dad34e')
# o = OxfordDictionaries('b4170561','f32687e0ecbc219cfd723bb220dad34e')
# relax = o.get_synonyms("Apple").json()
# synonyms = relax

from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
stop_words = set(stopwords.words("english"))
filtered_list = []

from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

stemmer = PorterStemmer()

trained_face_data = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
trained_body_data = cv2.CascadeClassifier('haarcascade_upperbody.xml')
trained_eye_data = cv2.CascadeClassifier('haarcascade_eye.xml')
webcam = cv2.VideoCapture(0)

layout = [  [sg.Text("Enter Test Text")],
            [sg.Button('Ok')] ]
window = sg.Window('You', layout)

sg.Popup('About Me','Hello I am an AI devolped by Garrett Provence. I will be using your webcam to scan your suroundings for a quick few seconds and will open a text box where you will be able to ask me questions. By clicking ok below you agree to letting me acess everyhting said before. I am still in beta so please be patient.')

timeout = time.time()   10;

while True:

##Webcam scanner
    def infiniteloop1():
        while True:
            test = 0
            if test == 5 or time.time() > timeout:
            test = test - 1

            successful_frame_read, frame = webcam.read()

            grayscaled_img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            face_coordinates = trained_face_data.detectMultiScale(grayscaled_img)
            body_coordinates = trained_body_data.detectMultiScale(grayscaled_img)
            eye_coordinates = trained_eye_data.detectMultiScale(grayscaled_img)

            for (x,y,w,h) in face_coordinates:
                cv2.rectangle(frame, (x, y),(x w, y h), (0,random.randrange(255),0), 2)
            for (x,y,w,h) in body_coordinates:
                cv2.rectangle(frame, (x, y),(x w, y h), (0,0,255), 2)
            for (x,y,w,h) in eye_coordinates:
                cv2.rectangle(frame, (x, y),(x w, y h), (random.randrange(255),0,0), 2)

    thread1 = threading.Thread(target=infiniteloop1)
    event, values = window.read()
    InputText = values[0]

    import json
    import requests
    import os
    import pprint
    import Oxfordwords
    from Oxfordwords import Word
    import pprint

##end OF webcam scanner

#img = cv2.imread('Rdj.png')
    while True:

            ##Test Text Scanner --
            Text = values[0]
            if event == sg.WIN_CLOSED or event == 'Cancel':
                words_in_excerpt = word_tokenize(Text)

                print('Hello',nltk.pos_tag(words_in_excerpt), "")
                sg.Popup('Test', nltk.pos_tag(words_in_excerpt))
                sg.Popup('There seems to be a error processing what you have said')
            ##End of test Text Scanner --

The oxford dictonary code -

#!/bin/env python3

""" oxford dictionary api """

from http import cookiejar

import requests
from bs4 import BeautifulSoup as soup

class WordNotFound(Exception):
    """ word not found in dictionary (404 status code) """

class BlockAll(cookiejar.CookiePolicy):
    """ policy to block cookies """
    return_ok = set_ok = domain_return_ok = path_return_ok = lambda self, *args, **kwargs: False
    netscape = True
    rfc2965 = hide_cookie2 = False

class Word(object):
    """ retrive word info from oxford dictionary website """
    entry_selector = '#entryContent > .entry'
    header_selector = '.top-container'

    title_selector = header_selector   ' .headword'
    wordform_selector = header_selector   ' .pos'
    property_global_selector = header_selector   ' .grammar'

    br_pronounce_selector = '[geo=br] .phon'
    am_pronounce_selector = '[geo=n_am] .phon'
    br_pronounce_audio_selector = '[geo=br] [data-src-ogg]'
    am_pronounce_audio_selector = '[geo=n_am] [data-src-ogg]'

    definition_body_selector = '.senses_multiple'
    namespaces_selector = '.senses_multiple > .shcut-g'
    examples_selector = '.senses_multiple .sense > .examples .x'
    definitions_selector = '.senses_multiple .sense > .def'

    extra_examples_selector = '.res-g [title="Extra examples"] .x-gs .x'
    phrasal_verbs_selector = '.phrasal_verb_links a'
    idioms_selector = '.idioms > .idm-g'

    other_results_selector = '#rightcolumn #relatedentries'

    soup_data = None

    def get_url(cls, word):
        """ get url of word definition """
        baseurl = 'https://www.oxfordlearnersdictionaries.com/definition/english/'
        return baseurl   word

    def delete(cls, selector):
        """ remove tag with specified selector in cls.soup_data """
            for tag in cls.soup_data.select(selector):
        except IndexError:

    def get(cls, word):
        """ get html soup of word """
        req = requests.Session()

        page_html = req.get(cls.get_url(word), timeout=5, headers={'User-agent': 'mother animal'})
        if page_html.status_code == 404:
            raise WordNotFound
            cls.soup_data = soup(page_html.content, 'html.parser')

        if cls.soup_data is not None:
            # remove some unnecessary tags to prevent false positive results
            cls.delete('[title="Oxford Collocations Dictionary"]')
            cls.delete('[title="British/American"]')  # edge case: 'phone'
            cls.delete('[title="Express Yourself"]')
            cls.delete('[title="Word Origin"]')

    def other_results(cls):
        """ get similar words, idioms, phrases...
        Return: {
                'All matches': [
                    {'word1': word1, 'id1': id1, 'wordform1': wordform1},
                    {'word2': word2, 'id2': id2, 'wordform2': wordform2}
                'Phrasal verbs': [
                    {'word1': word1, 'id1': id1, 'wordform1': wordform1},
                    {'word2': word2, 'id2': id2, 'wordform2': wordform2}
        info = []

            rightcolumn_tags = cls.soup_data.select(cls.other_results_selector)[0]
        except IndexError:
            return None

        # there can be multiple other results table like All matches, Phrasal verbs, Idioms,...
        header_tags = rightcolumn_tags.select('dt')
        other_results_tags = rightcolumn_tags.select('dd')

        # loop each other result table
        for header_tag, other_results_tag in zip(header_tags, other_results_tags):
            header = header_tag.text
            other_results = []

            for item_tag in other_results_tag.select('li'):
                names = item_tag.select('span')[0].find_all(text=True, recursive=False)
                wordform_tag = item_tag.select('pos')
                names.append(wordform_tag[0].text if len(wordform_tag) > 0 else '')

            other_results = list(filter(None, other_results))  # remove empty list
            ids = [cls.extract_id(tag.attrs['href'])
                   for tag in other_results_tag.select('li a')]

            results = []
            for other_result, id in zip(other_results, ids):
                result = {}
                result['name'] = ' '.join(list(map(lambda x: x.strip(), other_result[0:-1])))
                result['id'] = id

                    result['wordform'] = other_result[-1].strip()
                except IndexError:


            info.append({header: results})

        return info

    def name(cls):
        """ get word name """
        if cls.soup_data is None:
            return None
        return cls.soup_data.select(cls.title_selector)[0].text

    def id(cls):
        """ get id of a word. if a word has definitions in 2 seperate pages
        (multiple wordform) it will return 'word_1' and 'word_2' depend on
        which page it's on """
        if cls.soup_data is None:
            return None
        return cls.soup_data.select(cls.entry_selector)[0].attrs['id']

    def wordform(cls):
        """ return wordform of word (verb, noun, adj...) """
        if cls.soup_data is None:
            return None

            return cls.soup_data.select(cls.wordform_selector)[0].text
        except IndexError:
            return None

    def property_global(cls):
        """ return global property (apply to all definitions) """
        if cls.soup_data is None:
            return None

            return cls.soup_data.select(cls.property_global_selector)[0].text
        except IndexError:
            return None

    def get_prefix_from_filename(cls, filename):
        """ get prefix (NAmE or BrE) from audio name when prefix is null """
        if '_gb_' in filename:
            return 'BrE'

        elif '_us_' in filename:
            return 'NAmE'

        return None

    def pronunciations(cls):
        """ get britain and america pronunciations """
        if cls.soup_data is None:
            return None

        britain = {'prefix': None, 'ipa': None, 'url': None}
        america = {'prefix': None, 'ipa': None, 'url': None}

            britain_pron_tag = cls.soup_data.select(cls.br_pronounce_selector)[0]
            america_pron_tag = cls.soup_data.select(cls.am_pronounce_selector)[0]

            britain['ipa'] = britain_pron_tag.text
            britain['prefix'] = 'BrE'
            america['ipa'] = america_pron_tag.text
            america['prefix'] = 'nAmE'
        except IndexError:

            britain['url'] = cls.soup_data.select(cls.br_pronounce_audio_selector)[0].attrs['data-src-ogg']
            america['url'] = cls.soup_data.select(cls.am_pronounce_audio_selector)[0].attrs['data-src-ogg']
        except IndexError:

        if britain['prefix'] == None and britain['url'] is not None:
            britain['prefix'] = cls.get_prefix_from_filename(britain['url'])

        if america['prefix'] == None and america['url'] is not None:
            america['prefix'] = cls.get_prefix_from_filename(america['url'])

        return [britain, america]

    def extract_id(cls, link):
        """ get word id from link
        Argument: https://abc/definition/id
        Return: id
        return link.split('/')[-1]

    def get_references(cls, tags):
        """ get info about references to other page
        Argument: soup.select(<selector>)
        Return: [{'id': <id>, 'name': <word>}, {'id': <id2>, 'name': <word2>}, ...]
        if cls.soup_data is None:
            return None

        references = []
        for tag in tags.select('.xrefs a'):  # see also <external link>
            id = cls.extract_id(tag.attrs['href'])
            word = tag.text
            references.append({'id': id, 'name': word})

        return references

    def references(cls):
        """ get global references """
        if cls.soup_data is None:
            return None

        header_tag = cls.soup_data.select(cls.header_selector)[0]
        return cls.get_references(header_tag)

    def definitions(cls, full=False):
        """ Return: list of definitions """
        if cls.soup_data is None:
            return None

        if not full:
            return [tag.text for tag in cls.soup_data.select(cls.definitions_selector)]
        return cls.definition_full()

    def examples(cls):
        """ List of all examples (not categorized in seperate definitions) """
        if cls.soup_data is None:
            return None
        return [tag.text for tag in cls.soup_data.select(cls.examples_selector)]

    def phrasal_verbs(cls):
        """ get phrasal verbs list (verb only) """
        if cls.soup_data is None:
            return None

        phrasal_verbs = []
        for tag in cls.soup_data.select(cls.phrasal_verbs_selector):
            phrasal_verb = tag.select('.xh')[0].text
            id = cls.extract_id(tag.attrs['href'])  # https://abc/definition/id -> id

            phrasal_verbs.append({'name': phrasal_verb, 'id': id})

        return phrasal_verbs

    def _parse_definition(cls, parent_tag):
        """ return word definition   corresponding examples
        A word can have a single (None) or multiple namespaces
        Each namespace can have one or many definitions
        Each definitions can have one, many or no examples
        Some words can have specific property
        A verb can have phrasal verbs
        if cls.soup_data is None:
            return None

        definition = {}

        try:  # property (countable, transitive, plural,...)
            definition['property'] = parent_tag.select('.grammar')[0].text
        except IndexError:

        try:  # label: (old-fashioned), (informal), (saying)...
            definition['label'] = parent_tag.select('.labels')[0].text
        except IndexError:

        try:  # refer to something (of people, of thing,...)
            definition['refer'] = parent_tag.select('.dis-g')[0].text
        except IndexError:

        definition['references'] = cls.get_references(parent_tag)
        if not definition['references']:
            definition.pop('references', None)

        try:  # sometimes, it just refers to other page without having a definition
            definition['description'] = parent_tag.select('.def')[0].text
        except IndexError:

        definition['examples'] = [example_tag.text
                                  for example_tag in parent_tag.select('.examples .x')]

        definition['extra_example'] = [
            for example_tag in parent_tag.select('[unbox=extra_examples] .examples .unx')

        return definition

    def definition_full(cls):
        """ return word definition   corresponding examples
        A word can have a single (None) or multiple namespaces
        Each namespace can have one or many definitions
        Each definitions can have one, many or no examples
        Some words can have specific property
        A verb can have phrasal verbs
        if cls.soup_data is None:
            return None

        namespace_tags = cls.soup_data.select(cls.namespaces_selector)

        info = []
        for namespace_tag in namespace_tags:
                namespace = namespace_tag.select('h2.shcut')[0].text
            except IndexError:
                # some word have similar definitions grouped in a multiple namespaces (time)
                # some do not, and only have one namespace (woman)
                namespace = None

            definitions = []
            definition_full_tags = namespace_tag.select('.sense')

            for definition_full_tag in definition_full_tags:
                definition = cls._parse_definition(definition_full_tag)

            info.append({'namespace': namespace, 'definitions': definitions})

        # no namespace. all definitions is global
        if len(info) == 0:
            info.append({'namespace': '__GLOBAL__', 'definitions': []})
            def_body_tags = cls.soup_data.select(cls.definition_body_selector)

            definitions = []
            definition_full_tags = def_body_tags[0].select('.sense')

            for definition_full_tag in definition_full_tags:
                definition = cls._parse_definition(definition_full_tag)

            info[0]['definitions'] = definitions

        return info

    def idioms(cls):
        """ get word idioms
        Idioms dont have namespace like regular definitions
        Each idioms have one or more definitions
        Each definitions can have one, many or no examples
        idiom_tags = cls.soup_data.select(cls.idioms_selector)

        idioms = []
        for idiom_tag in idiom_tags:

                # sometimes idiom is in multiple idm classes inside
                # one idm-l class instead of a single idm class
                idiom = idiom_tag.select('.idm-l')[0].text
            except IndexError:
                idiom = idiom_tag.select('.idm')[0].text

            global_definition = {}

            try:  # label: (old-fashioned), (informal), (saying)...
                global_definition['label'] = idiom_tag.select('.labels')[0].text
            except IndexError:

            try:  # refer to something (of people, of thing,...)
                global_definition['refer'] = idiom_tag.select('.dis-g')[0].text
            except IndexError:

                global_definition['references'] = cls.get_references(idiom_tag)
            if not global_definition['references']:
                global_definition.pop('references', None)

            definitions = []
            # one idiom can have multiple definitions, each can have multiple examples or no example
            for definition_tag in idiom_tag.select('.sense'):
                definition = {}

                try:  # sometimes, it just refers to other page without having a definition
                    definition['description'] = definition_tag.select('.def')[0].text
                except IndexError:

                try:  # label: (old-fashioned), (informal), (saying)...
                    definition['label'] = definition_tag.select('.labels')[0].text
                except IndexError:

                try:  # refer to something (of people, of thing,...)
                    definition['refer'] = definition_tag.select('.dis-g')[0].text
                except IndexError:

                definition['references'] = cls.get_references(definition_tag)
                if not definition['references']:
                    definition.pop('references', None)

                definition['examples'] = [example_tag.text for example_tag in definition_tag.select('.x')]

            idioms.append({'name': idiom, 'summary': global_definition, 'definitions': definitions})

        return idioms

    def info(cls):
        """ return all info about a word """
        if cls.soup_data is None:
            return None

        word = {
            'id': cls.id(),
            'name': cls.name(),
            'wordform': cls.wordform(),
            'pronunciations': cls.pronunciations(),
            'property': cls.property_global(),
            'definitions': cls.definitions(full=True),
            'idioms': cls.idioms(),
            'other_results': cls.other_results()

        if not word['property']:
            word.pop('property', None)

        if not word['other_results']:
            word.pop('other_results', None)

        if word['wordform'] == 'verb':
            word['phrasal_verbs'] = cls.phrasal_verbs()

        return word

Any help will be appreciated thank you:)

CodePudding user response:

just split values[0] into words and call Word.get(...) on each

import re

while True:
    ##Test Text Scanner --
    words = re.findall(r"\w ", values[0].strip()) # can also use nltk.word_tokenize
    for word in words:
        if event == sg.WIN_CLOSED or event == 'Cancel':
            words_in_excerpt = word_tokenize(Text)
            print('Hello', nltk.pos_tag(words_in_excerpt), "")
            sg.Popup('Test', nltk.pos_tag(words_in_excerpt))
            sg.Popup('Def', Word.definitions())
            sg.Popup('There seems to be a error processing what you have said')
