Home > other >  Create a nested dictionary for every distinct words in a list
Create a nested dictionary for every distinct words in a list

Time:01-27

I have a nested list, and for each list inside I want to create a dictionary that will contain another dictionary with the words related to a certain word as a key and the times they appear as the value. For example:

from

sentences = [["i", "am", "a", "sick", "man"],
             ["i", "am", "a", "spiteful", "man"],
             ["i", "am", "an", "unattractive", "man"],
             ["i", "believe", "my", "liver", "is", "diseased"],
             ["however", "i", "know", "nothing", "at", "all", "about", "my",
              "disease", "and", "do", "not", "know", "for", "certain", "what", "ails", "me"]]

part of the dictionary returned would be:

{ "man": {"i": 3, "am": 3, "a": 2, "sick": 1, "spiteful": 1, "an": 1, "unattractive": 1}, "liver": {"i": 1, "believe": 1, "my": 1, "is": 1, "diseased": 1}...} 

with as many keys as there are distinct words in the passage.

I've tried this:

d = {}
    for row in sentences:
        for words in rows:
            if words not in d:
                d[words] = 1
            else:
                d[words]  = 1

But is only the way to count them, how could I use d as a value for another dictionary?

CodePudding user response:

from collections import defaultdict

data = {}

for sentence in sentences:
    for word in sentence:
        data[word] = defaultdict(lambda: 0)

for sentence in sentences:
    length = len(sentence)

    for index1, word1 in enumerate(sentence):
        for num in range(0, length - 1):
            index2 = (index1   1   num) % length
            word2 = sentence[index2]
            
            data[word1][word2]  = 1
            
print(data)

CodePudding user response:

sentences = [["i", "am", "a", "sick", "man"],
             ["i", "am", "a", "spiteful", "man"],
             ["i", "am", "an", "unattractive", "man"],
             ["i", "believe", "my", "liver", "is", "diseased"],
             ["however", "i", "know", "nothing", "at", "all", "about", "my",
              "disease", "and", "do", "not", "know", "for", "certain", "what", "ails", "me"]]

# "as many keys as there are distinct words in the passage"
# Well then we need to start by finding the distinct words.
# sets always help for this.

# first we flatten the list. If you don't know what this is doing, 
# search "flatten nested list Python". This is a common pattern:

flat_list = [term for group in sentences for term in group]

# now use set to find distinct words
distinct_words = set(flat_list)

# variable for final dictionary
result = {}

# define this function first. See invocation below
def find_related_counts(word):
    # a nice way to do counts us with 
    # setdefault. If the term has already 
    # been counted, then it just increments. 
    # otherwise, it will create the key and 
    # initialise it to the default 
    related_counts = {}
    for group in sentences:
        # is "word" related to the terms in this group?
        if word in group:
            # yes it is! add the other terms:
            for other in group:
                # except, presumably, the word itself 
                if other != word:
                    related_counts.setdefault(other, 0) 
                    related_counts[other]  = 1
    return related_counts

# for each word we have a key, and must find the value 
for word in distinct_words:
    # when dealing with nested anythings, it helps to 
    # make a function, so you don't have so much 
    # nesting in one place and separate things out 
    # nicely instead 
    value = find_related_counts(word)
    result[word] = value 


print(result)
print(result["man"])

OUTPUT:

{'spiteful': {'i': 1, 'am': 1, 'a': 1, 'man': 1}, 'and': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'unattractive': {'i': 1, 'am': 1, 'an': 1, 'man': 1}, 'nothing': {'however': 1, 'i': 1, 'know': 2, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'diseased': {'i': 1, 'believe': 1, 'my': 1, 'liver': 1, 'is': 1}, 'sick': {'i': 1, 'am': 1, 'a': 1, 'man': 1}, 'man': {'i': 3, 'am': 3, 'a': 2, 'sick': 1, 'spiteful': 1, 'an': 1, 'unattractive': 1}, 'do': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'believe': {'i': 1, 'my': 1, 'liver': 1, 'is': 1, 'diseased': 1}, 'i': {'am': 3, 'a': 2, 'sick': 1, 'man': 3, 'spiteful': 1, 'an': 1, 'unattractive': 1, 'believe': 1, 'my': 2, 'liver': 1, 'is': 1, 'diseased': 1, 'however': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'certain': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'what': 1, 'ails': 1, 'me': 1}, 'an': {'i': 1, 'am': 1, 'unattractive': 1, 'man': 1}, 'my': {'i': 2, 'believe': 1, 'liver': 1, 'is': 1, 'diseased': 1, 'however': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'a': {'i': 2, 'am': 2, 'sick': 1, 'man': 2, 'spiteful': 1}, 'am': {'i': 3, 'a': 2, 'sick': 1, 'man': 3, 'spiteful': 1, 'an': 1, 'unattractive': 1}, 'however': {'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'about': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'not': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'for': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'liver': {'i': 1, 'believe': 1, 'my': 1, 'is': 1, 'diseased': 1}, 'know': {'however': 1, 'i': 1, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'at': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'all': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'disease': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'ails': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'me': 1}, 'me': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1}, 'what': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'ails': 1, 'me': 1}, 'is': {'i': 1, 'believe': 1, 'my': 1, 'liver': 1, 'diseased': 1}}
{'i': 3, 'am': 3, 'a': 2, 'sick': 1, 'spiteful': 1, 'an': 1, 'unattractive': 1}
  •  Tags:  
  • Related