How to solve "string indices must be integers" (error while retrieving replies on tweet us-CodePudding

I am using TwitterAPI to extract replies on tweets using conversation_id by following example code mentioned below: The idea is to extract all replies against a list of thousands of conversation_ids, and I do have academic track credentials so the archive search should not be a problem.

from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRequestError, TwitterConnectionError, TwitterPager

# NOTE: If conversation is over a week old then it will not get returned.
CONVERSATION_ID = '1369393783482236933'

class TreeNode:
    def __init__(self, data):
        """data is a tweet's json object"""
        self.data = data
        self.children = []
        self.replied_to_tweet = None
        if 'referenced_tweets' in self.data:
            for tweet in self.data['referenced_tweets']: 
                if tweet['type'] == 'replied_to':
                    self.replied_to_tweet = tweet['id']
                    break

    def id(self):
        """a node is identified by its tweet id"""
        return self.data['id']

    def parent(self):
        """the reply-to tweet is the parent of the node"""
        return self.replied_to_tweet

    def find_parent_of(self, node):
        """append a node to the children of it's parent tweet"""
        if node.parent() == self.id():
            self.children.append(node)
            return True
        for child in self.children:
            if child.find_parent_of(node):
                return True
        return False

    def print_tree(self, level):
        """level 0 is the root node, then incremented for subsequent generations"""
        created_at = self.data['created_at']
        username = self.data['author_id']['username']
        text_80chars = self.data['text'][0:80].replace('\n', ' ')
        print(f'{level*"_"}{level}: [{created_at}][{username}] {text_80chars}')
        level  = 1
        for child in reversed(self.children):
            child.print_tree(level)

try:
    o = TwitterOAuth.read_file()
    api = TwitterAPI(o.consumer_key, o.consumer_secret, auth_type='oAuth2', api_version='2')

    # GET ROOT OF THE CONVERSATION

    r = api.request(f'tweets/:{CONVERSATION_ID}',
        {
            'expansions':'author_id',
            'tweet.fields':'author_id,conversation_id,created_at,referenced_tweets'
        },
        hydrate_type=HydrateType.APPEND)

    for item in r:
        root = TreeNode(item)
        print(f'ROOT {root.id()}')

    # GET ALL REPLIES IN CONVERSATION
    # (RETURNED IN REVERSE CHRONOLOGICAL ORDER)

    pager = TwitterPager(api, 'tweets/search/recent', 
        {
            'query':f'conversation_id:{CONVERSATION_ID}',
            'expansions':'author_id',
            'tweet.fields':'author_id,conversation_id,created_at,referenced_tweets'
        },
        hydrate_type=HydrateType.APPEND)

    # "wait=2" means wait 2 seconds between each request.
    # The rate limit is 450 requests per 15 minutes, or
    # 15*60/450 = 2 seconds. 

    orphans = []

    for item in pager.get_iterator(wait=2):
        node = TreeNode(item)
        print(f'{node.id()} => {node.parent()}', item['author_id']['username'])
        # COLLECT ANY ORPHANS THAT ARE CHILDREN OF THE NEW NODE
        orphans = [orphan for orphan in orphans if not node.find_parent_of(orphan)]
        # IF THE NEW NODE CANNOT BE PLACED IN TREE, ORPHAN IT UNTIL ITS PARENT IS FOUND
        if not root.find_parent_of(node):
            orphans.append(node)

    print('\nTREE...')
    root.print_tree(0)
    assert len(orphans) == 0, f'{len(orphans)} orphaned tweets'

except TwitterRequestError as e:
    print(e.status_code)
    for msg in iter(e):
        print(msg)

except TwitterConnectionError as e:
    print(e)

except Exception as e:
    print(e)

The error is displayed in detail if I comment the last two lines.

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
C:\Users\ANEESB~1\AppData\Local\Temp/ipykernel_18696/4104024841.py in <module>
     88 
     89         print('\nTREE...')
---> 90         root.print_tree(0)
     91         assert len(orphans) == 0, f'{len(orphans)} orphaned tweets'
     92 

C:\Users\ANEESB~1\AppData\Local\Temp/ipykernel_18696/4104024841.py in print_tree(self, level)
     37                 """level 0 is the root node, then incremented for subsequent generations"""
     38                 created_at = self.data['created_at']
---> 39                 username = self.data['author_id']['username']
     40                 text_80chars = self.data['text'][0:80].replace('\n', ' ')
     41                 print(f'{level*"_"}{level}: [{created_at}][{username}] {text_80chars}')

TypeError: string indices must be integers

The code is supposed to work, I don't know what is causing the error. Any help please?

CodePudding user response：

Self.data looks like this:

{
    'author_id': '3420477195', 
    'conversation_id': '1369393783482236933', 
    'created_at': '2021-03-09T21:04:54.000Z', 
    'text': "Happy one year anniversary to everyone working from home! Do you feel like if you have one more Zoom meeting you’ll rip your hair out? First of all, please don't do that. Second, we're here to save you from Zoom boredom with these new backgrounds!", 
    'id': '1369393783482236933', 
    'author_id_hydrate': {'id': '3420477195', 'name': 'Full Frontal', 'username': 'FullFrontalSamB'}
}

author_id is just a string, the details about the author are in author_id_hydrate. So self.data['author_id']['username'] should be self.data['author_id_hydrate']['username']