IndexError: list index out of range in if else when condition matches-CodePudding

This code scrapes Oddsortal website:

import pandas as pd
from bs4 import BeautifulSoup as bs
from selenium import webdriver
import threading
from multiprocessing.pool import ThreadPool
import os
import re
from math import nan


class Driver:
    def __init__(self):
        options = webdriver.ChromeOptions()
        options.add_argument("--headless")
        # Un-comment next line to supress logging:
        options.add_experimental_option('excludeSwitches', ['enable-logging'])
        self.driver = webdriver.Chrome(options=options)

    def __del__(self):
        self.driver.quit()  # clean up driver when we are cleaned up
        # print('The driver has been "quitted".')


threadLocal = threading.local()


def create_driver():
    the_driver = getattr(threadLocal, 'the_driver', None)
    if the_driver is None:
        the_driver = Driver()
        setattr(threadLocal, 'the_driver', the_driver)
    return the_driver.driver


class GameData:
    def __init__(self):
        self.date = []
        self.time = []
        self.game = []
        self.score = []
        self.home_odds = []
        self.draw_odds = []
        self.away_odds = []
        self.country = []
        self.league = []


def generate_matches(table):
    tr_tags = table.findAll('tr')
    for tr_tag in tr_tags:
        if 'class' in tr_tag.attrs and 'dark' in tr_tag['class']:
            th_tag = tr_tag.find('th', {'class': 'first2 tl'})
            a_tags = th_tag.findAll('a')
            country = a_tags[0].text
            league = a_tags[1].text
        else:
            td_tags = tr_tag.findAll('td')
            yield [td_tags[0].text, td_tags[1].text, td_tags[2].text, td_tags[3].text,
                   td_tags[4].text, td_tags[5].text, country, league]

I am getting a List Error:

yield [td_tags[0].text, td_tags[1].text, td_tags[2].text, td_tags[3].text,
IndexError: list index out of range

How can I resolve it?

This is my current entire code

When I checked, the list index was 1 while I am getting the country and league values correctly. If there are no values in the list, how can I modify

yield [td_tags[0].text, td_tags[1].text, td_tags[2].text, td_tags[3].text,
       td_tags[4].text, td_tags[5].text, country, league]

to get correct value?

CodePudding user response：

You can check if td_tags is not empty

td_tags = tr_tag.findAll('td')
if len(td_tags) > 0: # or just if td_tags
    yield [td_tags[0].text, td_tags[1].text, td_tags[2].text, td_tags[3].text,
           td_tags[4].text, td_tags[5].text, country, league]

CodePudding user response：

You can unpack td_tags so if it is empty you will have just county and league:

yield [*[td_tag.text for td_tag in td_tags], country, league]

Or you can check with a simple if it is not empty:

if td_tags:
    yield [td_tags[0].text, td_tags[1].text, td_tags[2].text, td_tags[3].text,
           td_tags[4].text, td_tags[5].text, country, league]

Another option if you don't care to suppress IndexError:

with suppress(IndexError):
    yield [td_tags[0].text, td_tags[1].text, td_tags[2].text, td_tags[3].text,
           td_tags[4].text, td_tags[5].text, country, league]