How can i obtain the dates?-CodePudding

   import pandas as pd
   from bs4 import BeautifulSoup as soup
   from urllib.request import urlopen as uReq

   cba_url = "https://www.cbabbq.com/events.cfm"
   client = uReq(cba_url)
   page_html = client.read()
   page_soup = soup(page_html, features='lxml')

   # CBA Event Names
  cba_event_tags = page_soup.findAll("td", {"class":"th1"})

  cba_event_names = []
  for cba_event_name in cba_event_tags:
     cba_event_names.append(cba_event_name.text)

  df_cba_event_names = pd.DataFrame(cba_event_names)
  #print(df_cba_event_names)

  # CBA Event Location
  cba_location_tags = page_soup.findAll("td", {"class":"th2"})

   cba_event_locations = []
   for cba_event_location in cba_location_tags:
      cba_event_locations.append(cba_event_location.text)

   df_cba_event_locations = pd.DataFrame(cba_event_locations)
  #print(df_cba_event_locations)

  # CBA Event Date
   cba_date_tags = page_soup.findAll("td")
   cba_date = str(cba_date_tags).split("<")[5].split(">")[1]
   cba_dates = []
   for k in cba_date_tags:
       cba_dates.append(k.text)
   print(cba_dates)

I am trying to obtain the daates but they have the same "td" tags as the other things but nothing else with it. So, I'm unsure how to specify that specific line of html.

CodePudding user response：

Here is how to get everything from that page:

from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as uReq

url = "https://www.cbabbq.com/events.cfm"
page = uReq(url)
page_soup = soup(page.read(), 'lxml')

data = page_soup.select('.table2 td')

for event, date, location in zip(data[0::3], data[1::3], data[2::3]):
    print(f'{event.text} -- {date.text} -- {location.text}')

Another way to iterate over found rows:

for i in range(len(data)-2):
    event, date, location = data[i:i 3]
    print(f'{event.text} -- {date.text} -- {location.text}')

If you want to just get the dates:

dates = page_soup.select('.table2 td:nth-of-type(3)')

for d in dates:
    print(d.text)