I am using beautifulsoup
to create two dataframes
of unique classes with text.
The first dataframe
has a few missing values that is messing up the alignment in rows when I join them. I tried to use an if not statement but I still get error: get_text() is empty
.
soup = bs(response.text, 'html5lib')
for x in soup.find_all("div", {"class": "details_table"}):
S = x.find("span", {"class": "search_line_s details_table_data"}).get_text(strip=True)
if not x.find("span", {"class": "search_line_a__ details_table_data"}).get_text(strip=True):
A = "N/A"
else:
A = x.find("span", {"class": "search_line_a__ details_table_data"}).get_text(strip=True)
App = x.find("span", {"class": "search_line_app details_table_data"}).get_text(strip=True)
df3.loc[len(df3.index)] = [S, A, App]
for items in soup.find_all("a", {"class": "e_link"}):
item_at = items.attrs
list_of_dict_values = item_at.values()
good_objects = [True, False, True, False, True, False, True, True, False, True, False, False, False]
property_asel = [val for is_good, val in zip(good_objects, list_of_dict_values) if is_good]
link = property_asel[0]
type = property_asel[1]
name = property_asel[2]
category = property_asel[3]
sub_category = property_asel[4]
price = property_asel[5]
df.loc[len(df.index)] = [name, category, sub_category, type, price, link]
fac.append(items.get_text(strip=True))
result = pd.concat([df, df3], axis=1)
CodePudding user response:
Avoid calling .get_text(strip=True)
in your condition, cause you have to check if the element itself is available:
if not x.find("span", {"class": "search_line_a__ details_table_data"}):
...
or
A = x.find("span", {"class": "search_line_a__ details_table_data"}).get_text(strip=True) if x.find("span", {"class": "search_line_a__ details_table_data"}) else "N/A"
or with walrus operator
(needs python 3.8 and higher):
A = e.get_text(strip=True) if (e:=x.find("span", {"class": "search_line_a__ details_table_data"})) else "N/A"