Home > Net >  It's tricky, can you solve the issue?
It's tricky, can you solve the issue?

Time:02-24

Output I want

I want list of authors appended and in one cell, I can get that but not all authors have role mentioned in the website, so I want to have the author that has role with it's role. Out I want is attached above. See link. It's tricky for me, someone may be able to tackle this. Looking forward to the answers and I would appreciate any help. Thank you.

from selenium import webdriver
import pandas as pd
driver = webdriver.Chrome()
site = 'https://www.goodreads.com/search?q=chughtai&qid=WzdWh5nG8z'
driver.get(site)
driver.maximize_window()
roles = []
authors = []

main = driver.find_elements_by_tag_name('tr')

for i in main:
    role = []
    author = []
    con = i.find_elements_by_xpath('.//div[@]')
    try:
        for n in con:
            auth = n.find_element_by_xpath('.//a[@]/span').text
            rol = n.find_element_by_xpath('.//span[@]').text
            author.append(auth)
            if rol:
                role.append(rol)
                one = ', '.join(role)
                roles.append(auth   ' '   rol)
            else:
                continue
        one_cell = ', '.join(author)
        authors.append(one_cell)
    except:
        pass

a = {'Author Name': authors,'Role': roles}
df = pd.DataFrame.from_dict(a, orient='index')
df = df.transpose()
df.to_csv("only_roles.csv", index=False)
print(df)

CodePudding user response:

from selenium import webdriver
import pandas as pd
from time import sleep
driver = webdriver.Chrome()
site = 'https://www.goodreads.com/search?q=chughtai&qid=WzdWh5nG8z'
driver.get(site)
sleep(5)
driver.maximize_window()
sleep(5)
roles = []
authors = []

main = driver.find_elements_by_tag_name('tr')
role = []
author = []
for i in main:
    
    con = i.find_elements_by_xpath('.//div[@]')
    try:
        for n in con:
            auth = n.find_element_by_xpath('.//a[@]/span').text
            rol = n.find_element_by_xpath('.//span[@]').text
            author.append(auth)
            role.append(rol)
           
    except:
        pass


cols = ['Author Name', 'Role']
df = pd.DataFrame(data=list(zip(author, role)), columns=cols)
print(df)
# df.to_csv('only_roles.csv',index=False)

CodePudding user response:

Somehow I can't run through your code to get all the books, so I modified it and please take whichever part that is useful from my version and bring to yours. My explanations in code's comments.

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
driver = webdriver.Chrome('...')
site = 'https://www.goodreads.com/search?q=chughtai&qid=WzdWh5nG8z'
driver.get(site)
driver.maximize_window()

data = [] # pandas can convert a list of dictionaries to a dataframe. Dictionary keys are column names.

for tr in driver.find_elements_by_tag_name('tr'):
    # one tr for one book
    
    # I chose the following as check for a book because it worked for the webpage
    if tr.get_attribute('itemtype') != 'http://schema.org/Book':
        continue # Not a book
    
    temp = {'Author Names': [], 'Role': []}
        
    for con in tr.find_elements_by_class_name('authorName__container'):
        # one container for one author
        
        try:
            authorName = con.find_element_by_class_name('authorName').find_element_by_tag_name('span').text
            temp['Author Names'].append(authorName)

            authorRole = con.find_element_by_class_name('role').text
            temp['Role'].append(f'{authorName} {authorRole}')

        except NoSuchElementException:
            pass # ignore this one

        except Exception as e:
            print(e) # print this one for inspection

    # convert lists to strings    
    data.append({k: ','.join(v) for k,v in temp.items()})
    
df = pd.DataFrame(data)
print(df)

                                       Author Names  \
0                       Ismat Chughtai,M. Asaduddin   
1                                    Ismat Chughtai   
2   Muhammad Umar Memon,M. Asaduddin,Ismat Chughtai   
3                       Ismat Chughtai,Tahira Naqvi   
4                        Ismat Chughtai,Amar Shahid   
5       Ismat Chughtai,Tahira Naqvi,Syeda S. Hameed   
6                                    Ismat Chughtai   
7                                  Hephaestus Books   
8                       Ismat Chughtai,Tahira Naqvi   
9                                  Rakhshanda Jalil   
10                                   Ismat Chughtai   
11                                   Ismat Chughtai   
12                                   Ismat Chughtai   
13                              Azeem Baig Chughtai   
14                                   Ismat Chughtai   
15                                   Ismat Chughtai   
16                                   Ismat Chughtai   
17                                   Ismat Chughtai   
18                      Ismat Chughtai,Tahira Naqvi   
19                                 Hephaestus Books   

                                                 Role  
0                           M. Asaduddin (Translator)  
1                                                      
2   Muhammad Umar Memon (Translator),M. Asaduddin ...  
3                           Tahira Naqvi (Translator)  
4                              Amar Shahid (Compiler)  
5   Tahira Naqvi (Translator),Syeda S. Hameed (Tra...  
6                                                      
7                                                      
8                           Tahira Naqvi (Translator)  
9                           Rakhshanda Jalil (Editor)  
10                                                     
11                                                     
12                                                     
13                                                     
14                                                     
15                                                     
16                                                     
17                                                     
18                          Tahira Naqvi (Translator)  
19                                                     
  • Related