I am building this scraper and I am trying to scrape TikTok hastags, then get the username from that hashtag, followed by scarping each username that I previously scraped. When finished want the information to be organized in a csv file. This is what I came up with but it is not working as I expected. I am a beginner and I am trying to learn a new language. What am I missing and doing wrong?
import requests
import json
import pandas as pd
# scrape hastag
url = "https://api.tikapi.io/public/hashtag?count=30&id=9261"
payload={}
headers = {
'X-API-KEY': 'xxxxxx'
}
response = requests.request("GET", url, headers=headers, data=payload)
hashtag_response = response.text
hashtag_json = json.loads (hashtag_response)
# write data to hashtag json file
results_json = open("data.json", "w")
L = [response.text]
results_json.writelines(L)
results_json.close()
# list
influencer = []
followerCount = []
bioLink = []
signature = []
for uniqueId in hashtag_json ['uniqueId']:
influencer.append(uniqueId)
# scrape influencer username
url = "https://api.tikapi.io/public/check?username={influencer}"
payload={}
headers = {
'X-API-KEY': 'xxxxx'
}
influencerresponse = requests.request("GET", url, headers=headers, data=payload)
infl_response = influencerresponse.text
influencer_json = json.loads (infl_response)
# write data to influencer json file
results_json = open("infl_data.json", "w")
I = [influencerresponse.text]
results_json.writelines(I)
results_json.close()
for followerCount, bioLink in influencer_json ['followerCount','bioLink','signature']:
followerCount.append(followerCount)
bioLink.append(bioLink)
signature.append(signature)
# create csv file of results
influencer_df = pd.DataFrame({
'Influencer' : influencer,
'Follower Count' : followerCount,
'Link' : bioLink,
'Signature' : signature,
})
influencer_df.to_csv('/Users/john/Repos/TikTok/influencer.csv', index=False)
CodePudding user response:
you were wrong in this part
for uniqueId in hashtag_json ['uniqueId']:
influencer.append(uniqueId)
should be
influencer.append(hashtag_json["itemList"][0]['author']['uniqueId'])
and in this part
for followerCount, bioLink in influencer_json ['followerCount','bioLink','signature']:
should be
followerCount.append(influencer_json['userInfo']['stats']['followerCount'])
bioLink.append(influencer_json['userInfo']['user']['bioLink']['link'])
signature.append(influencer_json['userInfo']['user']['signature'])
.
import requests
import json
import pandas as pd
# scrape hastag
url = "https://api.tikapi.io/public/hashtag?count=30&id=9261"
payload={}
headers = {
'X-API-KEY': 'OGSBrwMCu2N5Nvq1ugCgRDzjKHkPgNvw'
}
response = requests.request("GET", url, headers=headers, data=payload)
hashtag_response = response.text
hashtag_json = json.loads(hashtag_response)
# write data to hashtag json file
results_json = open("data.json", "w")
L = [response.text]
results_json.writelines(L)
results_json.close()
# list
influencer = []
followerCount = []
bioLink = []
signature = []
influencer.append(hashtag_json["itemList"][0]['author']['uniqueId'])
# scrape influencer username
url = "https://api.tikapi.io/public/check?username={}".format(influencer[0])
payload={}
headers = {
'X-API-KEY': 'OGSBrwMCu2N5Nvq1ugCgRDzjKHkPgNvw'
}
influencerresponse = requests.request("GET", url, headers=headers, data=payload)
infl_response = influencerresponse.text
influencer_json = json.loads(infl_response)
# write data to influencer json file
results_json = open("infl_data.json", "w")
I = [influencerresponse.text]
results_json.writelines(I)
results_json.close()
followerCount.append(influencer_json['userInfo']['stats']['followerCount'])
bioLink.append(influencer_json['userInfo']['user']['bioLink']['link'])
signature.append(influencer_json['userInfo']['user']['signature'])
# create csv file of results
influencer_df = pd.DataFrame({
'Influencer' : influencer,
'Follower Count' : followerCount,
'Link' : bioLink,
'Signature' : signature,
})
influencer_df.to_csv('/Users/john/Repos/TikTok/influencer.csv', index=False)