Referring to the answer of my previous question, how can I automate getting the payload from https://findamortgagebroker.com instead of copying it through inspecting the site using development tools. Also, how can I change the pages? I think changing the pages will require a new CaptchToken?
import requests
from bs4 import BeautifulSoup
api_url ='https://findamortgagebroker.com/home/SearchContacts/'
headers= {
"content-type":"application/x-www-form-urlencoded",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"}
body = "searchModel[SearchText]=San Diego&searchModel[PageNumber]=2&searchModel[Radius]=50&searchModel[ResultsPerPage]=20&searchModel[CaptchaToken]=03AEkXODDG8q9JqC--gCpxJK_Kevp506iB5o5Z7ilzY3Ge6GbYQaoX9jcOJqEyC6TG159L5KSvPoE43UlBxGMYW2jlNcnc0ING0sFeQO2RZIOui0YnNAaByRIVrjaluwaNi7WCE2FykjJNI0B5FNLB7nJjnr9N7YEeUkY13km0wRN3vfyqPh-bVdpahCir00GzE-pQyXU_o84bY1dCWRNQten7O_cnmdcA0ucEPxFeO3WIbMkUkUqqMC5vpAUiz_VttmYMyRETidTuaI6rHE2_AjGbUr6Z61vXFr-dXAC63alA15gGu8ypGRljtHS2wmfNSSySrtegnFxD3txZZ4d2KDk4ugBXLfh3jNUHM_KcKF6Rkp0WOHx7-D-4CEfMf-mC9zJ6FnVqJx3FTZiOrwcelQ0dW1OxdHuHlCVPPQlzIzcFMfsTJOsCLj3JNZTEgkQ6Eicl6dkVV-F-CRPd4fQZ2D_u3dDmrIaCIQJJ4LlQuSYXhLt-6QMcnFXceygadkKGqeiGQZcdUeagF6c8zz9OUg5g2ppXkCu-WsH08e-ei7sRHspA3Rdwh6sylcr8fqFlxDNmEXTI4CH1nRgLvJMuXr6KdcY3AWNhwA&searchModel[IsVendorRequest]=false&searchModel[VendorIdentifier]=0&searchModel[CaptchaV2]=false"
res = requests.post(api_url,data=body,headers=headers)
#print(res)
soup = BeautifulSoup(res.text,'lxml')
data =[]
for item in soup.select('.clickable-tile-contact'):
data.append({
'href':item.get('href'),
})
print(data)
CodePudding user response:
To get the payload data, go to payload > form Data > view source and click on View source
Code:
import requests
from bs4 import BeautifulSoup
import pandas as pd
api_url ='https://findamortgagebroker.com/home/SearchContacts/'
headers= {
"content-type":"application/x-www-form-urlencoded",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"}
data =[]
for page in range(1,42):
body = f"searchModel[SearchText]=San Diego&searchModel[PageNumber]={page}&searchModel[Radius]=50&searchModel[ResultsPerPage]=20&searchModel[CaptchaToken]=03AEkXODDG8q9JqC--gCpxJK_Kevp506iB5o5Z7ilzY3Ge6GbYQaoX9jcOJqEyC6TG159L5KSvPoE43UlBxGMYW2jlNcnc0ING0sFeQO2RZIOui0YnNAaByRIVrjaluwaNi7WCE2FykjJNI0B5FNLB7nJjnr9N7YEeUkY13km0wRN3vfyqPh-bVdpahCir00GzE-pQyXU_o84bY1dCWRNQten7O_cnmdcA0ucEPxFeO3WIbMkUkUqqMC5vpAUiz_VttmYMyRETidTuaI6rHE2_AjGbUr6Z61vXFr-dXAC63alA15gGu8ypGRljtHS2wmfNSSySrtegnFxD3txZZ4d2KDk4ugBXLfh3jNUHM_KcKF6Rkp0WOHx7-D-4CEfMf-mC9zJ6FnVqJx3FTZiOrwcelQ0dW1OxdHuHlCVPPQlzIzcFMfsTJOsCLj3JNZTEgkQ6Eicl6dkVV-F-CRPd4fQZ2D_u3dDmrIaCIQJJ4LlQuSYXhLt-6QMcnFXceygadkKGqeiGQZcdUeagF6c8zz9OUg5g2ppXkCu-WsH08e-ei7sRHspA3Rdwh6sylcr8fqFlxDNmEXTI4CH1nRgLvJMuXr6KdcY3AWNhwA&searchModel[IsVendorRequest]=false&searchModel[VendorIdentifier]=0&searchModel[CaptchaV2]=false"
res = requests.post(api_url,data=body,headers=headers)
#print(res)
soup = BeautifulSoup(res.text,'lxml')
for item in soup.select('.clickable-tile-contact'):
data.append({
'href':item.get('href')
})
print(data)
df = pd.DataFrame(data)
#print(df)
Output:
href
0 https://findamortgagebroker.com/Profile\Andres...
1 https://findamortgagebroker.com/Profile\DavidS...
2 https://findamortgagebroker.com/Profile\Daniel...
3 https://findamortgagebroker.com/Profile\DavidH...
4 https://findamortgagebroker.com/Profile\Evbeni...
.. ...
795 https://findamortgagebroker.com/Profile\Benjam...
796 https://findamortgagebroker.com/Profile\DiegoS...
797 https://findamortgagebroker.com/Profile\IvanLa...
798 https://findamortgagebroker.com/Profile\KevinM...
799 https://findamortgagebroker.com/Profile\DarenG...
[800 rows x 1 columns]