I am trying to download a CSV file generated from a report at This website.
Below is the request I am trying to replicate.
Example of API i am trying to call
I copy the cURL and parse it and get the following format for the request:
import requests
cookies = {
'_ga': 'GA1.2.938840467.1566745600',
'terminal': '174224916994986694513353793024390053397',
'__utmc': '1',
'request_timing~11947199': '2~2237~74~91~2402',
'_gid': 'GA1.2.89702438.1657551717',
'cf_clearance': 'RDmwUSB_b6JmRSJpvrM76reZifV_m6cHjCJ0kmUkAS8-1657566551-0-250',
'GSG_SESSION_ID': '322919708739472562779456661040511933493',
'is_session_valid': '1',
'plack_session': '27e03cd7d13a440955626dbc574adef85a619f88',
'__utma': '1.938840467.1566745600.1657299681.1657567143.4',
'__utmz': '1.1657567143.4.4.utmcsr=leetc.com|utmccn=(referral)|utmcmd=referral|utmcct=/',
'__utmt': '1',
'session_id': '53da3a29-e3d5-4dd4-96c3-7562a0fb7715',
'_gat': '1',
'request_id~1513632513': '6441F026-014E-11ED-9669-AEADB5E8FA7B',
'__utmb': '1.5.9.1657567169673',
'request_timing~1513632513': '1~4217~79~105~4401',
}
headers = {
'authority': 'lee.county-taxes.com',
'accept': 'application/xml, text/xml, */*; q=0.01',
'accept-language': 'en-US,en;q=0.9',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
# Requests sorts cookies= alphabetically
# 'cookie': '_ga=GA1.2.938840467.1566745600; terminal=174224916994986694513353793024390053397; __utmc=1; request_timing~11947199=2~2237~74~91~2402; _gid=GA1.2.89702438.1657551717; cf_clearance=RDmwUSB_b6JmRSJpvrM76reZifV_m6cHjCJ0kmUkAS8-1657566551-0-250; GSG_SESSION_ID=322919708739472562779456661040511933493; is_session_valid=1; plack_session=27e03cd7d13a440955626dbc574adef85a619f88; __utma=1.938840467.1566745600.1657299681.1657567143.4; __utmz=1.1657567143.4.4.utmcsr=leetc.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmt=1; session_id=53da3a29-e3d5-4dd4-96c3-7562a0fb7715; _gat=1; request_id~1513632513=6441F026-014E-11ED-9669-AEADB5E8FA7B; __utmb=1.5.9.1657567169673; request_timing~1513632513=1~4217~79~105~4401',
'origin': 'https://lee.county-taxes.com',
'referer': 'https://lee.county-taxes.com/public/reports/real_estate',
'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'x-requested-with': 'XMLHttpRequest',
}
data = 'base_url=public/reports/real_estate&parent_request_id=5955C82C-014E-11ED-A791-8C0D896639A2&report_search=cert_status /|/ tax_year >2014 roll_year * cert_sale_date /|/ exemption /|/ deed_status /|/ standard_flag /|/ &report_search_current_user_inputs=&session_id=322919708739472562779456661040511933493&app_url=/tcb/app&page_url=public/reports/real_estate&report_name=Certificate Information by Year&selected_report=624&filetype=csv&delimiter=comma"ing=on&pdf_report_title=Certificate Information by Year Report (Certificate Search)&add_run_by_to_header=on&paper_size=letter&page_orientation=portrait&page_break=auto&break_selector=tax_year&shade_alternate_rows=on&pdf_report_description=&report_download_email_address=&report_took_too_long_download_email=&displayed_columns=certificate_number|cert_status|tax_year|roll_year|account_number|situs_address|cert_sale_date|issued_date|purchased_date|certificate_rate|interest_start_date|face_amount|redeemed_date|redemption_amount_paid|transferred_date|exemption|deed_status|bidder_number|certificate_buyer|standard_flag&hide_results=&sort_by_displayed=&hide_public=&display_name=¤t_view=certs¤t_display=data&select_view=certs&last_sort_col=1&sbgb_boundary=2&search_order_column1=tax_year&search_order_column2=&select_display=data&search_order_direction1=desc&report_search_dummy=&report_search_dummy=/|/&report_search_dummy=>2014&report_search_dummy=&report_search_dummy=&report_search_dummy=/|/&report_search_dummy=&report_search_dummy=&report_search_dummy=&report_search_dummy=&report_search_dummy=&report_search_dummy=&report_search_dummy=&report_search_dummy=&report_search_dummy=/|/&report_search_dummy=/|/&report_search_dummy=&report_search_dummy=&report_search_dummy=/|/&rows_per_page=50&go_to_report_page=&report_page=1&backend=warehouse&print_cover_page=0&download_now=browser&ajax_request=0.4428397794570913&cookie_id=2025887579'
response = requests.post('https://lee.county-taxes.com/tcb/app/public/reports/real_estate/download', cookies=cookies, headers=headers, data=data)
Unfortunately when I run this code I get the following output in response.content:
b'<ajax-response><response type=\'element_errors\'></response> <response type="element" id="message_container" > </response> <response type="element" id="report_download_util"><![CDATA[ ]]></response> <response type="javascript"><![CDATA[\n file_download(d_location() \'/download_pending_report?pending_report_file=pwHy_qVA5b\'); ]]></response>\n</ajax-response>'
I'm not sure what to make of this? is trying to download the CSV from an API call possible? Am I using the wrong call?
TIA.
CodePudding user response:
The following works:
from httpx import Client from bs4 import BeautifulSoup data = { 'base_url':'public/reports/real_estate', 'parent_request_id':'4C4ACC20-0155-11ED-9D24-CAB03D8B3709', 'session_id':296334053076598741934874852698924119209, 'app_url':'/tcb/app', 'page_url':'public/reports/real_estate', 'report_name':'Active Certificate Vendors', 'selected_report':623, 'filetype':'csv', 'delimiter':'comma', 'quoting':'on', 'pdf_report_title':'Active Certificate Vendors Report (Certificate Sale Bidders)', 'add_run_by_to_header':'on', 'paper_size':'letter', 'page_orientation':'portrait', 'page_break':'auto', 'break_selector':'bidder_number', 'shade_alternate_rows':'on', 'displayed_columns':'vendor_number|bidder_name|full_name|bidder_number|address_lines|business_telephone|email', 'current_view':'cert_sale_bidders', 'current_display':'data', 'select_view':'cert_sale_bidders', 'last_sort_col':1, 'sbgb_boundary':2, 'search_order_column1':'bidder_number', 'select_display':'data', 'search_order_direction1':'asc', 'rows_per_page':50, 'report_page':1, 'backend':'mysql', 'print_cover_page':0, 'reset_report_name':0, 'preserve_messages':0, 'preserve_backend':0, 'preserve_collapse':0, 'ajax_request':0.6517064905478597, 'cookie_id':1982672363 } with Client(headers=headers, timeout=60.0, follow_redirects=True) as client: r = client.post('https://lee.county-taxes.com/tcb/app/public/reports/real_estate/report_results', data=data) soup = BeautifulSoup(r.text) profit = soup.select_one('#report_results__results') print(profit.text)
This returns:
Lee County Lee County 1 c/o County LandsPO Box 398 [email protected] Venus 1 LLC Venus 1 LLC 70 P O Box 25177 305-913-3333 [email protected] .....