I want to get the session id & csfrtoken from the url:
Also, I tried the below code in vba, but not getting response. please guide, Note the account is a dummy account only to fetch the details.
Sub test()
Dim user As String
Dim pwd As String
Dim path As String
user = "[email protected]"
pwd = "Rain@123"
path = "https://www.screener.in/api/company/6596449/quick_ratios/"
Debug.Print httpGET(path, user, pwd)
End Sub
Public Function httpGET(fn As String, _
Optional authUser As String = vbNullString, _
Optional authPass As String = vbNullString) As String
pHtml = fn
Dim oHttp As Object
Set oHttp = CreateObject("Microsoft.XMLHTTP")
Call oHttp.Open("GET", pHtml, False)
If (authUser <> vbNullString) Then
' need to do basic authentication
' acknowledgement to http://pastie.org/1192157
oHttp.setRequestHeader "Content-Type", "application/json"
oHttp.setRequestHeader "Accept", "application/json"
oHttp.setRequestHeader "Authorization", "Basic " _
EncodeBase64(authUser ":" authPass)
End If
Call oHttp.send("")
Debug.Print oHttp.getAllResponseHeaders
httpGET = oHttp.responseText
Set oHttp = Nothing
End Function
Function EncodeBase64(text As String) As String
Dim arrData() As Byte
arrData = StrConv(text, vbFromUnicode)
Dim objXML As MSXML2.DOMDocument60
Dim objNode As MSXML2.IXMLDOMElement
Set objXML = New MSXML2.DOMDocument60
Set objNode = objXML.createElement("b64")
objNode.DataType = "bin.base64"
objNode.nodeTypedValue = arrData
EncodeBase64 = Application.Clean(objNode.text)
Set objNode = Nothing
Set objXML = Nothing
End Function
CodePudding user response:
You don't need the value of sessionid to login to that site. Csrftoken is necessary, though. However, the following is how you can log in to that site and access the content available in there.
Sub ScrapeAfterLogin()
Const Url$ = "https://www.screener.in/api/company/6596449/quick_ratios/"
Const Link$ = "https://www.screener.in/login/"
Dim Html As HTMLDocument, sParams As String
Dim oHttp As Object, sToken$
Set Html = New HTMLDocument
Set oHttp = CreateObject("MSXML2.serverXMLHTTP")
With oHttp
.Open "GET", Url, True
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36"
.send
While .readyState < 4: DoEvents: Wend
Html.body.innerHTML = .responseText
sToken = Html.querySelector("input[name='csrfmiddlewaretoken']").getAttribute("value")
sParams = "csrfmiddlewaretoken=" & sToken & "&next=/api/company/6596449/quick_ratios/&[email protected]&password=Rain@123"
.Open "POST", Link, True
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "Referer", "https://www.screener.in/login/"
.setRequestHeader "Origin", "https://www.screener.in"
.send (sParams)
While .readyState < 4: DoEvents: Wend
MsgBox .responseText
End With
End Sub
To get the session id, you have to create a session, reuse the same session throughout all the requests and then find that session id in the cookies. I don't know how to reuse the same session in vba. However, the following is a python version of achieving the same.
import requests
from bs4 import BeautifulSoup
start_url = 'https://www.screener.in/api/company/6596449/quick_ratios/'
login_url = 'https://www.screener.in/login/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
}
params = {
'csrfmiddlewaretoken': '',
'next': '/api/company/6596449/quick_ratios/',
'username': '[email protected]',
'password': 'Rain@123',
}
with requests.Session() as s:
s.headers.update(headers)
res = s.get(start_url)
soup = BeautifulSoup(res.text,"lxml")
params['csrfmiddlewaretoken'] = soup.select_one("input[name='csrfmiddlewaretoken']")['value']
s.headers['Origin'] = 'https://www.screener.in'
s.headers['Referer'] = 'https://www.screener.in/login/'
resp = s.post(login_url,data=params)
print(s.cookies.items())
Output:
[('csrftoken', 'y64L6KeQfJiDvTMcsRQNlLCGYtfEVj82'), ('sessionid', 'e06akkv5yodjag6nf9tqrz5hex9wx99w')]