I'm trying to scrape certain information from a webpage using xmlhttp requests. The information I'm interested in are javascript encrypted and loaded dynamically. However, they are available in page source (CTRL U).
When I scoop out that portion from page source using regex and process the same using JsonConverter
, I get the following error:
Run-time error `10001`:
Error parsing JSON:
"text":{"payload":{"
I've tried with:
Sub GrabRedfinInfo()
Const siteLink$ = "https://www.redfin.com/TX/Austin/604-Amesbury-Ln-78752/unit-2/home/171045975"
Dim HTML As HTMLDocument, Http As Object
Dim jsonObject As Object, jsonStr As Object
Dim itemStr As Variant, sResp As String
Set HTML = New HTMLDocument
Set Http = CreateObject("MSXML2.XMLHTTP")
With Http
.Open "Get", siteLink, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
.send
HTML.body.innerHTML = .responseText
sResp = .responseText
End With
With CreateObject("VBScript.RegExp")
.Global = True
.Pattern = "reactServerState\.InitialContext = (.*);"
.MultiLine = True
Set jsonStr = .Execute(sResp)
End With
itemStr = jsonStr(0).submatches(0)
Set jsonObject = JsonConverter.ParseJson(Replace(itemStr, "\", ""))
MsgBox jsonObject("ReactServerAgent.cache")("dataCache")("/stingray/api/home/details/belowTheFold")("res")
End Sub
Expected output:
Active Under Contract
Active
Pending - Taking Backups
Active
The following image shows their whereabouts:
CodePudding user response:
I would instead alter the regex to be more restrictive and target only the events governing string. I would additionally alter the string replacement to ensure I was exchanging \"
with "
.
You then end up with the timeline of events as an array/collection. See here
Example:
Code:
Option Explicit
Public Sub GrabRedfinInfo()
Const siteLink$ = "https://www.redfin.com/TX/Austin/604-Amesbury-Ln-78752/unit-2/home/171045975"
Dim HTML As HTMLDocument, Http As Object
Dim jsonObject As Object, jsonStr As Object
Dim itemStr As Variant, sResp As String
Set HTML = New HTMLDocument
Set Http = CreateObject("MSXML2.XMLHTTP")
With Http
.Open "Get", siteLink, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
.send
HTML.body.innerHTML = .responseText
sResp = .responseText
End With
With CreateObject("VBScript.RegExp")
.Global = True
.Pattern = """events\\"".(\[.*?\])"
.MultiLine = True
Set jsonStr = .Execute(sResp)
End With
itemStr = jsonStr(0).SubMatches(0)
Set jsonObject = JsonConverter.ParseJson(Replace$(itemStr, "\" & Chr$(34), Chr$(34))) 'Array (collection)
Dim evt As Object
For Each evt In jsonObject
Debug.Print evt("mlsDescription")
Next
End Sub