I'm trying to create a movie database in Excel using VBA and HTML coding (I'm a newbie on this area). My code is the following:
Dim req As New MSXML2.XMLHTTP60
Dim reqURL As String
Dim pelicula As String
Dim Contador As Long
Dim UltimaRow As Long
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim WolTiles As MSHTML.IHTMLElementCollection
Dim WolTile As MSHTML.IHTMLElement
Dim Temporal As Variant
Dim QueSpace As Variant
UltimaRow = Cells(Rows.Count, 1).End(xlUp).Row
For Contador = 2 To UltimaRow
pelicula = Trim(Range("A" & Contador).Value)
reqURL = "https://www.filmaffinity.com/us/search.php?stext=" & WorksheetFunction.EncodeURL(pelicula)
req.Open "GET", reqURL, False
req.send
HTMLDoc.body.innerHTML = req.responseText
'********* alternative 2
If req.Status = 200 Then
Set WolTile = HTMLDoc.getElementById("movie-rat-avg")
If Not WolTile Is Nothing Then Range("B" & Contador).Value = WolTile.innerText
Set WolTile = HTMLDoc.getElementById("movie-count-rat")
If Not WolTile Is Nothing Then Range("C" & Contador).Value = Left(WolTile.innerText, InStr(1, WolTile.innerText, " ") - 1)
Set WolTiles = HTMLDoc.getElementsByClassName("movie-info")
If WolTiles.Length = 0 Then
Range("D" & Contador).Value = 0
Else
Temporal = InStr(1, WolTiles.Item(0).innerText, "Year")
Range("D" & Contador).Value = Mid(WolTiles.Item(0).innerText, Temporal 4, 4)
Temporal = InStr(1, WolTiles.Item(0).innerText, "Running time")
QueSpace = InStr(1, Mid(WolTiles.Item(0).innerText, Temporal 13, 6), " ")
Range("E" & Contador).Value = Mid(WolTiles.Item(0).innerText, Temporal 12, QueSpace)
End If
Set WolTiles = HTMLDoc.getElementsByClassName("card-genres")
If WolTiles.Length = 0 Then
Range("F" & Contador).Value = 0
Else
Temporal = InStr(1, WolTiles.Item(0).innerText, "|")
QueSpace = InStr(1, WolTiles.Item(0).innerText, ".")
If Temporal > QueSpace Then
If QueSpace > 0 Then
Range("F" & Contador).Value = Left(WolTiles.Item(0).innerText, QueSpace - 1)
End If
Else
If Temporal > 0 Then
Range("F" & Contador).Value = Left(WolTiles.Item(0).innerText, Temporal - 1)
End If
End If
End If
Else
MsgBox req.Status & " - " & req.statusText
Exit Sub
End If
Next
End Sub
The excel file (lista.xlsm) has the following entries for testing purposes:
title | rating | votes | year | Duration | genre |
---|---|---|---|---|---|
15 Minutes of War | 5,5 | 923 | 2.019 | 98 | Drama |
The Last Duel | 0 | 0 | |||
Collateral Beauty | 5,9 | 9.196 | 2.016 | 94 | |
Da 5 Bloods | 5,3 | 3.143 | 2.020 | 154 | War |
Daniel Isn't Real | 5,7 | 1.804 | 2.019 | 96 | Thriller |
As you can see "The Last Duel" entry does not show any data. But, if I compare the HTML output (saved to an external file after the line "HTMLDoc.body.innerHTML = req.responseText" I can find the same Element Id's and Classes.
Any idea why is this happening?
CodePudding user response:
You are getting a results list page, as more than one possible match, rather than a single film listing page.
You could check the content of the returned page to determine which type you have ended up on.
As the default ordering is by relevance, you might then assume the first film listing, on a results page, is the one to use, and thus make an additional request to get the film page for that listing.
Additionally, in future you may need to develop to handle no results.
Option Explicit
Public Sub WriteOutFilmInfo()
Dim req As New MSXML2.XMLHTTP60
Dim reqURL As String
Dim pelicula As String
Dim Contador As Long
Dim UltimaRow As Long
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim WolTiles As MSHTML.IHTMLElementCollection
Dim WolTile As MSHTML.IHTMLElement
Dim Temporal As Variant
Dim QueSpace As Variant
UltimaRow = Cells(Rows.Count, 1).End(xlUp).Row
For Contador = 2 To UltimaRow
pelicula = Trim$(Range("A" & Contador).Value)
reqURL = "https://www.filmaffinity.com/us/search.php?stext=" & Replace$(pelicula, Chr$(32), " ")
req.Open "GET", reqURL, False
req.send
HTMLDoc.body.innerHTML = req.responseText
'********* alternative 2
If req.Status = 200 Then
If InStr(HTMLDoc.querySelector(".fb-sh").href, ".html&t=") = 0 Then 'on search results list page not specific film page
reqURL = HTMLDoc.querySelector(".mc-title > a").href 'extract first listing as default sort is relevance
req.Open "GET", reqURL, False
req.send
HTMLDoc.body.innerHTML = req.responseText
End If
Set WolTile = HTMLDoc.getElementById("movie-rat-avg")
If Not WolTile Is Nothing Then Range("B" & Contador).Value = WolTile.innerText
Set WolTile = HTMLDoc.getElementById("movie-count-rat")
If Not WolTile Is Nothing Then Range("C" & Contador).Value = Left(WolTile.innerText, InStr(1, WolTile.innerText, " ") - 1)
Set WolTiles = HTMLDoc.getElementsByClassName("movie-info")
If WolTiles.Length = 0 Then
Range("D" & Contador).Value = 0
Else
Temporal = InStr(1, WolTiles.Item(0).innerText, "Year")
Range("D" & Contador).Value = Mid(WolTiles.Item(0).innerText, Temporal 4, 4)
Temporal = InStr(1, WolTiles.Item(0).innerText, "Running time")
QueSpace = InStr(1, Mid(WolTiles.Item(0).innerText, Temporal 13, 6), " ")
Range("E" & Contador).Value = Mid(WolTiles.Item(0).innerText, Temporal 12, QueSpace)
End If
Set WolTiles = HTMLDoc.getElementsByClassName("card-genres")
If WolTiles.Length = 0 Then
Range("F" & Contador).Value = 0
Else
Temporal = InStr(1, WolTiles.Item(0).innerText, "|")
QueSpace = InStr(1, WolTiles.Item(0).innerText, ".")
If Temporal > QueSpace Then
If QueSpace > 0 Then
Range("F" & Contador).Value = Left(WolTiles.Item(0).innerText, QueSpace - 1)
End If
Else
If Temporal > 0 Then
Range("F" & Contador).Value = Left(WolTiles.Item(0).innerText, Temporal - 1)
End If
End If
End If
Else
MsgBox req.Status & " - " & req.statusText
Exit Sub
End If
Next
End Sub
CodePudding user response:
Thanks everyone!!! You're absolutely right. I've checked again and the search returns 2 entries. enter image description here