Home > Back-end >  Convert PDF to text file using VBA and Adobe Acrobat XI standard
Convert PDF to text file using VBA and Adobe Acrobat XI standard

Time:08-24

Part 3 of a enter image description here

URLs:

https://hpvchemicals.oecd.org/ui/handler.axd?id=e19d2799-0c16-496d-a607-b09330dd28a7
https://hpvchemicals.oecd.org/ui/handler.axd?id=40da06b1-a855-4c0c-bc21-bbc856dca725
https://hpvchemicals.oecd.org/ui/handler.axd?id=c4967546-1f5e-472a-b629-a2998323735b
https://hpvchemicals.oecd.org/ui/handler.axd?id=bde5e625-83ee-423d-aa70-eb0e453088e4
https://hpvchemicals.oecd.org/ui/handler.axd?id=621c4f55-ef3c-4b99-bb98-e6aaf3f436dd
https://hpvchemicals.oecd.org/ui/handler.axd?id=26e1420d-f9b7-4768-b6fa-d345f54e7683
https://hpvchemicals.oecd.org/ui/handler.axd?id=263f3491-90c7-4c3a-b43e-4c4e9395bcea
https://hpvchemicals.oecd.org/ui/handler.axd?id=b78d39a9-26c2-48ff-aadc-cb056a89f08b
https://hpvchemicals.oecd.org/ui/handler.axd?id=97a7b56f-ebaf-4416-8b4b-88b19ca3bd16
https://hpvchemicals.oecd.org/ui/handler.axd?id=c6c3b7c1-9239-40d9-b51a-85a15e2411d6

Update:

enter image description here

CodePudding user response:

Tested:

#If VBA7 Then
    Private Declare PtrSafe Function URLDownloadToFile Lib "urlmon" _
      Alias "URLDownloadToFileA" (ByVal pCaller As LongPtr, ByVal szURL As String, _
        ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As LongPtr) As Long
#Else
    Private Declare Function URLDownloadToFile Lib "urlmon" _
      Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, _
        ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
#End If

Sub Tester()
    Dim filePath As String
    Dim fso As FileSystemObject, url
    Dim fileStream As TextStream, ws As Worksheet
    Dim c As Range, fileRoot As String, pdfPath As String
    
    Set ws = Worksheets("Data")     'use a specific worksheet reference
    fileRoot = ws.Range("D2").Value 'read this once
    If Right(fileRoot, 1) <> "\" Then fileRoot = fileRoot & "\" 'ensure terminating \
    
    For Each c In ws.Range("B2:B" & ws.Cells(Rows.Count, "B").End(xlUp).row).Cells
        url = Trim(c.Value)
        If LCase(url) Like "http?:*" Then  'has a URL
            
            pdfPath = fileRoot & "PDF_" & c.Offset(0, -1).Value & ".pdf"
            DownloadFile url, pdfPath
            ConvertPdf2 pdfPath, fileRoot & c.Offset(0, -1).Value & ".txt"
            
        End If 'have url
    Next c
End Sub

Function DownloadFile(sURL, sSaveAs) As Boolean
    DownloadFile = (URLDownloadToFile(0, sURL, sSaveAs, 0, 0) = 0)
End Function

Sub ConvertPdf2(pdfPath As String, textPath As String)
    Dim AcroXApp As Acrobat.AcroApp
    Dim AcroXAVDoc As Acrobat.AcroAVDoc
    Dim AcroXPDDoc As Acrobat.AcroPDDoc
    Dim jsObj As Object

    Set AcroXApp = CreateObject("AcroExch.App")
    Set AcroXAVDoc = CreateObject("AcroExch.AVDoc")
    AcroXAVDoc.Open pdfPath, "Acrobat"
    Set AcroXPDDoc = AcroXAVDoc.GetPDDoc
    Set jsObj = AcroXPDDoc.GetJSObject
    jsObj.SaveAs textPath, "com.adobe.acrobat.plain-text"
    AcroXAVDoc.Close False
    AcroXApp.Hide
    AcroXApp.Exit
End Sub

CodePudding user response:

I'm not in a position to help with automating acrobat, but here is a procedure I use to download PDFs. Supply a URL to a PDF file and a local path to save the file. Only works on the windows version of Excel.

Sub saveFile(url As String, Optional path As String)

    Dim http As Object
    Dim objfso As Object
    Dim objADOStream As Object
    
    If path = "" Then path = ThisWorkbook.path & "\file.pdf"
    
    Set http = CreateObject("MSXML2.serverXMLHTTP")
    http.Open "GET", url, False
    http.Send
 
    If http.Status = 200 Then
      Set objADOStream = CreateObject("ADODB.Stream")
      objADOStream.Open
      objADOStream.Type = 1 'adTypeBinary
 
      objADOStream.Write http.responsebody
      objADOStream.Position = 0    'Set the stream position to the start
 
      Set objfso = CreateObject("Scripting.FileSystemObject")
        If objfso.fileexists(path) Then objfso.DeleteFile path
      Set objfso = Nothing
 
      objADOStream.SaveToFile path
      objADOStream.Close
      Set objADOStream = Nothing
    End If
 
End Sub
  • Related