6/9/2017 - 5:36 PM

VBA_ Web Scraping

' Turn on Microsoft Internet controls in VBA Reference
' Turn on Microsoft HTML Library

Public Const ERROR_EMPTY_URL  As Long = 514
Public Const CELL_URL As String = "$C$2"
Public Const CELL_FIRSTCELL As String = "$C$3"
Public Const CELL_TABLE_NUMBER AS String = "$C$4"

' Class Module to web connect

   --- Option Explicit
   --- Private ie As InternetExplorer
   --- Public function GetWebPage(byval sUrl As String) As HTMLDocument
           if sUrl  = "" Then
             Err.Raise ERROR_EMPTY_URL, "clswebconnect.getwebpage" _
                     "The Url is empty, Please enter a valid url and try again"
           End If
     Set ie = New InternetExplorer
     ie.Visible = False
     ie.Navigage sUrl
     Do While ie.ReadyState <>  READYSTATE_COMPLETE
     Dim html As HTMLDocument
     Set html = ie.document 
     Set GetWebPage = HTML
   --  End Function

   -- Public Sub Cleanup()
       Set ie  = Nothing
       Application.StatusBar = ""
   -- End Sub

Sub ReadWebPage()
   On Error GoTo EH :
   TurnOff Functionality
   Dim sUrl As String, ITableNumber As Long
   Dim sFirstCellText As String
   ' cnControl is worksheet name
   sUrl  = cnControl.Range(CELL_URL)
   sFirstCellText = cnControl.Range(CELL_FIRSTCELL)
   ITableNumber = cnControl.Range(CELL_TABLE_NUMBER)   
   Dim o As New clsWebConnect
   Dim Html As HTMLDocument
   Set Html =  o.GetWebPage (sUrl)
  'MsgBox html.Title
  If ParseData(html , lTableNumber , sFirstCellText ) = False Then
      MsgBox "Finished Reading The webpage"
  End If
  Done :
       Exit Sub
 EH :
      MsgBox Err.Description & " ReadFromWebsite.ReadWebPage"
End Sub

Private Sub TurnOffFunctionality()
   Application.Calculation = xlcalculationManual
   Application.DisplayStatusBar  = False
   Application.EnableEvents = False
   Application.ScreenUpdating = False
End Sub

'Procedure :  TurnonFunctionality
'Source   :   ExcelMacroMastery.com
'Author   :
'Purpose    : Turn on automatic calculations , events and screen updating

Private Sub TurnOnFunctionality()
   Application.Calculation = xlcalculationAutomatic
   Application.DisplayStatusBar  = True
   Application.EnableEvents = True
   Application.ScreenUpdating = True
End Sub


' Click Events

Function ParseData(html As HTMLDocument , lTableNumber As Long, sFirstCellText As String ) As Boolean
    ' Clear Sheet
     'Write the Table Data to worksheet
     If WriteTableData  (cnTableData , html , lTableNumber , sFirstCellText) = False Then
        MsgBox "Couldn't find the Table."
     End If
     ' Format the Table
End Function

Public Sub ClearSheet()
   With cnTableData.Range("A1:BZ5000")
   End With
   'Remove exisitng tables
   Dim tb As ListObject
   For Each tb In cnTableData.ListObjects
   Next tb

End Sub

Function WriteTableData(shWrite As Worksheet ,html As HTMLDocument , lTableNumber As Long, sFirstCellText As String ) As Boolean
      On Error GoTo EH
      Dim bTableFound As Boolean
      bTableFound = False
      Dim tables as ihtmlElementCollection
      Set tables = html.getElementsByTagName("Table")
      Dim lTableReading As Long
      lTableReading = 0
      Dim row As HTMLTableRow
      Dim lColumns As Long 
      Dim table As HTMLTable , Cell As HTMLTableCell
      For Each table In Tables
         '  number of columns
         lColumns = table.rows(0).Cells.Length
         If instr(1,table.cells(0).innertext,sFirstCellText,vbTextCompare) > 0  Then
           lTableReading = lTableReading + 1
         End If
         If lTableNumber = lTableReading Then
           bTableFound = True
           'Reading From Table
           Dim lrow As Long , lCol As Long
           lRow = 0
           For Each row In Table.Rows
              lcol = 0
              For Each Cell In Row.Cells
                  shWrite.Range(START_RANGE).Offset(lrow, lcol) = Cell.InnerText
                  lCol = lCol + 1
              Next Cell
              lRow = lRow + 1
           Next row
           Exit For
        End If
      Next Table
      writeTableData = bTableFound
      EH :
         MsgBox Err.Description & "TableReader.WriteTableData"

End Function

Sub FormatTable(shData As Worksheet)

   On Error GoTo EH :
    Dim rgTable As Range
    Set rgTable = Shwri.Range(START_RANGE).currentregion
    Dim Table As ListObject
    Set table = shwrite.ListObjects.Add (xlsrcRange , rgTable , xlyes ) 'xlyes is for headers
    table.Name  = TABLE_NAME
    table.TableStyle  = "TableStyleMedium14"
    table.Range.VerticalAlignment  = xlTop
  Done :
         Exit Sub
         MsgBox Err.Description & "TableReader.WriteTableData"

End Sub