harsha547
6/9/2017 - 5:36 PM

VBA_ Web Scraping

' Turn on Microsoft Internet controls in VBA Reference
' Turn on Microsoft HTML Library

Public Const ERROR_EMPTY_URL  As Long = 514
Public Const CELL_URL As String = "$C$2"
Public Const CELL_FIRSTCELL As String = "$C$3"
Public Const CELL_TABLE_NUMBER AS String = "$C$4"

' Class Module to web connect

   --- Option Explicit
   --- Private ie As InternetExplorer
   --- Public function GetWebPage(byval sUrl As String) As HTMLDocument
          
           if sUrl  = "" Then
             Err.Raise ERROR_EMPTY_URL, "clswebconnect.getwebpage" _
                     "The Url is empty, Please enter a valid url and try again"
           End If
     Set ie = New InternetExplorer
     ie.Visible = False
     ie.Navigage sUrl
     
     Do While ie.ReadyState <>  READYSTATE_COMPLETE
         DoEvents
     Loop
     
     Dim html As HTMLDocument
     Set html = ie.document 
     
     Set GetWebPage = HTML
   
   --  End Function


   -- Public Sub Cleanup()
       ie.Quit
       Set ie  = Nothing
       Application.StatusBar = ""
   
   -- End Sub


--------------------------------------------------------------------------------------------------------------------------
Sub ReadWebPage()
   
   On Error GoTo EH :
   
   TurnOff Functionality
   
   Dim sUrl As String, ITableNumber As Long
   Dim sFirstCellText As String
   
   ' cnControl is worksheet name
   
   sUrl  = cnControl.Range(CELL_URL)
   sFirstCellText = cnControl.Range(CELL_FIRSTCELL)
   ITableNumber = cnControl.Range(CELL_TABLE_NUMBER)   
   
   
   Dim o As New clsWebConnect
   
   Dim Html As HTMLDocument
   
   Set Html =  o.GetWebPage (sUrl)
  
  'MsgBox html.Title
  
  If ParseData(html , lTableNumber , sFirstCellText ) = False Then
      cnTableData.Activate
      MsgBox "Finished Reading The webpage"
  End If
   
   
   o.Cleanup
  
   
  Done :
       TurnOnFunctionality
       Exit Sub
 
 EH :
      MsgBox Err.Description & " ReadFromWebsite.ReadWebPage"
  
End Sub



Private Sub TurnOffFunctionality()
   Application.Calculation = xlcalculationManual
   Application.DisplayStatusBar  = False
   Application.EnableEvents = False
   Application.ScreenUpdating = False
End Sub

'Procedure :  TurnonFunctionality
'Source   :   ExcelMacroMastery.com
'Author   :
'Purpose    : Turn on automatic calculations , events and screen updating

Private Sub TurnOnFunctionality()
   Application.Calculation = xlcalculationAutomatic
   Application.DisplayStatusBar  = True
   Application.EnableEvents = True
   Application.ScreenUpdating = True
End Sub



Modules

' Click Events
'




Function ParseData(html As HTMLDocument , lTableNumber As Long, sFirstCellText As String ) As Boolean
   
    ' Clear Sheet
    
      ClearSheet
      
     'Write the Table Data to worksheet
     
     If WriteTableData  (cnTableData , html , lTableNumber , sFirstCellText) = False Then
        
        MsgBox "Couldn't find the Table."
        
     End If
     
     ' Format the Table
     
End Function


Public Sub ClearSheet()
   
   With cnTableData.Range("A1:BZ5000")
       .Clear
   End With
   
   'Remove exisitng tables
   
   Dim tb As ListObject
   
   For Each tb In cnTableData.ListObjects
      tb.Delete
   Next tb
   

End Sub


Function WriteTableData(shWrite As Worksheet ,html As HTMLDocument , lTableNumber As Long, sFirstCellText As String ) As Boolean
    
      On Error GoTo EH
      
      Dim bTableFound As Boolean
      bTableFound = False
      
      Dim tables as ihtmlElementCollection
      Set tables = html.getElementsByTagName("Table")
      
      Dim lTableReading As Long
      lTableReading = 0
      Dim row As HTMLTableRow
      Dim lColumns As Long 
      Dim table As HTMLTable , Cell As HTMLTableCell
      
      For Each table In Tables
         
         '  number of columns
         lColumns = table.rows(0).Cells.Length
         
         If instr(1,table.cells(0).innertext,sFirstCellText,vbTextCompare) > 0  Then
         
           lTableReading = lTableReading + 1
           
         End If
         
         If lTableNumber = lTableReading Then
           bTableFound = True
           
           'Reading From Table
           Dim lrow As Long , lCol As Long
           
           lRow = 0
           
           For Each row In Table.Rows
              
              lcol = 0
              
              For Each Cell In Row.Cells
                
                  shWrite.Range(START_RANGE).Offset(lrow, lcol) = Cell.InnerText
                  
                  lCol = lCol + 1
                
              Next Cell
              
              lRow = lRow + 1
              
           Next row
           
           Exit For
           
           
        End If
         
      Next Table
      
      writeTableData = bTableFound
      
      EH :
         MsgBox Err.Description & "TableReader.WriteTableData"

End Function

Sub FormatTable(shData As Worksheet)

   On Error GoTo EH :
   
    Dim rgTable As Range
    Set rgTable = Shwri.Range(START_RANGE).currentregion
    rgTable.Columns.AutoFit
    
    Dim Table As ListObject
    Set table = shwrite.ListObjects.Add (xlsrcRange , rgTable , xlyes ) 'xlyes is for headers
    
    table.Name  = TABLE_NAME
    table.TableStyle  = "TableStyleMedium14"
    table.Range.VerticalAlignment  = xlTop
   
   
  Done :
         Exit Sub
  
  EH:
         MsgBox Err.Description & "TableReader.WriteTableData"
  

End Sub