[AccessD] Get data From Web

John W. Colby jwcolby at colbyconsulting.com
Fri Feb 18 10:14:58 CST 2005


Public Sub ParseHTML(sURL As String)
    '# ---------------------------------------------------------------- #'
    '#  Procedure:      ParseHTML                                       #'
    '#  Written By:     David Scott, Memphis Technology Group           #'
    '#  Date Written:   08/20/1998                                      #'
    '#  Accepts:        sURL - URL to examine                           #'
    '#  Returns:        Nothing                                         #'
    '#  References:     MSHTML - Microsoft HTML Object Library          #'
    '#                  SHDocVw - Microsoft Internet Controls           #'
    '#  Description:    Opens an IE window, retrieves current document  #'
    '#                  and cycles through each element of the document #'
    '#                  , printing to the debug window some of the ess- #'
    '#                  ential properties of the element. For a complete#'
    '#                  property listing check the object browser       #'
    '# ---------------------------------------------------------------- #'
    On Error Resume Next                    '# SOME PROPERTIES ARE NOT
PRESENT
                                            '# IN ALL ITEMS' PROPERTIES
Collection
                                            '# AN ERROR WILL BE GENERATED
    Dim SWs As New SHDocVw.ShellWindows     '# INSTANCES OF EXPLORER
    Dim IE As New SHDocVw.InternetExplorer  '# OBJECT REFERENCE TO NEW
INSTANCE OF IE
    Dim Doc As MSHTML.HTMLDocument          '# IE DOCUMENT REFERENCE
    Dim iItem As Integer                    '# CURRENT ITEM IN ITEM
Collection
                                            '# OF DOCUMENT
    Dim iItemCnt As Integer                 '# TOTAL NUMBER OF ITEMS IN ITEM
                                            '# COLLECTION OF DOCUMENT
    Dim itm                                 '# IE ITEM OBJECT REFERENCE
    Dim iFrm As Integer                     '# CURRENT FRAME IN FRAMES
Collection
                                            '# OF DOCUMENT
    Dim ifrmcnt As Integer                  '# TOTAL NUMBER OF FRAMES IN
Frame
                                            '# COLLECTION OF DOCUMENT
    Dim frm                                 '# IE FRAME OBJECT REFERENCE
   Dim strHTML As String
   
    IE.Visible = True                       '# SHOW INSTANCE OF IE
    IE.Offline = False                      '# MAKE SURE BROWSER ISN'T
WORKING OFFLINE
    IE.navigate sURL                        '# NAVIGATE TO THAT AWESOME
Access SITE

    If SWs.Count = 0 Then Exit Sub          '# IF NO INSTANCE OF IE IS OPEN,
THEN
                                            '# THERE'S NO NEED TO CONTINUE

    Do Until IE.Busy = False                '# WAIT UNTIL BROWSER IS
FINISHED DOWN-
    Loop                                    '# LOADING PAGE
   
   Do Until IE.Document.readyState = "complete"
   Loop
   Set Doc = IE.Document                   '# SET CURRENT DOCUMENT

    '# IF THE DOCUMENT LOADED IS A TRUE HTML DOCUMENT (TYPE IS DEFINED
    '# BY MSHTML LIBRARY) THEN CONTINUE, IF NOT, THEN DO NOTHING
    If TypeOf Doc Is HTMLDocument Then
        '# PRINT DOCUMENT TITLE
        ifrmcnt = Doc.Frames.Length         '# RETREIVE NUMBER OF FRAMES IN
Document
        If ifrmcnt = 0 Then                 '# IF NO FRAMES, THEN PRINT
ITEMS OF CURRENT DOCUMENT
            GoSub IterateItem
        Else
            For iFrm = 0 To ifrmcnt - 1     '# CYCLE THROUGH EACH FRAME
LISTED IN DOCUMENT
               GoSub IterateItem
            Next
        End If
    End If

    IE.Quit                                 '# QUIT IE
    '# CLEANUP
    If Not itm Is Nothing Then Set itm = Nothing
    If Not frm Is Nothing Then Set frm = Nothing
    If Not Doc Is Nothing Then Set Doc = Nothing
    If Not IE Is Nothing Then Set IE = Nothing
Exit Sub

IterateItem:
   If ifrmcnt = 0 Then                 '# IF NO FRAMES, THEN PRINT ITEMS OF
CURRENT DOCUMENT
      iItemCnt = Doc.all.Length
   Else
      iItemCnt = frm.Document.all.Length
      Set frm = Doc.Frames(iFrm)
   End If
   For iItem = 0 To iItemCnt - 1
      If ifrmcnt = 0 Then                 '# IF NO FRAMES, THEN PRINT ITEMS
OF CURRENT DOCUMENT
         Set itm = Doc.all.Item(iItem)
      Else
         Set itm = frm.Document.all.Item(iItem)
      End If
      Select Case itm.tagName
      Case "A", "HTML", "HEAD", "TITLE"
      Case "BODY"
         strHTML = strHTML & "InnerHTML " & iItem & ": " & itm.innerHTML
         If InStr(strHTML, "ad.exe?") > 0 Then
            Dim str As String
            str = Right$(strHTML, Len(strHTML) - InStr(strHTML,
"/cgi-cls/ad.exe?"))
            str = Left$(str, InStr(str, "<table"))
            If Len(str) > 0 Then GrabJobURLs str
         Else
            strHTML = ""
         End If
      Case Else
      End Select
'         strHTML = "Tagname " & iItem & ": " & itm.tagName
'         strHTML = strHTML & "Text " & iItem & ": " & itm.Text
'         strHTML = strHTML & "InnerHTML " & iItem & ": " & itm.innerHTML
'         strHTML = strHTML & "InnerText " & iItem & ": " & itm.innerText
'         strHTML = strHTML & "HRef " & iItem & ": " & itm.href
'         If InStr(strHTML, "ad.exe?") > 0 Then
'            Dim str As String
'            str = Right$(strHTML, Len(strHTML) - InStr(strHTML,
"/cgi-cls/ad.exe?"))
'            str = Left$(str, InStr(str, "<table"))
'            If Len(str) > 0 Then GrabJobURLs str
'         Else
'            strHTML = ""
'         End If
'         If InStr(strHTML, "Next Page") > 0 Then
'            MsgBox "section found"
'         End If
               
NextItem:
   Next
Return

End Sub


John W. Colby
www.ColbyConsulting.com 

Contribute your unused CPU cycles to a good cause:
http://folding.stanford.edu/





More information about the AccessD mailing list