John W. Colby
jwcolby at colbyconsulting.com
Fri Feb 18 10:14:58 CST 2005
Public Sub ParseHTML(sURL As String) '# ---------------------------------------------------------------- #' '# Procedure: ParseHTML #' '# Written By: David Scott, Memphis Technology Group #' '# Date Written: 08/20/1998 #' '# Accepts: sURL - URL to examine #' '# Returns: Nothing #' '# References: MSHTML - Microsoft HTML Object Library #' '# SHDocVw - Microsoft Internet Controls #' '# Description: Opens an IE window, retrieves current document #' '# and cycles through each element of the document #' '# , printing to the debug window some of the ess- #' '# ential properties of the element. For a complete#' '# property listing check the object browser #' '# ---------------------------------------------------------------- #' On Error Resume Next '# SOME PROPERTIES ARE NOT PRESENT '# IN ALL ITEMS' PROPERTIES Collection '# AN ERROR WILL BE GENERATED Dim SWs As New SHDocVw.ShellWindows '# INSTANCES OF EXPLORER Dim IE As New SHDocVw.InternetExplorer '# OBJECT REFERENCE TO NEW INSTANCE OF IE Dim Doc As MSHTML.HTMLDocument '# IE DOCUMENT REFERENCE Dim iItem As Integer '# CURRENT ITEM IN ITEM Collection '# OF DOCUMENT Dim iItemCnt As Integer '# TOTAL NUMBER OF ITEMS IN ITEM '# COLLECTION OF DOCUMENT Dim itm '# IE ITEM OBJECT REFERENCE Dim iFrm As Integer '# CURRENT FRAME IN FRAMES Collection '# OF DOCUMENT Dim ifrmcnt As Integer '# TOTAL NUMBER OF FRAMES IN Frame '# COLLECTION OF DOCUMENT Dim frm '# IE FRAME OBJECT REFERENCE Dim strHTML As String IE.Visible = True '# SHOW INSTANCE OF IE IE.Offline = False '# MAKE SURE BROWSER ISN'T WORKING OFFLINE IE.navigate sURL '# NAVIGATE TO THAT AWESOME Access SITE If SWs.Count = 0 Then Exit Sub '# IF NO INSTANCE OF IE IS OPEN, THEN '# THERE'S NO NEED TO CONTINUE Do Until IE.Busy = False '# WAIT UNTIL BROWSER IS FINISHED DOWN- Loop '# LOADING PAGE Do Until IE.Document.readyState = "complete" Loop Set Doc = IE.Document '# SET CURRENT DOCUMENT '# IF THE DOCUMENT LOADED IS A TRUE HTML DOCUMENT (TYPE IS DEFINED '# BY MSHTML LIBRARY) THEN CONTINUE, IF NOT, THEN DO NOTHING If TypeOf Doc Is HTMLDocument Then '# PRINT DOCUMENT TITLE ifrmcnt = Doc.Frames.Length '# RETREIVE NUMBER OF FRAMES IN Document If ifrmcnt = 0 Then '# IF NO FRAMES, THEN PRINT ITEMS OF CURRENT DOCUMENT GoSub IterateItem Else For iFrm = 0 To ifrmcnt - 1 '# CYCLE THROUGH EACH FRAME LISTED IN DOCUMENT GoSub IterateItem Next End If End If IE.Quit '# QUIT IE '# CLEANUP If Not itm Is Nothing Then Set itm = Nothing If Not frm Is Nothing Then Set frm = Nothing If Not Doc Is Nothing Then Set Doc = Nothing If Not IE Is Nothing Then Set IE = Nothing Exit Sub IterateItem: If ifrmcnt = 0 Then '# IF NO FRAMES, THEN PRINT ITEMS OF CURRENT DOCUMENT iItemCnt = Doc.all.Length Else iItemCnt = frm.Document.all.Length Set frm = Doc.Frames(iFrm) End If For iItem = 0 To iItemCnt - 1 If ifrmcnt = 0 Then '# IF NO FRAMES, THEN PRINT ITEMS OF CURRENT DOCUMENT Set itm = Doc.all.Item(iItem) Else Set itm = frm.Document.all.Item(iItem) End If Select Case itm.tagName Case "A", "HTML", "HEAD", "TITLE" Case "BODY" strHTML = strHTML & "InnerHTML " & iItem & ": " & itm.innerHTML If InStr(strHTML, "ad.exe?") > 0 Then Dim str As String str = Right$(strHTML, Len(strHTML) - InStr(strHTML, "/cgi-cls/ad.exe?")) str = Left$(str, InStr(str, "<table")) If Len(str) > 0 Then GrabJobURLs str Else strHTML = "" End If Case Else End Select ' strHTML = "Tagname " & iItem & ": " & itm.tagName ' strHTML = strHTML & "Text " & iItem & ": " & itm.Text ' strHTML = strHTML & "InnerHTML " & iItem & ": " & itm.innerHTML ' strHTML = strHTML & "InnerText " & iItem & ": " & itm.innerText ' strHTML = strHTML & "HRef " & iItem & ": " & itm.href ' If InStr(strHTML, "ad.exe?") > 0 Then ' Dim str As String ' str = Right$(strHTML, Len(strHTML) - InStr(strHTML, "/cgi-cls/ad.exe?")) ' str = Left$(str, InStr(str, "<table")) ' If Len(str) > 0 Then GrabJobURLs str ' Else ' strHTML = "" ' End If ' If InStr(strHTML, "Next Page") > 0 Then ' MsgBox "section found" ' End If NextItem: Next Return End Sub John W. Colby www.ColbyConsulting.com Contribute your unused CPU cycles to a good cause: http://folding.stanford.edu/