[dba-VB] XML Serialization

MartyConnelly martyconnelly at shaw.ca
Mon Jul 25 11:48:20 CDT 2005


I don't know if you are using ado streams to ouput or input an ado 
recordset as an xml file
but here is some really rough test code I was running from Access97.
I was using to debug Google's desktop search engines output of UTF-8 xml 
via Access
 There is code here to check the proper BOM marker on XML and also convert
the output xml from the ado stream  to different encoding types UTF-8, 
Unicode, ISO-8859-1, Big-5 etc.
I also use this method to convert xml files to and fro from UTF-8 to 
UTF-16 encoding.
UTF-8 being a mangled compression of UTF-16
with one caveat if the original xml characters aren't properly escaped 
such as Windows typographical chars
they will be dropped by the conversion. This is a problem with the XML 
google pumps out.
especially with international chars.
Maybe there are enough notes to puzzle it out or give you somethings to 
consider.

Sub ReadFileInUTF16()
'1 LoadFromFile snippet
Dim stm As ADODB.stream
Dim strPath As String

  strPath = GetPath(CurrentDb.Name)
  Set stm = New ADODB.stream
  stm.Open
  stm.LoadFromFile strPath & "encUTF16.xml"
  MsgBox stm.ReadText
End Sub
Sub SaveFileinUTF8()
'2 SaveToFile snippet
Dim stm As ADODB.stream
Dim strPath As String

  strPath = GetPath(CurrentDb.Name)
  Set stm = New ADODB.stream
  stm.Open
  stm.Charset = "UTF-8"
  stm.LoadFromFile strPath & "encUTF8.xml"
  stm.SaveToFile strPath & "test.xml", adSaveCreateOverWrite
End Sub

Sub ReadSaveFileInUTF8()
'1/2 ReadToFile / SaveToFile snippet
Dim stm As ADODB.stream
Dim strPath As String

  strPath = GetPath(CurrentDb.Name)
  Set stm = New ADODB.stream
  stm.Open
  stm.Charset = "UTF-8"
  stm.LoadFromFile strPath & "encUTF8.xml"
 stm.SaveToFile strPath & "test.xml", adSaveCreateOverWrite
End Sub
Sub TranlateToUTF8()
'3 charset snippet
'ERROR Don't Use this code, just to expose the google error
Dim stm As ADODB.stream
Dim strPath As String

  strPath = GetPath(CurrentDb.Name)
  Set stm = New ADODB.stream
  stm.Open
  stm.LoadFromFile strPath & "encUTF16.xml"
  stm.Charset = "UTF-8"
  stm.SaveToFile strPath & "test.xml", adSaveCreateOverWrite
End Sub
Sub PersistStreamToXML()
Dim cnn As ADODB.Connection
Dim rst As ADODB.Recordset
Dim stm As ADODB.stream
Dim strPath As String

  strPath = GetPath(CurrentDb.Name)
  Set cnn = New ADODB.Connection
  cnn.Open "Provider=Microsoft.Jet.OLEDB.4.0;" & _
           "Data Source=" & CurrentDb.Name & ";"
  Set rst = New ADODB.Recordset
  rst.Open "SELECT * FROM tblTest", cnn
  Set stm = New ADODB.stream
  rst.Save stm, adPersistXML
  rst.Close
  Set rst = Nothing
  MsgBox stm.Charset
  stm.SaveToFile strPath & "test.xml", adSaveCreateOverWrite
  Set stm = Nothing
End Sub

Sub ReadUTF8SaveFileInUTF16test()
'1/2 ReadToFile / SaveToFile snippet
'http://www.codeproject.com/soap/XMJFileStreaming.asp?msg=841289&mode=all&userid=903408#xx767979xx
Dim stm As ADODB.stream
Dim strPath As String
Dim strData As String
  strPath = GetPath(CurrentDb.Name)
  Set stm = New ADODB.stream
  stm.Open
  stm.Charset = "UTF-8"
  stm.Position = 0
  stm.Type = adTypeText
  'stm.LoadFromFile strPath & "encUTF8.xml"
  stm.LoadFromFile "C:\XML\Gil Encodings\UTF-8 encoded 
XML\Xm8w\XM8\XM8_UTF_vb\XM8_UTF_vb.xml"
   ' if you just try and dump out stream
   ' without reading and writing you get double BOM
   stm.Position = 0
  
   strData = stm.ReadText()
   Debug.Print strData
   stm.Position = 0
   stm.Charset = "Unicode" ' "Unicode" '"ascii" '"Big5" '"hebrew"
   stm.WriteText (strData)
 stm.SaveToFile strPath & "test16.xml", adSaveCreateOverWrite
 stm.Close
 Set stm = Nothing
End Sub
' ReadUTF8SaveFileInUTF16 "C:\XML\Gil Encodings\UTF-8 encoded 
XML\Xm8w\XM8\XM8_UTF_vb\XM8_UTF_vb.xml",
' ReadUTF8SaveFileInUTF16 "C:\XML\Gil 
Encodings\XM8_UTF_vb.xml","C:\XML\Gil Encodings\test16.xml"
Sub ReadUTF8SaveFileInUTF16(strFileIn As String, strFileOut As String)
'1/2 ReadToFile / SaveToFile snippet
'http://www.codeproject.com/soap/XMJFileStreaming.asp?msg=841289&mode=all&userid=903408#xx767979xx
'used ado 2.7
Dim stm As ADODB.stream
Dim strPath As String
Dim strData As String

'the character set encoding names for the machine are in the registry
'For a list of the character set strings that is known by a system, see
'the subkeys of HKEY_CLASSES_ROOT\MIME\Database\Charset
'in the Windows Registry.

  Set stm = New ADODB.stream
  stm.Open
  stm.Charset = "UTF-8" 'input file character set
  stm.Position = 0
  stm.Type = adTypeText
  '
  stm.LoadFromFile strFileIn
   ' if you just try and dump out stream
   ' without reading and writing you get double BOM
  
   stm.Position = 0 'reset to beginning of stream
   Dim strDataout
   strData = stm.ReadText()
  ' line below used to change encoding instruction for xml files
  '  <?xml version="1.0" encoding="UTF-16" ?>
   'strData = Replace(strData, "utf-8", "UTF-16", 1, 1)
     strData = Replace(strData, "utf-8", "UTF-16", 1, 1)
   Debug.Print strData
   stm.Position = 0
   ' set output file character set
   stm.Charset = "UTF-16" ' "Unicode" '"iso-8859-1" "ascii" '"Big5" 
'"hebrew"
   stm.WriteText (strData)
 stm.SaveToFile strFileOut, adSaveCreateOverWrite
 stm.Close
 Set stm = Nothing
End Sub

Public Function Replace(strIn As Variant, strFind As String, _
    strReplace As String, Optional intStart As Integer = 1, _
    Optional intCount As Integer = -1, _
    Optional intCompare As Integer = 0) As String
'-----------------------------------------------------------
' Inputs: String to search and replace,
'         search string, replacement string,
'         optional starting position (default = 1),
'         optional replacement limit (default = -1 .. ALL)
'         optional string compare value (default = 0 .. vbBinaryCompare)
' Outputs: Replaced string
' Created By: JLV 09/05/01
' Last Revised: JLV 09/05/01
' ** Duplicates the functionality of the VB 6 REPLACE function.
'-----------------------------------------------------------
Dim strWork As String, intS As Integer, intCnt As Integer
Dim intI As Integer, intLenF As Integer, intLenR As Integer

    If (intCompare < 0) Or (intCompare > 2) Then
        Err.Raise 5
        Exit Function
    End If
    If VarType(strIn) <> vbString Then
        Err.Raise 5
        Exit Function
    End If
    strWork = strIn
    intS = intStart
    intCnt = intCount
    intLenF = Len(strFind)
    intLenR = Len(strReplace)
    ' If find string zero length or count is zero, then nothing to replace
    If (intLenF = 0) Or (intCnt = 0) Then
        Replace = strIn
        Exit Function
    End If
    ' If start beyond length of string, return empty string
    If intS > Len(strWork) Then
        Replace = ""
        Exit Function
    End If

    ' Got some work to do -- find strings to replace
    Do
        intI = InStr(intS, strWork, strFind, intCompare)
        If intI = 0 Then Exit Do
        ' Insert the replace string
        strWork = Left(strWork, intI - 1) & strReplace & Mid(strWork, 
intI + intLenF)
        intS = intS + intI + intLenR - 1  ' Bump start to end of the 
replace string
        intCnt = intCnt - 1               ' Decrement the max replace 
counter
    Loop Until intCnt = 0
    Replace = strWork

End Function

Public Function GetPath(ByVal strFilePath As String) As String
Dim s As String
Dim i As Integer

  For i = Len(strFilePath) To 1 Step -1
    s = Mid$(strFilePath, i, 1)
    If StrComp(s, "\", vbBinaryCompare) = 0 Then
      GetPath = Left$(strFilePath, i)
      Exit For
    End If
  Next
End Function
Sub CheckBOM(Optional strFileIn As Variant, Optional strIn As Variant)
'checkbom "C:\XML\Gil Encodings\encUTF8_NoDecl.xml"
On Error GoTo Err_handler
Dim strInputData As String * 4
Dim lpBuffer() As Byte
Dim intFreeFile As Integer

  If Not IsMissing(strFileIn) Then
    intFreeFile = FreeFile
    Open strFileIn For Binary Access Read Lock Read As #intFreeFile Len = 4
    ReDim lpBuffer(4)
    Get #intFreeFile, , lpBuffer
    Close #intFreeFile
  ElseIf Not IsMissing(strIn) Then
    'Can't makes this work since VBA is always converting the string to 
UTF-16LE
    lpBuffer = Left$(strIn, 4)
  Else
    MsgBox "Nothing To Do"
    Exit Sub
  End If
 
  If lpBuffer(0) = 255 And lpBuffer(1) = 254 Then
    Debug.Print "File is UTF-16 Little Endian"
  ElseIf lpBuffer(0) = 254 And lpBuffer(1) = 255 Then
    Debug.Print "File is UTF-16 Big Endian"
  ElseIf lpBuffer(0) = 239 And lpBuffer(1) = 187 And lpBuffer(2) = 191 Then
    Debug.Print "File is UTF-8"
  'Start trying to figure out by other means this will only work on xml 
files that start with "<?"
  ElseIf lpBuffer(0) = 60 And lpBuffer(1) = 0 And lpBuffer(2) = 63 And 
lpBuffer(3) = 0 Then
    Debug.Print "File is UTF-16 Little Endian"
  ElseIf lpBuffer(0) = 0 And lpBuffer(1) = 60 And lpBuffer(2) = 0 And 
lpBuffer(3) = 63 Then
    Debug.Print "File is UTF-16 Big Endian"
  ElseIf lpBuffer(0) = 69 And lpBuffer(1) = 63 Then
    Debug.Print "File can be in UTF-8, ASCII, ISO-8859-?, Shift-JIS, etc"
  Else
    Debug.Print "Can't seem to figure out the Character encoding"
  End If

Err_Exit:
  On Error Resume Next
  Close #intFreeFile
  Exit Sub
Err_handler:
  Select Case Err.Number
  Case Else
    MsgBox Err.Number & " - " & Err.Description
  End Select
  Resume Err_Exit:
End Sub
Public Function EncodeToUTF8(lngUCS_dec As Long) As String
'This algorith was avaible in http://www1.tip.nl/~t876506/utf8tbl.html
Dim bytOne As Byte
Dim bytTwo As Byte
Dim bytThree As Byte

If lngUCS_dec < 128 Then
  bytOne = lngUCS_dec
  EncodeToUTF8 = CStr("0x" & Hex(bytOne))
ElseIf lngUCS_dec >= 128 And lngUCS_dec < 2048 Then
  bytOne = 192 + (lngUCS_dec \ 64)
  bytTwo = 128 + (lngUCS_dec Mod 64)
  EncodeToUTF8 = CStr("0x" & Hex(bytOne) & Hex(bytTwo))
ElseIf lngUCS_dec >= 2048 And lngUCS_dec < 65536 Then
  bytOne = 224 + (lngUCS_dec \ 4096)
  bytTwo = 128 + ((lngUCS_dec \ 64) Mod 64)
  bytThree = 128 + (lngUCS_dec Mod 64)
  EncodeToUTF8 = CStr("0x" & Hex(bytOne) & Hex(bytTwo) & Hex(bytThree))
Else
  MsgBox "Character Out of Range"
End If
End Function



John W. Colby wrote:

>Well I made my base SysVar structure serializable and created a SerializeOut
>and SerializeIn in the SysVar collection class and voila, exports / imports
>of sysvars to XML files.  Cool stuff.  This means that I can now store
>SysVars in either an MDB or XML files.  
>
>It "only" took FOUR days to figure all this stuff out starting with working
>VBA ADO classes in Access.  One more class and I will have the full SysVar
>functionality that I had inside of VBA.  The upside though is that learning
>how to read/write (manipulate) ADO.Net OLEDB objects gives me the foundation
>(and working examples) for much of the data manipulation stuff that I "just
>knew how to do" in VBA.  So many properties/methods, so little time.  And
>serialization to/from XML is pretty darned cool as well.  I've only
>scratched the surface on that stuff.
>
>I must say I don't know what I would do without the internet though.  I own
>one pretty good VB.Net book and three different ADO.Net books ad it is still
>waaaay faster to just Google for example code.  Once I have working example
>code it is easy enough to pick up and understand what is going on.
>
>John W. Colby
>www.ColbyConsulting.com 
>
>Contribute your unused CPU cycles to a good cause:
>http://folding.stanford.edu/
>
>
>_______________________________________________
>dba-VB mailing list
>dba-VB at databaseadvisors.com
>http://databaseadvisors.com/mailman/listinfo/dba-vb
>http://www.databaseadvisors.com
>
>
>
>  
>

-- 
Marty Connelly
Victoria, B.C.
Canada






More information about the dba-VB mailing list