MartyConnelly
martyconnelly at shaw.ca
Mon Jul 25 11:48:20 CDT 2005
I don't know if you are using ado streams to ouput or input an ado
recordset as an xml file
but here is some really rough test code I was running from Access97.
I was using to debug Google's desktop search engines output of UTF-8 xml
via Access
There is code here to check the proper BOM marker on XML and also convert
the output xml from the ado stream to different encoding types UTF-8,
Unicode, ISO-8859-1, Big-5 etc.
I also use this method to convert xml files to and fro from UTF-8 to
UTF-16 encoding.
UTF-8 being a mangled compression of UTF-16
with one caveat if the original xml characters aren't properly escaped
such as Windows typographical chars
they will be dropped by the conversion. This is a problem with the XML
google pumps out.
especially with international chars.
Maybe there are enough notes to puzzle it out or give you somethings to
consider.
Sub ReadFileInUTF16()
'1 LoadFromFile snippet
Dim stm As ADODB.stream
Dim strPath As String
strPath = GetPath(CurrentDb.Name)
Set stm = New ADODB.stream
stm.Open
stm.LoadFromFile strPath & "encUTF16.xml"
MsgBox stm.ReadText
End Sub
Sub SaveFileinUTF8()
'2 SaveToFile snippet
Dim stm As ADODB.stream
Dim strPath As String
strPath = GetPath(CurrentDb.Name)
Set stm = New ADODB.stream
stm.Open
stm.Charset = "UTF-8"
stm.LoadFromFile strPath & "encUTF8.xml"
stm.SaveToFile strPath & "test.xml", adSaveCreateOverWrite
End Sub
Sub ReadSaveFileInUTF8()
'1/2 ReadToFile / SaveToFile snippet
Dim stm As ADODB.stream
Dim strPath As String
strPath = GetPath(CurrentDb.Name)
Set stm = New ADODB.stream
stm.Open
stm.Charset = "UTF-8"
stm.LoadFromFile strPath & "encUTF8.xml"
stm.SaveToFile strPath & "test.xml", adSaveCreateOverWrite
End Sub
Sub TranlateToUTF8()
'3 charset snippet
'ERROR Don't Use this code, just to expose the google error
Dim stm As ADODB.stream
Dim strPath As String
strPath = GetPath(CurrentDb.Name)
Set stm = New ADODB.stream
stm.Open
stm.LoadFromFile strPath & "encUTF16.xml"
stm.Charset = "UTF-8"
stm.SaveToFile strPath & "test.xml", adSaveCreateOverWrite
End Sub
Sub PersistStreamToXML()
Dim cnn As ADODB.Connection
Dim rst As ADODB.Recordset
Dim stm As ADODB.stream
Dim strPath As String
strPath = GetPath(CurrentDb.Name)
Set cnn = New ADODB.Connection
cnn.Open "Provider=Microsoft.Jet.OLEDB.4.0;" & _
"Data Source=" & CurrentDb.Name & ";"
Set rst = New ADODB.Recordset
rst.Open "SELECT * FROM tblTest", cnn
Set stm = New ADODB.stream
rst.Save stm, adPersistXML
rst.Close
Set rst = Nothing
MsgBox stm.Charset
stm.SaveToFile strPath & "test.xml", adSaveCreateOverWrite
Set stm = Nothing
End Sub
Sub ReadUTF8SaveFileInUTF16test()
'1/2 ReadToFile / SaveToFile snippet
'http://www.codeproject.com/soap/XMJFileStreaming.asp?msg=841289&mode=all&userid=903408#xx767979xx
Dim stm As ADODB.stream
Dim strPath As String
Dim strData As String
strPath = GetPath(CurrentDb.Name)
Set stm = New ADODB.stream
stm.Open
stm.Charset = "UTF-8"
stm.Position = 0
stm.Type = adTypeText
'stm.LoadFromFile strPath & "encUTF8.xml"
stm.LoadFromFile "C:\XML\Gil Encodings\UTF-8 encoded
XML\Xm8w\XM8\XM8_UTF_vb\XM8_UTF_vb.xml"
' if you just try and dump out stream
' without reading and writing you get double BOM
stm.Position = 0
strData = stm.ReadText()
Debug.Print strData
stm.Position = 0
stm.Charset = "Unicode" ' "Unicode" '"ascii" '"Big5" '"hebrew"
stm.WriteText (strData)
stm.SaveToFile strPath & "test16.xml", adSaveCreateOverWrite
stm.Close
Set stm = Nothing
End Sub
' ReadUTF8SaveFileInUTF16 "C:\XML\Gil Encodings\UTF-8 encoded
XML\Xm8w\XM8\XM8_UTF_vb\XM8_UTF_vb.xml",
' ReadUTF8SaveFileInUTF16 "C:\XML\Gil
Encodings\XM8_UTF_vb.xml","C:\XML\Gil Encodings\test16.xml"
Sub ReadUTF8SaveFileInUTF16(strFileIn As String, strFileOut As String)
'1/2 ReadToFile / SaveToFile snippet
'http://www.codeproject.com/soap/XMJFileStreaming.asp?msg=841289&mode=all&userid=903408#xx767979xx
'used ado 2.7
Dim stm As ADODB.stream
Dim strPath As String
Dim strData As String
'the character set encoding names for the machine are in the registry
'For a list of the character set strings that is known by a system, see
'the subkeys of HKEY_CLASSES_ROOT\MIME\Database\Charset
'in the Windows Registry.
Set stm = New ADODB.stream
stm.Open
stm.Charset = "UTF-8" 'input file character set
stm.Position = 0
stm.Type = adTypeText
'
stm.LoadFromFile strFileIn
' if you just try and dump out stream
' without reading and writing you get double BOM
stm.Position = 0 'reset to beginning of stream
Dim strDataout
strData = stm.ReadText()
' line below used to change encoding instruction for xml files
' <?xml version="1.0" encoding="UTF-16" ?>
'strData = Replace(strData, "utf-8", "UTF-16", 1, 1)
strData = Replace(strData, "utf-8", "UTF-16", 1, 1)
Debug.Print strData
stm.Position = 0
' set output file character set
stm.Charset = "UTF-16" ' "Unicode" '"iso-8859-1" "ascii" '"Big5"
'"hebrew"
stm.WriteText (strData)
stm.SaveToFile strFileOut, adSaveCreateOverWrite
stm.Close
Set stm = Nothing
End Sub
Public Function Replace(strIn As Variant, strFind As String, _
strReplace As String, Optional intStart As Integer = 1, _
Optional intCount As Integer = -1, _
Optional intCompare As Integer = 0) As String
'-----------------------------------------------------------
' Inputs: String to search and replace,
' search string, replacement string,
' optional starting position (default = 1),
' optional replacement limit (default = -1 .. ALL)
' optional string compare value (default = 0 .. vbBinaryCompare)
' Outputs: Replaced string
' Created By: JLV 09/05/01
' Last Revised: JLV 09/05/01
' ** Duplicates the functionality of the VB 6 REPLACE function.
'-----------------------------------------------------------
Dim strWork As String, intS As Integer, intCnt As Integer
Dim intI As Integer, intLenF As Integer, intLenR As Integer
If (intCompare < 0) Or (intCompare > 2) Then
Err.Raise 5
Exit Function
End If
If VarType(strIn) <> vbString Then
Err.Raise 5
Exit Function
End If
strWork = strIn
intS = intStart
intCnt = intCount
intLenF = Len(strFind)
intLenR = Len(strReplace)
' If find string zero length or count is zero, then nothing to replace
If (intLenF = 0) Or (intCnt = 0) Then
Replace = strIn
Exit Function
End If
' If start beyond length of string, return empty string
If intS > Len(strWork) Then
Replace = ""
Exit Function
End If
' Got some work to do -- find strings to replace
Do
intI = InStr(intS, strWork, strFind, intCompare)
If intI = 0 Then Exit Do
' Insert the replace string
strWork = Left(strWork, intI - 1) & strReplace & Mid(strWork,
intI + intLenF)
intS = intS + intI + intLenR - 1 ' Bump start to end of the
replace string
intCnt = intCnt - 1 ' Decrement the max replace
counter
Loop Until intCnt = 0
Replace = strWork
End Function
Public Function GetPath(ByVal strFilePath As String) As String
Dim s As String
Dim i As Integer
For i = Len(strFilePath) To 1 Step -1
s = Mid$(strFilePath, i, 1)
If StrComp(s, "\", vbBinaryCompare) = 0 Then
GetPath = Left$(strFilePath, i)
Exit For
End If
Next
End Function
Sub CheckBOM(Optional strFileIn As Variant, Optional strIn As Variant)
'checkbom "C:\XML\Gil Encodings\encUTF8_NoDecl.xml"
On Error GoTo Err_handler
Dim strInputData As String * 4
Dim lpBuffer() As Byte
Dim intFreeFile As Integer
If Not IsMissing(strFileIn) Then
intFreeFile = FreeFile
Open strFileIn For Binary Access Read Lock Read As #intFreeFile Len = 4
ReDim lpBuffer(4)
Get #intFreeFile, , lpBuffer
Close #intFreeFile
ElseIf Not IsMissing(strIn) Then
'Can't makes this work since VBA is always converting the string to
UTF-16LE
lpBuffer = Left$(strIn, 4)
Else
MsgBox "Nothing To Do"
Exit Sub
End If
If lpBuffer(0) = 255 And lpBuffer(1) = 254 Then
Debug.Print "File is UTF-16 Little Endian"
ElseIf lpBuffer(0) = 254 And lpBuffer(1) = 255 Then
Debug.Print "File is UTF-16 Big Endian"
ElseIf lpBuffer(0) = 239 And lpBuffer(1) = 187 And lpBuffer(2) = 191 Then
Debug.Print "File is UTF-8"
'Start trying to figure out by other means this will only work on xml
files that start with "<?"
ElseIf lpBuffer(0) = 60 And lpBuffer(1) = 0 And lpBuffer(2) = 63 And
lpBuffer(3) = 0 Then
Debug.Print "File is UTF-16 Little Endian"
ElseIf lpBuffer(0) = 0 And lpBuffer(1) = 60 And lpBuffer(2) = 0 And
lpBuffer(3) = 63 Then
Debug.Print "File is UTF-16 Big Endian"
ElseIf lpBuffer(0) = 69 And lpBuffer(1) = 63 Then
Debug.Print "File can be in UTF-8, ASCII, ISO-8859-?, Shift-JIS, etc"
Else
Debug.Print "Can't seem to figure out the Character encoding"
End If
Err_Exit:
On Error Resume Next
Close #intFreeFile
Exit Sub
Err_handler:
Select Case Err.Number
Case Else
MsgBox Err.Number & " - " & Err.Description
End Select
Resume Err_Exit:
End Sub
Public Function EncodeToUTF8(lngUCS_dec As Long) As String
'This algorith was avaible in http://www1.tip.nl/~t876506/utf8tbl.html
Dim bytOne As Byte
Dim bytTwo As Byte
Dim bytThree As Byte
If lngUCS_dec < 128 Then
bytOne = lngUCS_dec
EncodeToUTF8 = CStr("0x" & Hex(bytOne))
ElseIf lngUCS_dec >= 128 And lngUCS_dec < 2048 Then
bytOne = 192 + (lngUCS_dec \ 64)
bytTwo = 128 + (lngUCS_dec Mod 64)
EncodeToUTF8 = CStr("0x" & Hex(bytOne) & Hex(bytTwo))
ElseIf lngUCS_dec >= 2048 And lngUCS_dec < 65536 Then
bytOne = 224 + (lngUCS_dec \ 4096)
bytTwo = 128 + ((lngUCS_dec \ 64) Mod 64)
bytThree = 128 + (lngUCS_dec Mod 64)
EncodeToUTF8 = CStr("0x" & Hex(bytOne) & Hex(bytTwo) & Hex(bytThree))
Else
MsgBox "Character Out of Range"
End If
End Function
John W. Colby wrote:
>Well I made my base SysVar structure serializable and created a SerializeOut
>and SerializeIn in the SysVar collection class and voila, exports / imports
>of sysvars to XML files. Cool stuff. This means that I can now store
>SysVars in either an MDB or XML files.
>
>It "only" took FOUR days to figure all this stuff out starting with working
>VBA ADO classes in Access. One more class and I will have the full SysVar
>functionality that I had inside of VBA. The upside though is that learning
>how to read/write (manipulate) ADO.Net OLEDB objects gives me the foundation
>(and working examples) for much of the data manipulation stuff that I "just
>knew how to do" in VBA. So many properties/methods, so little time. And
>serialization to/from XML is pretty darned cool as well. I've only
>scratched the surface on that stuff.
>
>I must say I don't know what I would do without the internet though. I own
>one pretty good VB.Net book and three different ADO.Net books ad it is still
>waaaay faster to just Google for example code. Once I have working example
>code it is easy enough to pick up and understand what is going on.
>
>John W. Colby
>www.ColbyConsulting.com
>
>Contribute your unused CPU cycles to a good cause:
>http://folding.stanford.edu/
>
>
>_______________________________________________
>dba-VB mailing list
>dba-VB at databaseadvisors.com
>http://databaseadvisors.com/mailman/listinfo/dba-vb
>http://www.databaseadvisors.com
>
>
>
>
>
--
Marty Connelly
Victoria, B.C.
Canada