Shamil Salakhetdinov
shamil at smsconsulting.spb.ru
Sun Apr 27 05:53:36 CDT 2008
...now the VB.net version of manually batched updates (watch line wraps!) It seems to work slightly quicker than similar C# version: ' VB.NET - individual updates '27.04.2008 13:01:44 'Count(*) = 25045 'Counter = 25045 '27.04.2008 13:01:49 ' C# - individual updates '27.04.2008 13:00:56 'Count(*) = 25045 'Counter = 25045 '27.04.2008 13:01:04 ' C# - manually batched updates '27.04.2008 13:37:39 'Count(*) = 25045 'Counter = 25045 '27.04.2008 13:37:42 ' VB.NET - manually batched updates '27.04.2008 14:39:11 'Count(*) = 25046 - changed because I work with this db 'Counter = 25046 '27.04.2008 14:39:13 'http://labs.developerfusion.co.uk/convert/csharp-to-vb.aspx 'http://codeconverter.sharpdevelop.net/Convert.aspx Imports System Imports System.Collections.Generic Imports System.Data.SqlClient Namespace TestSamples Public Class DataItem Public ID As Integer Public Name As String Public Sub New(ByVal id As Integer, ByVal name As String) id = id name = name End Sub Public ReadOnly Property ParsedName() As String ' just for testing purposes ' return real parsed name here Get Return Name End Get End Property End Class Public Class DataProcessor Private Const COUNT_SQL As String = "select count(*) from zsysUserSessionTrace" Private Const PAGED_SELECT_SQL As String = "SELECT * from " + "(" + " select " + " Cast(ROW_NUMBER() over (ORDER BY [UserSessionTraceId]) as int) as RowNum, " + " UserSessionTraceId, " + " HostAddress, " + " UserBrowser " + " from zsysUserSessionTrace " + ") s " + "where s.RowNum between {0} and {1}" Private Const UPDATE_SQL As String = "update zsysUserSessionTrace set UserBrowser = '{0}' where UserSessionTraceId = {1}" Private Const DEFAULT_BATCH_SIZE As Integer = 1000 Private _totalCount As Integer = -1 Public ReadOnly Property TotalCount() As Integer Get Return _totalCount End Get End Property Private _currentBatchStartRowNum As Integer = 0 Private _batchSize As Integer = 0 Public ReadOnly Property BatchSize() As Integer Get Return _batchSize End Get End Property Private _connectionString As String Public Sub New(ByVal connectionString As String) initialize(connectionString, DEFAULT_BATCH_SIZE) End Sub Public Sub New(ByVal connectionString As String, ByVal batchSize As Integer) initialize(connectionString, batchSize) End Sub Private Sub initialize(ByVal connectionString As String, ByVal batchSize As Integer) _batchSize = batchSize _currentBatchStartRowNum = 1 _connectionString = connectionString Using cnn As New System.Data.SqlClient.SqlConnection(connectionString) cnn.Open() Dim cmd As System.Data.SqlClient.SqlCommand = cnn.CreateCommand() cmd.CommandText = COUNT_SQL cmd.CommandType = System.Data.CommandType.Text _totalCount = CInt(cmd.ExecuteScalar()) End Using End Sub Public ReadOnly Property HasData() As Boolean Get Return (TotalCount > 0 AndAlso _currentBatchStartRowNum < TotalCount) End Get End Property Public Function GetData() As List(Of DataItem) Using cnn As New System.Data.SqlClient.SqlConnection(_connectionString) cnn.Open() Dim cmd As System.Data.SqlClient.SqlCommand = cnn.CreateCommand() Dim sql As String = String.Format(PAGED_SELECT_SQL, _currentBatchStartRowNum, _currentBatchStartRowNum + _batchSize) cmd.CommandText = sql cmd.CommandType = System.Data.CommandType.Text _currentBatchStartRowNum += _batchSize + 1 Dim rdr As System.Data.SqlClient.SqlDataReader = cmd.ExecuteReader(System.Data.CommandBehavior.SequentialAccess) Dim list As New List(Of DataItem)() While rdr.Read() 'error in automated conversion => list.Add(new DataItem((int)rdr(1), (string)rdr(3))); list.Add(New DataItem(CInt(rdr(1)), DirectCast(rdr(3), String))) End While Return list End Using End Function Public Function Update(ByVal item As DataItem) As Integer Using cnn As New System.Data.SqlClient.SqlConnection(_connectionString) cnn.Open() Dim cmd As System.Data.SqlClient.SqlCommand = cnn.CreateCommand() Dim sql As String = String.Format(UPDATE_SQL, item.ParsedName, item.ID) cmd.CommandText = sql cmd.CommandType = System.Data.CommandType.Text Return cmd.ExecuteNonQuery() End Using End Function Private _updateCommands As String() = Nothing Private _arrayIndex As Integer = -1 Public Sub AddUpdateCommand(ByVal item As DataItem) If _arrayIndex = -1 Then _updateCommands = DirectCast(Array.CreateInstance(GetType(String), _batchSize), String()) _arrayIndex = 0 End If Dim sql As String = String.Format(UPDATE_SQL, item.ParsedName, item.ID) 'badly translated => _updateCommands(System.Math.Max(System.Threading.Interlocked.Increment(_arra yIndex), _arrayIndex - 1)) = sql _updateCommands(_arrayIndex) = sql _arrayIndex += 1 End Sub Public Function BatchUpdate() As Integer If _arrayIndex = -1 Then Return -1 End If If _arrayIndex < _batchSize Then Array.Resize(Of String)(_updateCommands, _arrayIndex) End If Using cnn As New System.Data.SqlClient.SqlConnection(_connectionString) cnn.Open() Dim cmd As System.Data.SqlClient.SqlCommand = cnn.CreateCommand() 'Console.WriteLine("Batch update: {0} updates", _arrayIndex) Dim sql As String = [String].Join(";", _updateCommands) _arrayIndex = -1 cmd.CommandText = sql cmd.CommandType = System.Data.CommandType.Text Return cmd.ExecuteNonQuery() End Using End Function End Class Public Class A_MainEntry ' VB.NET - individual updates '27.04.2008 13:01:44 'Count(*) = 25045 'Counter = 25045 '27.04.2008 13:01:49 ' C# - individual updates '27.04.2008 13:00:56 'Count(*) = 25045 'Counter = 25045 '27.04.2008 13:01:04 ' C# - manually batched updates '27.04.2008 13:37:39 'Count(*) = 25045 'Counter = 25045 '27.04.2008 13:37:42 ' VB.NET - manually batched updates '27.04.2008 14:39:11 'Count(*) = 25046 - changed because I work with this db 'Counter = 25046 '27.04.2008 14:39:13 Public Shared Sub Main() Dim count As Integer = 0 Try '+ customize - set your connection string here Dim pcName As String = "SPB" Dim dbName As String = "dbName" Dim userName As String = "sa" Dim password As String = "sapwd" Dim connectionString As String = String.Format("Data Source={0}\SQLEXPRESS;Initial Catalog={1};User Id={2};Password={3}", pcName, dbName, userName, password) '- Console.WriteLine(DateTime.Now) Dim p As New DataProcessor(connectionString) While p.HasData Dim list As List(Of DataItem) = p.GetData() For Each item As DataItem In list If count > 0 AndAlso count Mod p.BatchSize = 0 Then p.BatchUpdate() End If 'p.Update(item); p.AddUpdateCommand(item) count += 1 Next End While p.BatchUpdate() Console.WriteLine("Count(*) = {0}", p.TotalCount) Console.WriteLine("Counter = {0}", count) Console.WriteLine(DateTime.Now) Catch ex As Exception Console.WriteLine(ex.Message) End Try End Sub End Class End Namespace -----Original Message----- From: dba-vb-bounces at databaseadvisors.com [mailto:dba-vb-bounces at databaseadvisors.com] On Behalf Of Shamil Salakhetdinov Sent: Sunday, April 27, 2008 1:43 PM To: 'Discussion concerning Visual Basic and related programming issues.' Subject: Re: [dba-VB] ADO.Net Hi Gustav, I do not know what causes this difference, I will try to find it later - yes, it expected to be nearly the same - meanwhile here is another version of manually (by custom code batched updates - works quicker as expected): // VB.NET - individual updates //27.04.2008 13:01:44 //Count(*) = 25045 //Counter = 25045 //27.04.2008 13:01:49 // C# - individual updates //27.04.2008 13:00:56 //Count(*) = 25045 //Counter = 25045 //27.04.2008 13:01:04 // C# - manually batched updates //27.04.2008 13:37:39 //Count(*) = 25045 //Counter = 25045 //27.04.2008 13:37:42 // coverters: //http://labs.developerfusion.co.uk/convert/csharp-to-vb.aspx //http://codeconverter.sharpdevelop.net/Convert.aspx using System; using System.Collections.Generic; using System.Data.SqlClient; namespace TestSamples { public class DataItem { public int ID; public string Name; public DataItem(int id, string name) { ID = id; Name = name; } public string ParsedName { // just for testing purposes // return real parsed name here get { return Name; } } } public class DataProcessor { private const string COUNT_SQL = "select count(*) from zsysUserSessionTrace"; private const string PAGED_SELECT_SQL = "SELECT * from " + "(" + " select " + " Cast(ROW_NUMBER() over (ORDER BY [UserSessionTraceId]) as int) as RowNum, " + " UserSessionTraceId, " + " HostAddress, " + " UserBrowser " + " from zsysUserSessionTrace " + ") s " + "where s.RowNum between {0} and {1}"; private const string UPDATE_SQL = "update zsysUserSessionTrace set UserBrowser = '{0}' where UserSessionTraceId = {1}"; private const int DEFAULT_BATCH_SIZE = 1000; private int _totalCount = -1; public int TotalCount { get { return _totalCount; } } private int _currentBatchStartRowNum = 0; private int _batchSize = 0; public int BatchSize { get { return _batchSize; }} private string _connectionString; public DataProcessor(string connectionString) { initialize(connectionString, DEFAULT_BATCH_SIZE); } public DataProcessor(string connectionString, int batchSize) { initialize(connectionString, batchSize); } private void initialize(string connectionString, int batchSize) { _batchSize = batchSize; _currentBatchStartRowNum = 1; _connectionString = connectionString; using (System.Data.SqlClient.SqlConnection cnn = new System.Data.SqlClient.SqlConnection(connectionString)) { cnn.Open(); System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand(); cmd.CommandText = COUNT_SQL; cmd.CommandType = System.Data.CommandType.Text; _totalCount = (int)cmd.ExecuteScalar(); } } public bool HasData { get { return (TotalCount > 0 && _currentBatchStartRowNum < TotalCount); } } public List<DataItem> GetData() { using (System.Data.SqlClient.SqlConnection cnn = new System.Data.SqlClient.SqlConnection(_connectionString)) { cnn.Open(); System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand(); string sql = string.Format(PAGED_SELECT_SQL, _currentBatchStartRowNum, _currentBatchStartRowNum + _batchSize); cmd.CommandText = sql; cmd.CommandType = System.Data.CommandType.Text; _currentBatchStartRowNum += _batchSize + 1; System.Data.SqlClient.SqlDataReader rdr = cmd.ExecuteReader(System.Data.CommandBehavior.SequentialAccess); List<DataItem> list = new List<DataItem>(); while (rdr.Read()) { //error in automated conversion => list.Add(new DataItem((int)rdr(1), (string)rdr(3))); list.Add(new DataItem((int)rdr[1], (string)rdr[3])); } return list; } } public int Update(DataItem item) { using (System.Data.SqlClient.SqlConnection cnn = new System.Data.SqlClient.SqlConnection(_connectionString)) { cnn.Open(); System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand(); string sql = string.Format(UPDATE_SQL, item.ParsedName, item.ID); cmd.CommandText = sql; cmd.CommandType = System.Data.CommandType.Text; return cmd.ExecuteNonQuery(); } } private string[] _updateCommands = null; private int _arrayIndex = -1; public void AddUpdateCommand(DataItem item) { if (_arrayIndex == -1) { _updateCommands = (string[])Array.CreateInstance(typeof(string), _batchSize); _arrayIndex = 0; } string sql = string.Format(UPDATE_SQL, item.ParsedName, item.ID); _updateCommands[_arrayIndex++] = sql; } public int BatchUpdate() { if (_arrayIndex == -1) return -1; if (_arrayIndex < _batchSize) Array.Resize<string>(ref _updateCommands, _arrayIndex); using (System.Data.SqlClient.SqlConnection cnn = new System.Data.SqlClient.SqlConnection(_connectionString)) { cnn.Open(); System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand(); //Console.WriteLine("Batch update: {0} updates", _arrayIndex); string sql = String.Join(";",_updateCommands) ; _arrayIndex = -1; cmd.CommandText = sql; cmd.CommandType = System.Data.CommandType.Text; return cmd.ExecuteNonQuery(); } } } public class A_MainEntry { // VB.NET - individual updates //27.04.2008 13:01:44 //Count(*) = 25045 //Counter = 25045 //27.04.2008 13:01:49 // C# - individual updates //27.04.2008 13:00:56 //Count(*) = 25045 //Counter = 25045 //27.04.2008 13:01:04 // C# - manually batched updates //27.04.2008 13:37:39 //Count(*) = 25045 //Counter = 25045 //27.04.2008 13:37:42 public static void Main() { int count = 0; try { //+ customize - set your connection string here string pcName = "SPB string dbName = "dbName string userName = "sa"; string password = "sapwd string connectionString = string.Format("Data Source={0}\\SQLEXPRESS;Initial Catalog={1};User Id={2};Password={3}", pcName, dbName, userName, password); //- Console.WriteLine(DateTime.Now); DataProcessor p = new DataProcessor(connectionString); while (p.HasData) { List<DataItem> list = p.GetData(); foreach (DataItem item in list) { if (count > 0 && count % p.BatchSize == 0) p.BatchUpdate(); //p.Update(item); p.AddUpdateCommand(item); count += 1; } } p.BatchUpdate(); Console.WriteLine("Count(*) = {0}", p.TotalCount); Console.WriteLine("Counter = {0}", count); Console.WriteLine(DateTime.Now); } catch (Exception ex) { Console.WriteLine(ex.Message); } } } } -----Original Message----- From: dba-vb-bounces at databaseadvisors.com [mailto:dba-vb-bounces at databaseadvisors.com] On Behalf Of Gustav Brock Sent: Sunday, April 27, 2008 1:33 PM To: dba-vb at databaseadvisors.com Subject: Re: [dba-VB] ADO.Net Hi Shamil > .. VB.NET .. it seems to work even quicker than C# equivalent somehow .. Strange. Could it be caused by some caching somewhere? What happens if you run the two tests in reverse order? /gustav