jwcolby
jwcolby at colbyconsulting.com
Sun Apr 27 11:56:40 CDT 2008
What I come hear is that using the built-in batch is as fast as any other method, so if that is what I find easier to use there is no issue using that. I will try to understand your other methods (in VB, since the language makes very little apparent speed difference) and implement that in parallel so that I can switch between them on demand. If I can do that I will do some timings on my big database, which is what I am doing the updates on. Shamil Salakhetdinov wrote: > Hi John, > > Below is fully parameterized sample C# code, which uses three update > strategies including the one you mentioned > http://blogs.msdn.com/dataaccess/archive/2005/05/19/420065.aspx > > The C# and VB.Net is nearly the same now - the difference of runtime of > previous version was because of one bug. (There could be still others - if > you find them please inform me - thank you). > > I'd think this thread's subject could be interested to many developers - > proposal (to moderators): is there anybody to take on this current sample > code and stats to test and to add some more tests and write some text to > publish this code and accompanying article on accessD web site? > > I wasn't aware of MS SQL TDS feature you mentioned but as you can see from > stats it gives just slightly better results than the manual batching. Of > course playing with batch size might give better gains of this method (my > tests didn't show that gains depending on batch size) but I must say for me > it looks more complicated and less flexible that using SQLDatareader and > custom classes - that could be just my perception.... > > The C# code is below in the end of this message - its VB.NET equivalent can > be obtained using automatic conversion - > http://labs.developerfusion.co.uk/convert/csharp-to-vb.aspx - and then one > code line have to be manually patched > > 'conversion issue replace the following code line => > ==================================================== > _updateCommands(System.Math.Max(System.Threading.Interlocked.Increment(_arra > yIndex), _arrayIndex - 1)) = sql > > with this one > ============= > _updateCommands(_arrayIndex) = sql : _arrayIndex += 1 > : > > Here are the stats: > > ==================== > C# > ==================== > > Strategy 1: > ----------- > Start time: 27.04.2008 19:02:58 > Count(*) = 25046 > Counter = 25046 > End time: 27.04.2008 19:03:15 > Elapsed time: 00:00:16.7968750 > > Strategy 2: > ----------- > Start time: 27.04.2008 19:03:15 > Count(*) = 25046 > Counter = 25046 > End time: 27.04.2008 19:03:23 > Elapsed time: 00:00:08.4218750 > > Strategy 3: > ----------- > Start time: 27.04.2008 19:03:23 > Count(*) = 25046 > Counter = 25046 > End time: 27.04.2008 19:03:31 > Elapsed time: 00:00:07.9062500 > > ==================== > VB.NET > ==================== > > Strategy 1: > ----------- > Start time: 27.04.2008 19:05:20 > Count(*) = 25046 > Counter = 25046 > End time: 27.04.2008 19:05:36 > Elapsed time: 00:00:16.7500000 > > Strategy 2: > ----------- > Start time: 27.04.2008 19:05:36 > Count(*) = 25046 > Counter = 25046 > End time: 27.04.2008 19:05:45 > Elapsed time: 00:00:08.2812500 > > Strategy 3: > ----------- > Start time: 27.04.2008 19:05:45 > Count(*) = 25046 > Counter = 25046 > End time: 27.04.2008 19:05:52 > Elapsed time: 00:00:07.5000000 > ==================== > > // Converters: > //http://labs.developerfusion.co.uk/convert/csharp-to-vb.aspx > //http://codeconverter.sharpdevelop.net/Convert.aspx > > // Sql TDS batched update > //http://blogs.msdn.com/dataaccess/archive/2005/05/19/420065.aspx > > using System; > using System.Collections.Generic; > using System.Data; > using System.Data.SqlClient; > > namespace TestSamples > { > public sealed class Constants > { > //+ ///////////// C U S T O M I Z E ///////////////////// > public const string PcName = "NANT"; > public const string DbName = "ORS2005"; > public const string UserName = "sa"; > public const string Password = "shms"; > > public const string TABLE_NAME = "zsysUserSessionTrace"; > public const string ID_COL_NAME = "UserSessionTraceId"; > public const string NAME_COL_NAME = "UserBrowser"; > public const string PARSED_NAME_COL_NAME = "ParsedUserBrowserName"; > > public const int DEFAULT_BATCH_SIZE = 1000; > > public static string GetParsedName(string name) > { > // just for testing purposes - customzie > // to return your parsed name here > if (name == null) name = ""; > name = DateTime.Now.ToString() + ": " + name; > if (name.Length > 255) name = name.Substring(0, 255); > > return name; > } > //- ///////////// C U S T O M I Z E ///////////////////// > > public const int ID_COL_INDEX = 1; > public const int NAME_COL_INDEX = 2; > public const int PARSED_NAME_COL_INDEX = 3; > > public static string COUNT_SQL { get { > return String.Format("select count(*) from {0}", TABLE_NAME); } > } > > public static string PAGED_SELECT_SQL { get { > return string.Format( > "SELECT * from " + "(" + " select " + > " Cast(ROW_NUMBER() over (ORDER BY [{0}]) as int) as > RowNum, " + > " [{1}], [{2}],[{3}] " + " from [{4}] " + ") s ", > ID_COL_NAME, ID_COL_NAME, NAME_COL_NAME, > PARSED_NAME_COL_NAME, TABLE_NAME) + > "where s.RowNum between {0} and {1}"; }} > > public static string UPDATE_SQL { get { > return string.Format("update {0} ",TABLE_NAME) + > string.Format(" set {0} = '{1}' where {2} = {3}", > PARSED_NAME_COL_NAME,"{0}",ID_COL_NAME,"{1}"); > }} > > public static string UPDATE_SQL_PARAMETERIZED { get { > return string.Format("update {0} set {1} = @{2} where {3} = > @{4}", > TABLE_NAME, PARSED_NAME_COL_NAME, PARSED_NAME_COL_NAME, > ID_COL_NAME, ID_COL_NAME); > }} > } > > public class DataItem > { > private int _id; > public int ID > { > get { return _id; } > } > > private string _name; > public string Name > { > get { return _name; } > } > > public DataItem(int id, string name) > { > _id = id; > _name = name; > } > > public string ParsedName > { > get > { > return Constants.GetParsedName(_name); > } > } > > > } > > public class DataProcessor > { > > private int _totalCount = -1; > public int TotalCount > { > get { return _totalCount; } > } > > private int _currentBatchStartRowNum = 0; > private int _batchSize = 0; > public int BatchSize { get { return _batchSize; } } > > private string _connectionString; > public DataProcessor(string connectionString) > { > initialize(connectionString, Constants.DEFAULT_BATCH_SIZE); > } > public DataProcessor(string connectionString, int batchSize) > { > initialize(connectionString, batchSize); > } > private void initialize(string connectionString, int batchSize) > { > > _batchSize = batchSize; > _currentBatchStartRowNum = 1; > _connectionString = connectionString; > > using (System.Data.SqlClient.SqlConnection cnn = new > System.Data.SqlClient.SqlConnection(connectionString)) > { > cnn.Open(); > > System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand(); > cmd.CommandText = Constants.COUNT_SQL; > cmd.CommandType = System.Data.CommandType.Text; > > _totalCount = (int)cmd.ExecuteScalar(); > } > } > > public bool HasData > { > get { return (TotalCount > 0 && _currentBatchStartRowNum < > TotalCount); } > } > > public List<DataItem> GetData() > { > using (System.Data.SqlClient.SqlConnection cnn = new > System.Data.SqlClient.SqlConnection(_connectionString)) > { > cnn.Open(); > > System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand(); > string sql = string.Format(Constants.PAGED_SELECT_SQL, > _currentBatchStartRowNum, _currentBatchStartRowNum + _batchSize); > cmd.CommandText = sql; > cmd.CommandType = System.Data.CommandType.Text; > _currentBatchStartRowNum += _batchSize + 1; > System.Data.SqlClient.SqlDataReader rdr = > cmd.ExecuteReader(System.Data.CommandBehavior.SequentialAccess); > > List<DataItem> list = new List<DataItem>(); > while (rdr.Read()) > { > //error in automated conversion => list.Add(new > DataItem((int)rdr(Constants.ID_COL_INDEX), > (string)rdr(Constants.NAME_COL_INDEX))); > list.Add(new DataItem((int)rdr[Constants.ID_COL_INDEX], > (string)rdr[Constants.NAME_COL_INDEX])); > } > return list; > } > } > > public int Update(DataItem item) > { > using (System.Data.SqlClient.SqlConnection cnn = new > System.Data.SqlClient.SqlConnection(_connectionString)) > { > cnn.Open(); > > System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand(); > string sql = string.Format(Constants.UPDATE_SQL, > item.ParsedName, item.ID); > cmd.CommandText = sql; > cmd.CommandType = System.Data.CommandType.Text; > return cmd.ExecuteNonQuery(); > } > } > > private string[] _updateCommands = null; > private int _arrayIndex = -1; > public void AddUpdateCommand(DataItem item) > { > if (_arrayIndex == -1) > { > _updateCommands = > (string[])Array.CreateInstance(typeof(string), _batchSize); > _arrayIndex = 0; > } > string sql = string.Format(Constants.UPDATE_SQL, > item.ParsedName, item.ID); > _updateCommands[_arrayIndex++] = sql; > } > > public int BatchUpdate() > { > if (_arrayIndex == -1) return -1; > if (_arrayIndex < _batchSize) > Array.Resize<string>(ref _updateCommands, _arrayIndex); > > using (System.Data.SqlClient.SqlConnection cnn = new > System.Data.SqlClient.SqlConnection(_connectionString)) > { > cnn.Open(); > > System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand(); > > //Console.WriteLine("Batch update: {0} updates", > _arrayIndex); > > string sql = String.Join(";", _updateCommands); > _arrayIndex = -1; > > cmd.CommandText = sql; > cmd.CommandType = System.Data.CommandType.Text; > return cmd.ExecuteNonQuery(); > } > } > > private SqlDataAdapter _da = null; > private DataSet _ds = null; > public DataTable SQLTDSGetDataTablePaged() > { > using (System.Data.SqlClient.SqlConnection cnn = new > System.Data.SqlClient.SqlConnection(_connectionString)) > { > cnn.Open(); > > int batchSize = _batchSize; > if (_currentBatchStartRowNum + _batchSize >= _totalCount) > batchSize = _totalCount - _currentBatchStartRowNum; > string sql = string.Format(Constants.PAGED_SELECT_SQL, > _currentBatchStartRowNum, _currentBatchStartRowNum + _batchSize); > > _currentBatchStartRowNum += _batchSize + 1; > > _da = new SqlDataAdapter(sql, cnn); > _ds = new DataSet(); > _da.Fill(_ds); > > _da.UpdateCommand = new > SqlCommand(Constants.UPDATE_SQL_PARAMETERIZED, cnn); > _da.UpdateCommand.Parameters.Add("@" + > Constants.PARSED_NAME_COL_NAME, SqlDbType.NVarChar, 255, > Constants.PARSED_NAME_COL_NAME); > _da.UpdateCommand.Parameters.Add("@" + > Constants.ID_COL_NAME, SqlDbType.Int, 0, Constants.ID_COL_NAME); > _da.UpdateCommand.UpdatedRowSource = UpdateRowSource.None; > _da.UpdateBatchSize = batchSize; > > _da.SelectCommand.Connection = null; > _da.UpdateCommand.Connection = null; > > return _ds.Tables[0]; > } > } > > public void SQLTDSUpdate() > { > SQLTDSUpdate(false); > } > public void SQLTDSUpdate(bool forceUpdate) > { > using (System.Data.SqlClient.SqlConnection cnn = new > System.Data.SqlClient.SqlConnection(_connectionString)) > { > cnn.Open(); > _da.SelectCommand.Connection = cnn; > _da.UpdateCommand.Connection = cnn; > if (forceUpdate) > _da.UpdateBatchSize = _ds.Tables[0].Rows.Count; > _da.Update(_ds.Tables[0]); > > //Console.WriteLine("*** Forced test exit ***"); > //System.Environment.Exit(100); > } > } > } > > public class A_MainEntry > { > /// <summary> > /// Strategy 1 - use SqlDataReader to get data pages, and > /// generate and execute individual update SQL > statements > /// </summary> > public static void Strategy1(DataProcessor p, ref int count) > { > while (p.HasData) > { > List<DataItem> list = p.GetData(); > foreach (DataItem item in list) > { > p.Update(item); > count += 1; > } > //Console.WriteLine("{0} - rows processed...", p.BatchSize); > > } > } > > /// <summary> > /// Strategy 1 - use SqlDataReader to get data pages, and > /// generate batched update SQL statements, and > /// execute update batch when it has all update > /// statements added > /// </summary> > public static void Strategy2(DataProcessor p, ref int count) > { > while (p.HasData) > { > List<DataItem> list = p.GetData(); > foreach (DataItem item in list) > { > if (count > 0 && (count % p.BatchSize) == 0) > p.BatchUpdate(); > p.AddUpdateCommand(item); > count += 1; > } > } > p.BatchUpdate(); > } > > /// <summary> > /// Strategy 3 - use SqlDataAdapter and DataSet to get data pages, > and > /// use SQL TDS bacthed updates facility > /// </summary> > public static void Strategy3(DataProcessor p, ref int count) > { > while (p.HasData) > { > DataTable table = p.SQLTDSGetDataTablePaged(); > > foreach (DataRow row in table.Rows) > { > row.SetModified(); > row[Constants.PARSED_NAME_COL_INDEX] = > Constants.GetParsedName((string)row[Constants.NAME_COL_INDEX]); > count += 1; > } > p.SQLTDSUpdate(!p.HasData); > } > } > > public static void Main() > { > try > { > //+ customize - set your connection string here > string pcName = Constants.PcName; > string dbName = Constants.DbName; > string userName = Constants.UserName; > string password = Constants.Password; > string connectionString = string.Format("Data > Source={0}\\SQLEXPRESS;Initial Catalog={1};User Id={2};Password={3}", > pcName, dbName, userName, password); > //- > > for (int i = 1; i <= 3; i++) > { > int count = 0; > DateTime startTime, endTime; > startTime = DateTime.Now; > Console.WriteLine(""); > Console.WriteLine("Strategy {0}:", i); > Console.WriteLine("-----------"); > Console.WriteLine("Start time: {0}", startTime); > DataProcessor p = new DataProcessor(connectionString); > > switch (i) > { > case 1: > Strategy1(p, ref count); > break; > case 2: > Strategy2(p, ref count); > break; > case 3: > Strategy3(p, ref count); > break; > default: > Console.WriteLine("{0} - undefined strategy > number", i); > break; > } > Console.WriteLine("Count(*) = {0}", p.TotalCount); > Console.WriteLine("Counter = {0}", count); > endTime = DateTime.Now; > Console.WriteLine("End time: {0}", endTime); > TimeSpan ts = > TimeSpan.FromTicks((endTime.Subtract(startTime)).Ticks); > Console.WriteLine("Elapsed time: {0}", ts); > } > } > catch (Exception ex) > { > Console.WriteLine(ex.Message); > } > } > > } > } > > Enjoy! > > -- > Shamil > > _______________________________________________ > dba-VB mailing list > dba-VB at databaseadvisors.com > http://databaseadvisors.com/mailman/listinfo/dba-vb > http://www.databaseadvisors.com > > -- John W. Colby www.ColbyConsulting.com