Shamil Salakhetdinov
shamil at smsconsulting.spb.ru
Sun Apr 27 10:14:00 CDT 2008
Hi John, Below is fully parameterized sample C# code, which uses three update strategies including the one you mentioned http://blogs.msdn.com/dataaccess/archive/2005/05/19/420065.aspx The C# and VB.Net is nearly the same now - the difference of runtime of previous version was because of one bug. (There could be still others - if you find them please inform me - thank you). I'd think this thread's subject could be interested to many developers - proposal (to moderators): is there anybody to take on this current sample code and stats to test and to add some more tests and write some text to publish this code and accompanying article on accessD web site? I wasn't aware of MS SQL TDS feature you mentioned but as you can see from stats it gives just slightly better results than the manual batching. Of course playing with batch size might give better gains of this method (my tests didn't show that gains depending on batch size) but I must say for me it looks more complicated and less flexible that using SQLDatareader and custom classes - that could be just my perception.... The C# code is below in the end of this message - its VB.NET equivalent can be obtained using automatic conversion - http://labs.developerfusion.co.uk/convert/csharp-to-vb.aspx - and then one code line have to be manually patched 'conversion issue replace the following code line => ==================================================== _updateCommands(System.Math.Max(System.Threading.Interlocked.Increment(_arra yIndex), _arrayIndex - 1)) = sql with this one ============= _updateCommands(_arrayIndex) = sql : _arrayIndex += 1 : Here are the stats: ==================== C# ==================== Strategy 1: ----------- Start time: 27.04.2008 19:02:58 Count(*) = 25046 Counter = 25046 End time: 27.04.2008 19:03:15 Elapsed time: 00:00:16.7968750 Strategy 2: ----------- Start time: 27.04.2008 19:03:15 Count(*) = 25046 Counter = 25046 End time: 27.04.2008 19:03:23 Elapsed time: 00:00:08.4218750 Strategy 3: ----------- Start time: 27.04.2008 19:03:23 Count(*) = 25046 Counter = 25046 End time: 27.04.2008 19:03:31 Elapsed time: 00:00:07.9062500 ==================== VB.NET ==================== Strategy 1: ----------- Start time: 27.04.2008 19:05:20 Count(*) = 25046 Counter = 25046 End time: 27.04.2008 19:05:36 Elapsed time: 00:00:16.7500000 Strategy 2: ----------- Start time: 27.04.2008 19:05:36 Count(*) = 25046 Counter = 25046 End time: 27.04.2008 19:05:45 Elapsed time: 00:00:08.2812500 Strategy 3: ----------- Start time: 27.04.2008 19:05:45 Count(*) = 25046 Counter = 25046 End time: 27.04.2008 19:05:52 Elapsed time: 00:00:07.5000000 ==================== // Converters: //http://labs.developerfusion.co.uk/convert/csharp-to-vb.aspx //http://codeconverter.sharpdevelop.net/Convert.aspx // Sql TDS batched update //http://blogs.msdn.com/dataaccess/archive/2005/05/19/420065.aspx using System; using System.Collections.Generic; using System.Data; using System.Data.SqlClient; namespace TestSamples { public sealed class Constants { //+ ///////////// C U S T O M I Z E ///////////////////// public const string PcName = "NANT"; public const string DbName = "ORS2005"; public const string UserName = "sa"; public const string Password = "shms"; public const string TABLE_NAME = "zsysUserSessionTrace"; public const string ID_COL_NAME = "UserSessionTraceId"; public const string NAME_COL_NAME = "UserBrowser"; public const string PARSED_NAME_COL_NAME = "ParsedUserBrowserName"; public const int DEFAULT_BATCH_SIZE = 1000; public static string GetParsedName(string name) { // just for testing purposes - customzie // to return your parsed name here if (name == null) name = ""; name = DateTime.Now.ToString() + ": " + name; if (name.Length > 255) name = name.Substring(0, 255); return name; } //- ///////////// C U S T O M I Z E ///////////////////// public const int ID_COL_INDEX = 1; public const int NAME_COL_INDEX = 2; public const int PARSED_NAME_COL_INDEX = 3; public static string COUNT_SQL { get { return String.Format("select count(*) from {0}", TABLE_NAME); } } public static string PAGED_SELECT_SQL { get { return string.Format( "SELECT * from " + "(" + " select " + " Cast(ROW_NUMBER() over (ORDER BY [{0}]) as int) as RowNum, " + " [{1}], [{2}],[{3}] " + " from [{4}] " + ") s ", ID_COL_NAME, ID_COL_NAME, NAME_COL_NAME, PARSED_NAME_COL_NAME, TABLE_NAME) + "where s.RowNum between {0} and {1}"; }} public static string UPDATE_SQL { get { return string.Format("update {0} ",TABLE_NAME) + string.Format(" set {0} = '{1}' where {2} = {3}", PARSED_NAME_COL_NAME,"{0}",ID_COL_NAME,"{1}"); }} public static string UPDATE_SQL_PARAMETERIZED { get { return string.Format("update {0} set {1} = @{2} where {3} = @{4}", TABLE_NAME, PARSED_NAME_COL_NAME, PARSED_NAME_COL_NAME, ID_COL_NAME, ID_COL_NAME); }} } public class DataItem { private int _id; public int ID { get { return _id; } } private string _name; public string Name { get { return _name; } } public DataItem(int id, string name) { _id = id; _name = name; } public string ParsedName { get { return Constants.GetParsedName(_name); } } } public class DataProcessor { private int _totalCount = -1; public int TotalCount { get { return _totalCount; } } private int _currentBatchStartRowNum = 0; private int _batchSize = 0; public int BatchSize { get { return _batchSize; } } private string _connectionString; public DataProcessor(string connectionString) { initialize(connectionString, Constants.DEFAULT_BATCH_SIZE); } public DataProcessor(string connectionString, int batchSize) { initialize(connectionString, batchSize); } private void initialize(string connectionString, int batchSize) { _batchSize = batchSize; _currentBatchStartRowNum = 1; _connectionString = connectionString; using (System.Data.SqlClient.SqlConnection cnn = new System.Data.SqlClient.SqlConnection(connectionString)) { cnn.Open(); System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand(); cmd.CommandText = Constants.COUNT_SQL; cmd.CommandType = System.Data.CommandType.Text; _totalCount = (int)cmd.ExecuteScalar(); } } public bool HasData { get { return (TotalCount > 0 && _currentBatchStartRowNum < TotalCount); } } public List<DataItem> GetData() { using (System.Data.SqlClient.SqlConnection cnn = new System.Data.SqlClient.SqlConnection(_connectionString)) { cnn.Open(); System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand(); string sql = string.Format(Constants.PAGED_SELECT_SQL, _currentBatchStartRowNum, _currentBatchStartRowNum + _batchSize); cmd.CommandText = sql; cmd.CommandType = System.Data.CommandType.Text; _currentBatchStartRowNum += _batchSize + 1; System.Data.SqlClient.SqlDataReader rdr = cmd.ExecuteReader(System.Data.CommandBehavior.SequentialAccess); List<DataItem> list = new List<DataItem>(); while (rdr.Read()) { //error in automated conversion => list.Add(new DataItem((int)rdr(Constants.ID_COL_INDEX), (string)rdr(Constants.NAME_COL_INDEX))); list.Add(new DataItem((int)rdr[Constants.ID_COL_INDEX], (string)rdr[Constants.NAME_COL_INDEX])); } return list; } } public int Update(DataItem item) { using (System.Data.SqlClient.SqlConnection cnn = new System.Data.SqlClient.SqlConnection(_connectionString)) { cnn.Open(); System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand(); string sql = string.Format(Constants.UPDATE_SQL, item.ParsedName, item.ID); cmd.CommandText = sql; cmd.CommandType = System.Data.CommandType.Text; return cmd.ExecuteNonQuery(); } } private string[] _updateCommands = null; private int _arrayIndex = -1; public void AddUpdateCommand(DataItem item) { if (_arrayIndex == -1) { _updateCommands = (string[])Array.CreateInstance(typeof(string), _batchSize); _arrayIndex = 0; } string sql = string.Format(Constants.UPDATE_SQL, item.ParsedName, item.ID); _updateCommands[_arrayIndex++] = sql; } public int BatchUpdate() { if (_arrayIndex == -1) return -1; if (_arrayIndex < _batchSize) Array.Resize<string>(ref _updateCommands, _arrayIndex); using (System.Data.SqlClient.SqlConnection cnn = new System.Data.SqlClient.SqlConnection(_connectionString)) { cnn.Open(); System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand(); //Console.WriteLine("Batch update: {0} updates", _arrayIndex); string sql = String.Join(";", _updateCommands); _arrayIndex = -1; cmd.CommandText = sql; cmd.CommandType = System.Data.CommandType.Text; return cmd.ExecuteNonQuery(); } } private SqlDataAdapter _da = null; private DataSet _ds = null; public DataTable SQLTDSGetDataTablePaged() { using (System.Data.SqlClient.SqlConnection cnn = new System.Data.SqlClient.SqlConnection(_connectionString)) { cnn.Open(); int batchSize = _batchSize; if (_currentBatchStartRowNum + _batchSize >= _totalCount) batchSize = _totalCount - _currentBatchStartRowNum; string sql = string.Format(Constants.PAGED_SELECT_SQL, _currentBatchStartRowNum, _currentBatchStartRowNum + _batchSize); _currentBatchStartRowNum += _batchSize + 1; _da = new SqlDataAdapter(sql, cnn); _ds = new DataSet(); _da.Fill(_ds); _da.UpdateCommand = new SqlCommand(Constants.UPDATE_SQL_PARAMETERIZED, cnn); _da.UpdateCommand.Parameters.Add("@" + Constants.PARSED_NAME_COL_NAME, SqlDbType.NVarChar, 255, Constants.PARSED_NAME_COL_NAME); _da.UpdateCommand.Parameters.Add("@" + Constants.ID_COL_NAME, SqlDbType.Int, 0, Constants.ID_COL_NAME); _da.UpdateCommand.UpdatedRowSource = UpdateRowSource.None; _da.UpdateBatchSize = batchSize; _da.SelectCommand.Connection = null; _da.UpdateCommand.Connection = null; return _ds.Tables[0]; } } public void SQLTDSUpdate() { SQLTDSUpdate(false); } public void SQLTDSUpdate(bool forceUpdate) { using (System.Data.SqlClient.SqlConnection cnn = new System.Data.SqlClient.SqlConnection(_connectionString)) { cnn.Open(); _da.SelectCommand.Connection = cnn; _da.UpdateCommand.Connection = cnn; if (forceUpdate) _da.UpdateBatchSize = _ds.Tables[0].Rows.Count; _da.Update(_ds.Tables[0]); //Console.WriteLine("*** Forced test exit ***"); //System.Environment.Exit(100); } } } public class A_MainEntry { /// <summary> /// Strategy 1 - use SqlDataReader to get data pages, and /// generate and execute individual update SQL statements /// </summary> public static void Strategy1(DataProcessor p, ref int count) { while (p.HasData) { List<DataItem> list = p.GetData(); foreach (DataItem item in list) { p.Update(item); count += 1; } //Console.WriteLine("{0} - rows processed...", p.BatchSize); } } /// <summary> /// Strategy 1 - use SqlDataReader to get data pages, and /// generate batched update SQL statements, and /// execute update batch when it has all update /// statements added /// </summary> public static void Strategy2(DataProcessor p, ref int count) { while (p.HasData) { List<DataItem> list = p.GetData(); foreach (DataItem item in list) { if (count > 0 && (count % p.BatchSize) == 0) p.BatchUpdate(); p.AddUpdateCommand(item); count += 1; } } p.BatchUpdate(); } /// <summary> /// Strategy 3 - use SqlDataAdapter and DataSet to get data pages, and /// use SQL TDS bacthed updates facility /// </summary> public static void Strategy3(DataProcessor p, ref int count) { while (p.HasData) { DataTable table = p.SQLTDSGetDataTablePaged(); foreach (DataRow row in table.Rows) { row.SetModified(); row[Constants.PARSED_NAME_COL_INDEX] = Constants.GetParsedName((string)row[Constants.NAME_COL_INDEX]); count += 1; } p.SQLTDSUpdate(!p.HasData); } } public static void Main() { try { //+ customize - set your connection string here string pcName = Constants.PcName; string dbName = Constants.DbName; string userName = Constants.UserName; string password = Constants.Password; string connectionString = string.Format("Data Source={0}\\SQLEXPRESS;Initial Catalog={1};User Id={2};Password={3}", pcName, dbName, userName, password); //- for (int i = 1; i <= 3; i++) { int count = 0; DateTime startTime, endTime; startTime = DateTime.Now; Console.WriteLine(""); Console.WriteLine("Strategy {0}:", i); Console.WriteLine("-----------"); Console.WriteLine("Start time: {0}", startTime); DataProcessor p = new DataProcessor(connectionString); switch (i) { case 1: Strategy1(p, ref count); break; case 2: Strategy2(p, ref count); break; case 3: Strategy3(p, ref count); break; default: Console.WriteLine("{0} - undefined strategy number", i); break; } Console.WriteLine("Count(*) = {0}", p.TotalCount); Console.WriteLine("Counter = {0}", count); endTime = DateTime.Now; Console.WriteLine("End time: {0}", endTime); TimeSpan ts = TimeSpan.FromTicks((endTime.Subtract(startTime)).Ticks); Console.WriteLine("Elapsed time: {0}", ts); } } catch (Exception ex) { Console.WriteLine(ex.Message); } } } } Enjoy! -- Shamil