jwcolby
jwcolby at colbyconsulting.com
Sun Apr 27 11:56:40 CDT 2008
What I come hear is that using the built-in batch is as fast as any
other method, so if that is what I find easier to use there is no issue
using that.
I will try to understand your other methods (in VB, since the language
makes very little apparent speed difference) and implement that in
parallel so that I can switch between them on demand. If I can do that
I will do some timings on my big database, which is what I am doing the
updates on.
Shamil Salakhetdinov wrote:
> Hi John,
>
> Below is fully parameterized sample C# code, which uses three update
> strategies including the one you mentioned
> http://blogs.msdn.com/dataaccess/archive/2005/05/19/420065.aspx
>
> The C# and VB.Net is nearly the same now - the difference of runtime of
> previous version was because of one bug. (There could be still others - if
> you find them please inform me - thank you).
>
> I'd think this thread's subject could be interested to many developers -
> proposal (to moderators): is there anybody to take on this current sample
> code and stats to test and to add some more tests and write some text to
> publish this code and accompanying article on accessD web site?
>
> I wasn't aware of MS SQL TDS feature you mentioned but as you can see from
> stats it gives just slightly better results than the manual batching. Of
> course playing with batch size might give better gains of this method (my
> tests didn't show that gains depending on batch size) but I must say for me
> it looks more complicated and less flexible that using SQLDatareader and
> custom classes - that could be just my perception....
>
> The C# code is below in the end of this message - its VB.NET equivalent can
> be obtained using automatic conversion -
> http://labs.developerfusion.co.uk/convert/csharp-to-vb.aspx - and then one
> code line have to be manually patched
>
> 'conversion issue replace the following code line =>
> ====================================================
> _updateCommands(System.Math.Max(System.Threading.Interlocked.Increment(_arra
> yIndex), _arrayIndex - 1)) = sql
>
> with this one
> =============
> _updateCommands(_arrayIndex) = sql : _arrayIndex += 1
> :
>
> Here are the stats:
>
> ====================
> C#
> ====================
>
> Strategy 1:
> -----------
> Start time: 27.04.2008 19:02:58
> Count(*) = 25046
> Counter = 25046
> End time: 27.04.2008 19:03:15
> Elapsed time: 00:00:16.7968750
>
> Strategy 2:
> -----------
> Start time: 27.04.2008 19:03:15
> Count(*) = 25046
> Counter = 25046
> End time: 27.04.2008 19:03:23
> Elapsed time: 00:00:08.4218750
>
> Strategy 3:
> -----------
> Start time: 27.04.2008 19:03:23
> Count(*) = 25046
> Counter = 25046
> End time: 27.04.2008 19:03:31
> Elapsed time: 00:00:07.9062500
>
> ====================
> VB.NET
> ====================
>
> Strategy 1:
> -----------
> Start time: 27.04.2008 19:05:20
> Count(*) = 25046
> Counter = 25046
> End time: 27.04.2008 19:05:36
> Elapsed time: 00:00:16.7500000
>
> Strategy 2:
> -----------
> Start time: 27.04.2008 19:05:36
> Count(*) = 25046
> Counter = 25046
> End time: 27.04.2008 19:05:45
> Elapsed time: 00:00:08.2812500
>
> Strategy 3:
> -----------
> Start time: 27.04.2008 19:05:45
> Count(*) = 25046
> Counter = 25046
> End time: 27.04.2008 19:05:52
> Elapsed time: 00:00:07.5000000
> ====================
>
> // Converters:
> //http://labs.developerfusion.co.uk/convert/csharp-to-vb.aspx
> //http://codeconverter.sharpdevelop.net/Convert.aspx
>
> // Sql TDS batched update
> //http://blogs.msdn.com/dataaccess/archive/2005/05/19/420065.aspx
>
> using System;
> using System.Collections.Generic;
> using System.Data;
> using System.Data.SqlClient;
>
> namespace TestSamples
> {
> public sealed class Constants
> {
> //+ ///////////// C U S T O M I Z E /////////////////////
> public const string PcName = "NANT";
> public const string DbName = "ORS2005";
> public const string UserName = "sa";
> public const string Password = "shms";
>
> public const string TABLE_NAME = "zsysUserSessionTrace";
> public const string ID_COL_NAME = "UserSessionTraceId";
> public const string NAME_COL_NAME = "UserBrowser";
> public const string PARSED_NAME_COL_NAME = "ParsedUserBrowserName";
>
> public const int DEFAULT_BATCH_SIZE = 1000;
>
> public static string GetParsedName(string name)
> {
> // just for testing purposes - customzie
> // to return your parsed name here
> if (name == null) name = "";
> name = DateTime.Now.ToString() + ": " + name;
> if (name.Length > 255) name = name.Substring(0, 255);
>
> return name;
> }
> //- ///////////// C U S T O M I Z E /////////////////////
>
> public const int ID_COL_INDEX = 1;
> public const int NAME_COL_INDEX = 2;
> public const int PARSED_NAME_COL_INDEX = 3;
>
> public static string COUNT_SQL { get {
> return String.Format("select count(*) from {0}", TABLE_NAME); }
> }
>
> public static string PAGED_SELECT_SQL { get {
> return string.Format(
> "SELECT * from " + "(" + " select " +
> " Cast(ROW_NUMBER() over (ORDER BY [{0}]) as int) as
> RowNum, " +
> " [{1}], [{2}],[{3}] " + " from [{4}] " + ") s ",
> ID_COL_NAME, ID_COL_NAME, NAME_COL_NAME,
> PARSED_NAME_COL_NAME, TABLE_NAME) +
> "where s.RowNum between {0} and {1}"; }}
>
> public static string UPDATE_SQL { get {
> return string.Format("update {0} ",TABLE_NAME) +
> string.Format(" set {0} = '{1}' where {2} = {3}",
> PARSED_NAME_COL_NAME,"{0}",ID_COL_NAME,"{1}");
> }}
>
> public static string UPDATE_SQL_PARAMETERIZED { get {
> return string.Format("update {0} set {1} = @{2} where {3} =
> @{4}",
> TABLE_NAME, PARSED_NAME_COL_NAME, PARSED_NAME_COL_NAME,
> ID_COL_NAME, ID_COL_NAME);
> }}
> }
>
> public class DataItem
> {
> private int _id;
> public int ID
> {
> get { return _id; }
> }
>
> private string _name;
> public string Name
> {
> get { return _name; }
> }
>
> public DataItem(int id, string name)
> {
> _id = id;
> _name = name;
> }
>
> public string ParsedName
> {
> get
> {
> return Constants.GetParsedName(_name);
> }
> }
>
>
> }
>
> public class DataProcessor
> {
>
> private int _totalCount = -1;
> public int TotalCount
> {
> get { return _totalCount; }
> }
>
> private int _currentBatchStartRowNum = 0;
> private int _batchSize = 0;
> public int BatchSize { get { return _batchSize; } }
>
> private string _connectionString;
> public DataProcessor(string connectionString)
> {
> initialize(connectionString, Constants.DEFAULT_BATCH_SIZE);
> }
> public DataProcessor(string connectionString, int batchSize)
> {
> initialize(connectionString, batchSize);
> }
> private void initialize(string connectionString, int batchSize)
> {
>
> _batchSize = batchSize;
> _currentBatchStartRowNum = 1;
> _connectionString = connectionString;
>
> using (System.Data.SqlClient.SqlConnection cnn = new
> System.Data.SqlClient.SqlConnection(connectionString))
> {
> cnn.Open();
>
> System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand();
> cmd.CommandText = Constants.COUNT_SQL;
> cmd.CommandType = System.Data.CommandType.Text;
>
> _totalCount = (int)cmd.ExecuteScalar();
> }
> }
>
> public bool HasData
> {
> get { return (TotalCount > 0 && _currentBatchStartRowNum <
> TotalCount); }
> }
>
> public List<DataItem> GetData()
> {
> using (System.Data.SqlClient.SqlConnection cnn = new
> System.Data.SqlClient.SqlConnection(_connectionString))
> {
> cnn.Open();
>
> System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand();
> string sql = string.Format(Constants.PAGED_SELECT_SQL,
> _currentBatchStartRowNum, _currentBatchStartRowNum + _batchSize);
> cmd.CommandText = sql;
> cmd.CommandType = System.Data.CommandType.Text;
> _currentBatchStartRowNum += _batchSize + 1;
> System.Data.SqlClient.SqlDataReader rdr =
> cmd.ExecuteReader(System.Data.CommandBehavior.SequentialAccess);
>
> List<DataItem> list = new List<DataItem>();
> while (rdr.Read())
> {
> //error in automated conversion => list.Add(new
> DataItem((int)rdr(Constants.ID_COL_INDEX),
> (string)rdr(Constants.NAME_COL_INDEX)));
> list.Add(new DataItem((int)rdr[Constants.ID_COL_INDEX],
> (string)rdr[Constants.NAME_COL_INDEX]));
> }
> return list;
> }
> }
>
> public int Update(DataItem item)
> {
> using (System.Data.SqlClient.SqlConnection cnn = new
> System.Data.SqlClient.SqlConnection(_connectionString))
> {
> cnn.Open();
>
> System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand();
> string sql = string.Format(Constants.UPDATE_SQL,
> item.ParsedName, item.ID);
> cmd.CommandText = sql;
> cmd.CommandType = System.Data.CommandType.Text;
> return cmd.ExecuteNonQuery();
> }
> }
>
> private string[] _updateCommands = null;
> private int _arrayIndex = -1;
> public void AddUpdateCommand(DataItem item)
> {
> if (_arrayIndex == -1)
> {
> _updateCommands =
> (string[])Array.CreateInstance(typeof(string), _batchSize);
> _arrayIndex = 0;
> }
> string sql = string.Format(Constants.UPDATE_SQL,
> item.ParsedName, item.ID);
> _updateCommands[_arrayIndex++] = sql;
> }
>
> public int BatchUpdate()
> {
> if (_arrayIndex == -1) return -1;
> if (_arrayIndex < _batchSize)
> Array.Resize<string>(ref _updateCommands, _arrayIndex);
>
> using (System.Data.SqlClient.SqlConnection cnn = new
> System.Data.SqlClient.SqlConnection(_connectionString))
> {
> cnn.Open();
>
> System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand();
>
> //Console.WriteLine("Batch update: {0} updates",
> _arrayIndex);
>
> string sql = String.Join(";", _updateCommands);
> _arrayIndex = -1;
>
> cmd.CommandText = sql;
> cmd.CommandType = System.Data.CommandType.Text;
> return cmd.ExecuteNonQuery();
> }
> }
>
> private SqlDataAdapter _da = null;
> private DataSet _ds = null;
> public DataTable SQLTDSGetDataTablePaged()
> {
> using (System.Data.SqlClient.SqlConnection cnn = new
> System.Data.SqlClient.SqlConnection(_connectionString))
> {
> cnn.Open();
>
> int batchSize = _batchSize;
> if (_currentBatchStartRowNum + _batchSize >= _totalCount)
> batchSize = _totalCount - _currentBatchStartRowNum;
> string sql = string.Format(Constants.PAGED_SELECT_SQL,
> _currentBatchStartRowNum, _currentBatchStartRowNum + _batchSize);
>
> _currentBatchStartRowNum += _batchSize + 1;
>
> _da = new SqlDataAdapter(sql, cnn);
> _ds = new DataSet();
> _da.Fill(_ds);
>
> _da.UpdateCommand = new
> SqlCommand(Constants.UPDATE_SQL_PARAMETERIZED, cnn);
> _da.UpdateCommand.Parameters.Add("@" +
> Constants.PARSED_NAME_COL_NAME, SqlDbType.NVarChar, 255,
> Constants.PARSED_NAME_COL_NAME);
> _da.UpdateCommand.Parameters.Add("@" +
> Constants.ID_COL_NAME, SqlDbType.Int, 0, Constants.ID_COL_NAME);
> _da.UpdateCommand.UpdatedRowSource = UpdateRowSource.None;
> _da.UpdateBatchSize = batchSize;
>
> _da.SelectCommand.Connection = null;
> _da.UpdateCommand.Connection = null;
>
> return _ds.Tables[0];
> }
> }
>
> public void SQLTDSUpdate()
> {
> SQLTDSUpdate(false);
> }
> public void SQLTDSUpdate(bool forceUpdate)
> {
> using (System.Data.SqlClient.SqlConnection cnn = new
> System.Data.SqlClient.SqlConnection(_connectionString))
> {
> cnn.Open();
> _da.SelectCommand.Connection = cnn;
> _da.UpdateCommand.Connection = cnn;
> if (forceUpdate)
> _da.UpdateBatchSize = _ds.Tables[0].Rows.Count;
> _da.Update(_ds.Tables[0]);
>
> //Console.WriteLine("*** Forced test exit ***");
> //System.Environment.Exit(100);
> }
> }
> }
>
> public class A_MainEntry
> {
> /// <summary>
> /// Strategy 1 - use SqlDataReader to get data pages, and
> /// generate and execute individual update SQL
> statements
> /// </summary>
> public static void Strategy1(DataProcessor p, ref int count)
> {
> while (p.HasData)
> {
> List<DataItem> list = p.GetData();
> foreach (DataItem item in list)
> {
> p.Update(item);
> count += 1;
> }
> //Console.WriteLine("{0} - rows processed...", p.BatchSize);
>
> }
> }
>
> /// <summary>
> /// Strategy 1 - use SqlDataReader to get data pages, and
> /// generate batched update SQL statements, and
> /// execute update batch when it has all update
> /// statements added
> /// </summary>
> public static void Strategy2(DataProcessor p, ref int count)
> {
> while (p.HasData)
> {
> List<DataItem> list = p.GetData();
> foreach (DataItem item in list)
> {
> if (count > 0 && (count % p.BatchSize) == 0)
> p.BatchUpdate();
> p.AddUpdateCommand(item);
> count += 1;
> }
> }
> p.BatchUpdate();
> }
>
> /// <summary>
> /// Strategy 3 - use SqlDataAdapter and DataSet to get data pages,
> and
> /// use SQL TDS bacthed updates facility
> /// </summary>
> public static void Strategy3(DataProcessor p, ref int count)
> {
> while (p.HasData)
> {
> DataTable table = p.SQLTDSGetDataTablePaged();
>
> foreach (DataRow row in table.Rows)
> {
> row.SetModified();
> row[Constants.PARSED_NAME_COL_INDEX] =
> Constants.GetParsedName((string)row[Constants.NAME_COL_INDEX]);
> count += 1;
> }
> p.SQLTDSUpdate(!p.HasData);
> }
> }
>
> public static void Main()
> {
> try
> {
> //+ customize - set your connection string here
> string pcName = Constants.PcName;
> string dbName = Constants.DbName;
> string userName = Constants.UserName;
> string password = Constants.Password;
> string connectionString = string.Format("Data
> Source={0}\\SQLEXPRESS;Initial Catalog={1};User Id={2};Password={3}",
> pcName, dbName, userName, password);
> //-
>
> for (int i = 1; i <= 3; i++)
> {
> int count = 0;
> DateTime startTime, endTime;
> startTime = DateTime.Now;
> Console.WriteLine("");
> Console.WriteLine("Strategy {0}:", i);
> Console.WriteLine("-----------");
> Console.WriteLine("Start time: {0}", startTime);
> DataProcessor p = new DataProcessor(connectionString);
>
> switch (i)
> {
> case 1:
> Strategy1(p, ref count);
> break;
> case 2:
> Strategy2(p, ref count);
> break;
> case 3:
> Strategy3(p, ref count);
> break;
> default:
> Console.WriteLine("{0} - undefined strategy
> number", i);
> break;
> }
> Console.WriteLine("Count(*) = {0}", p.TotalCount);
> Console.WriteLine("Counter = {0}", count);
> endTime = DateTime.Now;
> Console.WriteLine("End time: {0}", endTime);
> TimeSpan ts =
> TimeSpan.FromTicks((endTime.Subtract(startTime)).Ticks);
> Console.WriteLine("Elapsed time: {0}", ts);
> }
> }
> catch (Exception ex)
> {
> Console.WriteLine(ex.Message);
> }
> }
>
> }
> }
>
> Enjoy!
>
> --
> Shamil
>
> _______________________________________________
> dba-VB mailing list
> dba-VB at databaseadvisors.com
> http://databaseadvisors.com/mailman/listinfo/dba-vb
> http://www.databaseadvisors.com
>
>
--
John W. Colby
www.ColbyConsulting.com