[dba-VB] ADO.Net

Shamil Salakhetdinov shamil at smsconsulting.spb.ru
Sun Apr 27 10:14:00 CDT 2008


Hi John,

Below is fully parameterized sample C# code, which uses three update
strategies including the one you mentioned
http://blogs.msdn.com/dataaccess/archive/2005/05/19/420065.aspx 

The C# and VB.Net is nearly the same now - the difference of runtime of
previous version was because of one bug. (There could be still others - if
you find them please inform me - thank you).

I'd think this thread's subject could be interested to many developers -
proposal (to moderators): is there anybody to take on this current sample
code and stats to test and to add some more tests and write some text to
publish this code and accompanying article on accessD web site?

I wasn't aware of MS SQL TDS feature you mentioned but as you can see from
stats it gives just slightly better results than the manual batching. Of
course playing with batch size might give better gains of this method (my
tests didn't show that gains depending on batch size) but I must say for me
it looks more complicated and less flexible that using SQLDatareader and
custom classes - that could be just my perception....

The C# code is below in the end of this message - its VB.NET equivalent can
be obtained using automatic conversion -
http://labs.developerfusion.co.uk/convert/csharp-to-vb.aspx - and then one
code line have to be manually patched

'conversion issue replace the following code line => 
====================================================
_updateCommands(System.Math.Max(System.Threading.Interlocked.Increment(_arra
yIndex), _arrayIndex - 1)) = sql

with this one
=============
_updateCommands(_arrayIndex) = sql : _arrayIndex += 1
:

Here are the stats:

====================
C#
====================

Strategy 1:
-----------
Start time: 27.04.2008 19:02:58
Count(*) = 25046
Counter = 25046
End time: 27.04.2008 19:03:15
Elapsed time: 00:00:16.7968750

Strategy 2:
-----------
Start time: 27.04.2008 19:03:15
Count(*) = 25046
Counter = 25046
End time: 27.04.2008 19:03:23
Elapsed time: 00:00:08.4218750

Strategy 3:
-----------
Start time: 27.04.2008 19:03:23
Count(*) = 25046
Counter = 25046
End time: 27.04.2008 19:03:31
Elapsed time: 00:00:07.9062500

====================
VB.NET
====================

Strategy 1:
-----------
Start time: 27.04.2008 19:05:20
Count(*) = 25046
Counter = 25046
End time: 27.04.2008 19:05:36
Elapsed time: 00:00:16.7500000

Strategy 2:
-----------
Start time: 27.04.2008 19:05:36
Count(*) = 25046
Counter = 25046
End time: 27.04.2008 19:05:45
Elapsed time: 00:00:08.2812500

Strategy 3:
-----------
Start time: 27.04.2008 19:05:45
Count(*) = 25046
Counter = 25046
End time: 27.04.2008 19:05:52
Elapsed time: 00:00:07.5000000
====================

// Converters:
//http://labs.developerfusion.co.uk/convert/csharp-to-vb.aspx 
//http://codeconverter.sharpdevelop.net/Convert.aspx 

// Sql TDS batched update
//http://blogs.msdn.com/dataaccess/archive/2005/05/19/420065.aspx

using System;
using System.Collections.Generic;
using System.Data;
using System.Data.SqlClient;

namespace TestSamples
{
    public sealed class Constants
    {
        //+ /////////////  C U S T O M I Z E  /////////////////////
        public const string PcName = "NANT";
        public const string DbName = "ORS2005";
        public const string UserName = "sa";
        public const string Password = "shms";

        public const string TABLE_NAME = "zsysUserSessionTrace";
        public const string ID_COL_NAME = "UserSessionTraceId";
        public const string NAME_COL_NAME = "UserBrowser";
        public const string PARSED_NAME_COL_NAME = "ParsedUserBrowserName";

        public const int DEFAULT_BATCH_SIZE = 1000;

        public static string GetParsedName(string name)
        {
            // just for testing purposes - customzie
            // to return your parsed name here 
            if (name == null) name = "";
            name = DateTime.Now.ToString() + ": " + name;
            if (name.Length > 255) name = name.Substring(0, 255);

            return name;
        }
        //- /////////////  C U S T O M I Z E  /////////////////////

        public const int ID_COL_INDEX = 1;
        public const int NAME_COL_INDEX = 2;
        public const int PARSED_NAME_COL_INDEX = 3;

        public static string COUNT_SQL { get {
            return String.Format("select count(*) from {0}", TABLE_NAME); }
}
        
        public static string PAGED_SELECT_SQL { get {
                return string.Format(
                  "SELECT * from " + "(" + " select " +
                  " Cast(ROW_NUMBER() over (ORDER BY [{0}]) as int) as
RowNum, " +
                  " [{1}], [{2}],[{3}] " + " from [{4}] " + ") s ",
                  ID_COL_NAME, ID_COL_NAME, NAME_COL_NAME,
PARSED_NAME_COL_NAME, TABLE_NAME) +
                  "where s.RowNum between {0} and {1}"; }}

        public static string UPDATE_SQL { get {
            return string.Format("update {0} ",TABLE_NAME) +
                   string.Format(" set {0} = '{1}' where {2} = {3}",
                   PARSED_NAME_COL_NAME,"{0}",ID_COL_NAME,"{1}"); 
        }}

        public static string UPDATE_SQL_PARAMETERIZED { get {
            return string.Format("update {0} set {1} = @{2} where  {3} =
@{4}",
                 TABLE_NAME, PARSED_NAME_COL_NAME, PARSED_NAME_COL_NAME,
ID_COL_NAME, ID_COL_NAME);
        }}
    }

    public class DataItem
    {
        private int _id;
        public int ID
        {
            get { return _id; }
        }

        private string _name;
        public string Name
        {
            get { return _name; }
        }

        public DataItem(int id, string name)
        {
            _id = id;
            _name = name;
        }

        public string ParsedName
        {
            get 
            {
                return Constants.GetParsedName(_name); 
            }
        }


    }

    public class DataProcessor
    {

        private int _totalCount = -1;
        public int TotalCount
        {
            get { return _totalCount; }
        }

        private int _currentBatchStartRowNum = 0;
        private int _batchSize = 0;
        public int BatchSize { get { return _batchSize; } }

        private string _connectionString;
        public DataProcessor(string connectionString)
        {
            initialize(connectionString, Constants.DEFAULT_BATCH_SIZE);
        }
        public DataProcessor(string connectionString, int batchSize)
        {
            initialize(connectionString, batchSize);
        }
        private void initialize(string connectionString, int batchSize)
        {

            _batchSize = batchSize;
            _currentBatchStartRowNum = 1;
            _connectionString = connectionString;

            using (System.Data.SqlClient.SqlConnection cnn = new
System.Data.SqlClient.SqlConnection(connectionString))
            {
                cnn.Open();

                System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand();
                cmd.CommandText = Constants.COUNT_SQL;
                cmd.CommandType = System.Data.CommandType.Text;

                _totalCount = (int)cmd.ExecuteScalar();
            }
        }

        public bool HasData
        {
            get { return (TotalCount > 0 && _currentBatchStartRowNum <
TotalCount); }
        }

        public List<DataItem> GetData()
        {
            using (System.Data.SqlClient.SqlConnection cnn = new
System.Data.SqlClient.SqlConnection(_connectionString))
            {
                cnn.Open();

                System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand();
                string sql = string.Format(Constants.PAGED_SELECT_SQL,
_currentBatchStartRowNum, _currentBatchStartRowNum + _batchSize);
                cmd.CommandText = sql;
                cmd.CommandType = System.Data.CommandType.Text;
                _currentBatchStartRowNum += _batchSize + 1;
                System.Data.SqlClient.SqlDataReader rdr =
cmd.ExecuteReader(System.Data.CommandBehavior.SequentialAccess);

                List<DataItem> list = new List<DataItem>();
                while (rdr.Read())
                {
                    //error in automated conversion => list.Add(new
DataItem((int)rdr(Constants.ID_COL_INDEX),
(string)rdr(Constants.NAME_COL_INDEX)));
                    list.Add(new DataItem((int)rdr[Constants.ID_COL_INDEX],
(string)rdr[Constants.NAME_COL_INDEX]));
                }
                return list;
            }
        }

        public int Update(DataItem item)
        {
            using (System.Data.SqlClient.SqlConnection cnn = new
System.Data.SqlClient.SqlConnection(_connectionString))
            {
                cnn.Open();

                System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand();
                string sql = string.Format(Constants.UPDATE_SQL,
item.ParsedName, item.ID);
                cmd.CommandText = sql;
                cmd.CommandType = System.Data.CommandType.Text;
                return cmd.ExecuteNonQuery();
            }
        }

        private string[] _updateCommands = null;
        private int _arrayIndex = -1;
        public void AddUpdateCommand(DataItem item)
        {
            if (_arrayIndex == -1)
            {
                _updateCommands =
(string[])Array.CreateInstance(typeof(string), _batchSize);
                _arrayIndex = 0;
            }
            string sql = string.Format(Constants.UPDATE_SQL,
item.ParsedName, item.ID);
            _updateCommands[_arrayIndex++] = sql;
        }

        public int BatchUpdate()
        {
            if (_arrayIndex == -1) return -1;
            if (_arrayIndex < _batchSize)
                Array.Resize<string>(ref _updateCommands, _arrayIndex);

            using (System.Data.SqlClient.SqlConnection cnn = new
System.Data.SqlClient.SqlConnection(_connectionString))
            {
                cnn.Open();

                System.Data.SqlClient.SqlCommand cmd = cnn.CreateCommand();

                //Console.WriteLine("Batch update: {0} updates",
_arrayIndex);

                string sql = String.Join(";", _updateCommands);
                _arrayIndex = -1;

                cmd.CommandText = sql;
                cmd.CommandType = System.Data.CommandType.Text;
                return cmd.ExecuteNonQuery();
            }
        }

        private SqlDataAdapter _da = null;
        private DataSet _ds = null;
        public DataTable SQLTDSGetDataTablePaged()
        {
            using (System.Data.SqlClient.SqlConnection cnn = new
System.Data.SqlClient.SqlConnection(_connectionString))
            {
                cnn.Open();

                int batchSize = _batchSize;
                if (_currentBatchStartRowNum + _batchSize >= _totalCount)
                    batchSize = _totalCount - _currentBatchStartRowNum;
                string sql = string.Format(Constants.PAGED_SELECT_SQL,
_currentBatchStartRowNum, _currentBatchStartRowNum + _batchSize);
                                
                _currentBatchStartRowNum += _batchSize + 1;

                _da = new SqlDataAdapter(sql, cnn);
                _ds = new DataSet(); 
                _da.Fill(_ds);

                _da.UpdateCommand = new
SqlCommand(Constants.UPDATE_SQL_PARAMETERIZED, cnn);
                _da.UpdateCommand.Parameters.Add("@" +
Constants.PARSED_NAME_COL_NAME, SqlDbType.NVarChar, 255,
Constants.PARSED_NAME_COL_NAME);
                _da.UpdateCommand.Parameters.Add("@" +
Constants.ID_COL_NAME, SqlDbType.Int, 0, Constants.ID_COL_NAME);
                _da.UpdateCommand.UpdatedRowSource = UpdateRowSource.None;
                _da.UpdateBatchSize = batchSize;

                _da.SelectCommand.Connection = null;
                _da.UpdateCommand.Connection = null;
 
                return _ds.Tables[0]; 
            }
        }

        public void SQLTDSUpdate()
        {
            SQLTDSUpdate(false); 
        }
        public void SQLTDSUpdate(bool forceUpdate)
        {
            using (System.Data.SqlClient.SqlConnection cnn = new
System.Data.SqlClient.SqlConnection(_connectionString))
            {
                cnn.Open();
                _da.SelectCommand.Connection = cnn;
                _da.UpdateCommand.Connection = cnn;
                if (forceUpdate)
                   _da.UpdateBatchSize = _ds.Tables[0].Rows.Count;
                _da.Update(_ds.Tables[0]);

                //Console.WriteLine("*** Forced test exit ***");  
                //System.Environment.Exit(100);   
            }
        }
    }

    public class A_MainEntry
    {
        /// <summary>
        /// Strategy 1 - use SqlDataReader to get data pages, and
        ///              generate and execute individual update SQL
statements
        /// </summary>
        public static void Strategy1(DataProcessor p, ref int count)
        {
            while (p.HasData)
            {
                List<DataItem> list = p.GetData();
                foreach (DataItem item in list)
                {
                    p.Update(item);
                    count += 1;
                }
                //Console.WriteLine("{0} - rows processed...", p.BatchSize);

            }
        }

        /// <summary>
        /// Strategy 1 - use SqlDataReader to get data pages, and
        ///              generate batched update SQL statements, and
        ///              execute update batch when it has all update
        ///              statements added 
        /// </summary>
        public static void Strategy2(DataProcessor p, ref int count)
        {
            while (p.HasData)
            {
                List<DataItem> list = p.GetData();
                foreach (DataItem item in list)
                {
                    if (count > 0 && (count % p.BatchSize) == 0)
p.BatchUpdate();
                    p.AddUpdateCommand(item);
                    count += 1;
                }
            }
            p.BatchUpdate();
        }

        /// <summary>
        /// Strategy 3 - use SqlDataAdapter and DataSet to get data pages,
and
        ///              use SQL TDS bacthed updates facility
        /// </summary>
        public static void Strategy3(DataProcessor p, ref int count)
        {
            while (p.HasData)
            {
                DataTable table = p.SQLTDSGetDataTablePaged();

                foreach (DataRow row in table.Rows)
                {
                    row.SetModified();
                    row[Constants.PARSED_NAME_COL_INDEX] =
Constants.GetParsedName((string)row[Constants.NAME_COL_INDEX]);
                    count += 1;
                }
                p.SQLTDSUpdate(!p.HasData);
            }
        }

        public static void Main()
        {
            try
            {
                //+ customize - set your connection string here 
                string pcName = Constants.PcName;
                string dbName = Constants.DbName;
                string userName = Constants.UserName;
                string password = Constants.Password;
                string connectionString = string.Format("Data
Source={0}\\SQLEXPRESS;Initial Catalog={1};User Id={2};Password={3}",
pcName, dbName, userName, password);
                //- 
                
                for (int i = 1; i <= 3; i++)
                {
                    int count = 0;
                    DateTime startTime, endTime;
                    startTime = DateTime.Now;  
                    Console.WriteLine("");
                    Console.WriteLine("Strategy {0}:", i);
                    Console.WriteLine("-----------");
                    Console.WriteLine("Start time: {0}", startTime);
                    DataProcessor p = new DataProcessor(connectionString);

                    switch (i)
                    {
                        case 1:
                            Strategy1(p, ref count);
                            break;
                        case 2:
                            Strategy2(p, ref count);
                            break;
                        case 3:
                            Strategy3(p, ref count);
                            break;
                        default:
                            Console.WriteLine("{0} - undefined strategy
number", i);
                            break; 
                    }
                    Console.WriteLine("Count(*) = {0}", p.TotalCount);
                    Console.WriteLine("Counter = {0}", count);
                    endTime = DateTime.Now;  
                    Console.WriteLine("End time: {0}", endTime);
                    TimeSpan ts =
TimeSpan.FromTicks((endTime.Subtract(startTime)).Ticks);
                    Console.WriteLine("Elapsed time: {0}", ts);
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }

    }
}

Enjoy!

--
Shamil




More information about the dba-VB mailing list