Shamil Salakhetdinov
shamil at users.mns.ru
Sun Sep 30 08:43:01 CDT 2007
Hi Max, I have programmed some more string jamming strategies, code is below. Its interesting to note that the most obvious programming (StringJammer4), which assumes that only alphabetic chars should be left and all the other should be stripped out, is running as quick as the most generic one (StringJammer3), which can use whatever set will be defined as the set of the chars to be left in the result string. Its also interesting to note that when millions iterations are at stake then very subtle code differences can result in clearly visible/countable time execution gains, e.g. checking that a char is in uppercase and NOT calling ToUpper function works about 1 second faster on one million iterations than the code, which just always calls ToUpper function. I expect that the most generic solution (StringJammer3) can be made even faster without going to C++ or Assmebler if somehow implement it using inline coding instead of delegate function calls of course this generic solution has to be considerably refactored to achieve this goal: the gain Id expect could be about 2 sec for one million iterations (its about 4 sec now). From practical point of view this gain looks very small(?) to take into account but such kind of manual coding optimization exercises are rather valuable to polish programming skills. Your VBA code runs here 3 min 24 seconds under MS Access VBA IDE for the same set of the test strings, which is used in the below C# code: - 4 sec vs. 204 sec. (3 min. 24 sec.) => 51 times quicker Shamil P.S. the code: using System; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; namespace StringJammerTestConsoleApplication { /// <summary> /// StringJammer abstract class /// </summary> public unsafe abstract class StringJammer { protected static byte[] sieve = new byte[256]; protected void Init() { sieve.Initialize(); for (uint i = (int)'A'; i <= (int)'Z'; i++) sieve[i] = 1; for (uint i = (int)'a'; i <= (int)'z'; i++) sieve[i] = 1; copyProcs.Initialize(); for (int i = 0; i < copyProcs.Length; i++) { if ((i >= (int)'A') && (i <= (int)'Z') || (i >= (int)'a') && (i <= (int)'z')) copyProcs[i] = copyChar; else copyProcs[i] = dummyCopyChar; } } public abstract void Jam(ref string stringToJam); protected static copyCharDelegate[] copyProcs = new copyCharDelegate[256]; protected delegate void copyCharDelegate(char* c, ref int i, ref int j, ref bool upperCase); protected static void copyChar(char* c, ref int i, ref int j, ref bool upperCase) { Char cc = c[i++]; if (upperCase) { if (Char.IsUpper(cc)) c[j++] = cc; else c[j++] = Char.ToUpper(cc); upperCase = false; } else { if (char.IsLower(cc)) c[j++] = cc; else c[j++] = Char.ToLower(cc); } } protected static void dummyCopyChar(char* c, ref int i, ref int j, ref bool upperCase) { c[i++] = Char.MinValue; if (!upperCase ) upperCase = true; } } /// <summary> /// StringJammer1 class - first string jamming strategy /// </summary> public class StringJammer1 : StringJammer { public StringJammer1() { Init(); } public override void Jam(ref string stringToJam) { StringBuilder result = new StringBuilder(stringToJam.Length); bool upperCase = true; foreach (char c in stringToJam.ToCharArray()) { if (sieve[(int)c] == 0) upperCase = true; else if (upperCase) { result.Append(c.ToString().ToUpper()); upperCase = false; } else result.Append(c.ToString().ToLower()); } stringToJam = result.ToString().Trim(); } } /// <summary> /// StringJammer2 class - second string jamming strategy /// </summary> public class StringJammer2 : StringJammer { public StringJammer2() { Init(); } public override void Jam(ref string stringToJam) { unsafe { fixed (char* c = stringToJam) { bool upperCase = true; int i = 0, j = 0; while (i < stringToJam.Length) { if (sieve[c[i]] == 0) { c[i++] = ' '; upperCase = true; } else if (upperCase) { c[j++] = Char.ToUpper(c[i++]); upperCase = false; } else { c[j++] = Char.ToLower(c[i++]); } } while (j < stringToJam.Length) c[j++] = ' '; } stringToJam = stringToJam.Trim(); } } } /// <summary> /// StringJammer3 class - third string jamming strategy /// </summary> public class StringJammer3 : StringJammer { public StringJammer3() { Init(); } public override void Jam(ref string stringToJam) { unsafe { fixed (char* c = stringToJam) { bool upperCase = true; int i = 0, j = 0; while (i < stringToJam.Length) copyProcs[c[i]](c, ref i, ref j, ref upperCase); while (j < stringToJam.Length) c[j++] = Char.MinValue; } stringToJam = stringToJam.Trim(Char.MinValue); } } } /// <summary> /// StringJammer4 class - fourth string jamming strategy /// </summary> public class StringJammer4 : StringJammer { public StringJammer4() { Init(); } private bool isNotJammable(char c) { return Char.IsLetter(c); } public override void Jam(ref string stringToJam) { unsafe { fixed (char* c = stringToJam) { bool upperCase = true; int i = 0, j = 0; while (i < stringToJam.Length) { char cc = c[i++]; //if (Char.IsLetter(cc)) if (isNotJammable(cc)) { if (upperCase) { if (Char.IsUpper(cc)) c[j++] = cc; else c[j++] = Char.ToUpper(cc); upperCase = false; } else { if (Char.IsLower(cc)) c[j++] = cc; else c[j++] = Char.ToLower(cc); } } else upperCase = true; } while (j < stringToJam.Length) c[j++] = Char.MinValue; } stringToJam = stringToJam.Trim(Char.MinValue); } } } /// <summary> /// Test /// </summary> class Program { static void Main(string[] args) { const long MAX_CYCLES = 1000000; string[] test = { " # hey#hey#Hey,hello_world$%#=======", "@#$this#is_a_test_of_the-emer=======", "gency-broadcast-system $()# " }; StringJammer[] jummers = { new StringJammer1(), new StringJammer2(), new StringJammer3(), new StringJammer4() }; for (int k = 0; k < jummers.Length; k++) { long cyclesQty = MAX_CYCLES; Console.WriteLine("+ {0}\n {1:D} cycles started at {2}", jummers[k].GetType().ToString(), MAX_CYCLES, DateTime.Now.ToLongTimeString()); while (cyclesQty > 0) { for (int i = 0; i < test.Length; i++) { string result = new StringBuilder(test[i]).ToString(); jummers[k].Jam(ref result); if (cyclesQty == MAX_CYCLES) { Console.WriteLine(test[i] + " => {" + result + "}"); } } --cyclesQty; } Console.WriteLine("- {0}\n {1:D} cycles finished at {2}\n", jummers[k].GetType().ToString(), MAX_CYCLES, DateTime.Now.ToLongTimeString()); } Console.WriteLine("Press any key to continue..."); Console.ReadKey(); } } } + StringJammerTestConsoleApplication.StringJammer1 1000000 cycles started at 13:09:56 # hey#hey#Hey,hello_world$%#======= => {HeyHeyHeyHelloWorld} @#$this#is_a_test_of_the-emer======= => {ThisIsATestOfTheEmer} gency-broadcast-system $()# => {GencyBroadcastSystem} - StringJammerTestConsoleApplication.StringJammer1 1000000 cycles finished at 13:10:15 + StringJammerTestConsoleApplication.StringJammer2 1000000 cycles started at 13:10:15 # hey#hey#Hey,hello_world$%#======= => {HeyHeyHeyHelloWorld} @#$this#is_a_test_of_the-emer======= => {ThisIsATestOfTheEmer} gency-broadcast-system $()# => {GencyBroadcastSystem} - StringJammerTestConsoleApplication.StringJammer2 1000000 cycles finished at 13:10:21 + StringJammerTestConsoleApplication.StringJammer3 1000000 cycles started at 13:10:21 # hey#hey#Hey,hello_world$%#======= => {HeyHeyHeyHelloWorld} @#$this#is_a_test_of_the-emer======= => {ThisIsATestOfTheEmer} gency-broadcast-system $()# => {GencyBroadcastSystem} - StringJammerTestConsoleApplication.StringJammer3 1000000 cycles finished at 13:10:25 + StringJammerTestConsoleApplication.StringJammer4 1000000 cycles started at 13:10:25 # hey#hey#Hey,hello_world$%#======= => {HeyHeyHeyHelloWorld} @#$this#is_a_test_of_the-emer======= => {ThisIsATestOfTheEmer} gency-broadcast-system $()# => {GencyBroadcastSystem} - StringJammerTestConsoleApplication.StringJammer4 1000000 cycles finished at 13:10:29 -- Shamil -- Shamil -----Original Message----- From: accessd-bounces at databaseadvisors.com [mailto:accessd-bounces at databaseadvisors.com] On Behalf Of max.wanadoo at gmail.com Sent: Sunday, September 30, 2007 12:53 PM To: 'Access Developers discussion and problem solving' Subject: Re: [AccessD] Use Regex - Create Camel Case Hi Shamil, Clearly your compiled solution is by way and far the quickest solution. I have tried all sorts of VBA solutions including looking at XOR, IMP, EQV, bitwise solutions, but there overheads were considerable. The best I can come up with in VBA is below. One million iterations on my Dell Inspiron comes in at 3 min 52 secs. If John didn't want to Hump it, then RegExpr appears to be the answer within pure VBA Max <<<tail trimmed to make this message smaller>>>