some speed up and notes

author: drbaggy <js5@sanger.ac.uk> 2021-05-12 17:38:21 +0100
committer: drbaggy <js5@sanger.ac.uk> 2021-05-12 17:38:21 +0100
commit: 04dcd9b878bf160b94752ea11b92c4dd7451de1d (patch)
tree: 2fd7b1d03e032890c4e40f71a5dc57b182c1e9ef /challenge-112/james-smith
parent: 7be53b6d9d55dfd13808a4e8d1483f3a92e90806 (diff)
download: perlweeklychallenge-club-04dcd9b878bf160b94752ea11b92c4dd7451de1d.tar.gz
perlweeklychallenge-club-04dcd9b878bf160b94752ea11b92c4dd7451de1d.tar.bz2
perlweeklychallenge-club-04dcd9b878bf160b94752ea11b92c4dd7451de1d.zip
2 files changed, 112 insertions, 70 deletions
diff --git a/challenge-112/james-smith/perl/ch-1.pl b/challenge-112/james-smith/perl/ch-1.pl
index 863eb9c651..826c5edf30 100644
--- a/challenge-112/james-smith/perl/ch-1.pl
+++ b/challenge-112/james-smith/perl/ch-1.pl
@@ -3,10 +3,11 @@
 use strict;
 
 use warnings;
-use feature qw(say);
+use feature qw(say state);
 use Test::More;
 use Benchmark qw(cmpthese);
 
+##
 ## Please note there is an ambiguity in the question - when then path contains no
 ## files - as it cannot start with a '/' and not end with a '/' - so we have
 ## to make a choice do we return '/' or do we return ''.
@@ -19,7 +20,12 @@ use Benchmark qw(cmpthese);
 ## $parent_dir.canonical_path('/');
 ##
 ## then it will always end without a "/";
-
+##
+##
+##
+## Performance of different methods
+## ================================
+##
 ## We will look at some different versions of the code
 ## Whether we use an array or string to accumulate the resultant path
 ## Whether we use "readable" code or Perl hacks and tricks
@@ -27,9 +33,10 @@ use Benchmark qw(cmpthese);
 ##
 ## Methods:
 ##   * "Long form" Perl...
-##     * canonical_path_double - Using a double loop
-##     * canonical_path_array  - Using backtracking instead of inner loop
-##     * canonical_path_string - Use a string as the accumulator and mapping
+##     * canonical_path_double      - Using a double loop
+##     * canonical_path_array       - Using backtracking instead of inner loop
+##     * canonical_path_string      - Use a string as the accumulator and mapping
+##     * canonical_path_string_fast - As above - but using substr/rindex
 ##
 ##   * "One-liner" perl {arrays}
 ##     * canonical_path_compact     - short version of array code
@@ -40,40 +47,46 @@ use Benchmark qw(cmpthese);
 ##     * canonical_path_short    - compact method
 ##     * canonical_path_fast     - replace one of the regex with equality checks
 ##     * canonical_path_fastest  - replace other regex with substr/rindex
+##     * canonical_path_global   - as fastest but with global variable...
 ##
 ## Timings for these:
-## 
-##               Rate @-sh $-s'st $-sh @-2l @-cd $-cd @-fa $-fa $-f'st
-## @-short    17483/s   --   -15% -16% -28% -29% -33% -37% -38%   -58%
-## $-shortest 20534/s  17%     --  -1% -15% -17% -21% -26% -27%   -51%
-## $-short    20833/s  19%     1%   -- -14% -15% -20% -25% -26%   -50%
-## @-2loops   24213/s  38%    18%  16%   --  -2%  -7% -13% -14%   -42%
-## @-code     24631/s  41%    20%  18%   2%   --  -5% -11% -12%   -41%
-## $-code     25907/s  48%    26%  24%   7%   5%   --  -7%  -8%   -38%
-## @-fast     27778/s  59%    35%  33%  15%  13%   7%   --  -1%   -33%
-## $-fast     28090/s  61%    37%  35%  16%  14%   8%   1%   --   -33%
-## $-fastest  41667/s 138%   103% 100%  72%  69%  61%  50%  48%     --
+##
+##                    Rate @-sh $-st $-sh @-2l @-cf @-ft $-cd $-fa $-cf $-ft $-gl
+## @-shortest     17,483/s   -- -21% -22% -29% -30% -37% -38% -43% -52% -59% -59%
+## $-shortest     22,026/s  26%   --  -2% -11% -11% -21% -21% -28% -40% -48% -49%
+## $-short        22,472/s  29%   2%   --  -9% -10% -20% -20% -26% -39% -47% -48%
+## @-code-2loop   24,631/s  41%  12%  10%   --  -1% -12% -12% -19% -33% -42% -43%
+## @-code-fastest 24,876/s  42%  13%  11%   1%   -- -11% -11% -18% -32% -42% -42%
+## @-fastest      27,933/s  60%  27%  24%  13%  12%   --  -0%  -8% -24% -35% -35%
+## $-code-fast    28,011/s  60%  27%  25%  14%  13%   0%   --  -8% -24% -34% -35%
+## $-fast         30,488/s  74%  38%  36%  24%  23%   9%   9%   -- -17% -29% -29%
+## $-code-fastest 36,765/s 110%  67%  64%  49%  48%  32%  31%  21%   -- -14% -15%
+## $-fastest      42,735/s 144%  94%  90%  74%  72%  53%  53%  40%  16%   --  -1%
+## $-global-fast  43,103/s 147%  96%  92%  75%  73%  54%  54%  41%  17%   1%   --
 ##
 ## What we see is:
-##  * that the string code is fractionally faster than
-##    the array code, but by only 1-5%
-##  * using compact "1-liner" code can be approximately 7-8%
+##  * that the string code is faster than the array code,
+##    by around 20-40%
+##  * using compact "1-liner" code can be approximately 10%
 ##    faster.
 ##  * but using less regex's and replacing them with
 ##    eq/ne for comparisons and `substr`/`rindex` for
 ##    replacement/trimming improves the speed the most.
-##     * approx 35% for removing the comparison regex for checking
+##     * approx 25-30% for removing the comparison regex for checking
 ##       `' '` or `'.'` and replacing with two `eq`/`ne`
-##     * approx 50% for removing the substitute of the string
+##     * approx 30-40% for removing the substitute of the string
 ##       from the last `'/'` to the end of the string, with `rindex`
 ##       and the the four parameter version of `subst`.
-##     * combining the two seems to double the performance
+##     * combining the two seems to double the performance!
+##  * switching from local to global variables gets a minor
+##    gain (about 1%) again due to memory management.
 ##
 ## Conclusion
 ##
 ## So short code is interesting - but is not by a long shot the
 ## most efficient especially in respect of converting regexes into
-## `substr`/`index`/`rindex`, even if we keep it to a 1-liner.
+## `substr`/`index`/`rindex`, allocation of variables, even if we
+## keep it to a 1-liner.
 ##
 
 my @examples = (
@@ -96,6 +109,7 @@ my @examples = (
 is( canonical_path_double(      $_->[0]), $_->[1], $_->[2] ) foreach @examples;
 is( canonical_path_array(       $_->[0]), $_->[1], $_->[2] ) foreach @examples;
 is( canonical_path_string(      $_->[0]), $_->[1], $_->[2] ) foreach @examples;
+is( canonical_path_string_fast( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
 ## One liners (array)...
 is( canonical_path_compact(     $_->[0]), $_->[1], $_->[2] ) foreach @examples;
 is( canonical_path_compact_opt( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
@@ -104,22 +118,25 @@ is( canonical_path_shortest(    $_->[0]), $_->[1], $_->[2] ) foreach @examples;
 is( canonical_path_short(       $_->[0]), $_->[1], $_->[2] ) foreach @examples;
 is( canonical_path_fast(        $_->[0]), $_->[1], $_->[2] ) foreach @examples;
 is( canonical_path_fastest(     $_->[0]), $_->[1], $_->[2] ) foreach @examples;
+is( canonical_path_global(      $_->[0]), $_->[1], $_->[2] ) foreach @examples;
 
 done_testing();
 
 cmpthese( 100_000, {
 ## Code
-  '@-2loops'   => sub { canonical_path_double(      $_->[0] ) foreach @examples },
-  '$-code'     => sub { canonical_path_string(      $_->[0] ) foreach @examples },
-  '@-code'     => sub { canonical_path_array(       $_->[0] ) foreach @examples },
+  '@-2l' => sub { canonical_path_double(      $_->[0] ) foreach @examples },
+  '$-cd' => sub { canonical_path_string(      $_->[0] ) foreach @examples },
+  '@-cf' => sub { canonical_path_array(       $_->[0] ) foreach @examples },
+  '$-cf' => sub { canonical_path_string_fast( $_->[0] ) foreach @examples },
 ## Array 1-liner
-  '@-fast'     => sub { canonical_path_compact_opt( $_->[0] ) foreach @examples },
-  '@-short'    => sub { canonical_path_compact(     $_->[0] ) foreach @examples },
+  '@-ft' => sub { canonical_path_compact_opt( $_->[0] ) foreach @examples },
+  '@-sh' => sub { canonical_path_compact(     $_->[0] ) foreach @examples },
 ## String 1-liner
-  '$-short'    => sub { canonical_path_short(       $_->[0] ) foreach @examples },
-  '$-shortest' => sub { canonical_path_shortest(    $_->[0] ) foreach @examples },
-  '$-fast'     => sub { canonical_path_fast(        $_->[0] ) foreach @examples },
-  '$-fastest'  => sub { canonical_path_fastest(     $_->[0] ) foreach @examples },
+  '$-sh' => sub { canonical_path_short(       $_->[0] ) foreach @examples },
+  '$-st' => sub { canonical_path_shortest(    $_->[0] ) foreach @examples },
+  '$-fa' => sub { canonical_path_fast(        $_->[0] ) foreach @examples },
+  '$-ft' => sub { canonical_path_fastest(     $_->[0] ) foreach @examples },
+  '$-gl' => sub { canonical_path_global(      $_->[0] ) foreach @examples },
 });
 
 sub canonical_path_double {
@@ -236,6 +253,30 @@ sub canonical_path_string {
   return $canonical_path;
 }
 
+sub canonical_path_string_fast {
+  my $path = shift;
+  my @directories    = split m{/},               ## Split path into directory names
+                       $path;
+
+  my $canonical_path = '';                       ## Initialize canonical path
+
+  foreach my $directory_name ( @directories ) {  ## For each directory we
+    next if $directory_name eq '';               ## Remove "empty" directory names
+    next if $directory_name eq '.';              ## Remove directories named "."
+                                                 ## (current directory)
+    if( $directory_name eq q(..) ) {             ## look to see if it is
+                                                 ## ..;
+      substr $canonical_path,
+             rindex( $canonical_path, '/' ),
+             ~0, '';                             ## If so remove parent directory
+                                                 ## if one is set....
+    } else {
+      $canonical_path .= q(/) . $directory_name; ## add directory name to end
+    }
+  }
+  return $canonical_path;
+}
+
 
 sub canonical_path_short {
 ## Converting if statements into ternary operators
@@ -265,7 +306,6 @@ $a='';
 $a
 }
 
-
 sub canonical_path_fastest {
 ## we avoid regular expressions here by using `rindex` - to look
 ## for the last slash in the string and removing it along with
@@ -284,11 +324,22 @@ sub canonical_path_fastest {
 ## 1s-complement of 0 - which gives the largest +ve perl integer
 ## In this case - 18,446,744,073,709,551,615. This is approx
 ## 16 EB (Exabytes) - I think that should be enough!
-##
-## This appears
+
 $a='';
 '.'ne$_&&''ne$_&&('..'ne$_?$a.='/'.$_:substr$a,rindex($a,'/'),~0,'')for split/\//,shift;
 $a
 }
 
+my $s;
+
+sub canonical_path_global {
+## Minor speed up by using a global variable - this
+## is I think due to memory allocation for variables
+## this can be something useful to know in very tight
+## loops...
+$s='';
+'.'ne$_&&''ne$_&&('..'ne$_?$s.='/'.$_:substr$s,rindex($s,'/'),~0,'')for split/\//,shift;
+$s
+}
+
 
diff --git a/challenge-112/james-smith/perl/ch-2.pl b/challenge-112/james-smith/perl/ch-2.pl
index 6365c6690d..a5d78154ba 100644
--- a/challenge-112/james-smith/perl/ch-2.pl
+++ b/challenge-112/james-smith/perl/ch-2.pl
@@ -7,41 +7,25 @@ use feature qw(say);
 use Test::More;
 use Benchmark qw(cmpthese);
 
+
 open my $fh, q(<), 'ans.txt';
 chomp( my @ans = <$fh> );
 close $fh;
-my $N = @ARGV ? $ARGV[0] : 50;
-my $I = @ARGV > 1 ? $ARGV[1] : 10_000;
 
-is(climb_fib(1),1);
-is(climb_fib(2),2);
-is(climb_fib(3),3);
-is(climb_fib(4),5);
-is(climb_fib(5),8);
-is(climb_fib(6),13);
-is(climb_fib(7),21);
-is(climb_fib(8),34);
-is(climb_fib(9),55);
-is(climb_fib(10),89);
+my $N = @ARGV     ? $ARGV[0] : 30;
+my $I = @ARGV > 1 ? $ARGV[1] : 100_000;
 
-is(climb(1),1);
-is(climb(2),2);
-is(climb(3),3);
-is(climb(4),5);
-is(climb(5),8);
-is(climb(6),13);
-is(climb(7),21);
-is(climb(8),34);
-is(climb(9),55);
-is(climb(10),89);
+is(climb(            $_), $ans[$_] ) foreach 1..$N;
+is(climb_fib(        $_), $ans[$_] ) foreach 1..$N;
+is(climb_fib_1liner( $_), $ans[$_] ) foreach 1..$N;
 
-is( climb_fib_1liner($_),$ans[$_] ) foreach 1..50;
 done_testing();
 
 cmpthese($I,{
-  'climb' => sub { climb($_) foreach 0..$N; },
-  'fib-1' => sub { climb_fib_1liner($_) foreach 0..$N; },
-  'fib'   => sub { climb_fib($_) foreach 0..$N; },
+  'climb' => sub { climb(            $_ ) foreach 0..$N; },
+  'fib-g' => sub { climb_fib_global( $_ ) foreach 0..$N; },
+  'fib-1' => sub { climb_fib_1liner( $_ ) foreach 0..$N; },
+  'fib'   => sub { climb_fib(        $_ ) foreach 0..$N; },
 });
 
 ## Once we look at the formula for climb - we
@@ -54,20 +38,27 @@ cmpthese($I,{
 ## fibonachi number...
 ## infact rewriting this as a 1-liner even
 ## speeds it up futher - this is down to
-## storing the caclulation of phi only temporarily
+## storing the caclulation of phi^(n+!) only
+## temporarily
 
-sub climb_fib {
-  my $p = ((1 + sqrt 5)/2)**($_[0]+1);
+sub climb {
+  my @climb = (1,1);
+  @climb = ($climb[1],$climb[0]+$climb[1]) foreach 2..$_[0];
+  return $climb[1];
+}
+
+my $p;
+sub climb_fib_global {
+  $p = ((1 + sqrt 5)/2)**($_[0]+1);
   return int(0.001+ ($p - ($_[0]&1?1:-1)/$p)*sqrt 0.2);
 }
 
-sub climb_fib_1liner {
-  return int(0.001 + (($a = ((1+sqrt 5)/2)**($_[0]+1)) - ($_[0]&1?1:-1)/$a)*sqrt 0.2);
+sub climb_fib {
+  my $q = ((1 + sqrt 5)/2)**($_[0]+1);
+  return int(0.001+ ($q - ($_[0]&1?1:-1)/$q)*sqrt 0.2);
 }
 
-sub climb {
-  my @climb = (1,1);
-  @climb = ($climb[1],$climb[0]+$climb[1]) foreach 2..$_[0];
-  return $climb[1];
+sub climb_fib_1liner {
+  return int(0.001 + (($p = ((1+sqrt 5)/2)**($_[0]+1)) - ($_[0]&1?1:-1)/$p)*sqrt 0.2);
 }
author	drbaggy <js5@sanger.ac.uk>	2021-05-12 17:38:21 +0100
committer	drbaggy <js5@sanger.ac.uk>	2021-05-12 17:38:21 +0100
commit	04dcd9b878bf160b94752ea11b92c4dd7451de1d (patch)
tree	2fd7b1d03e032890c4e40f71a5dc57b182c1e9ef /challenge-112/james-smith
parent	7be53b6d9d55dfd13808a4e8d1483f3a92e90806 (diff)
download	perlweeklychallenge-club-04dcd9b878bf160b94752ea11b92c4dd7451de1d.tar.gz perlweeklychallenge-club-04dcd9b878bf160b94752ea11b92c4dd7451de1d.tar.bz2 perlweeklychallenge-club-04dcd9b878bf160b94752ea11b92c4dd7451de1d.zip