aboutsummaryrefslogtreecommitdiff
path: root/challenge-112/james-smith
diff options
context:
space:
mode:
authordrbaggy <js5@sanger.ac.uk>2021-05-12 17:38:21 +0100
committerdrbaggy <js5@sanger.ac.uk>2021-05-12 17:38:21 +0100
commit04dcd9b878bf160b94752ea11b92c4dd7451de1d (patch)
tree2fd7b1d03e032890c4e40f71a5dc57b182c1e9ef /challenge-112/james-smith
parent7be53b6d9d55dfd13808a4e8d1483f3a92e90806 (diff)
downloadperlweeklychallenge-club-04dcd9b878bf160b94752ea11b92c4dd7451de1d.tar.gz
perlweeklychallenge-club-04dcd9b878bf160b94752ea11b92c4dd7451de1d.tar.bz2
perlweeklychallenge-club-04dcd9b878bf160b94752ea11b92c4dd7451de1d.zip
some speed up and notes
Diffstat (limited to 'challenge-112/james-smith')
-rw-r--r--challenge-112/james-smith/perl/ch-1.pl121
-rw-r--r--challenge-112/james-smith/perl/ch-2.pl61
2 files changed, 112 insertions, 70 deletions
diff --git a/challenge-112/james-smith/perl/ch-1.pl b/challenge-112/james-smith/perl/ch-1.pl
index 863eb9c651..826c5edf30 100644
--- a/challenge-112/james-smith/perl/ch-1.pl
+++ b/challenge-112/james-smith/perl/ch-1.pl
@@ -3,10 +3,11 @@
use strict;
use warnings;
-use feature qw(say);
+use feature qw(say state);
use Test::More;
use Benchmark qw(cmpthese);
+##
## Please note there is an ambiguity in the question - when then path contains no
## files - as it cannot start with a '/' and not end with a '/' - so we have
## to make a choice do we return '/' or do we return ''.
@@ -19,7 +20,12 @@ use Benchmark qw(cmpthese);
## $parent_dir.canonical_path('/');
##
## then it will always end without a "/";
-
+##
+##
+##
+## Performance of different methods
+## ================================
+##
## We will look at some different versions of the code
## Whether we use an array or string to accumulate the resultant path
## Whether we use "readable" code or Perl hacks and tricks
@@ -27,9 +33,10 @@ use Benchmark qw(cmpthese);
##
## Methods:
## * "Long form" Perl...
-## * canonical_path_double - Using a double loop
-## * canonical_path_array - Using backtracking instead of inner loop
-## * canonical_path_string - Use a string as the accumulator and mapping
+## * canonical_path_double - Using a double loop
+## * canonical_path_array - Using backtracking instead of inner loop
+## * canonical_path_string - Use a string as the accumulator and mapping
+## * canonical_path_string_fast - As above - but using substr/rindex
##
## * "One-liner" perl {arrays}
## * canonical_path_compact - short version of array code
@@ -40,40 +47,46 @@ use Benchmark qw(cmpthese);
## * canonical_path_short - compact method
## * canonical_path_fast - replace one of the regex with equality checks
## * canonical_path_fastest - replace other regex with substr/rindex
+## * canonical_path_global - as fastest but with global variable...
##
## Timings for these:
-##
-## Rate @-sh $-s'st $-sh @-2l @-cd $-cd @-fa $-fa $-f'st
-## @-short 17483/s -- -15% -16% -28% -29% -33% -37% -38% -58%
-## $-shortest 20534/s 17% -- -1% -15% -17% -21% -26% -27% -51%
-## $-short 20833/s 19% 1% -- -14% -15% -20% -25% -26% -50%
-## @-2loops 24213/s 38% 18% 16% -- -2% -7% -13% -14% -42%
-## @-code 24631/s 41% 20% 18% 2% -- -5% -11% -12% -41%
-## $-code 25907/s 48% 26% 24% 7% 5% -- -7% -8% -38%
-## @-fast 27778/s 59% 35% 33% 15% 13% 7% -- -1% -33%
-## $-fast 28090/s 61% 37% 35% 16% 14% 8% 1% -- -33%
-## $-fastest 41667/s 138% 103% 100% 72% 69% 61% 50% 48% --
+##
+## Rate @-sh $-st $-sh @-2l @-cf @-ft $-cd $-fa $-cf $-ft $-gl
+## @-shortest 17,483/s -- -21% -22% -29% -30% -37% -38% -43% -52% -59% -59%
+## $-shortest 22,026/s 26% -- -2% -11% -11% -21% -21% -28% -40% -48% -49%
+## $-short 22,472/s 29% 2% -- -9% -10% -20% -20% -26% -39% -47% -48%
+## @-code-2loop 24,631/s 41% 12% 10% -- -1% -12% -12% -19% -33% -42% -43%
+## @-code-fastest 24,876/s 42% 13% 11% 1% -- -11% -11% -18% -32% -42% -42%
+## @-fastest 27,933/s 60% 27% 24% 13% 12% -- -0% -8% -24% -35% -35%
+## $-code-fast 28,011/s 60% 27% 25% 14% 13% 0% -- -8% -24% -34% -35%
+## $-fast 30,488/s 74% 38% 36% 24% 23% 9% 9% -- -17% -29% -29%
+## $-code-fastest 36,765/s 110% 67% 64% 49% 48% 32% 31% 21% -- -14% -15%
+## $-fastest 42,735/s 144% 94% 90% 74% 72% 53% 53% 40% 16% -- -1%
+## $-global-fast 43,103/s 147% 96% 92% 75% 73% 54% 54% 41% 17% 1% --
##
## What we see is:
-## * that the string code is fractionally faster than
-## the array code, but by only 1-5%
-## * using compact "1-liner" code can be approximately 7-8%
+## * that the string code is faster than the array code,
+## by around 20-40%
+## * using compact "1-liner" code can be approximately 10%
## faster.
## * but using less regex's and replacing them with
## eq/ne for comparisons and `substr`/`rindex` for
## replacement/trimming improves the speed the most.
-## * approx 35% for removing the comparison regex for checking
+## * approx 25-30% for removing the comparison regex for checking
## `' '` or `'.'` and replacing with two `eq`/`ne`
-## * approx 50% for removing the substitute of the string
+## * approx 30-40% for removing the substitute of the string
## from the last `'/'` to the end of the string, with `rindex`
## and the the four parameter version of `subst`.
-## * combining the two seems to double the performance
+## * combining the two seems to double the performance!
+## * switching from local to global variables gets a minor
+## gain (about 1%) again due to memory management.
##
## Conclusion
##
## So short code is interesting - but is not by a long shot the
## most efficient especially in respect of converting regexes into
-## `substr`/`index`/`rindex`, even if we keep it to a 1-liner.
+## `substr`/`index`/`rindex`, allocation of variables, even if we
+## keep it to a 1-liner.
##
my @examples = (
@@ -96,6 +109,7 @@ my @examples = (
is( canonical_path_double( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_array( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_string( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
+is( canonical_path_string_fast( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
## One liners (array)...
is( canonical_path_compact( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_compact_opt( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
@@ -104,22 +118,25 @@ is( canonical_path_shortest( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_short( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_fast( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_fastest( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
+is( canonical_path_global( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
done_testing();
cmpthese( 100_000, {
## Code
- '@-2loops' => sub { canonical_path_double( $_->[0] ) foreach @examples },
- '$-code' => sub { canonical_path_string( $_->[0] ) foreach @examples },
- '@-code' => sub { canonical_path_array( $_->[0] ) foreach @examples },
+ '@-2l' => sub { canonical_path_double( $_->[0] ) foreach @examples },
+ '$-cd' => sub { canonical_path_string( $_->[0] ) foreach @examples },
+ '@-cf' => sub { canonical_path_array( $_->[0] ) foreach @examples },
+ '$-cf' => sub { canonical_path_string_fast( $_->[0] ) foreach @examples },
## Array 1-liner
- '@-fast' => sub { canonical_path_compact_opt( $_->[0] ) foreach @examples },
- '@-short' => sub { canonical_path_compact( $_->[0] ) foreach @examples },
+ '@-ft' => sub { canonical_path_compact_opt( $_->[0] ) foreach @examples },
+ '@-sh' => sub { canonical_path_compact( $_->[0] ) foreach @examples },
## String 1-liner
- '$-short' => sub { canonical_path_short( $_->[0] ) foreach @examples },
- '$-shortest' => sub { canonical_path_shortest( $_->[0] ) foreach @examples },
- '$-fast' => sub { canonical_path_fast( $_->[0] ) foreach @examples },
- '$-fastest' => sub { canonical_path_fastest( $_->[0] ) foreach @examples },
+ '$-sh' => sub { canonical_path_short( $_->[0] ) foreach @examples },
+ '$-st' => sub { canonical_path_shortest( $_->[0] ) foreach @examples },
+ '$-fa' => sub { canonical_path_fast( $_->[0] ) foreach @examples },
+ '$-ft' => sub { canonical_path_fastest( $_->[0] ) foreach @examples },
+ '$-gl' => sub { canonical_path_global( $_->[0] ) foreach @examples },
});
sub canonical_path_double {
@@ -236,6 +253,30 @@ sub canonical_path_string {
return $canonical_path;
}
+sub canonical_path_string_fast {
+ my $path = shift;
+ my @directories = split m{/}, ## Split path into directory names
+ $path;
+
+ my $canonical_path = ''; ## Initialize canonical path
+
+ foreach my $directory_name ( @directories ) { ## For each directory we
+ next if $directory_name eq ''; ## Remove "empty" directory names
+ next if $directory_name eq '.'; ## Remove directories named "."
+ ## (current directory)
+ if( $directory_name eq q(..) ) { ## look to see if it is
+ ## ..;
+ substr $canonical_path,
+ rindex( $canonical_path, '/' ),
+ ~0, ''; ## If so remove parent directory
+ ## if one is set....
+ } else {
+ $canonical_path .= q(/) . $directory_name; ## add directory name to end
+ }
+ }
+ return $canonical_path;
+}
+
sub canonical_path_short {
## Converting if statements into ternary operators
@@ -265,7 +306,6 @@ $a='';
$a
}
-
sub canonical_path_fastest {
## we avoid regular expressions here by using `rindex` - to look
## for the last slash in the string and removing it along with
@@ -284,11 +324,22 @@ sub canonical_path_fastest {
## 1s-complement of 0 - which gives the largest +ve perl integer
## In this case - 18,446,744,073,709,551,615. This is approx
## 16 EB (Exabytes) - I think that should be enough!
-##
-## This appears
+
$a='';
'.'ne$_&&''ne$_&&('..'ne$_?$a.='/'.$_:substr$a,rindex($a,'/'),~0,'')for split/\//,shift;
$a
}
+my $s;
+
+sub canonical_path_global {
+## Minor speed up by using a global variable - this
+## is I think due to memory allocation for variables
+## this can be something useful to know in very tight
+## loops...
+$s='';
+'.'ne$_&&''ne$_&&('..'ne$_?$s.='/'.$_:substr$s,rindex($s,'/'),~0,'')for split/\//,shift;
+$s
+}
+
diff --git a/challenge-112/james-smith/perl/ch-2.pl b/challenge-112/james-smith/perl/ch-2.pl
index 6365c6690d..a5d78154ba 100644
--- a/challenge-112/james-smith/perl/ch-2.pl
+++ b/challenge-112/james-smith/perl/ch-2.pl
@@ -7,41 +7,25 @@ use feature qw(say);
use Test::More;
use Benchmark qw(cmpthese);
+
open my $fh, q(<), 'ans.txt';
chomp( my @ans = <$fh> );
close $fh;
-my $N = @ARGV ? $ARGV[0] : 50;
-my $I = @ARGV > 1 ? $ARGV[1] : 10_000;
-is(climb_fib(1),1);
-is(climb_fib(2),2);
-is(climb_fib(3),3);
-is(climb_fib(4),5);
-is(climb_fib(5),8);
-is(climb_fib(6),13);
-is(climb_fib(7),21);
-is(climb_fib(8),34);
-is(climb_fib(9),55);
-is(climb_fib(10),89);
+my $N = @ARGV ? $ARGV[0] : 30;
+my $I = @ARGV > 1 ? $ARGV[1] : 100_000;
-is(climb(1),1);
-is(climb(2),2);
-is(climb(3),3);
-is(climb(4),5);
-is(climb(5),8);
-is(climb(6),13);
-is(climb(7),21);
-is(climb(8),34);
-is(climb(9),55);
-is(climb(10),89);
+is(climb( $_), $ans[$_] ) foreach 1..$N;
+is(climb_fib( $_), $ans[$_] ) foreach 1..$N;
+is(climb_fib_1liner( $_), $ans[$_] ) foreach 1..$N;
-is( climb_fib_1liner($_),$ans[$_] ) foreach 1..50;
done_testing();
cmpthese($I,{
- 'climb' => sub { climb($_) foreach 0..$N; },
- 'fib-1' => sub { climb_fib_1liner($_) foreach 0..$N; },
- 'fib' => sub { climb_fib($_) foreach 0..$N; },
+ 'climb' => sub { climb( $_ ) foreach 0..$N; },
+ 'fib-g' => sub { climb_fib_global( $_ ) foreach 0..$N; },
+ 'fib-1' => sub { climb_fib_1liner( $_ ) foreach 0..$N; },
+ 'fib' => sub { climb_fib( $_ ) foreach 0..$N; },
});
## Once we look at the formula for climb - we
@@ -54,20 +38,27 @@ cmpthese($I,{
## fibonachi number...
## infact rewriting this as a 1-liner even
## speeds it up futher - this is down to
-## storing the caclulation of phi only temporarily
+## storing the caclulation of phi^(n+!) only
+## temporarily
-sub climb_fib {
- my $p = ((1 + sqrt 5)/2)**($_[0]+1);
+sub climb {
+ my @climb = (1,1);
+ @climb = ($climb[1],$climb[0]+$climb[1]) foreach 2..$_[0];
+ return $climb[1];
+}
+
+my $p;
+sub climb_fib_global {
+ $p = ((1 + sqrt 5)/2)**($_[0]+1);
return int(0.001+ ($p - ($_[0]&1?1:-1)/$p)*sqrt 0.2);
}
-sub climb_fib_1liner {
- return int(0.001 + (($a = ((1+sqrt 5)/2)**($_[0]+1)) - ($_[0]&1?1:-1)/$a)*sqrt 0.2);
+sub climb_fib {
+ my $q = ((1 + sqrt 5)/2)**($_[0]+1);
+ return int(0.001+ ($q - ($_[0]&1?1:-1)/$q)*sqrt 0.2);
}
-sub climb {
- my @climb = (1,1);
- @climb = ($climb[1],$climb[0]+$climb[1]) foreach 2..$_[0];
- return $climb[1];
+sub climb_fib_1liner {
+ return int(0.001 + (($p = ((1+sqrt 5)/2)**($_[0]+1)) - ($_[0]&1?1:-1)/$p)*sqrt 0.2);
}