added optimal string method

author: drbaggy <js5@sanger.ac.uk> 2021-05-11 15:27:35 +0100
committer: drbaggy <js5@sanger.ac.uk> 2021-05-11 15:27:35 +0100
commit: 25c04b60a66fe175d840f37f19a016b38bc536e0 (patch)
tree: 4b5dce86714005b1fb3ba628965512a0183cabb9
parent: 854ad23ac674bf18f134079573356a4fb58104a0 (diff)
download: perlweeklychallenge-club-25c04b60a66fe175d840f37f19a016b38bc536e0.tar.gz
perlweeklychallenge-club-25c04b60a66fe175d840f37f19a016b38bc536e0.tar.bz2
perlweeklychallenge-club-25c04b60a66fe175d840f37f19a016b38bc536e0.zip
1 files changed, 128 insertions, 32 deletions
diff --git a/challenge-112/james-smith/perl/ch-1.pl b/challenge-112/james-smith/perl/ch-1.pl
index 3246323b99..c758f9a475 100644
--- a/challenge-112/james-smith/perl/ch-1.pl
+++ b/challenge-112/james-smith/perl/ch-1.pl
@@ -6,6 +6,47 @@ use warnings;
 use feature qw(say);
 use Test::More;
 
+## Please note there is an ambiguity in the question - when then path contains no
+## files - as it cannot start with a '/' and not end with a '/' - so we have
+## to make a choice do we return '/' or do we return ''.
+##
+## In our case we decide to return it as the empty string.
+## This has the advantage that there is a level of consistency if you do...
+##
+## $parent_dir.canonical_path('/a');
+## or
+## $parent_dir.canonical_path('/');
+##
+## then it will always end without a "/";
+
+is( canonical_path_array('/a/'),                   '/a',     'Remove trailing slash (empty trailing dir)' );
+is( canonical_path_array('//a'),                   '/a',     'Remove empty dir as start' );
+is( canonical_path_array('/a/b//c/'),              '/a/b/c', 'Remove empty dir "//"' );
+is( canonical_path_array('/a/./b/./c/'),           '/a/b/c', 'Remove "." dir ...' );
+is( canonical_path_array('/a/b/c/../..'),          '/a',     'Two ".." together at end' );
+is( canonical_path_array('/a/b/../c/..'),          '/a',     'Two ".." separated (one in middle)' );
+is( canonical_path_array('/a/../b/../c/./.'),      '/c',     'Two ".." separated (both in middle)' );
+is( canonical_path_array('/a/b/../../c'),          '/c',     'Two ".." together in middle' );
+is( canonical_path_array('/a/../b/../c/..'),       '',       'Same no of ".." as dir' );
+is( canonical_path_array('/a/b/c/../../..'),       '',       'Same no of ".." as dir - all at end' );
+is( canonical_path_array('/a/../b/../c/../../..'), '',       'More ".." than dirs' );
+is( canonical_path_array('/../../../a/'),          '/a',     '".." at start, no other ".."' );
+is( canonical_path_array('/../../a/../c/.'),       '/c',     '".." at start, other ".."' );
+
+is( canonical_path_string('/a/'),                   '/a',     'Remove trailing slash (empty trailing dir)' );
+is( canonical_path_string('//a'),                   '/a',     'Remove empty dir as start' );
+is( canonical_path_string('/a/b//c/'),              '/a/b/c', 'Remove empty dir "//"' );
+is( canonical_path_string('/a/./b/./c/'),           '/a/b/c', 'Remove "." dir ...' );
+is( canonical_path_string('/a/b/c/../..'),          '/a',     'Two ".." together at end' );
+is( canonical_path_string('/a/b/../c/..'),          '/a',     'Two ".." separated (one in middle)' );
+is( canonical_path_string('/a/../b/../c/./.'),      '/c',     'Two ".." separated (both in middle)' );
+is( canonical_path_string('/a/b/../../c'),          '/c',     'Two ".." together in middle' );
+is( canonical_path_string('/a/../b/../c/..'),       '',       'Same no of ".." as dir' );
+is( canonical_path_string('/a/b/c/../../..'),       '',       'Same no of ".." as dir - all at end' );
+is( canonical_path_string('/a/../b/../c/../../..'), '',       'More ".." than dirs' );
+is( canonical_path_string('/../../../a/'),          '/a',     '".." at start, no other ".."' );
+is( canonical_path_string('/../../a/../c/.'),       '/c',     '".." at start, other ".."' );
+
 is( canonical_path('/a/'),                   '/a',     'Remove trailing slash (empty trailing dir)' );
 is( canonical_path('//a'),                   '/a',     'Remove empty dir as start' );
 is( canonical_path('/a/b//c/'),              '/a/b/c', 'Remove empty dir "//"' );
@@ -14,9 +55,9 @@ is( canonical_path('/a/b/c/../..'),          '/a',     'Two ".." together at end
 is( canonical_path('/a/b/../c/..'),          '/a',     'Two ".." separated (one in middle)' );
 is( canonical_path('/a/../b/../c/./.'),      '/c',     'Two ".." separated (both in middle)' );
 is( canonical_path('/a/b/../../c'),          '/c',     'Two ".." together in middle' );
-is( canonical_path('/a/../b/../c/..'),       '/',      'Same no of ".." as dir' );
-is( canonical_path('/a/b/c/../../..'),       '/',      'Same no of ".." as dir - all at end' );
-is( canonical_path('/a/../b/../c/../../..'), '/',      'More ".." than dirs' );
+is( canonical_path('/a/../b/../c/..'),       '',       'Same no of ".." as dir' );
+is( canonical_path('/a/b/c/../../..'),       '',       'Same no of ".." as dir - all at end' );
+is( canonical_path('/a/../b/../c/../../..'), '',       'More ".." than dirs' );
 is( canonical_path('/../../../a/'),          '/a',     '".." at start, no other ".."' );
 is( canonical_path('/../../a/../c/.'),       '/c',     '".." at start, other ".."' );
 
@@ -28,44 +69,99 @@ is( canonical_path_compact('/a/b/c/../..'),          '/a',     'Two ".." togethe
 is( canonical_path_compact('/a/b/../c/..'),          '/a',     'Two ".." separated (one in middle)' );
 is( canonical_path_compact('/a/../b/../c/./.'),      '/c',     'Two ".." separated (both in middle)' );
 is( canonical_path_compact('/a/b/../../c'),          '/c',     'Two ".." together in middle' );
-is( canonical_path_compact('/a/../b/../c/..'),       '/',      'Same no of ".." as dir' );
-is( canonical_path_compact('/a/b/c/../../..'),       '/',      'Same no of ".." as dir - all at end' );
-is( canonical_path_compact('/a/../b/../c/../../..'), '/',      'More ".." than dirs' );
+is( canonical_path_compact('/a/../b/../c/..'),       '',       'Same no of ".." as dir' );
+is( canonical_path_compact('/a/b/c/../../..'),       '',       'Same no of ".." as dir - all at end' );
+is( canonical_path_compact('/a/../b/../c/../../..'), '',       'More ".." than dirs' );
 is( canonical_path_compact('/../../../a/'),          '/a',     '".." at start, no other ".."' );
 is( canonical_path_compact('/../../a/../c/.'),       '/c',     '".." at start, other ".."' );
 
 done_testing();
 
-sub canonical_path {
-  ## Initialize:
-  ##   Set pointer to 1;
-  ##   Split path into directories
-  ##     - remove empty directories and also "." directories {current dir}
-
-  my( $p, @parts ) = ( 1, grep { $_ ne '' && $_ ne '.' } split m{/}, $_[0] );
-
-  ## Process paths...
-  ## If the directory isn't ".." we just move on to the next one...
-  ## If it is the first entry of the list we just shift it off.
-  ## Otherwise we splice it out of the array, along withe previous directory
-  ##     `splice @parts, $p-1, 2`
-  ##   and then backtrack one space..
-  ##   this means the splice can be written as:
-  ##     `splice @parts, --$p, 2`
-  ## Repeat this until we get to the end of the array - there will be no more '..'s in the
-  ## list...
-
-  $parts[$p] ne '..' ? $p++ : $p ? splice @parts, --$p, 2 : shift @parts while $p < @parts;
-
-  ## Finally return our string which consists of a "/" followed by all the
-  ## directories left joing with "/"s...
-
-  return '/'.join '/',@parts;
+sub canonical_path_array {
+  my $directory_path = shift;
+  my @directory_names   = grep { $_ ne ''  }   ## Remove "empty" directory names
+                          grep { $_ ne '.' }   ## Remove directories with name "."
+                          split m{/},          ## Split path into directories
+                          $directory_path;
+
+  my $pointer = 1;                             ## Initialize pointer to 1
+
+  while( $pointer < @directory_names ) {       ## Keep going till the pointer is
+                                               ## after the end of the list...
+
+    if( $directory_names[$pointer] eq '..' ) { ## If we have a ".." name
+                                               ## then this means we have to
+                                               ## remove it from the list,
+                                               ## along with it's parent...
+
+      if( $pointer > 0 ) {                     ## If it is not at the start of
+        splice @directory_names, $pointer - 1, 2;  ## the list - we remove it
+                                               ## and it's parent directory
+
+        $pointer --;                           ## We back-track one-space as
+                                               ## what would have been the next
+                                               ## entry has moved backwards to
+                                               ## spaces..
+      } else {
+        shift @directory_names;                ## If it is at the start of the
+                                               ## list we remove it.
+                                               ## No need to back track as the
+                                               ## next entry is now in this
+                                               ## location
+      }
+    } else {                                   ## Finally if the name isn't ".."
+      $pointer ++;                             ## We just go onto the next path
+                                               ## element
+    }
+  }
+
+  return join '/','', @directory_names;        ## The final stage is to return
+                                               ## the path "joined" together
 }
 
 sub canonical_path_compact {
 $a=1,@_=grep{!/^\.?$/}split/\//,shift;
 $_[$a]ne'..'?$a++:$a?splice@_,--$a,2:shift while$a<@_;
-'/'.join'/',@_
+join'/','',@_
 }
 
+## This is the "nice version" of the string based method for
+## finding the canonical path. Rather than storing the canonical
+## path in an array and join to return the value - we use a string
+## and use "concatenate" and "regex-replace" to add or remove the
+## path as required..
+
+## The highly compressed and optimized version follows with
+## `canonical_path`
+
+sub canonical_path_string {
+  my $path = shift;
+  my @directories    = split m{/},               ## Split path into directory names
+                       $path;
+
+  my $canonical_path = '';                       ## Initialize canonical path
+
+  foreach my $directory_name ( @directories ) {  ## For each directory we
+    next if $directory_name eq '';               ## Remove "empty" directory names
+    next if $directory_name eq '.';              ## Remove directories named "."
+                                                 ## (current directory)
+    if( $directory_name eq q(..) ) {             ## look to see if it is
+                                                 ## ..;
+      $canonical_path =~ s{/[^/]+\Z}{};          ## If so remove parent directory
+                                                 ## if one is set....
+    } else {
+      $canonical_path .= q(/) . $directory_name; ## add directory name to end
+    }
+  }
+  return $canonical_path;
+}
+
+
+sub canonical_path {
+$a='';
+/^\.?$/?0:'..'ne$_?$a.="/$_":$a=~s{/[^/]+$}{}
+for split/\//,shift;
+$a
+}
+
+
author	drbaggy <js5@sanger.ac.uk>	2021-05-11 15:27:35 +0100
committer	drbaggy <js5@sanger.ac.uk>	2021-05-11 15:27:35 +0100
commit	25c04b60a66fe175d840f37f19a016b38bc536e0 (patch)
tree	4b5dce86714005b1fb3ba628965512a0183cabb9
parent	854ad23ac674bf18f134079573356a4fb58104a0 (diff)
download	perlweeklychallenge-club-25c04b60a66fe175d840f37f19a016b38bc536e0.tar.gz perlweeklychallenge-club-25c04b60a66fe175d840f37f19a016b38bc536e0.tar.bz2 perlweeklychallenge-club-25c04b60a66fe175d840f37f19a016b38bc536e0.zip