#!/usr/local/bin/perl

use strict;

use warnings;
use feature qw(say state);
use Test::More;
use Benchmark qw(cmpthese);

##
## Please note there is an ambiguity in the question - when then path contains no
## files - as it cannot start with a '/' and not end with a '/' - so we have
## to make a choice do we return '/' or do we return ''.
##
## In our case we decide to return it as the empty string.
## This has the advantage that there is a level of consistency if you do...
##
## $parent_dir.canonical_path('/a');
## or
## $parent_dir.canonical_path('/');
##
## then it will always end without a "/";
##
##
##
## Performance of different methods
## ================================
##
## We will look at some different versions of the code
## Whether we use an array or string to accumulate the resultant path
## Whether we use "readable" code or Perl hacks and tricks
## To see what aspects of our code makes it faster or slower
##
## Methods:
##   * "Long form" Perl...
##     * canonical_path_double      - Using a double loop
##     * canonical_path_array       - Using backtracking instead of inner loop
##     * canonical_path_string      - Use a string as the accumulator and mapping
##     * canonical_path_string_fast - As above - but using substr/rindex
##
##   * "One-liner" perl {arrays}
##     * canonical_path_compact      - short version of array code
##     * canonical_path_compact_opt  - optimized version of above - 1-less regex
##     * canonical_path_compact_glob - as opt but with global var
##
##   * "One-liner" perl {strings}
##     * canonical_path_shortest - most compact method
##     * canonical_path_short    - compact method
##     * canonical_path_fast     - replace one of the regex with equality checks
##     * canonical_path_fastest  - replace other regex with substr/rindex
##     * canonical_path_global   - as fastest but with global variable...
##
## Timings for these:
##
##                 Rate @-sh $-st $-sh @&2l $&fa $-fa @&ft $&ft @-ft @-gl $-ft $-gl
## @-short     20,877/s   --  -6%  -8% -15% -25% -32% -38% -43% -45% -45% -50% -51%
## $-shortest  22,124/s   6%   --  -2% -10% -21% -28% -34% -40% -41% -42% -47% -48%
## $-short     22,573/s   8%   2%   --  -8% -19% -27% -33% -39% -40% -41% -46% -47%
## @-&-2loop   24,631/s  18%  11%   9%   -- -12% -20% -26% -33% -35% -35% -41% -42%
## $-&-fast    27,933/s  34%  26%  24%  13%   --  -9% -16% -24% -26% -27% -34% -35%
## $-fast      30,769/s  47%  39%  36%  25%  10%   --  -8% -17% -18% -19% -27% -28%
## @-&-fastest 33,445/s  60%  51%  48%  36%  20%   9%   --  -9% -11% -12% -20% -22%
## $-&-fastest 36,900/s  77%  67%  63%  50%  32%  20%  10%   --  -2%  -3% -12% -14%
## @-fastest   37,736/s  81%  71%  67%  53%  35%  23%  13%   2%   --  -1% -10% -12%
## @-global    38,168/s  83%  73%  69%  55%  37%  24%  14%   3%   1%   --  -9% -11%
## $-fastest   42,017/s 101%  90%  86%  71%  50%  37%  26%  14%  11%  10%   --  -2%
## $-global    42,735/s 105%  93%  89%  74%  53%  39%  28%  16%  13%  12%   2%   --

## What we see is:
##  * that the optimized string code is faster than the array code,
##    by around 12-15%
##  * using compact "1-liner" code can be approximately 10%
##    faster.
##  * but using less regex's and replacing them with
##    eq/ne for comparisons and `substr`/`rindex` for
##    replacement/trimming improves the speed the most.
##     * approx 25-30% for removing the comparison regex for checking
##       `' '` or `'.'` and replacing with two `eq`/`ne`
##     * approx 30-40% for removing the substitute of the string
##       from the last `'/'` to the end of the string, with `rindex`
##       and the the four parameter version of `subst`.
##     * combining the two seems to double the performance!
##  * switching from local to global variables gets a minor
##    gain (about 1-2%) again due to memory management.
##
## Conclusion
##
## So short code is interesting - but is not by a long shot the
## most efficient especially in respect of converting regexes into
## `substr`/`index`/`rindex`, allocation of variables, even if we
## keep it to a 1-liner.
##

my @examples = (
  [ '/a/',                   '/a',     'Remove trailing slash (empty trailing dir)' ],
  [ '//a',                   '/a',     'Remove empty dir as start' ],
  [ '/a/b//c/',              '/a/b/c', 'Remove empty dir "//"' ],
  [ '/a/./b/./c/',           '/a/b/c', 'Remove "." dir ...' ],
  [ '/a/b/c/../..',          '/a',     'Two ".." together at end' ],
  [ '/a/b/../c/..',          '/a',     'Two ".." separated (one in middle)' ],
  [ '/a/../b/../c/./.',      '/c',     'Two ".." separated (both in middle)' ],
  [ '/a/b/../../c',          '/c',     'Two ".." together in middle' ],
  [ '/a/../b/../c/..',       '',       'Same no of ".." as dir' ],
  [ '/a/b/c/../../..',       '',       'Same no of ".." as dir - all at end' ],
  [ '/a/../b/../c/../../..', '',       'More ".." than dirs' ],
  [ '/../../../a/',          '/a',     '".." at start, no other ".."' ],
  [ '/../../a/../c/.',       '/c',     '".." at start, other ".."' ],
);

## Code examples..
is( canonical_path_double(       $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_array(        $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_string(       $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_string_fast(  $_->[0]), $_->[1], $_->[2] ) foreach @examples;
### One liners (array)...
is( canonical_path_compact(      $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_compact_opt(  $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_compact_glob( $_->[0]), $_->[1], $_->[2] ) foreach @examples;
### One liners (string)...
is( canonical_path_shortest(     $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_short(        $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_fast(         $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_fastest(      $_->[0]), $_->[1], $_->[2] ) foreach @examples;
is( canonical_path_global(       $_->[0]), $_->[1], $_->[2] ) foreach @examples;
done_testing();

cmpthese( 100_000, {
## Code
  '@&2l' => sub { canonical_path_double(       $_->[0] ) foreach @examples },
  '$&fa' => sub { canonical_path_string(       $_->[0] ) foreach @examples },
  '@&ft' => sub { canonical_path_array(        $_->[0] ) foreach @examples },
  '$&ft' => sub { canonical_path_string_fast(  $_->[0] ) foreach @examples },
## Array 1-liner
  '@-sh' => sub { canonical_path_compact(      $_->[0] ) foreach @examples },
  '@-ft' => sub { canonical_path_compact_opt(  $_->[0] ) foreach @examples },
  '@-gl' => sub { canonical_path_compact_glob( $_->[0] ) foreach @examples },
## String 1-liner
  '$-st' => sub { canonical_path_shortest(     $_->[0] ) foreach @examples },
  '$-sh' => sub { canonical_path_short(        $_->[0] ) foreach @examples },
  '$-fa' => sub { canonical_path_fast(         $_->[0] ) foreach @examples },
  '$-ft' => sub { canonical_path_fastest(      $_->[0] ) foreach @examples },
  '$-gl' => sub { canonical_path_global(       $_->[0] ) foreach @examples },
});

sub canonical_path_double {
  ## This was my original function, basically loop until you find a ".."
  ## process it;
  ## Then restart the loop if you have removed the ".."
  ## This is one of the few times that labels are useful allowing
  ## to jump out of the inner loop and go to the next iteration of
  ## the outer loop...
  ## We then use "last" to jump out of the outer loop once no '..'s are
  ## found except in the first position...
  my $directory_path = shift;
  my @directory_names   = grep { $_ ne '' &&  ## Remove "empty" directory names
                                 $_ ne '.' }   ## Remove directories with name "."
                          split m{/},          ## Split path into directories
                          $directory_path;

  OUTER: while(1) {
    foreach (1..$#directory_names) {
      next unless $directory_names[$_] eq '..';
      splice @directory_names,$_-1,2;
      next OUTER;
    }
    last;
  }
  shift @directory_names if @directory_names && $directory_names[0] eq '..';
  return join '/','',@directory_names;
}

sub canonical_path_array {
  my $directory_path = shift;
  my @parts          = split m{/}, $directory_path;
  my @directory_names;
  foreach my $part ( @parts ) {
    next if $part eq '';
    next if $part eq '.';
    if($part eq '..' ) {
      pop @directory_names;
    } else {
      push @directory_names,$part;
    }
  }
  return join '/','',@directory_names;
}

sub canonical_path_compact {
  my @d=();
  /^\.?$/||('..'eq$_?pop@d:push@d,$_)for split/\//,shift;
  return join'/','',@d;
}

my @g;
sub canonical_path_compact_glob {
  @g=();
  ''ne$_&&'.'ne$_&&('..'eq$_?pop@g:push@g,$_)for split/\//,shift;
  return join'/','',@g;
}

sub canonical_path_compact_opt {
  my @d=();
  ''ne$_&&'.'ne$_&&('..'eq$_?pop@d:push@d,$_)for split/\//,shift;
  return join'/','',@d;
}

## This is the "nice version" of the string based method for
## finding the canonical path. Rather than storing the canonical
## path in an array and join to return the value - we use a string
## and use "concatenate" and "regex-replace" to add or remove the
## path as required..

## The highly compressed and optimized version follows with
## `canonical_path`

sub canonical_path_string {
  my $path = shift;
  my @directories    = split m{/},               ## Split path into directory names
                       $path;

  my $canonical_path = '';                       ## Initialize canonical path

  foreach my $directory_name ( @directories ) {  ## For each directory we
    next if $directory_name eq '';               ## Remove "empty" directory names
    next if $directory_name eq '.';              ## Remove directories named "."
                                                 ## (current directory)
    if( $directory_name eq q(..) ) {             ## look to see if it is
                                                 ## ..;
      $canonical_path =~ s{/[^/]+\Z}{};          ## If so remove parent directory
                                                 ## if one is set....
    } else {
      $canonical_path .= q(/) . $directory_name; ## add directory name to end
    }
  }
  return $canonical_path;
}

sub canonical_path_string_fast {
  my $path = shift;
  my @directories    = split m{/},               ## Split path into directory names
                       $path;

  my $canonical_path = '';                       ## Initialize canonical path

  foreach my $directory_name ( @directories ) {  ## For each directory we
    next if $directory_name eq '';               ## Remove "empty" directory names
    next if $directory_name eq '.';              ## Remove directories named "."
                                                 ## (current directory)
    if( $directory_name eq q(..) ) {             ## look to see if it is
                                                 ## ..;
      substr $canonical_path,
             rindex( $canonical_path, '/' ),
             ~0, '';                             ## If so remove parent directory
                                                 ## if one is set....
    } else {
      $canonical_path .= q(/) . $directory_name; ## add directory name to end
    }
  }
  return $canonical_path;
}


sub canonical_path_short {
## Converting if statements into ternary operators
## Shorter by using regex rather than, interpolation
## and `$` rather than `/Z`
$a='';
/^\.?$/?0:'..'ne$_?$a.="/$_":$a=~s#/[^/]+$## for split'/',shift;
$a
}


sub canonical_path_shortest {
## only ternary - use `x` to add/not add the path
## depending on whether the directory name is "."
## or //. This is the shortest script in terms of
## bytes - but also the slowest string version.
$a='';
'..'ne$_?$a.="/$_"x!/^\.?$/:$a=~s#/[^/]+$## for split'/',shift;
$a
}


sub canonical_path_fast {
## Skip the regex for ''/'.' and replace with compares
$a='';
'.'ne$_&&''ne$_&&('..'ne$_?$a.="/$_":$a=~s#/[^/]+$##)for split'/',shift;
$a
}

sub canonical_path_fastest {
## we avoid regular expressions here by using `rindex` - to look
## for the last slash in the string and removing it along with
## the characters after it.
## We use the 4 parameter version of `substr` here. This replaces
## the substring that is found with this fourth parameter.
##
## `substr $string, $offset, $length, $replace`
##
## There is a catch here - that the 3rd parameter has to
## set the length of string to be removed (or `-n`) if you want
## it to be to the end - but this only works if `n > 0` so there
## is no simple way to remove everything to the end of the
## string - to do this we have to specify a length greate than
## all possible lenghts. We can use "~0" which is the
## 1s-complement of 0 - which gives the largest +ve perl integer
## In this case - 18,446,744,073,709,551,615. This is approx
## 16 EB (Exabytes) - I think that should be enough!

$a='';
'.'ne$_&&''ne$_&&('..'ne$_?$a.='/'.$_:substr$a,rindex($a,'/'),~0,'')for split'/',shift;
$a
}

my $s;

sub canonical_path_global {
## Minor speed up by using a global variable - this
## is I think due to memory allocation for variables
## this can be something useful to know in very tight
## loops...
$s='';
'.'ne$_&&''ne$_&&('..'ne$_?$s.='/'.$_:substr$s,rindex($s,'/'),~0,'')for split'/',shift;
$s
}