diff options
46 files changed, 5756 insertions, 293 deletions
diff --git a/challenge-099/duncan-c-white/README b/challenge-099/duncan-c-white/README index 1b476ca4de..a184ac1e54 100644 --- a/challenge-099/duncan-c-white/README +++ b/challenge-099/duncan-c-white/README @@ -47,15 +47,15 @@ in $s into a variable called l". Another API might be "T l>2", meaning pos 1 to char pos l-1". Then I wrote an API-interpreter for them. Of course, this has to handle -the general match API (see API.pm and ch-1a.pl for details) which can match -in multiple places - requiring us to try each possible place, and backtrack. +the general match API (see Interpreter.pm, API.pm and ch-1a.pl for details) +which can match in multiple places - requiring us to try each possible place +recursively. The API-interpreter successfully does pattern matching, as shown by "./ch-1a.pl -t" passing all tests. So this is a successful proof of concept. -But as yet I have not attempted to translate our patterns such as 'a*c?e' -into lists of API calls. - +Up to the submission deadline, this was all I managed to achieve. See the +bottom of this README for an update.. Task 2: "Unique Subsequence @@ -91,3 +91,52 @@ into a Task-1-style pattern, so lit becomes *l*i*t* and figuring out how many different ways that pattern can apply. But as $T has no meta-chars, an easy approach may be possible (locate all positions in $S where head($T) appears, for each of them recursively match subtr($S,pos+1) against tail($T)) + + +Return to Task 1 after the deadline + +After the deadline had passed, I spent quite a long time working out +how to translate our patterns such as 'a*c?e' into lists of API calls. +See Translate.pm and ch-1b.pl for the details of what I did. This half +of the problem was much harder than the Interpreter. + +I then invested some more time optimising the Interpreter+Translator: + +opt1-noFL: + Here I realised that "First" (match @ 0) and "Last" (match @ end of + the string) were special cases, and removed them, thus removing over + 100 lines of Perl without any harm. + +opt2-separateRunX: + I started thinking about merging the Translator and Interpreter, + here I split out the Interpreter operations for At, Match, Test and + Capture into separate functions thinking this might make it easier + to move bits of the Interpreter into the Translator. It might have + helped with At, Test and Capture, but Match has to try matching at + several different places, backtracking, and the structure of this + would be entirely different in a merged T+I. So this was a dead end. + +opt3-rewrite: + Here I bit the bullet, and started working out from scratch (reusing + snippets of code from both Translator and Interpreter) how to do an + integrated Pattern Match. I got very bogged down in captures and + backtracking, before eventually realising that working out which + substrings to capture should be completely postponed until after the + match was complete. All that it needed was to know how many fixed width + islands (sequences of string literals and '?'s) there were, and + for each island, the starting position in the string we were matching + against where that island had matched. All the captures could be + worked out from there. It took two solid evenings work, but eventually + it worked fine. See PatternMatch.pm, Tuple.pm (reused ADT) and so on. + This approach completely abandons the API instructions, the PosExpr + module and the concept of an interpreter for them. + + It's considerably shorter, and most likely faster (although I haven't + benchmarked them). + +Is it more elegant? I'm not sure. There was a great elegance about the +concept of an Abstract Pattern Instruction, with a front end compiling a +pattern into a list of APIs, and a back end interpreting them to perform +the pattern match. On the other hand, there was a certain elegance about +the concept of fixed width islands, either matched at a fixed location, +or matched floating about, and the startpos array. diff --git a/challenge-099/duncan-c-white/opt1-noFL/API.pm b/challenge-099/duncan-c-white/opt1-noFL/API.pm new file mode 100644 index 0000000000..2d8d23f0bd --- /dev/null +++ b/challenge-099/duncan-c-white/opt1-noFL/API.pm @@ -0,0 +1,255 @@ +package API; + +# automatically generated implementation of recursive data type +# API = A(string lit,posexpr at,string name) +# or M(string lit,posexpr atorafter,string name) +# or T(posexpr pe1,string op,posexpr pe2) +# or C(posexpr pe1,posexpr pe2) + + +use strict; +use warnings; +use feature 'say'; + +use overload '""' => \&as_string; +use overload 'eq' => \&streq; + + + +# API->A: Constructor or get method, use: +# my $obj = API->A($lit, $at, $name) OR +# my ($lit, $at, $name) = $obj->A +sub A ($;$$$) +{ + my( $thing, $lit, $at, $name ) = @_; + if( @_ == 4 ) # constructor + { + die "API->A: classname $thing is not 'API'\n" + unless $thing eq "API"; + return bless ["A", $lit, $at, $name], "API"; + } else # get method + { + die "API->A: $thing is not a API\n" + unless ref($thing) eq "API"; + my @x = @$thing; + my $t = shift @x; + die "API->A: malformed object $thing\n" + unless @x == 3 && $t eq "A"; + return @x; + } +} + + +# API->M: Constructor or get method, use: +# my $obj = API->M($lit, $atorafter, $name) OR +# my ($lit, $atorafter, $name) = $obj->M +sub M ($;$$$) +{ + my( $thing, $lit, $atorafter, $name ) = @_; + if( @_ == 4 ) # constructor + { + die "API->M: classname $thing is not 'API'\n" + unless $thing eq "API"; + return bless ["M", $lit, $atorafter, $name], "API"; + } else # get method + { + die "API->M: $thing is not a API\n" + unless ref($thing) eq "API"; + my @x = @$thing; + my $t = shift @x; + die "API->M: malformed object $thing\n" + unless @x == 3 && $t eq "M"; + return @x; + } +} + + +# API->T: Constructor or get method, use: +# my $obj = API->T($pe1, $op, $pe2) OR +# my ($pe1, $op, $pe2) = $obj->T +sub T ($;$$$) +{ + my( $thing, $pe1, $op, $pe2 ) = @_; + if( @_ == 4 ) # constructor + { + die "API->T: classname $thing is not 'API'\n" + unless $thing eq "API"; + return bless ["T", $pe1, $op, $pe2], "API"; + } else # get method + { + die "API->T: $thing is not a API\n" + unless ref($thing) eq "API"; + my @x = @$thing; + my $t = shift @x; + die "API->T: malformed object $thing\n" + unless @x == 3 && $t eq "T"; + return @x; + } +} + + +# API->C: Constructor or get method, use: +# my $obj = API->C($pe1, $pe2) OR +# my ($pe1, $pe2) = $obj->C +sub C ($;$$) +{ + my( $thing, $pe1, $pe2 ) = @_; + if( @_ == 3 ) # constructor + { + die "API->C: classname $thing is not 'API'\n" + unless $thing eq "API"; + return bless ["C", $pe1, $pe2], "API"; + } else # get method + { + die "API->C: $thing is not a API\n" + unless ref($thing) eq "API"; + my @x = @$thing; + my $t = shift @x; + die "API->C: malformed object $thing\n" + unless @x == 2 && $t eq "C"; + return @x; + } +} + + +sub kind ($) +{ + my( $self ) = @_; + die "API->kind: $self is not a API\n" + unless ref($self) eq "API"; + return $self->[0]; +} + + +sub as_string ($) +{ + my( $self ) = @_; + die "API->as_string: $self is not a API\n" + unless ref($self) eq "API"; + my @x = @$self; + my $t = shift @x; + + # stringify all params + @x = map { "$_" } @x; + + # specific printing rules + return "A'$x[0]' $x[1]->$x[2]" if $t eq 'A'; + return "C $x[0] $x[1]" if $t eq 'C'; + return "M'$x[0]' $x[1]->$x[2]" if $t eq 'M'; + return "T$x[0]$x[1]$x[2]" if $t eq 'T'; + + # general case + my $args = join( ',', @x ); + return $args ne "" ? "$t($args)" : "$t"; +} + + + +sub streq ($$) +{ + my( $a, $b ) = @_; + #print "API->streq called with a=$a, b=$b\n"; + return "$a" eq "$b" ? 1 : 0; # the sneaky way +} + + + + +my $debug = 0; +sub setdebug { my($d) = @_; $debug = $d; } + +# +# my @api = parse( $input ); +# Parse $input, a string containing a comma-separated sequence +# of Abstract Pattern Instructions, return the list of apis. +# Die screaming if parsing fails. +# For example, the pattern a*e is represented by the +# following api string: +# A'a' 0->a,A'e' slen-1->e,Te>a-1,C a+1 e-1 +# (where slen is a runtime variable representing the length of +# the target string these APIs would eventually operate on). +# +# The individual api forms are: +# M'str' posexpr->name +# A'str' posexpr->name +# Tposexpr ('>'|'='|'>=') posexpr +# C posexpr [posexpr] +# where posexpr = \d+ or name or name '+'|'-' \d+ (see module PosExpr) +# +sub parse ($) +{ + my( $input ) = @_; + my @result; + while( $input ) + { + # M'str' posexpr->name + if( $input =~ s/^M\s*// ) + { + $input =~ s/^'([^']+)'\s+// || + die "bad input $input in M..\n"; + my $str = $1; + (my $pe,$input) = PosExpr::parse($input); + die "bad input $input in M$str $pe...\n" + unless $input =~ s/^\s*->\s*(\w+)//; + my $pname = $1; + my $api = API->M($str, $pe, $pname); + say "debug: parsed api $api, rest input $input" + if $debug; + push @result, $api; + } + # A'str' posexpr->name + elsif( $input =~ s/^A\s*// ) + { + $input =~ s/^'([^']+)'\s+// || + die "bad input $input in A..\n"; + my $str = $1; + (my $pe,$input) = PosExpr::parse($input); + die "bad input $input in A$str $pe...\n" + unless $input =~ s/^\s*->\s*(\w+)//; + my $pname = $1; + my $api = API->A($str, $pe, $pname); + say "debug: parsed api $api, rest input $input" + if $debug; + push @result, $api; + } + # Tposexpr ('>'|'='|'>=') posexpr + elsif( $input =~ s/^T\s*// ) + { + (my $pe,$input) = PosExpr::parse($input); + $input =~ s/^(>=|>|=)// || + die "bad input $input in T$pe, ". + "'>','=' or '>=' expected\n"; + my $op = $1; + (my $pe2,$input) = PosExpr::parse($input); + my $api = API->T($pe, $op, $pe2); + say "debug: parsed api $api, rest input $input" + if $debug; + push @result, $api; + } + # C posexpr [posexpr] + elsif( $input =~ s/^C\s*// ) + { + (my $pe,$input) = PosExpr::parse($input); + $input =~ s/^\s*//; + + # second posexpr is optional: + # present if next ch is not ',' + my $pe2 = $pe; + if( $input ne '' && $input !~ /^,/ ) + { + ($pe2,$input) = PosExpr::parse($input); + } + my $api = API->C($pe, $pe2); + say "debug: parsed api $api, rest input $input" + if $debug; + push @result, $api; + } + $input =~ s/^\s*,\s*//; + } + die "bad input $input, non empty but not M|T|C..\n" if $input; + return @result; +} + + +1; + diff --git a/challenge-099/duncan-c-white/opt1-noFL/Interpreter.pm b/challenge-099/duncan-c-white/opt1-noFL/Interpreter.pm new file mode 100644 index 0000000000..4e2e29fb1a --- /dev/null +++ b/challenge-099/duncan-c-white/opt1-noFL/Interpreter.pm @@ -0,0 +1,123 @@ +package Interpreter; + +# Here's the interpreter for a list of APIs. This is the guts of +# pattern matching. + +use strict; +use warnings; +use feature 'say'; +use Function::Parameters; +use Data::Dumper; +use Exporter; +our @ISA = qw(Exporter); + +our @EXPORT_OK = qw(interprete); # Export on demand + +use lib qw(.); +use API; +use PosExpr; + +my $debug = 0; + +sub setdebug { my($d) = @_; $debug = $d; } + + +# +# my @litpos = findliteral( $lit, $p, $s ); +# Find all instances of $lit atorafter position $p in $s, +# build and return a list of possible starting positions. +# +fun findliteral( $lit, $p, $s ) +{ + my $slen = length($s); + my $llen = length($lit); + my @result = grep { substr($s,$_,$llen) eq $lit; } $p..$slen-1; + #die "findliteral: lit=$lit, s=$s, results=".Dumper(\@result); + return @result; +} + + +# +# my $match = interprete( $s, $apilist, $poshash, $mtlist ); +# Interprete @$apilist against the string $s, using (and modifying) +# %$poshash which maps names to their positions. +# Return 1 for a match (filling in @$mtlist) or 0 for no match. +# +fun interprete( $s, $apilist, $poshash, $mtlist ) +{ + my $slen = length($s); + my $n = 0; + foreach my $api (@$apilist) + { + my $kind = $api->kind; + if( $kind eq 'A' ) # Match at pos + { + my( $lit, $at, $name ) = $api->A; + my $pos = $at->eval($poshash); + return 0 unless substr($s,$pos,length($lit)) eq $lit; + $poshash->{$name} = $pos; + say "debug: A: set pos[$name] = $pos" if $debug; + } + elsif( $kind eq 'M' ) # Match at or after - tricky + { + my( $lit, $atorafterpos, $name ) = $api->M; + my $atorafter = $atorafterpos->eval($poshash); + my @litpos = findliteral( $lit, $atorafter, $s ); + return 0 if @litpos==0; + + # must try each possible position in @litpos in turn + if( @litpos == 1 ) + { + $poshash->{$name} = $litpos[0]; + say "debug: M: set pos[$name] = $litpos[0]" if $debug; + } else + { + say "debug: M: try each pos ".join(',',@litpos) if $debug; + # Try the rest of the api list.. + my @restapi = @$apilist[$n+1..$#$apilist]; + + # for each possible pos of the match + foreach my $pos (@litpos) + { + $poshash->{$name} = $pos; + say "debug: try pos[$name] = $pos" if $debug; + my $match = interprete( + $s, \@restapi, $poshash, $mtlist ); + say "debug: matchrest = $match" if $debug; + return 1 if $match; + undef $poshash->{$name}; + } + return 0; + } + } + elsif( $kind eq 'T' ) + { + my( $pe1, $op, $pe2 ) = $api->T; + my $pv1 = $pe1->eval($poshash); + my $pv2 = $pe2->eval($poshash); + return 0 if $op eq '>' && $pv1 <= $pv2; + return 0 if $op eq '>=' && $pv1 < $pv2; + return 0 if $op eq '=' && $pv1 != $pv2; + say "debug: test $api succeeded" if $debug; + } + elsif( $kind eq 'C' ) + { + my ($pe1, $pe2) = $api->C; + my $from = $pe1->eval($poshash); + my $to = $pe2->eval($poshash); + my $cap = substr($s,$from,$to-$from+1) // ''; + my $mn = @$mtlist; + push @$mtlist, $cap; + say "debug: C: set mt[$mn] to $cap" if $debug; + } + else + { + die "interprete: not yet implemented $kind\n"; + } + $n++; + } + return 1; +} + + +1; diff --git a/challenge-099/duncan-c-white/opt1-noFL/PosExpr.pm b/challenge-099/duncan-c-white/opt1-noFL/PosExpr.pm new file mode 100644 index 0000000000..dbeef03ff4 --- /dev/null +++ b/challenge-099/duncan-c-white/opt1-noFL/PosExpr.pm @@ -0,0 +1,253 @@ +package PosExpr; + +# automatically generated implementation of recursive data type +# PosExpr = I(int n) +# or N(string name) +# or NO(string name,char op,int n) + + +use strict; +use warnings; +use feature 'say'; + +use overload '""' => \&as_string; +use overload 'eq' => \&streq; + + + +# PosExpr->I: Constructor or get method, use: +# my $obj = PosExpr->I($n) OR +# my $n = $obj->I +sub I ($;$) +{ + my( $thing, $n ) = @_; + if( @_ == 2 ) # constructor + { + die "PosExpr->I: classname $thing is not 'PosExpr'\n" + unless $thing eq "PosExpr"; + return bless ["I", $n], "PosExpr"; + } else # get method + { + die "PosExpr->I: $thing is not a PosExpr\n" + unless ref($thing) eq "PosExpr"; + my @x = @$thing; + my $t = shift @x; + die "PosExpr->I: malformed object $thing\n" + unless @x == 1 && $t eq "I"; + return $x[0]; + } +} + + +# PosExpr->N: Constructor or get method, use: +# my $obj = PosExpr->N($name) OR +# my $name = $obj->N +sub N ($;$) +{ + my( $thing, $name ) = @_; + if( @_ == 2 ) # constructor + { + die "PosExpr->N: classname $thing is not 'PosExpr'\n" + unless $thing eq "PosExpr"; + return bless ["N", $name], "PosExpr"; + } else # get method + { + die "PosExpr->N: $thing is not a PosExpr\n" + unless ref($thing) eq "PosExpr"; + my @x = @$thing; + my $t = shift @x; + die "PosExpr->N: malformed object $thing\n" + unless @x == 1 && $t eq "N"; + return $x[0]; + } +} + + +# PosExpr->NO: Constructor or get method, use: +# my $obj = PosExpr->NO($name, $op, $n) OR +# my ($name, $op, $n) = $obj->NO +sub NO ($;$$$) +{ + my( $thing, $name, $op, $n ) = @_; + if( @_ == 4 ) # constructor + { + die "PosExpr->NO: classname $thing is not 'PosExpr'\n" + unless $thing eq "PosExpr"; + return bless ["NO", $name, $op, $n], "PosExpr"; + } else # get method + { + die "PosExpr->NO: $thing is not a PosExpr\n" + unless ref($thing) eq "PosExpr"; + my @x = @$thing; + my $t = shift @x; + die "PosExpr->NO: malformed object $thing\n" + unless @x == 3 && $t eq "NO"; + return @x; + } +} + + +sub kind ($) +{ + my( $self ) = @_; + die "PosExpr->kind: $self is not a PosExpr\n" + unless ref($self) eq "PosExpr"; + return $self->[0]; +} + + +sub as_string ($) +{ + my( $self ) = @_; + die "PosExpr->as_string: $self is not a PosExpr\n" + unless ref($self) eq "PosExpr"; + my @x = @$self; + my $t = shift @x; + + # stringify all params + @x = map { "$_" } @x; + + # specific printing rules + return "$x[0]" if $t eq 'I'; + return "$x[0]" if $t eq 'N'; + return "$x[0]$x[1]$x[2]" if $t eq 'NO'; + + # general case + my $args = join( ',', @x ); + return $args ne "" ? "$t($args)" : "$t"; +} + + + +sub streq ($$) +{ + my( $a, $b ) = @_; + #print "PosExpr->streq called with a=$a, b=$b\n"; + return "$a" eq "$b" ? 1 : 0; # the sneaky way +} + + + + +my $debug = 0; +sub setdebug { my($d) = @_; $debug = $d; } + +# +# my($pe,$leftover) = parse($input); +# Parse $input, a string starting with a position expression, +# delivering the position expression in $pe and the leftover +# input in $leftover. Die screaming if no well formed pos expr +# can be found at the start of $input. +# A position expression is either \d+ or name or name '+'|'-' \d+ +# The internal form of a posexpr is ['I', int const] or ['N', name] +# or ['NO', name, +-, offset] +# +sub parse ($) +{ + my( $input ) = @_; + if( $input =~ s/^(\d+)\s*// ) + { + return ( PosExpr->I($1), $input ); + } + $input =~ s/^(\w+)\s*// || + die "bad input in pos expr $input, name expected\n"; + my $name = $1; + if( $input =~ s/^([\+-])\s*(\d+)\s*// ) + { + if( $2 == 0 ) + { + return ( PosExpr->N($name), $input ); + } else + { + return ( PosExpr->NO($name,$1,$2), $input ); + } + } + return ( PosExpr->N($name), $input ); +} + +# +# my $pe = simpleparse($str); +# Parse a position expression out of $str. Fail if there's +# anything left over. +# +sub simpleparse ($) +{ + my( $str ) = @_; + my($pe,$leftover) = parse($str); + die "PosExpr->simpleparse($str): left over $leftover after pe $pe\n" + if $leftover; + return $pe; +} + + +# +# my $newpe = $pe->add($n); +# Arithmetic on PosExprs: add $n (an integer) +# to $pe, giving $newpe. +# +sub add +{ + my( $self, $n ) = @_; + return $self if $n==0; + if( $self->kind eq 'I' ) + { + my $x = $self->I; + return PosExpr->I( $x+$n ); + } elsif( $self->kind eq "N" ) + { + my $name = $self->N; + my $op = $n>0?'+':'-'; + $n = abs($n); + return PosExpr->NO( $name, $op, $n ); + } else + { + # .. NO( string name, char op, int n ) + my( $name, $op, $x ) = $self->NO; + $x = -$x if $op eq '-'; + $op = '+'; + $x += $n; + return PosExpr->N( $name ) if $x==0; + $op = '-' if $x<0; + $x = abs($x); + return PosExpr->NO( $name, $op, $x ); + } +} + + +# +# my $pos = $pe->eval($poshash); +# Evaluate the given PosExpr $pe, using %$poshash +# for name lookup. Returns the actual position (a number). +# +sub eval ($$) +{ + my( $self, $poshash ) = @_; + if( $self->kind eq 'I' ) + { + return $self->I; + } elsif( $self->kind eq 'N' ) + { + my $name = $self->N; + die "error in PosExpr::eval($self): ". + "no such name $name in poshash ". Dumper($poshash) + unless exists $poshash->{$name}; + return $poshash->{$name}; + } else # if( $self->kind eq 'NO' ) + { + my ($name, $op, $n) = $self->NO; + die "error in PosExpr::eval($self): ". + "no such name $name in poshash ". Dumper($poshash) + unless exists $poshash->{$name}; + die "error in PosExpr::eval($self): bad op $op\n" unless + $op eq '+' || $op eq '-'; + $n = - $n if $op eq '-'; + return $poshash->{$name} + $n; + } +} + + + + + +1; + diff --git a/challenge-099/duncan-c-white/opt1-noFL/README b/challenge-099/duncan-c-white/opt1-noFL/README new file mode 100644 index 0000000000..29e7be5d6f --- /dev/null +++ b/challenge-099/duncan-c-white/opt1-noFL/README @@ -0,0 +1,5 @@ +In this version, I've remove F'str'->name and L'str'->name APIs, +because after all F and L are just special cases of A'str' pos->name, +(pos=0 for F, and pos=slen-length(str) for L). + +This saves about 100 lines overall.. diff --git a/challenge-099/duncan-c-white/opt1-noFL/TestMatch.pm b/challenge-099/duncan-c-white/opt1-noFL/TestMatch.pm new file mode 100644 index 0000000000..9cfac08f40 --- /dev/null +++ b/challenge-099/duncan-c-white/opt1-noFL/TestMatch.pm @@ -0,0 +1,75 @@ +package TestMatch; + +use strict; +use warnings; +use feature 'say'; +use Data::Dumper; +use Function::Parameters; +use Test::More; + +use Exporter; +our @ISA = qw(Exporter); + +our @EXPORT_OK = qw(match_tests); # Export on demand + +use lib qw(.); +use API; + +my $debug = 0; + +sub |
