From 7b1522c52a9f0be331dba4fcf0c2916b08b374f7 Mon Sep 17 00:00:00 2001 From: Jörg Sommrey <28217714+jo-37@users.noreply.github.com> Date: Mon, 18 Jan 2021 15:57:08 +0100 Subject: Solution to task 1 --- challenge-096/jo-37/perl/ch-1.pl | 64 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100755 challenge-096/jo-37/perl/ch-1.pl diff --git a/challenge-096/jo-37/perl/ch-1.pl b/challenge-096/jo-37/perl/ch-1.pl new file mode 100755 index 0000000000..4646532d1c --- /dev/null +++ b/challenge-096/jo-37/perl/ch-1.pl @@ -0,0 +1,64 @@ +#!/usr/bin/perl -s + +use v5.16; +use Test2::V0; + +our ($tests, $examples); + +run_tests() if $tests || $examples; # does not return + +say(< Date: Mon, 18 Jan 2021 21:41:41 +0100 Subject: Solution to task 2 --- challenge-096/jo-37/perl/ch-2.pl | 188 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100755 challenge-096/jo-37/perl/ch-2.pl diff --git a/challenge-096/jo-37/perl/ch-2.pl b/challenge-096/jo-37/perl/ch-2.pl new file mode 100755 index 0000000000..72f1a6afb9 --- /dev/null +++ b/challenge-096/jo-37/perl/ch-2.pl @@ -0,0 +1,188 @@ +#!/usr/bin/perl -s + +use v5.16; +use Test2::V0 '!float'; +use PDL; +use experimental 'signatures'; + +our ($tests, $examples, $verbose); + +run_tests() if $tests || $examples; # does not return + +say(<setvaltobad(0); + + # Fill first row and column with ascending sequences as the costs of + # removing or inserting a prefix of that length. + $dist->slice(',(0)') .= sequence $dist->dim(0); + $dist->slice('(0),') .= sequence $dist->dim(1); + + # To fill the remaining fields, loop over the indices of all BAD + # values. With the index order provided by "whichND" the required + # "flood filling" is achieved. + for my $idx ($dist->isbad->whichND->dog) { + + # Character pair for index position. + my $cs = $source[$idx->at(0)]; + my $ct = $target[$idx->at(1)]; + + # 2x2 neighborhood matrix preceeding index. + my $nb = $dist->range($idx - 1, 2); + + # 2x2 transition cost matrix. + my $trans = cost $cs ne $ct; + + # The cost to reach the new field is the minimum of the sums of + # the previous cost and the transition cost. + $dist->indexND($idx) .= min $nb + $trans; + } + + explain_edit(\@source, \@target, $dist) if $verbose; + + # The requested Levenshtein distance is the bottom right matrix + # element. + $dist->indexND($dist->shape - 1); +} + +# Backtrack the steps that led to the lower right corner of the distance +# matrix and describe the corresponding edit operations. +sub explain_edit ($source, $target, $dist) { + + # Collect edit operations. + my @edit; + + # Start at bottom right corner. + my $idx = $dist->shape - 1; + + # Stop at upper left corner. + while (any $idx) { + + # Character pair for index position. + my $cs = $source->[$idx->at(0)]; + my $ct = $target->[$idx->at(1)]; + + # 2x2 neighborhood matrix preceeding index, truncated to BAD + # values across the matrix' borders. + my $nb = $dist->range($idx - 1, 2, 'truncate'); + + # The field preceeding the current field holds the minimum + # within the neighborhood. Find the minimum and the + # corresponding index. On a non-unique minimum, this will + # prefer the diagonal "replace" move. + my ($min, $pred) = min_ind $nb; + + # Record the edit operation. + unshift @edit, + any($pred) ? + $pred->at(0) ? + "ins($ct)" : + "del($cs)" : + $dist->indexND($idx) == $min ? + "keep($cs)" : + "repl($cs,$ct)"; + + # Move to predecessor field. + $idx = $idx - 1 + $pred; + } + + { + no warnings 'uninitialized'; + say @$source, ' -> ', @$target, ':'; + } + say "@edit"; + say "Levenshtein distance = ", $dist->indexND($dist->shape - 1); + say "Wagner-Fischer distance matrix:", $dist; +} + +# Cost matrix for a single insert, delete and keep/replace. +sub cost ($repl) { + long([0, 1], [1, 0])->setvaltobad(0)->set(0, 0, $repl); +} + +# Find the minimum in an N-dim piddle together with the corresponding +# index. Though this looks complicated, it is very efficient. +sub min_ind ($min) { + my (@index, $index); + + # Reduce dimension by projection via minimum down to zero (getting + # to the global minimum as a scalar) and record the index piddle for + # each dimension. + ($min, undef, $index[@index]) = $min->minmaximum while $min->dims; + + # Expand the target index bottom-up from the record by successively + # applying the so far built index to the next higher dimension index. + $index = $_->indexND($index)->glue(0, $index) for reverse @index; + + ($min, $index); +} + + +### Examples and tests + +sub run_tests { + SKIP: { + skip "examples" unless $examples; + is levenshtein_distance('kitten', 'sitting'), 3, 'example 1'; + is levenshtein_distance('sunday', 'monday'), 2, 'example 2'; + is levenshtein_distance('Saturday', 'Sunday'), 3, + 'example from wikipedia'; + is levenshtein_distance('parachute', 'headache'), 5, + 'another example'; + } + + SKIP: { + skip "tests" unless $tests; + is levenshtein_distance('abcdef', 'uvwxyz'), 6, 'replace all'; + is levenshtein_distance('def', 'abcdef'), 3, 'insert prefix'; + is levenshtein_distance('abcdef', 'def'), 3, 'delete prefix'; + is levenshtein_distance('abcdef', 'uvwdef'), 3, 'replace prefix'; + is levenshtein_distance('abc', 'abcdef'), 3, 'insert suffix'; + is levenshtein_distance('abcdef', 'abc'), 3, 'delete suffix'; + is levenshtein_distance('abcdef', 'abcxyz'), 3, 'replace suffix'; + is levenshtein_distance('abef', 'abcdef'), 2, 'insert infix'; + is levenshtein_distance('abcdef', 'abef'), 2, 'delete infix'; + is levenshtein_distance('abcdef', 'abwxef'), 2, 'replace infix'; + } + done_testing; + exit; +} -- cgit From 282303002673fb67ec499f67c90be22a8d509f5f Mon Sep 17 00:00:00 2001 From: Jörg Sommrey <28217714+jo-37@users.noreply.github.com> Date: Sat, 23 Jan 2021 15:47:47 +0100 Subject: Simplify min_ind --- challenge-096/jo-37/perl/ch-2.pl | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/challenge-096/jo-37/perl/ch-2.pl b/challenge-096/jo-37/perl/ch-2.pl index 72f1a6afb9..cdb9eaa346 100755 --- a/challenge-096/jo-37/perl/ch-2.pl +++ b/challenge-096/jo-37/perl/ch-2.pl @@ -140,18 +140,10 @@ sub cost ($repl) { } # Find the minimum in an N-dim piddle together with the corresponding -# index. Though this looks complicated, it is very efficient. -sub min_ind ($min) { - my (@index, $index); - - # Reduce dimension by projection via minimum down to zero (getting - # to the global minimum as a scalar) and record the index piddle for - # each dimension. - ($min, undef, $index[@index]) = $min->minmaximum while $min->dims; - - # Expand the target index bottom-up from the record by successively - # applying the so far built index to the next higher dimension index. - $index = $_->indexND($index)->glue(0, $index) for reverse @index; +# index. +sub min_ind ($pdl) { + my $min = min $pdl; + my $index = whichND($pdl == $min)->slice(',(0)'); ($min, $index); } -- cgit