diff options
| author | Jörg Sommrey <28217714+jo-37@users.noreply.github.com> | 2020-12-23 19:35:03 +0100 |
|---|---|---|
| committer | Jörg Sommrey <28217714+jo-37@users.noreply.github.com> | 2020-12-23 19:35:03 +0100 |
| commit | d9647dbc59d9546451f8759e6c448c5011096cb0 (patch) | |
| tree | 684448a2cf19d3cf25cea8e7d219818db9a5e1ec | |
| parent | 8f20af5028b170fe9ce26433f61cfe1bf10bdd42 (diff) | |
| parent | 51beb00928ae3a8cb0c1fe8660ce2b80e9467a1a (diff) | |
| download | perlweeklychallenge-club-d9647dbc59d9546451f8759e6c448c5011096cb0.tar.gz perlweeklychallenge-club-d9647dbc59d9546451f8759e6c448c5011096cb0.tar.bz2 perlweeklychallenge-club-d9647dbc59d9546451f8759e6c448c5011096cb0.zip | |
Solutions to challenge 092
| -rwxr-xr-x | challenge-092/jo-37/perl/ch-1.pl | 79 | ||||
| -rwxr-xr-x | challenge-092/jo-37/perl/ch-2.pl | 73 |
2 files changed, 152 insertions, 0 deletions
diff --git a/challenge-092/jo-37/perl/ch-1.pl b/challenge-092/jo-37/perl/ch-1.pl new file mode 100755 index 0000000000..594a55d31b --- /dev/null +++ b/challenge-092/jo-37/perl/ch-1.pl @@ -0,0 +1,79 @@ +#!/usr/bin/perl + +use v5.16; +use utf8; +use charnames qw(:full :short); +use Unicode::Normalize; +use Digest; +use Test2::V0; + +#use constant DIGEST => 'SHA-384'; +use constant DIGEST => 'SHA-1'; + +# The task appears to be very easy, especially because the specification +# comes along with a detailed description of an optimal implementation. +# This invites to go the extra mile. +# +# - A unique "isomorphic canonicalization" is created. By taking a +# message digest of this canonicalization, isomorphic strings can be +# identified by identical digests. +# +# - Extended grapheme clusters (see +# https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries) +# are used to identify single "characters". By normalization (using +# canonical decomposition) different representations of the same +# symbol are transformed into identical grapheme clusters. See the +# last test case for an example. +sub uni_iso_digest { + # Normalize input (Normalization Form D - canonical decomposition) + local $_ = NFD shift; + + my $digest = Digest->new(DIGEST); + my $n = 0; + my %chars; + my @canon; + + # Assign an ascending number to every new character appearing in the + # string and append the current number's bits to the message. + # Use \X to grab for a grapheme cluster. + while (s/(\X)//) { + $chars{$1} = $n++ unless exists $chars{$1}; + $digest->add_bits(pack('L', $chars{$1}), 32); + push @canon, $chars{$1}; + } + + # Return the canonicalization and the message digest in list + # context. Returns solely the digest in scalar context. + (\@canon, $digest->hexdigest); +} + +is uni_iso_digest('abc'), uni_iso_digest('xyz'), 'Example 1'; +is uni_iso_digest('abb'), uni_iso_digest('xyy'), 'Example 2'; +isnt uni_iso_digest('sum'), uni_iso_digest('add'), 'Example 3'; + +is uni_iso_digest('αββγ'), uni_iso_digest(9001), 'multi-byte vs. numeric'; + +# Nine code points form four separate characters with two +# different characters having two differing representations. +my $str = + "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}" . + + "\N{LATIN CAPITAL LETTER A}" . + "\N{COMBINING DIAERESIS}" . + + "\N{LATIN CAPITAL LETTER A}" . + "\N{COMBINING DIAERESIS}" . + "\N{COMBINING LONG STROKE OVERLAY}" . + + "\N{LATIN CAPITAL LETTER A}" . + "\N{COMBINING LONG STROKE OVERLAY}" . + "\N{COMBINING DIAERESIS}"; + +is uni_iso_digest($str), uni_iso_digest('xxyy'), + 'Take normalized grapheme clusters as the individual characters'; + +# Retrieve the canonicalization only: +my ($canon) = uni_iso_digest($str); +say "canonical: (@$canon)"; + +done_testing; diff --git a/challenge-092/jo-37/perl/ch-2.pl b/challenge-092/jo-37/perl/ch-2.pl new file mode 100755 index 0000000000..b695b153bf --- /dev/null +++ b/challenge-092/jo-37/perl/ch-2.pl @@ -0,0 +1,73 @@ +#!/usr/bin/perl + +use v5.16; +use Test2::V0; +use experimental 'postderef'; +use List::MoreUtils 'minmax'; + +# Intervals are represented here by refs to two-element arrays +# containing the lower and upper border. At least one border (the same +# side for all) has to be part of the interval. + +# Single insertion step: return the interval(s) to be passed to the +# result and the new insertion (if any) when comparing the insertion +# with a single part of the given interval list. +sub pass_merge { + my ($base, $insert) = @_; + + # No insertion: just pass the base. + return [$base] unless $insert; + + # Base below insertion: pass the base and keep the insertion. + return ([$base], $insert) if $base->[1] < $insert->[0]; + + # Insertion below base: pass insertion and base, clear insertion. + return [$insert, $base] if $insert->[1] < $base->[0]; + + # Overlapping intervals: pass nothing and use merged intervals as + # new insertion. + ([], [minmax $base->@*, $insert->@*]); +} + +# Insert the new interval (1st arg) into the list of given intervals by +# sliding the insertion over the list of intervals and performing the +# operations as provided by pass_merge. +sub insert { + my $insert = shift; + + ((map { + (my $pass, $insert) = pass_merge $_, $insert; + $pass->@*; + } @_), + # Append the insertion if it still exists. + ($insert) x !!$insert); +} + +is [insert [2, 6], [1, 4], [8, 10]], + [[1, 6], [8, 10]], 'Example 1'; + +is [insert [5, 8], [1, 2], [3, 7], [8, 10]], + [[1, 2], [3, 10]], 'Example 2'; + +is [insert [10, 11], [1, 5], [7, 9]], + [[1, 5], [7, 9], [10, 11]], 'Example 3'; + +is [insert [1, 2], [3, 4], [5, 6], [7, 8]], + [[1, 2], [3, 4], [5, 6], [7, 8]], 'Prepend new interval'; + +is [insert [9, 10], [3, 4], [5, 6], [7, 8]], + [[3, 4], [5, 6], [7, 8], [9, 10]], 'Append new interval'; + +is [insert [1, 8], [2, 3], [4, 5], [6, 7]], + [[1, 8]], 'Eat up everything'; + +is [insert [3, 5], [1, 2], [4, 6], [7, 8], [9, 10]], + [[1, 2], [3, 6], [7, 8], [9, 10]], 'Left merge'; + +is [insert [4, 6], [1, 2], [3, 5], [7, 8], [9, 10]], + [[1, 2], [3, 6], [7, 8], [9, 10]], 'Right merge'; + +is [insert [5, 8], [1, 2], [4, 6], [7, 9], [10, 11], [12, 13]], + [[1, 2], [4, 9], [10, 11], [12, 13]], 'Join intervals'; + +done_testing; |
