Gustavo Chaves Perl 5 solutions to the challenge 010

author: Gustavo L. de M. Chaves <gustavo@cpqd.com.br> 2019-05-31 22:51:20 -0300
committer: Gustavo L. de M. Chaves <gustavo@cpqd.com.br> 2019-05-31 22:51:20 -0300
commit: b2a5c076ec32c123540821bcbf3e51e9e8fd4fa1 (patch)
tree: 8b6ff04062a95fd1db28cebd5578045994941d74 /challenge-010
parent: 6135ea57e1b789c67981e2de52c2bc9c666a0f74 (diff)
download: perlweeklychallenge-club-b2a5c076ec32c123540821bcbf3e51e9e8fd4fa1.tar.gz
perlweeklychallenge-club-b2a5c076ec32c123540821bcbf3e51e9e8fd4fa1.tar.bz2
perlweeklychallenge-club-b2a5c076ec32c123540821bcbf3e51e9e8fd4fa1.zip
3 files changed, 237 insertions, 0 deletions
diff --git a/challenge-010/gustavo-chaves/perl5/README.pod b/challenge-010/gustavo-chaves/perl5/README.pod
new file mode 100644
index 0000000000..6781160863
--- /dev/null
+++ b/challenge-010/gustavo-chaves/perl5/README.pod
@@ -0,0 +1,61 @@
+=pod
+
+=encoding utf8
+
+=head1 #1 Roman numerals
+
+=over 4
+
+Write a script to encode/decode Roman numerals. For example, given Roman numeral
+CCXLVI, it should return 246. Similarly, for decimal number 39, it should return
+XXXIX. Checkout L<wikipedia page|https://en.wikipedia.org/wiki/Roman_numerals>
+for more information.
+
+=back
+
+In the Wikipedia I learned that there are two different notations for Roman
+numerals: additive and subtractive.  They differ in how they render some
+numbers, like these:
+
+  +--------+----------+-------------+
+  | Arabic | Additive | Subtractive |
+  +--------+----------+-------------+
+  |    4   |    IIII  |   IV        |
+  |    9   |      IX  |   VIIII     |
+  |   19   |  XVIIII  |   XIX       |
+  |   90   |      XC  |   LXXXX     |
+  +--------+----------+-------------+
+
+My solution (ch-1.pl) accepts a list of arabic or roman numerals as arguments,
+detects in which system they are and converts them to the other system. The
+convertion from arabic to roman always uses the additive notation, but the
+convertion from roman to arabic hopefully understands both notations.
+
+In particular, this means that my solution does not fully comply with the
+specification, because it asks that 39 should be converted to XXXIX, which is in
+subtractive notation. My script converts 39 to XXXVIIII. Like this:
+
+  $ ./ch-1.pl CCXLVI 246 XXXIX 39
+  CCXLVI == 246
+  246 == CCXXXXVI
+  XXXIX == 39
+  39 == XXXVIIII
+
+=head1 #2 Ranking
+
+=over 4
+
+Write a script to find Jaro-Winkler distance between two strings. For more
+information check L<wikipedia
+page|https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance>.
+
+=back
+
+I found it hard to fully grasp the Definition Section in the English wikipedia
+page. Fortunately, the L<French
+page|https://fr.wikipedia.org/wiki/Distance_de_Jaro-Winkler> has more examples
+which allowed me to get going, although it seems to be confounding the Jaro
+Distance with the Jaro Similarity.
+
+I'm not sure about the correctness of my solution. At least, it calculates
+correctly the three examples in the French page.
diff --git a/challenge-010/gustavo-chaves/perl5/ch-1.pl b/challenge-010/gustavo-chaves/perl5/ch-1.pl
new file mode 100755
index 0000000000..c816f5aed5
--- /dev/null
+++ b/challenge-010/gustavo-chaves/perl5/ch-1.pl
@@ -0,0 +1,79 @@
+#!/usr/bin/env perl
+
+use 5.026;
+use strict;
+use autodie;
+use warnings;
+
+sub main {
+    foreach (@ARGV) {
+        if (/^\d+$/) {
+            say "$_ == ", arabic_to_roman($_);
+        } elsif (/^[IVXLCDM]+$/) {
+            say "$_ == ", roman_to_arabic($_);
+        } else {
+            die "'$_' is neither an arabic nor a roman numeral\n";
+        }
+    }
+    return 0;
+}
+
+my %powers = (
+    M => 1000,
+    D => 500,
+    C => 100,
+    L => 50,
+    X => 10,
+    V => 5,
+    I => 1,
+);
+
+my @powers =
+    map {[$_ => $powers{$_}]}
+    sort {$powers{$b} <=> $powers{$a}}
+    keys %powers;
+
+sub arabic_to_roman {
+    my ($arabic) = @_;
+
+    my $roman = '';
+
+    foreach my $power (@powers) {
+        use integer;
+        my ($letter, $base) = @$power;
+        if (my $multiple = $arabic / $base) {
+            $roman .= $letter x $multiple;
+        }
+        $arabic %= $base;
+    }
+
+    return $roman;
+}
+
+sub roman_to_arabic {
+    my ($roman) = @_;
+
+    my $arabic = my $accumulator = 0;
+    my $last_power = 10000;     # greater than the biggest power
+
+    foreach my $letter (split //, $roman) {
+        my $power = $powers{$letter};
+        if ($power < $last_power) {
+            $arabic += $accumulator;
+            $accumulator = $power;
+        } elsif ($power == $last_power) {
+            # additive notation
+            $accumulator += $power;
+        } else {
+            # subtractive notation
+            $arabic += $accumulator - 2 * $last_power + $power;
+            $accumulator = 0;
+        }
+        $last_power = $power;
+    }
+    $arabic += $accumulator;
+
+    return $arabic;
+}
+
+main();
diff --git a/challenge-010/gustavo-chaves/perl5/ch-2.pl b/challenge-010/gustavo-chaves/perl5/ch-2.pl
new file mode 100755
index 0000000000..2e84a08009
--- /dev/null
+++ b/challenge-010/gustavo-chaves/perl5/ch-2.pl
@@ -0,0 +1,97 @@
+#!/usr/bin/env perl
+
+use 5.026;
+use strict;
+use autodie;
+use warnings;
+use List::Util qw(min max);
+
+sub main {
+    die "usage: $0 STRING STRING\n" unless @ARGV == 2;
+    my ($s1, $s2) = @ARGV;
+
+    say jaro_winkler_distance($s1, $s2);
+
+    return 0;
+}
+
+sub jaro_winkler_distance {
+    my ($s1, $s2) = @_;
+
+    return 1 - jaro_winkler_similarity($s1, $s2);
+}
+
+sub jaro_winkler_similarity {
+    my ($s1, $s2) = @_;
+
+    my $similarity = jaro_similarity($s1, $s2);
+    my $length = common_prefix_length($s1, $s2);
+    my $scaling_factor = 0.1;
+
+    return $similarity + $length * $scaling_factor * (1 - $similarity);
+}
+
+sub jaro_similarity {
+    my ($s1, $s2) = @_;
+
+    my ($matchings, $transpositions) = matching_characters_and_transpositions($s1, $s2);
+
+    return
+        $matchings == 0
+        ? 0
+        : ($matchings/length($s1) + $matchings/length($s2) + ($matchings-$transpositions)/$matchings) / 3;
+}
+
+sub matching_characters_and_transpositions {
+    my ($s1, $s2) = @_;
+
+    my $l1 = length $s1;
+    my $l2 = length $s2;
+
+    my $farthest = int(max($l1, $l2)/2) - 1;
+
+    my $m1 = matching_characters($s1, $s2, $l1, $l2, $farthest);
+    my $m2 = matching_characters($s2, $s1, $l2, $l1, $farthest);
+
+    my $matchings = min(length $m1, length $m2);
+
+    my $transpositions = 0;
+    for (my $i=0; $i<$matchings; ++$i) {
+        if (substr($m1, $i, 1) ne substr($m2, $i, 1)) {
+            ++$transpositions;
+        }
+    }
+
+    return ($matchings, int($transpositions/2));
+}
+
+sub matching_characters {
+    my ($s1, $s2, $l1, $l2, $farthest) = @_;
+
+    my $matching = '';
+
+    for my $i (0 .. $l1) {
+        my $char = substr($s1, $i, 1);
+        for my $j (max(0, $i-$farthest) .. min($i+$farthest, $l2-1)) {
+            if ($char eq substr($s2, $j, 1)) {
+                $matching .= $char;
+                last;
+            }
+        }
+    }
+
+    return $matching;
+}
+
+sub common_prefix_length {
+    my ($s1, $s2) = @_;
+
+    my $length = 0;
+    for my $i (0 .. 3) {
+        last if substr($s1, $i, 1) ne substr($s2, $i, 1);
+        ++$length;
+    }
+    return $length;
+}
+
+main();
author	Gustavo L. de M. Chaves <gustavo@cpqd.com.br>	2019-05-31 22:51:20 -0300
committer	Gustavo L. de M. Chaves <gustavo@cpqd.com.br>	2019-05-31 22:51:20 -0300
commit	b2a5c076ec32c123540821bcbf3e51e9e8fd4fa1 (patch)
tree	8b6ff04062a95fd1db28cebd5578045994941d74 /challenge-010
parent	6135ea57e1b789c67981e2de52c2bc9c666a0f74 (diff)
download	perlweeklychallenge-club-b2a5c076ec32c123540821bcbf3e51e9e8fd4fa1.tar.gz perlweeklychallenge-club-b2a5c076ec32c123540821bcbf3e51e9e8fd4fa1.tar.bz2 perlweeklychallenge-club-b2a5c076ec32c123540821bcbf3e51e9e8fd4fa1.zip