From 3dec094185381ab2411067eaa1d557bc7ae16362 Mon Sep 17 00:00:00 2001 From: Paulo Custodio Date: Thu, 14 Jan 2021 09:30:43 +0000 Subject: Add Perl solution to challenge 010 --- challenge-010/paulo-custodio/README | 1 + challenge-010/paulo-custodio/perl/ch-1.pl | 75 +++++++++++++++++++++++++ challenge-010/paulo-custodio/perl/ch-2.pl | 91 +++++++++++++++++++++++++++++++ challenge-010/paulo-custodio/test.pl | 33 +++++++++++ 4 files changed, 200 insertions(+) create mode 100644 challenge-010/paulo-custodio/README create mode 100644 challenge-010/paulo-custodio/perl/ch-1.pl create mode 100644 challenge-010/paulo-custodio/perl/ch-2.pl create mode 100644 challenge-010/paulo-custodio/test.pl diff --git a/challenge-010/paulo-custodio/README b/challenge-010/paulo-custodio/README new file mode 100644 index 0000000000..87dc0b2fbd --- /dev/null +++ b/challenge-010/paulo-custodio/README @@ -0,0 +1 @@ +Solution by Paulo Custodio diff --git a/challenge-010/paulo-custodio/perl/ch-1.pl b/challenge-010/paulo-custodio/perl/ch-1.pl new file mode 100644 index 0000000000..d622244769 --- /dev/null +++ b/challenge-010/paulo-custodio/perl/ch-1.pl @@ -0,0 +1,75 @@ +#!/usr/bin/env perl + +# Challenge 010 +# +# Challenge #1 +# Write a script to encode/decode Roman numerals. For example, given Roman +# numeral CCXLVI, it should return 246. Similarly, for decimal number 39, it +# should return XXXIX. Checkout wikipedia page for more information. + +use strict; +use warnings; +use 5.030; + +my %ROMAN = ( M => 1000, D => 500, C => 100, L => 50, X => 10, V => 5, I => 1 ); +my $RE = qr/[MDCLXVI]/i; + +sub decode_roman { + my($str) = @_; + my $num = 0; + while ($str ne '') { + for ($str) { + if (/^($RE)($RE)(.*)/i && $ROMAN{$1} < $ROMAN{$2}) { + $num += $ROMAN{uc($2)} - $ROMAN{uc($1)}; + $_ = $3; + } + elsif (s/^(($RE)\1*)//i) { + $num += $ROMAN{uc($2)}*length($1); + } + else { + die "cannot parse: $_\n"; + } + } + } + return $num; +} + +sub encode_roman { + my($num) = @_; + my $str = ""; + while ($num > 0) { + if ($num >= 1000) { $str .= "M" x int($num/1000); $num %= 1000; } + elsif ($num >= 900) { $str .= "CM"; $num -= 900; } + elsif ($num >= 500) { $str .= "D"; $num -= 500; } + elsif ($num >= 400) { $str .= "CD"; $num -= 400; } + elsif ($num >= 100) { $str .= "C" x int($num/100); $num %= 100; } + elsif ($num >= 90) { $str .= "XC"; $num -= 90; } + elsif ($num >= 50) { $str .= "L"; $num -= 50; } + elsif ($num >= 40) { $str .= "XL"; $num -= 40; } + elsif ($num >= 10) { $str .= "X" x int($num/10); $num %= 10; } + elsif ($num >= 9) { $str .= "IX"; $num -= 9; } + elsif ($num >= 5) { $str .= "V"; $num -= 5; } + elsif ($num >= 4) { $str .= "IV"; $num -= 4; } + elsif ($num >= 1) { $str .= "I" x $num; $num = 0; } + else { die; } + } + return $str; +} + +for (@ARGV) { + if (/-test/) { + for (1..3000) { + my $str = encode_roman($_); + my $num = decode_roman($str); + $_ == $num or die "$_ => $str => $num"; + print $str," "; + } + print "\n"; + } + elsif (/^\d+/) { + say "$_ => ", encode_roman($_); + } + else { + say "$_ => ", decode_roman($_); + } +} diff --git a/challenge-010/paulo-custodio/perl/ch-2.pl b/challenge-010/paulo-custodio/perl/ch-2.pl new file mode 100644 index 0000000000..f053b7a341 --- /dev/null +++ b/challenge-010/paulo-custodio/perl/ch-2.pl @@ -0,0 +1,91 @@ +#!/usr/bin/env perl + +# Challenge 010 +# +# Challenge #2 +# Write a script to find Jaro-Winkler distance between two strings. For more +# information check wikipedia page. + +use strict; +use warnings; +use 5.030; +use List::Util 'max', 'min'; + + +use Data::Dump 'dump'; + +sub jaro_similarity { + my($s1, $s2) = @_; + + return 1 if $s1 eq $s2; # strings equal => 1 + + my @s1 = split //, $s1; my $len1 = length($s1); + my @s2 = split //, $s2; my $len2 = length($s2); + + # max distance between matching characters + my $max_dist = int(max($len1, $len2) / 2) - 1; + + # count number of unique matches - same letters less than max_dist appart + my $match = 0; + my @found_s1; + my @found_s2; + for my $i (0 .. $len1-1) { + for my $j (max(0, $i-$max_dist) .. min($len2-1, $i+$max_dist)) { + if ($s1[$i] eq $s2[$j] && # same character + !$found_s2[$j]) # and not already matched + { + $found_s1[$i]=1; # mark these ... + $found_s2[$j]=1; # ... as found + $match++; + last; + } + } + } + return 0 if $match==0; # no matched characters => 0 + + # count transpositions: The first assigned character on one string is + # compared to the first assigned character on the other string. If the + # characters are not the same, half of a transposition has occurred. Then + # the second assigned character on one string is compared to the second + # assigned character on the other string, etc. The number of mismatched + # characters is divided by two to yield the number of transpositions. + my $transp = 0; + my $pos_s2 = 0; + for my $i (0 .. $len1-1) { + if ($found_s1[$i]) { # there is a match in s1, find first match in s2 + $pos_s2++ while !$found_s2[$pos_s2]; + if ($s1[$i] ne $s2[$pos_s2++]) { + $transp++; + } + } + } + $transp /= 2; + + my $jaro_similarity = ($match/$len1 + $match/$len2 + ($match-$transp)/$match) / 3; + return $jaro_similarity; +} + +sub jaro_winkler_similarity { + my($s1, $s2) = @_; + + my @s1 = split //, $s1; my $len1 = length($s1); + my @s2 = split //, $s2; my $len2 = length($s2); + + # find longest common prefix l + my $l = 0; + $l++ while ($l < min(4, $len1, $len2) && $s1[$l] eq $s2[$l]); + + # constant p + my $p = 0.1; + + my $jaro_similarity = jaro_similarity($s1, $s2); + my $jaro_winkler_similarity = $jaro_similarity + $l*$p*(1 - $jaro_similarity); + return $jaro_winkler_similarity; +} + +sub jaro_winkler_distance { + my($s1, $s2) = @_; + return 1-jaro_winkler_similarity($s1,$s2); +} + +say jaro_winkler_distance(@ARGV); diff --git a/challenge-010/paulo-custodio/test.pl b/challenge-010/paulo-custodio/test.pl new file mode 100644 index 0000000000..6d475a16b8 --- /dev/null +++ b/challenge-010/paulo-custodio/test.pl @@ -0,0 +1,33 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use 5.030; +use Test::More; + +ok 0==system("perl perl/ch-1.pl -test"); + +for ([XXXIX => 39], [CCXLVI => 246], [DCCLXXXIX => 789], [MMCDXXI => 2421], + [CLX => 160], [CCVII => 207], [MIX => 1009], [MLXVI => 1066], + [MDCCLXXVI => 1776], [MCMXVIII => 1918], [MCMLIV => 1954], [MMXIV => 2014]) { + my($roman, $arabic) = @$_; + + is capture("perl perl/ch-1.pl $roman"), "$roman => $arabic\n"; + is capture("perl perl/ch-1.pl $arabic"), "$arabic => $roman\n"; +} + +is capture("perl perl/ch-2.pl DwAyNE DuANE"), "0.16\n"; +is capture("perl perl/ch-2.pl TRATE TRACE"), "0.0933333333333333\n"; +is capture("perl perl/ch-2.pl arnab aranb"), "0.0533333333333335\n"; + + +done_testing; + + +sub capture { + my($cmd) = @_; + my $out = `$cmd`; + $out =~ s/[ \r\t]*\n/\n/g; + return $out; +} + -- cgit