From 3dec094185381ab2411067eaa1d557bc7ae16362 Mon Sep 17 00:00:00 2001
From: Paulo Custodio <pauloscustodio@gmail.com>
Date: Thu, 14 Jan 2021 09:30:43 +0000
Subject: Add Perl solution to challenge 010

---
 challenge-010/paulo-custodio/README       |  1 +
 challenge-010/paulo-custodio/perl/ch-1.pl | 75 +++++++++++++++++++++++++
 challenge-010/paulo-custodio/perl/ch-2.pl | 91 +++++++++++++++++++++++++++++++
 challenge-010/paulo-custodio/test.pl      | 33 +++++++++++
 4 files changed, 200 insertions(+)
 create mode 100644 challenge-010/paulo-custodio/README
 create mode 100644 challenge-010/paulo-custodio/perl/ch-1.pl
 create mode 100644 challenge-010/paulo-custodio/perl/ch-2.pl
 create mode 100644 challenge-010/paulo-custodio/test.pl

diff --git a/challenge-010/paulo-custodio/README b/challenge-010/paulo-custodio/README
new file mode 100644
index 0000000000..87dc0b2fbd
--- /dev/null
+++ b/challenge-010/paulo-custodio/README
@@ -0,0 +1 @@
+Solution by Paulo Custodio
diff --git a/challenge-010/paulo-custodio/perl/ch-1.pl b/challenge-010/paulo-custodio/perl/ch-1.pl
new file mode 100644
index 0000000000..d622244769
--- /dev/null
+++ b/challenge-010/paulo-custodio/perl/ch-1.pl
@@ -0,0 +1,75 @@
+#!/usr/bin/env perl
+
+# Challenge 010
+#
+# Challenge #1
+# Write a script to encode/decode Roman numerals. For example, given Roman
+# numeral CCXLVI, it should return 246. Similarly, for decimal number 39, it 
+# should return XXXIX. Checkout wikipedia page for more information.
+
+use strict;
+use warnings;
+use 5.030;
+
+my %ROMAN = ( M => 1000, D => 500, C => 100, L => 50, X => 10, V => 5, I => 1 );
+my $RE = qr/[MDCLXVI]/i;
+
+sub decode_roman {
+	my($str) = @_;
+	my $num = 0;
+	while ($str ne '') {
+		for ($str) {
+			if (/^($RE)($RE)(.*)/i && $ROMAN{$1} < $ROMAN{$2}) { 
+				$num += $ROMAN{uc($2)} - $ROMAN{uc($1)};
+				$_ = $3;
+			}
+			elsif (s/^(($RE)\1*)//i) { 
+				$num += $ROMAN{uc($2)}*length($1); 
+			}
+			else {
+				die "cannot parse: $_\n";
+			}
+		}
+	}
+	return $num;
+}
+
+sub encode_roman {
+	my($num) = @_;
+	my $str = "";
+	while ($num > 0) {
+		if ($num >= 1000) { $str .= "M" x int($num/1000); $num %= 1000; }
+		elsif ($num >= 900) { $str .= "CM"; $num -= 900; }
+		elsif ($num >= 500) { $str .= "D"; $num -= 500; }
+		elsif ($num >= 400) { $str .= "CD"; $num -= 400; }
+		elsif ($num >= 100) { $str .= "C" x int($num/100); $num %= 100; }
+		elsif ($num >= 90) { $str .= "XC"; $num -= 90; }
+		elsif ($num >= 50) { $str .= "L"; $num -= 50; }
+		elsif ($num >= 40) { $str .= "XL"; $num -= 40; }
+		elsif ($num >= 10) { $str .= "X" x int($num/10); $num %= 10; }
+		elsif ($num >= 9) { $str .= "IX"; $num -= 9; }
+		elsif ($num >= 5) { $str .= "V"; $num -= 5; }
+		elsif ($num >= 4) { $str .= "IV"; $num -= 4; }
+		elsif ($num >= 1) { $str .= "I" x $num; $num = 0; }
+		else { die; }
+	}
+	return $str;
+}
+
+for (@ARGV) {
+	if (/-test/) {
+		for (1..3000) {
+			my $str = encode_roman($_); 
+			my $num = decode_roman($str); 
+			$_ == $num or die "$_ => $str => $num";
+			print $str," ";
+		}
+		print "\n";
+	}
+	elsif (/^\d+/) {
+		say "$_ => ", encode_roman($_);
+	}
+	else {
+		say "$_ => ", decode_roman($_);
+	}
+}
diff --git a/challenge-010/paulo-custodio/perl/ch-2.pl b/challenge-010/paulo-custodio/perl/ch-2.pl
new file mode 100644
index 0000000000..f053b7a341
--- /dev/null
+++ b/challenge-010/paulo-custodio/perl/ch-2.pl
@@ -0,0 +1,91 @@
+#!/usr/bin/env perl
+
+# Challenge 010
+#
+# Challenge #2
+# Write a script to find Jaro-Winkler distance between two strings. For more
+# information check wikipedia page.
+
+use strict;
+use warnings;
+use 5.030;
+use List::Util 'max', 'min';
+
+
+use Data::Dump 'dump';
+
+sub jaro_similarity {
+	my($s1, $s2) = @_;
+	
+	return 1 if $s1 eq $s2;					# strings equal => 1
+	
+	my @s1 = split //, $s1; my $len1 = length($s1);
+	my @s2 = split //, $s2; my $len2 = length($s2);
+	
+	# max distance between matching characters
+	my $max_dist = int(max($len1, $len2) / 2) - 1;
+	
+	# count number of unique matches - same letters less than max_dist appart
+	my $match = 0;
+	my @found_s1;
+	my @found_s2;
+	for my $i (0 .. $len1-1) {
+		for my $j (max(0, $i-$max_dist) .. min($len2-1, $i+$max_dist)) {
+			if ($s1[$i] eq $s2[$j] &&		# same character 
+			    !$found_s2[$j]) 			# and not already matched
+			{
+				$found_s1[$i]=1;			# mark these ...
+				$found_s2[$j]=1;			# ... as found
+				$match++;
+				last;
+			}
+		}
+	}
+	return 0 if $match==0;					# no matched characters => 0
+	
+	# count transpositions: The first assigned character on one string is 
+	# compared to the first assigned character on the other string. If the 
+	# characters are not the same, half of a transposition has occurred. Then 
+	# the second assigned character on one string is compared to the second 
+	# assigned character on the other string, etc. The number of mismatched 
+	# characters is divided by two to yield the number of transpositions.
+	my $transp = 0;
+	my $pos_s2 = 0;
+	for my $i (0 .. $len1-1) {
+		if ($found_s1[$i]) {		# there is a match in s1, find first match in s2
+			$pos_s2++ while !$found_s2[$pos_s2];
+			if ($s1[$i] ne $s2[$pos_s2++]) {
+				$transp++;
+			}
+		}
+	}
+	$transp /= 2;
+	
+	my $jaro_similarity = ($match/$len1 + $match/$len2 + ($match-$transp)/$match) / 3;
+	return $jaro_similarity;
+}
+
+sub jaro_winkler_similarity {
+	my($s1, $s2) = @_;
+	
+	my @s1 = split //, $s1; my $len1 = length($s1);
+	my @s2 = split //, $s2; my $len2 = length($s2);
+	
+	# find longest common prefix l
+	my $l = 0;
+	$l++ while ($l < min(4, $len1, $len2) && $s1[$l] eq $s2[$l]);
+	
+	# constant p
+	my $p = 0.1;
+	
+	my $jaro_similarity = jaro_similarity($s1, $s2);
+	my $jaro_winkler_similarity = $jaro_similarity + $l*$p*(1 - $jaro_similarity);
+	return $jaro_winkler_similarity;
+}
+
+sub jaro_winkler_distance {
+	my($s1, $s2) = @_;
+	return 1-jaro_winkler_similarity($s1,$s2);
+}
+
+say jaro_winkler_distance(@ARGV);
diff --git a/challenge-010/paulo-custodio/test.pl b/challenge-010/paulo-custodio/test.pl
new file mode 100644
index 0000000000..6d475a16b8
--- /dev/null
+++ b/challenge-010/paulo-custodio/test.pl
@@ -0,0 +1,33 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use 5.030;
+use Test::More;
+
+ok 0==system("perl perl/ch-1.pl -test");
+
+for ([XXXIX => 39], [CCXLVI => 246], [DCCLXXXIX => 789], [MMCDXXI => 2421],
+	 [CLX => 160], [CCVII => 207], [MIX => 1009], [MLXVI => 1066],
+	 [MDCCLXXVI => 1776], [MCMXVIII => 1918], [MCMLIV => 1954], [MMXIV => 2014]) {
+	my($roman, $arabic) = @$_;
+	
+	is capture("perl perl/ch-1.pl $roman"), 	"$roman => $arabic\n";
+	is capture("perl perl/ch-1.pl $arabic"), 	"$arabic => $roman\n";
+}
+
+is capture("perl perl/ch-2.pl DwAyNE DuANE"), 	"0.16\n";
+is capture("perl perl/ch-2.pl TRATE  TRACE"), 	"0.0933333333333333\n";
+is capture("perl perl/ch-2.pl arnab  aranb"), 	"0.0533333333333335\n";
+
+
+done_testing;
+
+
+sub capture {
+	my($cmd) = @_;
+	my $out = `$cmd`;
+	$out =~ s/[ \r\t]*\n/\n/g;
+	return $out;
+}
+
-- 
cgit