diff options
Diffstat (limited to 'challenge-081')
| -rw-r--r-- | challenge-081/colin-crain/perl/ch-1.pl | 81 | ||||
| -rw-r--r-- | challenge-081/colin-crain/perl/ch-2.pl | 130 | ||||
| -rw-r--r-- | challenge-081/colin-crain/raku/ch-1.raku | 73 | ||||
| -rw-r--r-- | challenge-081/colin-crain/raku/ch-2.raku | 124 |
4 files changed, 408 insertions, 0 deletions
diff --git a/challenge-081/colin-crain/perl/ch-1.pl b/challenge-081/colin-crain/perl/ch-1.pl new file mode 100644 index 0000000000..e5be33336f --- /dev/null +++ b/challenge-081/colin-crain/perl/ch-1.pl @@ -0,0 +1,81 @@ +#! /opt/local/bin/perl +# +# common_bonds.pl +# TASK #1 › Common Base String +# Submitted by: Mohammad S Anwar +# You are given 2 strings, $A and $B. +# +# Write a script to find out common base strings in $A and $B. +# +# A substring of a string $S is called base string if +# repeated concatenation of the substring results in the string. +# +# Example 1: +# Input: +# $A = "abcdabcd" +# $B = "abcdabcdabcdabcd" +# +# Output: +# ("abcd", "abcdabcd") +# Example 2: +# Input: +# $A = "aaa" +# $B = "aa" +# +# Output: +# ("a") +# +# method: +# This really isn't as complicated as it may sound. +# +# A common base string must by definition be sized as a harmonic +# divisor of the original: 1/2, 1/3, 1/4 etc. We can save some +# trouble by looking at only those substrings that fit this basic +# constraint. +# +# Any base string will begin the source string and extend for n +# characters, with n contained within the set of fractional +# components outlined above with respect to the source string +# length. Thus 1/2 length, or 1/3, 1/4 etc. +# +# It's also rather difficult to come up with a good example as most +# input variations produce super-obvious results. Not much of a +# needle in a haystack going on here. +# +# 2020 colin crain +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## + + + +use warnings; +use strict; +use feature ":5.26"; + +## ## ## ## ## MAIN: + +@ARGV == 0 and @ARGV = qw(agtcagtcagtcagtc agtcagtcagtcagtcagtcagtcagtcagtc); + +my ($A, $B) = @ARGV; +my @out; +my %subs_a = map {$_ => 1} find_base_strings($A); +for (find_base_strings($B)) { + push @out, $_ if exists $subs_a{$_} ; +} +say $_ for @out; + + +## ## ## ## ## SUBS: + +sub find_base_strings { + my $str = shift; + my $len = length $str; + my @out; + + for (1..$len) { + next unless $len % $_ == 0; + my $sub = substr $str, 0, $len/$_; + my $res = $str =~ /^(?:$sub)+$/; + push @out, $sub if $res; + } + return @out; +}
\ No newline at end of file diff --git a/challenge-081/colin-crain/perl/ch-2.pl b/challenge-081/colin-crain/perl/ch-2.pl new file mode 100644 index 0000000000..4e65791988 --- /dev/null +++ b/challenge-081/colin-crain/perl/ch-2.pl @@ -0,0 +1,130 @@ +#! /opt/local/bin/perl +# +# bag-o-sharks.pl +# +# TASK #2 › Frequency Sort +# Submitted by: Mohammad S Anwar +# You are given file named input. +# +# Write a script to find the frequency of all the words. +# +# It should print the result as first column of each line should be the +# frequency of the the word followed by all the words of that frequency +# arranged in lexicographical order. Also sort the words in the +# ascending order of frequency. +# +# INPUT file +# +# West Side Story +# +# The award-winning adaptation of the classic romantic tragedy "Romeo +# and Juliet". The feuding families become two warring New York City +# gangs, the white Jets led by Riff and the Latino Sharks, led by +# Bernardo. Their hatred escalates to a point where neither can coexist +# with any form of understanding. But when Riff's best friend (and +# former Jet) Tony and Bernardo's younger sister Maria meet at a dance, +# no one can do anything to stop their love. Maria and Tony begin +# meeting in secret, planning to run away. Then the Sharks and Jets plan +# a rumble under the highway--whoever wins gains control of the streets. +# Maria sends Tony to stop it, hoping it can end the violence. It goes +# terribly wrong, and before the lovers know what's happened, tragedy +# strikes and doesn't stop until the climactic and heartbreaking ending. + +# NOTE +# For the sake of this task, please ignore the following in the input file: +# . " ( ) , 's -- + +# OUTPUT +# 1 But City It Jet Juliet Latino New Romeo Side Story Their Then West +# York adaptation any anything at award-winning away become before begin +# best classic climactic coexist control dance do doesn't end ending +# escalates families feuding form former friend gains gangs goes +# happened hatred heartbreaking highway hoping in know love lovers meet +# meeting neither no one plan planning point romantic rumble run secret +# sends sister streets strikes terribly their two under understanding +# until violence warring what when where white whoever wins with wrong +# younger +# +# 2 Bernardo Jets Riff Sharks The by it led tragedy +# +# 3 Maria Tony a can of stop +# +# 4 to +# +# 9 and the + +# method: +# a bit of NLP for you all. A naive bag of words output by +# frequency. We'll start by pretreating the data: scrub certain +# defined punctuation and possessive case into spaces, and lowercase +# normalize all text. WE will make sure to keep a single hyphen. We +# won't be doing any name recognition so the we won't worry about +# losing capitalization for those entities here and concern +# ourselves rather with making sure "their" and "Their" get counted +# as the same word. This is of course a judgement call and not +# specified behavior but seems fitting to this basic word analysis. + +# Consequently the output is slightly different as, for instance, +# 'their' is moved to the second category, and the output is +# actually in lexicographic order as requested, rather than the +# example ASCII sort with capital letters first. +# +# Next-level improvements on this method might be begin to identify +# Named Entities by selectively removing the capitalization of +# letters only at beginning of sentences, that is to say after a +# period or certain punctuation, or at the beginning of a paragraph +# or quote. Then unusually capitalized words could be identified in +# the corpus on basis of their grammarical uniqueness. +# +# +# 2020 colin crain +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## + + + +use warnings; +use strict; +use feature ":5.26"; + +## ## ## ## ## MAIN: + +local $/ = undef; +my $input = <DATA>; + +## preproc +$input =~ s/[. " ( ) ,]|'s|--/ /xg; +$input = lc($input); + +my %bag; +my %freq; + +## proc +my @words = split /\s+/, $input; +$bag{$_}++ for @words; + +while (my ($key, $value) = each %bag) { + push $freq{$value}->@*, $key; +} + +## output phase +for (sort {$a-$b} keys %freq) { + say +(sprintf "%-4s", $_) . join "\n ", sort $freq{$_}->@*; + say ''; +} + + +__DATA__ +West Side Story + +The award-winning adaptation of the classic romantic tragedy "Romeo +and Juliet". The feuding families become two warring New York City +gangs, the white Jets led by Riff and the Latino Sharks, led by +Bernardo. Their hatred escalates to a point where neither can coexist +with any form of understanding. But when Riff's best friend (and +former Jet) Tony and Bernardo's younger sister Maria meet at a dance, +no one can do anything to stop their love. Maria and Tony begin +meeting in secret, planning to run away. Then the Sharks and Jets plan +a rumble under the highway--whoever wins gains control of the streets. +Maria sends Tony to stop it, hoping it can end the violence. It goes +terribly wrong, and before the lovers know what's happened, tragedy +strikes and doesn't stop until the climactic and heartbreaking ending.
\ No newline at end of file diff --git a/challenge-081/colin-crain/raku/ch-1.raku b/challenge-081/colin-crain/raku/ch-1.raku new file mode 100644 index 0000000000..5b26fcb94f --- /dev/null +++ b/challenge-081/colin-crain/raku/ch-1.raku @@ -0,0 +1,73 @@ +#!/usr/bin/env perl6 +# +# +# common-bonds.raku +# +# TASK #1 › Common Base String +# Submitted by: Mohammad S Anwar +# You are given 2 strings, $A and $B. +# +# Write a script to find out common base strings in $A and $B. +# +# A substring of a string $S is called base string if +# repeated concatenation of the substring results in the string. +# +# Example 1: +# Input: +# $A = "abcdabcd" +# $B = "abcdabcdabcdabcd" +# +# Output: +# ("abcd", "abcdabcd") +# Example 2: +# Input: +# $A = "aaa" +# $B = "aa" +# +# Output: +# ("a") +# +# method: +# This really isn't as complicated as it may sound. +# +# A common base string must by definition be sized as a harmonic +# divisor of the original: 1/2, 1/3, 1/4 etc. We can save some +# trouble by looking at only those substrings that fit this basic +# constraint. +# +# Any base string will begin the source string and extend for n +# characters, with n contained within the set of fractional +# components outlined above with respect to the source string +# length. Thus 1/2 length, or 1/3, 1/4 etc. +# +# It's also rather difficult to come up with a good example as most +# input variations produce super-obvious results. Not much of a +# needle in a haystack going on here. +# +# +# 2020 colin crain +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## + + + +unit sub MAIN ($A = "aaaaaaaaaaaaaaaaaaaaaaaa", $B = "aaaaaaaaaaaa") ; +my @out; + +my $bag-A = (find_base_strings($A)).Bag; +@out.push: $_ if $bag-A{$_}:exists for find_base_strings($B); +.say for @out; + + + +sub find_base_strings ($str) { + my @bases; + for (1..$str.chars).grep($str.chars %% *) + .map($str.chars/*) { + my $sub = $str.substr(0,$_); + @bases.push: $sub if so $str ~~ /^ $sub+ $/; + } + @bases; +} + + + diff --git a/challenge-081/colin-crain/raku/ch-2.raku b/challenge-081/colin-crain/raku/ch-2.raku new file mode 100644 index 0000000000..7437998469 --- /dev/null +++ b/challenge-081/colin-crain/raku/ch-2.raku @@ -0,0 +1,124 @@ +#!/usr/bin/env perl6 +# +# +# bag-o-sharks.raku +# +# TASK #2 › Frequency Sort +# Submitted by: Mohammad S Anwar +# You are given file named input. +# +# Write a script to find the frequency of all the words. +# +# It should print the result as first column of each line should be the +# frequency of the the word followed by all the words of that frequency +# arranged in lexicographical order. Also sort the words in the +# ascending order of frequency. +# +# INPUT file +# +# West Side Story +# +# The award-winning adaptation of the classic romantic tragedy "Romeo +# and Juliet". The feuding families become two warring New York City +# gangs, the white Jets led by Riff and the Latino Sharks, led by +# Bernardo. Their hatred escalates to a point where neither can coexist +# with any form of understanding. But when Riff's best friend (and +# former Jet) Tony and Bernardo's younger sister Maria meet at a dance, +# no one can do anything to stop their love. Maria and Tony begin +# meeting in secret, planning to run away. Then the Sharks and Jets plan +# a rumble under the highway--whoever wins gains control of the streets. +# Maria sends Tony to stop it, hoping it can end the violence. It goes +# terribly wrong, and before the lovers know what's happened, tragedy +# strikes and doesn't stop until the climactic and heartbreaking ending. + +# NOTE +# For the sake of this task, please ignore the following in the input file: +# . " ( ) , 's -- +# +# OUTPUT +# 1 But City It Jet Juliet Latino New Romeo Side Story Their Then West +# York adaptation any anything at award-winning away become before begin +# best classic climactic coexist control dance do doesn't end ending +# escalates families feuding form former friend gains gangs goes +# happened hatred heartbreaking highway hoping in know love lovers meet +# meeting neither no one plan planning point romantic rumble run secret +# sends sister streets strikes terribly their two under understanding +# until violence warring what when where white whoever wins with wrong +# younger +# +# 2 Bernardo Jets Riff Sharks The by it led tragedy +# +# 3 Maria Tony a can of stop +# +# 4 to +# +# 9 and the +# +# method: +# a bit of NLP for you all. A naive bag of words output by +# frequency. We'll start by pretreating the data: scrub certain +# defined punctuation and possessive case into spaces, and lowercase +# normalize all text. WE will make sure to keep a single hyphen. We +# won't be doing any name recognition so the we won't worry about +# losing capitalization for those entities here and concern +# ourselves rather with making sure "their" and "Their" get counted +# as the same word. This is of course a judgement call and not +# specified behavior but seems fitting to this basic word analysis. +# +# Consequently the output is slightly different as, for instance, +# 'their' is moved to the second category, and the output is +# actually in lexicographic order as requested, rather than the +# example ASCII sort with capital letters first. +# +# Next-level improvements on this method might be begin to identify +# Named Entities by selectively removing the capitalization of +# letters only at beginning of sentences, that is to say after a +# period or certain punctuation, or at the beginning of a paragraph +# or quote. Then unusually capitalized words could be identified in +# the corpus on basis of their grammarical uniqueness. +# +# +# 2020 colin crain +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## + + + +unit sub MAIN () ; + +my $input = q:to/__END__/; + West Side Story + + The award-winning adaptation of the classic romantic tragedy "Romeo + and Juliet". The feuding families become two warring New York City + gangs, the white Jets led by Riff and the Latino Sharks, led by + Bernardo. Their hatred escalates to a point where neither can coexist + with any form of understanding. But when Riff's best friend (and + former Jet) Tony and Bernardo's younger sister Maria meet at a dance, + no one can do anything to stop their love. Maria and Tony begin + meeting in secret, planning to run away. Then the Sharks and Jets plan + a rumble under the highway--whoever wins gains control of the streets. + Maria sends Tony to stop it, hoping it can end the violence. It goes + terribly wrong, and before the lovers know what's happened, tragedy + strikes and doesn't stop until the climactic and heartbreaking ending. + __END__ + +## preproc +$input ~~ s:g/ <[."(),]> | \'s | \-\- / /; +$input .= lc; +$input .= trim; + +## freq analysis +my %freq; +for $input.split(/\s+/) + .Bag + .kv -> $key, $val { + %freq{$val}.push: $key; +} + +## out +for %freq.keys.sort({ $^a <=> $^b }) { + say $_.fmt("%-5s") ~ %freq{$_}.sort.join("\n ") ~ "\n"; +} + + + |
