aboutsummaryrefslogtreecommitdiff
path: root/challenge-024
diff options
context:
space:
mode:
authorMohammad S Anwar <mohammad.anwar@yahoo.com>2019-09-09 01:23:00 +0100
committerMohammad S Anwar <mohammad.anwar@yahoo.com>2019-09-09 01:23:00 +0100
commitdd6293e6c7c4752f20974bb223deeb8c1c8b8373 (patch)
tree12caf7853ac792fe6a25e97150250e8284add5f2 /challenge-024
parent66395d10c3df3154719725bb7f3386eb78d7725a (diff)
downloadperlweeklychallenge-club-dd6293e6c7c4752f20974bb223deeb8c1c8b8373.tar.gz
perlweeklychallenge-club-dd6293e6c7c4752f20974bb223deeb8c1c8b8373.tar.bz2
perlweeklychallenge-club-dd6293e6c7c4752f20974bb223deeb8c1c8b8373.zip
- Added solutions by Colin Crain.
Diffstat (limited to 'challenge-024')
-rw-r--r--challenge-024/colin-crain/perl5/ch-1.pl1
-rw-r--r--challenge-024/colin-crain/perl5/ch-2.pl98
2 files changed, 99 insertions, 0 deletions
diff --git a/challenge-024/colin-crain/perl5/ch-1.pl b/challenge-024/colin-crain/perl5/ch-1.pl
new file mode 100644
index 0000000000..da1d2f1a0c
--- /dev/null
+++ b/challenge-024/colin-crain/perl5/ch-1.pl
@@ -0,0 +1 @@
+perl -e “”
diff --git a/challenge-024/colin-crain/perl5/ch-2.pl b/challenge-024/colin-crain/perl5/ch-2.pl
new file mode 100644
index 0000000000..cd4701ed12
--- /dev/null
+++ b/challenge-024/colin-crain/perl5/ch-2.pl
@@ -0,0 +1,98 @@
+#! /opt/local/bin/perl
+#
+# inverted_index.pl
+#
+#
+#
+# 2019 colin crain
+## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
+
+use warnings;
+use strict;
+use feature ":5.26";
+
+## ## ## ## ## MAIN
+
+my @docs = @ARGV;
+
+say "enter search terms: ";
+my @TEST_TERMS = split /\s/, <STDIN>;
+
+my $idx = make_index( @docs );
+
+my ($match, $all) = search_index($idx, @TEST_TERMS);
+
+say "matched all:\n", (join ", ", $match->@*);
+say "by prevalence:\n", (join "\n ", $all->@*);
+
+
+## ## ## ## ## SUBS
+
+sub make_index {
+ my @documents = @_;
+
+ my $idx = {};
+
+ ## open and slurp down the file,
+ ## split on whitespace to make a word list
+ for my $file ( @documents ) {
+ open (my $fh, "<" , $file) or die "can't open $file to read: $!\n";
+ my @words = split /\s/, <$fh>;
+ close $fh;
+
+ ## run it through a unique filter
+ my @unique_words = uniq( @words );
+
+ ## add each word to the index adding the file to the value array
+ for my $word ( @unique_words ) {
+ if (exists $idx->{$word}) {
+ push $idx->{$word}->@*, $file;
+ }
+ else {
+ $idx->{$word} = [$file];
+ }
+ }
+ }
+
+ return $idx;
+}
+
+sub search_index {
+## takes an index hashref and a list of search terms
+## returns two lists, the first of docs with all terms
+## the second a sorted list of documents by term prevalence
+## ( most words found first, will include all documents with at least one word)
+ my ($idx, @words) = @_;
+
+ ## for each search term, add its index doc list to the result
+ my @result;
+ for my $search_term ( @words ) {
+ if (exists $idx->{$search_term}) {
+ push @result, $idx->{$search_term}->@*;
+ }
+ }
+
+ ## count the document occurrences in the concatenated index lists
+ my %result_count;
+ foreach my $document ( @result ) {
+ $result_count{$document}++;
+ }
+ my @unique_result = uniq( @result );
+
+ ## if the occurence count equals the search term count, all terms match to the document
+ my @all_terms = grep { $result_count{$_} == scalar @words } @unique_result;
+
+ ## alternately, sort the unique result list on prevalence
+ my @by_prevalence = sort { $result_count{$b} <=> $result_count{$a} } @unique_result;
+
+ return (\@all_terms, \@by_prevalence);
+}
+
+sub uniq {
+## filters a list for unique terms, with first occurence preserved
+## we could grab List::Util::uniq but here we roll our own with the "seen" trick
+ my %seen;
+ return grep { ! $seen{$_}++ } @_;
+}
+
+