aboutsummaryrefslogtreecommitdiff
path: root/challenge-024/simon-proctor/perl6
diff options
context:
space:
mode:
authorSimon Proctor <simon.proctor@zpg.co.uk>2019-09-02 11:20:49 +0100
committerSimon Proctor <simon.proctor@zpg.co.uk>2019-09-02 11:20:49 +0100
commit4eb4b2ae6f28de11475256dd94eeec739446517e (patch)
tree7daa4594152bb20e61c13c62d0b6ee2c5fade827 /challenge-024/simon-proctor/perl6
parent1391a8dd6400d0c5c8acbb7ab9111fb0548accdd (diff)
downloadperlweeklychallenge-club-4eb4b2ae6f28de11475256dd94eeec739446517e.tar.gz
perlweeklychallenge-club-4eb4b2ae6f28de11475256dd94eeec739446517e.tar.bz2
perlweeklychallenge-club-4eb4b2ae6f28de11475256dd94eeec739446517e.zip
Inverted Indexer creator. Multi threaded because why wouldn't you?
Diffstat (limited to 'challenge-024/simon-proctor/perl6')
-rw-r--r--challenge-024/simon-proctor/perl6/ch-2.p651
1 files changed, 51 insertions, 0 deletions
diff --git a/challenge-024/simon-proctor/perl6/ch-2.p6 b/challenge-024/simon-proctor/perl6/ch-2.p6
new file mode 100644
index 0000000000..c8ac377ffa
--- /dev/null
+++ b/challenge-024/simon-proctor/perl6/ch-2.p6
@@ -0,0 +1,51 @@
+#!/usr/bin/env perl6
+
+use v6;
+use JSON::Fast;
+
+my %*SUB-MAIN-OPTS = :named-anywhere;
+
+#| Display Help information
+multi sub MAIN ( Bool :h(:$help) where so * ) {
+ say $*USAGE;
+}
+
+subset FileExists of Str where { $_.IO.e && $_.IO.f };
+
+#| Work out the reverse index for the given documents
+multi sub MAIN (
+ *@documents where { @documents.all ~~ FileExists }, #= List of documents to process
+ Int :$min-length = 3, #= Minimum word length to count for inclusion in the index. Default is 3 characters.
+) {
+ my %index;
+ my $word-channel = Channel.new;
+ my @promises;
+ my $promise-count++;
+
+ for @documents -> $path {
+ @promises.push(
+ start {
+ my $res-path = $path.IO.resolve.Str;
+ for $path.IO.words -> $word is copy {
+ $word ~~ s:g!<[\W]>!!;
+ next unless $word.chars >= $min-length;
+ $word-channel.send( ( $word.fc, $res-path ) );
+ }
+ }
+ );
+ }
+
+ my $reactor = start react {
+ whenever $word-channel -> ( $word, $path ) {
+ %index{$word} //= SetHash.new;
+ %index{$word}.{$path} = True;
+ }
+ }
+ await @promises;
+ $word-channel.close;
+ await $reactor;
+
+ %index = %index.map( { $_.key => $_.value.keys } );
+ say to-json( %index );
+}
+