From 61fdd85cad216bab1aa88cf6fb0075f55726f24c Mon Sep 17 00:00:00 2001 From: Joelle Maslak Date: Mon, 2 Sep 2019 12:35:11 -0600 Subject: Joelle's P5 solution for 24.2 --- challenge-024/joelle-maslak/perl5/ch-2.pl | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100755 challenge-024/joelle-maslak/perl5/ch-2.pl diff --git a/challenge-024/joelle-maslak/perl5/ch-2.pl b/challenge-024/joelle-maslak/perl5/ch-2.pl new file mode 100755 index 0000000000..93f8a076d6 --- /dev/null +++ b/challenge-024/joelle-maslak/perl5/ch-2.pl @@ -0,0 +1,52 @@ +#!/usr/bin/env perl + +use v5.16; # for fc +use strict; +use warnings; + +use File::ByLine; +use List::Util qw(uniq); + +# Using file by line means that a very long single file might be able to +# be processed quicker (assuming IO can keep up). + +MAIN: { + my %docs; + + my $fbl = File::ByLine->new(); + $fbl->processes(10); + + for my $fn (uniq @ARGV) { + # Read the file, seperate out the words (use File::ByLine to + # allow 10 threads to simultaniously read the file) + # + # F::BL->map applies the map to each line. + my %found; + $docs{$fn} = [ + uniq $fbl->map( + sub { + grep { $_ ne '' and exists $found{$_} ? undef : ($found{$_} = 1) } + map { fc } # Case insensitive words + split /\W+/, $_ + }, + $fn, + ) + ]; + } + + # Build the index + my %index; + for my $fn (sort keys %docs) { + for my $word ($docs{$fn}->@*) { + $index{$word} = [] unless exists $index{$word}; + push $index{$word}->@*, $fn; + } + } + + # Output the index + for my $word (sort keys %index) { + say "$word: " . join(" ", $index{$word}->@*); + } +} + + -- cgit