aboutsummaryrefslogtreecommitdiff
path: root/challenge-024
diff options
context:
space:
mode:
authorPaulo Custodio <pauloscustodio@gmail.com>2021-01-25 20:07:40 +0000
committerPaulo Custodio <pauloscustodio@gmail.com>2021-01-26 00:06:54 +0000
commit3fa58628535d4041c7cc648c005080ca88f18c18 (patch)
tree336fe3cc14f518f05e871ab974cc86a09a2fd8f6 /challenge-024
parent3d3900a2f0f69c54a34683e4e1b5da007b4af9d9 (diff)
downloadperlweeklychallenge-club-3fa58628535d4041c7cc648c005080ca88f18c18.tar.gz
perlweeklychallenge-club-3fa58628535d4041c7cc648c005080ca88f18c18.tar.bz2
perlweeklychallenge-club-3fa58628535d4041c7cc648c005080ca88f18c18.zip
Replace tabs by spaces so that indentation looks correct
Diffstat (limited to 'challenge-024')
-rw-r--r--challenge-024/paulo-custodio/perl/ch-2.pl198
-rw-r--r--challenge-024/paulo-custodio/test.pl70
2 files changed, 134 insertions, 134 deletions
diff --git a/challenge-024/paulo-custodio/perl/ch-2.pl b/challenge-024/paulo-custodio/perl/ch-2.pl
index 31d780cfa9..5bd3c85fe5 100644
--- a/challenge-024/paulo-custodio/perl/ch-2.pl
+++ b/challenge-024/paulo-custodio/perl/ch-2.pl
@@ -3,14 +3,14 @@
# Challenge 019
#
# Task #2
-# Create a script to implement full text search functionality using Inverted
+# Create a script to implement full text search functionality using Inverted
# Index. According to wikipedia:
-#
-# In computer science, an inverted index (also referred to as a postings file
+#
+# In computer science, an inverted index (also referred to as a postings file
# or inverted file) is a database index storing a mapping from content, such as
# words or numbers, to its locations in a table, or in a document or a set of
# documents (named in contrast to a forward index, which maps from documents to
-# content). The purpose of an inverted index is to allow fast full-text
+# content). The purpose of an inverted index is to allow fast full-text
# searches, at a cost of increased processing when a document is added to the
# database.
@@ -50,130 +50,130 @@ END
}
};
-# main
+# main
my($op, @args) = @ARGV;
if (@ARGV>=2 && $op =~ /^add/i) {
- add_doc($_) for @args;
+ add_doc($_) for @args;
}
elsif (@ARGV>=2 && $op =~ /^sea/) {
- search($_) for @args;
+ search($_) for @args;
}
else {
- die "Usage: add documents | search words\n";
+ die "Usage: add documents | search words\n";
}
# add words from document to index
sub add_doc {
- my($doc) = @_;
-
- # get title
- my $title = path($doc)->basename;
-
- # connect to index database
- my $dbh = DBI->connect("dbi:SQLite:dbname=".DBFILE,"","",
- { RaiseError => 1, AutoCommit => 1 });
-
- # get document id
- my $document_id = get_document_id($dbh, $title);
-
- # read document
- my $word_count = 0;
- my $text = path($doc)->slurp;
- while ($text =~ /(\w+)/g) {
- my $word = lc($1);
- my $word_id = get_word_id($dbh, $word);
- add_found($dbh, $document_id, $word_id);
- $word_count++;
- }
-
- say "Indexed $title: found $word_count words";
-
- # disconnect from database
- $dbh->disconnect();
+ my($doc) = @_;
+
+ # get title
+ my $title = path($doc)->basename;
+
+ # connect to index database
+ my $dbh = DBI->connect("dbi:SQLite:dbname=".DBFILE,"","",
+ { RaiseError => 1, AutoCommit => 1 });
+
+ # get document id
+ my $document_id = get_document_id($dbh, $title);
+
+ # read document
+ my $word_count = 0;
+ my $text = path($doc)->slurp;
+ while ($text =~ /(\w+)/g) {
+ my $word = lc($1);
+ my $word_id = get_word_id($dbh, $word);
+ add_found($dbh, $document_id, $word_id);
+ $word_count++;
+ }
+
+ say "Indexed $title: found $word_count words";
+
+ # disconnect from database
+ $dbh->disconnect();
}
# get or add value to table
sub get_or_add_id {
- my($dbh, $table, $column, $value) = @_;
-
- for (1..2) {
- # search document in db
- my $sth = $dbh->prepare("SELECT id FROM $table WHERE $column = ?");
- $sth->execute($value);
- my($id) = $sth->fetchrow();
- $sth->finish();
-
- return $id if defined $id;
-
- # add value if not found
- $sth = $dbh->prepare("INSERT INTO $table($column) VALUES(?)");
- $sth->execute($value);
- $sth->finish();
- }
-
- die "failed to insert $column to $table value '$value'";
+ my($dbh, $table, $column, $value) = @_;
+
+ for (1..2) {
+ # search document in db
+ my $sth = $dbh->prepare("SELECT id FROM $table WHERE $column = ?");
+ $sth->execute($value);
+ my($id) = $sth->fetchrow();
+ $sth->finish();
+
+ return $id if defined $id;
+
+ # add value if not found
+ $sth = $dbh->prepare("INSERT INTO $table($column) VALUES(?)");
+ $sth->execute($value);
+ $sth->finish();
+ }
+
+ die "failed to insert $column to $table value '$value'";
}
# get or add document id
sub get_document_id {
- my($dbh, $title) = @_;
- return get_or_add_id($dbh, "documents", "title", $title);
+ my($dbh, $title) = @_;
+ return get_or_add_id($dbh, "documents", "title", $title);
}
# get or add word id
sub get_word_id {
- my($dbh, $word) = @_;
- return get_or_add_id($dbh, "words", "word", $word);
+ my($dbh, $word) = @_;
+ return get_or_add_id($dbh, "words", "word", $word);
}
# add a found location if not already found
sub add_found {
- my($dbh, $document_id, $word_id) = @_;
-
- # search location in db
- my $sth = $dbh->prepare("
- SELECT id FROM found
- WHERE document_id = ?
- AND word_id = ?");
- $sth->execute($document_id, $word_id);
- my($id) = $sth->fetchrow();
- $sth->finish();
-
- return if defined($id);
-
- # location not found, insert in db
- $sth = $dbh->prepare("INSERT INTO found (document_id, word_id)
- VALUES(?,?)");
- $sth->execute($document_id, $word_id);
- $sth->finish();
+ my($dbh, $document_id, $word_id) = @_;
+
+ # search location in db
+ my $sth = $dbh->prepare("
+ SELECT id FROM found
+ WHERE document_id = ?
+ AND word_id = ?");
+ $sth->execute($document_id, $word_id);
+ my($id) = $sth->fetchrow();
+ $sth->finish();
+
+ return if defined($id);
+
+ # location not found, insert in db
+ $sth = $dbh->prepare("INSERT INTO found (document_id, word_id)
+ VALUES(?,?)");
+ $sth->execute($document_id, $word_id);
+ $sth->finish();
}
# search word
sub search {
- my($word) = @_;
-
- # connect to index database
- my $dbh = DBI->connect("dbi:SQLite:dbname=".DBFILE,"","",
- { RaiseError => 1, AutoCommit => 1 });
-
- # search locations of each word
- my $sth = $dbh->prepare("
- SELECT word, title
- FROM documents, words, found
- WHERE word = ?
- AND found.document_id = documents.id
- AND found.word_id = words.id
- ORDER BY title");
-
- $sth->execute($word);
-
- while (my($word, $title) = $sth->fetchrow()) {
- say "$word\t$title";
- }
-
- $sth->finish();
-
- # disconnect from database
- $dbh->disconnect();
+ my($word) = @_;
+
+ # connect to index database
+ my $dbh = DBI->connect("dbi:SQLite:dbname=".DBFILE,"","",
+ { RaiseError => 1, AutoCommit => 1 });
+
+ # search locations of each word
+ my $sth = $dbh->prepare("
+ SELECT word, title
+ FROM documents, words, found
+ WHERE word = ?
+ AND found.document_id = documents.id
+ AND found.word_id = words.id
+ ORDER BY title");
+
+ $sth->execute($word);
+
+ while (my($word, $title) = $sth->fetchrow()) {
+ say "$word $title";
+ }
+
+ $sth->finish();
+
+ # disconnect from database
+ $dbh->disconnect();
}
diff --git a/challenge-024/paulo-custodio/test.pl b/challenge-024/paulo-custodio/test.pl
index a5930b3da4..48d6cebbee 100644
--- a/challenge-024/paulo-custodio/test.pl
+++ b/challenge-024/paulo-custodio/test.pl
@@ -6,72 +6,72 @@ use 5.030;
use Path::Tiny;
use Test::More;
use WWW::Mechanize;
-use utf8::all; # books are in UTF-8
+use utf8::all; # books are in UTF-8
# Challenge 024
# Task #1
# Create a smallest script in terms of size that on execution doesn't throw any
-# error. The script doesn't have to do anything special. You could even come up
+# error. The script doesn't have to do anything special. You could even come up
# with smallest one-liner.
-#
+#
# My solution: an empty file! (0 bytes) - Perl executes it and does not throw an error
is capture("perl perl/ch-1.pl"), "";
# Task #2
-# Create a script to implement full text search functionality using Inverted
+# Create a script to implement full text search functionality using Inverted
# Index. According to wikipedia:
#
# Download a couple of books from https://www.gutenberg.org/ebooks/
-get_book('The Masque of the Red Death.txt',
- 'https://www.gutenberg.org/files/1064/1064-0.txt');
-get_book('The Fall of the House of Usher.txt',
- 'https://www.gutenberg.org/cache/epub/932/pg932.txt');
-get_book('The Cask of Amontillado.txt',
- 'https://www.gutenberg.org/cache/epub/1063/pg1063.txt');
-get_book('The Raven.txt',
- 'https://www.gutenberg.org/cache/epub/17192/pg17192.txt');
+get_book('The Masque of the Red Death.txt',
+ 'https://www.gutenberg.org/files/1064/1064-0.txt');
+get_book('The Fall of the House of Usher.txt',
+ 'https://www.gutenberg.org/cache/epub/932/pg932.txt');
+get_book('The Cask of Amontillado.txt',
+ 'https://www.gutenberg.org/cache/epub/1063/pg1063.txt');
+get_book('The Raven.txt',
+ 'https://www.gutenberg.org/cache/epub/17192/pg17192.txt');
# show index
is capture("perl perl/ch-2.pl search death"), <<END;
-death The Fall of the House of Usher.txt
-death The Masque of the Red Death.txt
-death The Raven.txt
+death The Fall of the House of Usher.txt
+death The Masque of the Red Death.txt
+death The Raven.txt
END
is capture("perl perl/ch-2.pl search mystery"), <<END;
-mystery The Fall of the House of Usher.txt
-mystery The Raven.txt
+mystery The Fall of the House of Usher.txt
+mystery The Raven.txt
END
is capture("perl perl/ch-2.pl search imagination"), <<END;
-imagination The Fall of the House of Usher.txt
-imagination The Raven.txt
+imagination The Fall of the House of Usher.txt
+imagination The Raven.txt
END
done_testing;
sub capture {
- my($cmd) = @_;
- my $out = `$cmd`;
- $out =~ s/[ \t\v\f\r]*\n/\n/g;
- return $out;
+ my($cmd) = @_;
+ my $out = `$cmd`;
+ $out =~ s/[ \t\v\f\r]*\n/\n/g;
+ return $out;
}
sub run {
- my($cmd) = @_;
- ok 0==system($cmd), $cmd;
+ my($cmd) = @_;
+ ok 0==system($cmd), $cmd;
}
sub get_book {
- my($file, $url) = @_;
- if (!-f $file) {
- say "Getting $url --> $file";
- my $mech = WWW::Mechanize->new();
- $mech->get($url);
- path($file)->spew($mech->content);
-
- # build index
- run("perl perl/ch-2.pl add '$file'");
- }
+ my($file, $url) = @_;
+ if (!-f $file) {
+ say "Getting $url --> $file";
+ my $mech = WWW::Mechanize->new();
+ $mech->get($url);
+ path($file)->spew($mech->content);
+
+ # build index
+ run("perl perl/ch-2.pl add '$file'");
+ }
}