aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaulo Custodio <pauloscustodio@gmail.com>2023-05-03 12:09:29 +0100
committerPaulo Custodio <pauloscustodio@gmail.com>2023-05-03 12:10:57 +0100
commit02e546117f67c8b8fabd0ecba2ade1c5b4ff56f6 (patch)
tree345b87193914524c9c3b36d50fcacf92b2f904e5
parent6c6480baed8abb849ed2d2ffa29f5e8710b64268 (diff)
downloadperlweeklychallenge-club-02e546117f67c8b8fabd0ecba2ade1c5b4ff56f6.tar.gz
perlweeklychallenge-club-02e546117f67c8b8fabd0ecba2ade1c5b4ff56f6.tar.bz2
perlweeklychallenge-club-02e546117f67c8b8fabd0ecba2ade1c5b4ff56f6.zip
Add C solution
-rw-r--r--challenge-001/paulo-custodio/test.pl4
-rw-r--r--challenge-024/paulo-custodio/c/ch-2.c294
-rw-r--r--challenge-024/paulo-custodio/perl/ch-2.pl3
-rw-r--r--challenge-024/paulo-custodio/python/ch-2.py5
-rw-r--r--challenge-024/paulo-custodio/t/test-2.yaml28
5 files changed, 316 insertions, 18 deletions
diff --git a/challenge-001/paulo-custodio/test.pl b/challenge-001/paulo-custodio/test.pl
index e05049e9fd..132d8f52be 100644
--- a/challenge-001/paulo-custodio/test.pl
+++ b/challenge-001/paulo-custodio/test.pl
@@ -148,11 +148,11 @@ sub build {
return "python3 ../../challenge-001/paulo-custodio/brainfuck.py $prog_wo_ext.bf";
}
if (/^c$/) {
- run("gcc $prog -o $prog_wo_ext -lm -lmpfr -lgmp") if (!-f $exe || -M $exe > -M $prog);
+ run("gcc $prog -o $prog_wo_ext -lm -lmpfr -lgmp -lsqlite3") if (!-f $exe || -M $exe > -M $prog);
return $exe;
}
if (/^cpp$/) {
- run("g++ $prog -o $prog_wo_ext -lmpfr -lgmpxx -lgmp") if (!-f $exe || -M $exe > -M $prog);
+ run("g++ $prog -o $prog_wo_ext -lmpfr -lgmpxx -lgmp -lsqlite3") if (!-f $exe || -M $exe > -M $prog);
return $exe;
}
if (/^d$/) {
diff --git a/challenge-024/paulo-custodio/c/ch-2.c b/challenge-024/paulo-custodio/c/ch-2.c
new file mode 100644
index 0000000000..e3424eec6b
--- /dev/null
+++ b/challenge-024/paulo-custodio/c/ch-2.c
@@ -0,0 +1,294 @@
+/*
+Challenge 024
+
+Task #2
+Create a script to implement full text search functionality using Inverted
+Index. According to wikipedia:
+
+In computer science, an inverted index (also referred to as a postings file
+or inverted file) is a database index storing a mapping from content, such as
+words or numbers, to its locations in a table, or in a document or a set of
+documents (named in contrast to a forward index, which maps from documents to
+content). The purpose of an inverted index is to allow fast full-text
+searches, at a cost of increased processing when a document is added to the
+database.
+*/
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sqlite3.h>
+
+#define DATABASE "index.db3"
+#define SEPARATORS " \t\r\n\v\f!\"#$%&'()*+,-./:;<=>?@[\\]^`{|}~"
+
+void* check_mem(void* p) {
+ if (!p) {
+ fputs("Out of memory\n", stderr);
+ exit(EXIT_FAILURE);
+ }
+ return p;
+}
+
+bool file_exists(const char *filename) {
+ struct stat buffer;
+ return (stat(filename, &buffer) == 0);
+}
+
+void basename(char* filename) {
+ char* p = filename + strlen(filename);
+ while (p > filename && p[-1] != '/' && p[-1] != '\\' && p[-1] != '.')
+ p--;
+ if (p > filename && p[-1] == '.')
+ p[-1] = '\0';
+}
+
+char* strtolower(char* str) {
+ for (char* p = str; *p; p++)
+ *p = tolower(*p);
+ return str;
+}
+
+void check_error(sqlite3* db, int rc) {
+ if (rc != SQLITE_OK) {
+ fprintf(stderr, "Database error: %s\n", sqlite3_errmsg(db));
+ sqlite3_close(db);
+ exit(EXIT_FAILURE);
+ }
+}
+
+void check_error_step(sqlite3* db, int step) {
+ if (step != SQLITE_DONE) {
+ fprintf(stderr, "Database error: %s\n", sqlite3_errmsg(db));
+ sqlite3_close(db);
+ exit(EXIT_FAILURE);
+ }
+}
+
+int callback(void *NotUsed, int argc, char **argv, char **azColName) {
+ for(int i = 0; i < argc; i++) {
+ printf("%s = %s\n", azColName[i], argv[i] ? argv[i] : "NULL");
+ }
+ printf("\n");
+ return 0;
+}
+
+sqlite3* open_database() {
+ sqlite3 *db;
+ int rc = sqlite3_open(DATABASE, &db);
+ check_error(db, rc);
+ return db;
+}
+
+void close_database(sqlite3* db) {
+ sqlite3_close(db);
+}
+
+void create_database() {
+ if (file_exists(DATABASE))
+ return;
+
+ sqlite3 *db = open_database();
+ char *zErrMsg = NULL;
+
+ const char* sql =
+ "CREATE TABLE words ("
+ "id INTEGER PRIMARY KEY AUTOINCREMENT, "
+ "word TEXT UNIQUE);"
+ "CREATE TABLE documents ("
+ "id INTEGER PRIMARY KEY AUTOINCREMENT, "
+ "title TEXT UNIQUE); "
+ "CREATE TABLE found ("
+ "id INTEGER PRIMARY KEY AUTOINCREMENT, "
+ "document_id INTEGER, "
+ "word_id INTEGER); "
+ "CREATE UNIQUE INDEX found_index ON found(document_id, word_id); ";
+
+ int rc = sqlite3_exec(db, sql, callback, 0, &zErrMsg);
+ if (rc != SQLITE_OK) {
+ fprintf(stderr, "SQL error: %s\n", zErrMsg);
+ sqlite3_free(zErrMsg);
+ sqlite3_close(db);
+ return;
+ }
+
+ close_database(db);
+}
+
+int get_or_add_document_id(sqlite3* db, const char* title) {
+ bool found = false;
+ int id = -1;
+
+ const char* sql = "SELECT id FROM documents WHERE title = ?;";
+ sqlite3_stmt* stmt;
+ int rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+ check_error(db, rc);
+ sqlite3_bind_text(stmt, 1, title, strlen(title), NULL);
+ int step = sqlite3_step(stmt);
+ if (step == SQLITE_ROW) {
+ id = sqlite3_column_int(stmt, 0);
+ found = true;
+ }
+ sqlite3_finalize(stmt);
+
+ if (found)
+ return id;
+
+ sql = "INSERT INTO documents(title) VALUES(?);";
+ rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+ check_error(db, rc);
+ sqlite3_bind_text(stmt, 1, title, strlen(title), NULL);
+ step = sqlite3_step(stmt);
+ check_error_step(db, step);
+ sqlite3_finalize(stmt);
+
+ return sqlite3_last_insert_rowid(db);
+}
+
+int get_or_add_word_id(sqlite3* db, const char* word) {
+ bool found = false;
+ int id = -1;
+
+ const char* sql = "SELECT id FROM words WHERE word = ?;";
+ sqlite3_stmt* stmt;
+ int rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+ check_error(db, rc);
+ sqlite3_bind_text(stmt, 1, word, strlen(word), NULL);
+ int step = sqlite3_step(stmt);
+ if (step == SQLITE_ROW) {
+ id = sqlite3_column_int(stmt, 0);
+ found = true;
+ }
+ sqlite3_finalize(stmt);
+
+ if (found)
+ return id;
+
+ sql = "INSERT INTO words(word) VALUES(?);";
+ rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+ check_error(db, rc);
+ sqlite3_bind_text(stmt, 1, word, strlen(word), NULL);
+ step = sqlite3_step(stmt);
+ check_error_step(db, step);
+ sqlite3_finalize(stmt);
+
+ return sqlite3_last_insert_rowid(db);
+}
+
+int add_found(sqlite3* db, int document_id, int word_id) {
+ bool found = false;
+ int id = -1;
+
+ const char* sql = "SELECT id FROM found "
+ "WHERE document_id = ? AND word_id = ?;";
+ sqlite3_stmt* stmt;
+ int rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+ check_error(db, rc);
+ sqlite3_bind_int(stmt, 1, document_id);
+ sqlite3_bind_int(stmt, 2, word_id);
+ int step = sqlite3_step(stmt);
+ if (step == SQLITE_ROW) {
+ id = sqlite3_column_int(stmt, 0);
+ found = true;
+ }
+ sqlite3_finalize(stmt);
+
+ if (found)
+ return id;
+
+ sql = "INSERT INTO found(document_id, word_id) VALUES(?, ?);";
+ rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+ check_error(db, rc);
+ sqlite3_bind_int(stmt, 1, document_id);
+ sqlite3_bind_int(stmt, 2, word_id);
+ step = sqlite3_step(stmt);
+ check_error_step(db, step);
+ sqlite3_finalize(stmt);
+
+ return sqlite3_last_insert_rowid(db);
+}
+
+void add_document(const char* filename) {
+ sqlite3* db = open_database();
+
+ // title
+ char* title = check_mem(strdup(filename));
+ basename(title);
+
+ // document id
+ int document_id = get_or_add_document_id(db, title);
+
+ // read document
+ FILE* fp = fopen(filename, "r");
+ if (fp == NULL) {
+ perror(filename);
+ close_database(db);
+ free(title);
+ exit(EXIT_FAILURE);
+ }
+
+ char line[BUFSIZ];
+ while (fgets(line, sizeof(line), fp) != NULL) {
+ char* word = strtok(line, SEPARATORS);
+ while (word != NULL) {
+ strtolower(word);
+ int word_id = get_or_add_word_id(db, word);
+ add_found(db, document_id, word_id);
+ word = strtok(NULL, SEPARATORS);
+ }
+ }
+
+ printf("Indexed %s\n", title);
+
+ fclose(fp);
+ close_database(db);
+ free(title);
+}
+
+void search_documents(const char* word) {
+ sqlite3* db = open_database();
+
+ char* lcword = strtolower(check_mem(strdup(word)));
+
+ const char* sql = "SELECT word, title "
+ "FROM documents, words, found "
+ "WHERE word = ? "
+ "AND found.document_id = documents.id "
+ "AND found.word_id = words.id "
+ "ORDER BY title;";
+ sqlite3_stmt* stmt;
+ int rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+ check_error(db, rc);
+ sqlite3_bind_text(stmt, 1, lcword, strlen(lcword), NULL);
+ int step;
+ while ((step = sqlite3_step(stmt)) == SQLITE_ROW) {
+ printf("%s %s\n",
+ sqlite3_column_text(stmt, 0),
+ sqlite3_column_text(stmt, 1));
+ }
+ sqlite3_finalize(stmt);
+
+ close_database(db);
+ free(lcword);
+}
+
+int usage(void) {
+ fputs("usage: ch-2 add|search file|word\n", stderr);
+ return EXIT_FAILURE;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc != 3)
+ return usage();
+
+ create_database();
+ if (strcmp(argv[1], "add") == 0)
+ add_document(argv[2]);
+ else if (strcmp(argv[1], "search") == 0)
+ search_documents(argv[2]);
+ else
+ return usage();
+}
diff --git a/challenge-024/paulo-custodio/perl/ch-2.pl b/challenge-024/paulo-custodio/perl/ch-2.pl
index 2d74437c97..bd938b5a74 100644
--- a/challenge-024/paulo-custodio/perl/ch-2.pl
+++ b/challenge-024/paulo-custodio/perl/ch-2.pl
@@ -41,6 +41,7 @@ CREATE TABLE found (
document_id INTEGER,
word_id INTEGER
);
+CREATE UNIQUE INDEX found_index ON found(document_id, word_id);
END
close($p) or die "sqlite3 failed";
}
@@ -64,7 +65,7 @@ sub add_doc {
my($doc) = @_;
# get title
- my $title = path($doc)->basename;
+ (my $title = path($doc)->basename) =~ s/\.\w+$//;
# connect to index database
my $dbh = DBI->connect("dbi:SQLite:dbname=".DBFILE,"","",
diff --git a/challenge-024/paulo-custodio/python/ch-2.py b/challenge-024/paulo-custodio/python/ch-2.py
index 7b3aff407a..6f4ab47bb9 100644
--- a/challenge-024/paulo-custodio/python/ch-2.py
+++ b/challenge-024/paulo-custodio/python/ch-2.py
@@ -47,6 +47,9 @@ def create_database():
word_id INTEGER
);
''')
+ cur.execute('''
+ CREATE UNIQUE INDEX found_index ON found(document_id, word_id);
+ ''')
con.commit()
con.close()
@@ -94,7 +97,7 @@ def add_found(con, document_id, word_id):
def add_doc(file):
con = sqlite3.connect(DBFILE)
- title = os.path.basename(file)
+ title = re.sub(r"\.\w+$", "", os.path.basename(file))
document_id = get_document_id(con, title)
# read document
diff --git a/challenge-024/paulo-custodio/t/test-2.yaml b/challenge-024/paulo-custodio/t/test-2.yaml
index 1f15630cb9..7df1a09659 100644
--- a/challenge-024/paulo-custodio/t/test-2.yaml
+++ b/challenge-024/paulo-custodio/t/test-2.yaml
@@ -1,52 +1,52 @@
-- setup:
+- setup: unlink('index.db3');1
cleanup:
args: add 'The Cask of Amontillado.txt'
input:
output: |
- Indexed The Cask of Amontillado.txt
+ Indexed The Cask of Amontillado
- setup:
cleanup:
args: add 'The Fall of the House of Usher.txt'
input:
output: |
- Indexed The Fall of the House of Usher.txt
+ Indexed The Fall of the House of Usher
- setup:
cleanup:
args: add 'The Masque of the Red Death.txt'
input:
output: |
- Indexed The Masque of the Red Death.txt
+ Indexed The Masque of the Red Death
- setup:
cleanup:
args: add 'The Raven.txt'
input:
output: |
- Indexed The Raven.txt
+ Indexed The Raven
- setup:
cleanup:
args: search death
input:
output: |
- death The Fall of the House of Usher.txt
- death The Masque of the Red Death.txt
- death The Raven.txt
+ death The Fall of the House of Usher
+ death The Masque of the Red Death
+ death The Raven
- setup:
cleanup:
args: search mystery
input:
output: |
- mystery The Fall of the House of Usher.txt
- mystery The Raven.txt
+ mystery The Fall of the House of Usher
+ mystery The Raven
- setup:
cleanup:
args: search revenge
input:
output: |
- revenge The Cask of Amontillado.txt
+ revenge The Cask of Amontillado
- setup:
- cleanup: unlink('index.db3')
+ cleanup: unlink('index.db3');1
args: search imagination
input:
output: |
- imagination The Fall of the House of Usher.txt
- imagination The Raven.txt
+ imagination The Fall of the House of Usher
+ imagination The Raven