aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--challenge-132/tyler-wardhaugh/clojure/src/tw/weekly/c132/t2.clj60
-rw-r--r--challenge-132/tyler-wardhaugh/clojure/test/tw/weekly/c132_test.clj12
2 files changed, 64 insertions, 8 deletions
diff --git a/challenge-132/tyler-wardhaugh/clojure/src/tw/weekly/c132/t2.clj b/challenge-132/tyler-wardhaugh/clojure/src/tw/weekly/c132/t2.clj
index 8d237772b6..0f0930b838 100644
--- a/challenge-132/tyler-wardhaugh/clojure/src/tw/weekly/c132/t2.clj
+++ b/challenge-132/tyler-wardhaugh/clojure/src/tw/weekly/c132/t2.clj
@@ -1,14 +1,62 @@
(ns tw.weekly.c132.t2
- (:require [clojure.edn :as :edn))
+ (:require [clojure.edn :as edn]
+ [clojure.pprint :refer [cl-format]]))
;;;
; Task description for TASK #2, Hash Join
;;;
-(def DEFAULT-INPUT [])
+(def MAXSIZE "The maximum number of build relations to process at one time." 3)
+(def DEFAULT-INPUT
+ [[[20, "Alex" ]
+ [28, "Joe" ]
+ [38, "Mike" ]
+ [18, "Alex" ]
+ [25, "David" ]
+ [18, "Simon" ]]
+ [["Alex", "Stewart"]
+ ["Joe", "Root" ]
+ ["Mike", "Gatting"]
+ ["Joe", "Blog" ]
+ ["Alex", "Jones" ]
+ ["Simon","Duane" ]]
+ 1
+ 0])
+
+(defn butnth
+ "Returns all values except the one at index."
+ [coll index]
+ (keep-indexed (fn [i v] (when (not= i index) v)) coll))
+
+;;; Classic Hash Join
+;; Algorithm description:
+; https://en.wikipedia.org/wiki/Hash_join#Classic_hash_join
+;; Notes:
+; - We proactively batch the smaller relation (build) into MAXSIZE chunks to
+; ensure it can fit into memory.
+; - Order of output relation is not guaranteed.
+;;;
+(defn hash-join
+ [a b a-index b-index]
+ (let [[build build-index probe probe-index]
+ (if (<= (count a) (count b))
+ [a a-index b b-index]
+ [b b-index a a-index])]
+ (-> (comp
+ (partition-all MAXSIZE)
+ (map (fn [batch] (group-by #(nth % build-index) batch)))
+ (mapcat
+ (fn [build-map]
+ (keep (fn [row]
+ (when-let [ks (build-map (nth row probe-index))]
+ (map #(concat % (butnth row probe-index)) ks)))
+ probe)))
+ cat)
+ (sequence build))))
(defn -main
- "Run Task 1 with a given input D and S, defaulting to the first example from
- the task description."
+ "Run Task 1 with a given input H1, H2, I1, and I2, defaulting to the first
+ example from the task description."
[& args]
- (let [[H1 H2] (or (some->> args (map edn/read-string)) DEFAULT-INPUT)]
- ))
+ (let [[H1 H2 I1 I2] (or (some->> args (map edn/read-string)) DEFAULT-INPUT)]
+ (->> (hash-join H1 H2 I1 I2)
+ (cl-format true "~:{~a, ~s, ~s~%~}"))))
diff --git a/challenge-132/tyler-wardhaugh/clojure/test/tw/weekly/c132_test.clj b/challenge-132/tyler-wardhaugh/clojure/test/tw/weekly/c132_test.clj
index 6d2c5f2894..00edee2c66 100644
--- a/challenge-132/tyler-wardhaugh/clojure/test/tw/weekly/c132_test.clj
+++ b/challenge-132/tyler-wardhaugh/clojure/test/tw/weekly/c132_test.clj
@@ -1,7 +1,7 @@
(ns tw.weekly.c132-test
(:require [clojure.test :refer [deftest is testing]]
[tw.weekly.c132.t1 :as t1]
- #_[tw.weekly.c132.t2 :refer []]))
+ [tw.weekly.c132.t2 :as t2]))
(def today (t1/parse-date t1/DEFAULT-TODAY))
@@ -17,4 +17,12 @@
(deftest task-2
(testing "Task 2, Hash Join"
- ))
+ (is (= (set '((20 "Alex" "Stewart")
+ (18 "Alex" "Stewart")
+ (28 "Joe" "Root")
+ (38 "Mike" "Gatting")
+ (28 "Joe" "Blog")
+ (20 "Alex" "Jones")
+ (18 "Alex" "Jones")
+ (18 "Simon" "Duane")))
+ (set (apply t2/hash-join t2/DEFAULT-INPUT))))))