Skip to content

Commit

Permalink
Fixing unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
cnuernber committed Sep 12, 2024
1 parent 877c7eb commit c917c2d
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 14 deletions.
2 changes: 1 addition & 1 deletion deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
org.apache.spark/spark-mllib_2.12 {:mvn/version "3.0.1"}
org.apache.spark/spark-sql_2.12 {:mvn/version "3.0.1"}
org.apache.spark/spark-streaming_2.12 {:mvn/version "3.0.1"}
org.tribuo/tribuo-all {:mvn/version "4.2.0" :extension "pom"}
org.tribuo/tribuo-all {:mvn/version "4.3.1" :extension "pom"}
}
:extra-paths ["neanderthal" "test"]}

Expand Down
10 changes: 4 additions & 6 deletions src/tech/v3/dataset.clj
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,6 @@
- `:disable-comment-skipping?` - As default, the `#` character is recognised as a
line comment when found in the beginning of a line of text in a CSV file,
and the row will be ignored. Set `true` to disable this behavior.
- `:disable-na-as-missing?` - As default, the string \"NA\" is (case-insensitively)
parsed as `nil`. Set `false` to disable this behavior.
- `:max-chars-per-column` - Defaults to 4096. Columns with more characters that this
will result in an exception.
- `:max-num-columns` - Defaults to 8192. CSV,TSV files with more columns than this
Expand Down Expand Up @@ -477,8 +475,8 @@ null [6 3]:


(defn concat
"Concatenate datasets in place using a copying-concatenation.
See also concat-inplace as it may be more efficient for your use case if you have
"Concatenate datasets using a copying-concatenation.
See also [[concat-inplace]] as it may be more efficient for your use case if you have
a small number (like less than 3) of datasets."
([dataset & args]
(apply tech.v3.dataset.base/concat dataset args))
Expand Down Expand Up @@ -644,9 +642,9 @@ null [6 3]:


(defn group-by
"Produce a map of key-fn-value->dataset. The argument to key-fn
"Produce a map of key-fn-value->dataset. The argument to key-fn
is a map of colname->column-value representing a row in dataset.
Each dataset in the resulting map contains all and only rows
Each dataset in the resulting map contains all and only rows
that produce the same key-fn-value.
Options - options are passed into dtype arggroup:
Expand Down
8 changes: 4 additions & 4 deletions src/tech/v3/dataset/metamorph.clj
Original file line number Diff line number Diff line change
Expand Up @@ -318,8 +318,8 @@ null [6 3]:


(defn concat
"Concatenate datasets in place using a copying-concatenation.
See also concat-inplace as it may be more efficient for your use case if you have
"Concatenate datasets using a copying-concatenation.
See also [[concat-inplace]] as it may be more efficient for your use case if you have
a small number (like less than 3) of datasets."
([& args]
(apply tech.v3.dataset.metamorph-api/concat args))
Expand Down Expand Up @@ -489,9 +489,9 @@ null [6 3]:


(defn group-by
"Produce a map of key-fn-value->dataset. The argument to key-fn
"Produce a map of key-fn-value->dataset. The argument to key-fn
is a map of colname->column-value representing a row in dataset.
Each dataset in the resulting map contains all and only rows
Each dataset in the resulting map contains all and only rows
that produce the same key-fn-value.
Options - options are passed into dtype arggroup:
Expand Down
5 changes: 2 additions & 3 deletions test/tech/v3/dataset/mapseq_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,7 @@

;;forward map from input value to encoded value.
;;After ETL, column values are all doubles
(let [apple-value (-> (get (ds-mod/inference-target-label-map dataset) :apple)
double)]
(let [apple-value (get (ds-mod/inference-target-label-map dataset) :apple)]
(is (= #{:apple}
(as-> dataset ds
(ds/filter ds #(= apple-value (:fruit-name %)))
Expand Down Expand Up @@ -172,7 +171,7 @@
:apple :fruit-name-apple,
:lemon :fruit-name-lemon},
:src-column :fruit-name,
:result-datatype :float64}
:result-datatype :int64}
(into {} (first (ds-cat/dataset->one-hot-maps dataset)))))
(is (= #{:mass :fruit-name-orange :fruit-name-mandarin :width :fruit-name-apple :color-score
:fruit-name-lemon :height}
Expand Down

0 comments on commit c917c2d

Please sign in to comment.