From 8e4030c7c10f75d88584b04f4623449bb8589fb8 Mon Sep 17 00:00:00 2001 From: daslu Date: Tue, 11 Feb 2025 01:09:11 +0200 Subject: [PATCH] updated datasets namespace --- notebooks/noj_book/datasets.clj | 50 +++++++++++---------------------- 1 file changed, 17 insertions(+), 33 deletions(-) diff --git a/notebooks/noj_book/datasets.clj b/notebooks/noj_book/datasets.clj index 4797126..7542185 100644 --- a/notebooks/noj_book/datasets.clj +++ b/notebooks/noj_book/datasets.clj @@ -2,52 +2,36 @@ ;; author: Daniel Slutsky, Ken Huang -;; ## Rdatasets -;; For our tutorials here, -;; let us fetch some datasets from [Rdatasets](https://vincentarelbundock.github.io/Rdatasets/): +;; We may use various sources of datasets for our tutorials here. (ns noj-book.datasets - (:require [tablecloth.api :as tc])) + (:require [tablecloth.api :as tc] + [scicloj.metamorph.ml.rdatasets :as rdatasets])) -(def iris - (-> "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv" - (tc/dataset {:key-fn keyword}) - (tc/rename-columns {:Sepal.Length :sepal-length - :Sepal.Width :sepal-width - :Petal.Length :petal-length - :Petal.Width :petal-width - :Species :species}))) +;; ## rdatasets +;; One of the main sources is the `rdatasets` namespace of [metamorph.ml](https://github.com/scicloj/metamorph.ml), +;; which can fetch datasets from the [Rdatasets](https://vincentarelbundock.github.io/Rdatasets/) collection. -iris +(rdatasets/datasets-iris) -(def mtcars - (-> "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/mtcars.csv" - (tc/dataset {:key-fn keyword}))) +(rdatasets/ggplot2-mpg) -mtcars - -(def scatter - (-> "https://vincentarelbundock.github.io/Rdatasets/csv/openintro/simulated_scatter.csv" - (tc/dataset {:key-fn keyword}))) - -(tc/head scatter) +(rdatasets/openintro-simulated_scatter) ;; ## Plotly ;; We can also use datasets from [Plotly Sample Datasets](https://plotly.github.io/datasets/) -(-> "https://raw.githubusercontent.com/plotly/datasets/refs/heads/master/1962_2006_walmart_store_openings.csv" - (tc/dataset {:key-fn keyword - :parser-fn {:OPENDATE :string - :date_super :string}}) - (tc/head)) +(tc/dataset + "https://raw.githubusercontent.com/plotly/datasets/refs/heads/master/1962_2006_walmart_store_openings.csv" + {:key-fn keyword + :parser-fn {:OPENDATE :string + :date_super :string}}) ;; ## tech.ml.dataset (TMD) ;; [TMD's repo](https://github.com/techascent/tech.ml.dataset/tree/master/test/data) ;; also has some datasets that we can use: -(def stocks - (tc/dataset - "https://raw.githubusercontent.com/techascent/tech.ml.dataset/master/test/data/stocks.csv" - {:key-fn keyword})) +(tc/dataset + "https://raw.githubusercontent.com/techascent/tech.ml.dataset/master/test/data/stocks.csv" + {:key-fn keyword}) -stocks