diff --git a/.github/workflows/clojure.yml b/.github/workflows/clojure.yml index d11e119..65451eb 100644 --- a/.github/workflows/clojure.yml +++ b/.github/workflows/clojure.yml @@ -10,7 +10,7 @@ jobs: - name: Install dependencies run: lein -U deps - name: Run tests - run: lein test + run: lein test-all - name: Deploy if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/devel' run: lein deploy diff --git a/README.md b/README.md index 0f3db08..2ccbdf7 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,8 @@ Oscaro’s generic I/O tools collection. - [spit](#spit) - [exists?](#exists) - [core](#core) - - [gzipped?](#gzipped) - [file-reader](#file-reader) +- [About compression](#about-compression) - [clj-kondo](#clj-kondo) - [License](#license) @@ -290,20 +290,21 @@ you need to call (close! file) when you done. **returns**: an map with a `:stream` key -#### `gzipped?` +## About compression -Test if a filename ends with `.gz` or `.gzip` +By default, `tools.io` supports _[gzip]_, _[bzip2]_ and _[framed lz4][]_ +compression algorithms and can be extended by implementing a custom protocol +(see sources). -**arguments**: -- filename - -**returns**: a boolean +It also supports the following formats if you provide the required dependencies. +- _[xz]_ with `org.tukaani/xz` provided +- _[zstd]_ with `com.github.luben/zstd-jni` provided -examples -```clojure -(core/gzipped? "toto.gz"); => true -(core/gzipped? "toto.GZip"); => true -``` +[gzip]: https://en.wikipedia.org/wiki/Gzip +[bzip2]: https://en.wikipedia.org/wiki/Bzip2 +[xz]: https://en.wikipedia.org/wiki/XZ_Utils +[zstd]: https://en.wikipedia.org/wiki/Zstd +[framed lz4]: https://en.wikipedia.org/wiki/LZ4_(compression_algorithm) ## clj-kondo diff --git a/project.clj b/project.clj index 665b4c8..6d76de6 100644 --- a/project.clj +++ b/project.clj @@ -29,5 +29,13 @@ :profiles {:dev {:global-vars {*warn-on-reflection* true} :source-paths ["dev"] :resource-paths ["test/resources"] - :dependencies [[org.clojure/tools.namespace "1.5.0"]]}} + :dependencies [[org.clojure/tools.namespace "1.5.0"]]} + :extra-compression + {:dependencies [[org.tukaani/xz "1.9"] + [com.github.luben/zstd-jni "1.5.6-3"]]}} + :test-selectors {:default (fn [m] (not (:extra-compression m))) + :extra-compression :extra-compression} + :aliases {"repl" ["with-profile" "+extra-compression" "repl"] + "test-all" ["with-profile" "+extra-compression" "test" ":all"]} + :target-path "target/%s/" :repl-options {:init-ns user}) diff --git a/src/tools/io.clj b/src/tools/io.clj index fd6f6a4..216670b 100644 --- a/src/tools/io.clj +++ b/src/tools/io.clj @@ -229,54 +229,54 @@ (def ^{:added "0.3.16" :doc - "Return a lazy seq of string from a [protocol://]text[.gz] file. - warning: the seq must be entirely consumed before the file is closed."} + "Returns a lazy seq of string from a [protocol://]text[.zext] file. + Warning: The seq must be entirely consumed before the file is closed."} read-text-file (read-string-format-file-fn identity)) (def ^{:added "0.3.16" :doc - "Write a seq of strings in a [protocol://]text[.gz] file."} + "Writes a seq of strings in a [protocol://]text[.zext] file."} write-text-file (write-string-file-fn identity)) (def ^{:added "0.3.16" :doc - "Return a lazy seq of parsed json objects from a [protocol://]jsons[.gz] file. - warning: the seq must be entirely consumed before the file is closed."} + "Returns a lazy seq of parsed json objects from a [protocol://]jsons[.zext] file. + Warning: The seq must be entirely consumed before the file is closed."} read-jsons-file (read-string-format-file-fn #(charred/read-json % :key-fn keyword))) (def ^{:added "0.3.16" :doc - "Write a seq of elements serialized as JSON in a [protocol://]jsons[.gz] file."} + "Writes a seq of elements serialized as JSON in a [protocol://]jsons[.zext] file."} write-jsons-file (write-string-file-fn #(charred/write-json-str % :indent-str nil :escape-slash false))) (def ^{:added "0.3.16" :doc - "Return a lazy seq of parsed edn objects from a [protocol://]edn[.gz] file. - warning: the seq must be entirely consumed before the file is closed."} + "Returns a lazy seq of parsed edn objects from a [protocol://]edn[.zext] file. + Warning: The seq must be entirely consumed before the file is closed."} read-edns-file (read-string-format-file-fn edn/read-string)) (def ^{:added "0.3.16" :doc - "Write a seq of elements serialized as EDN in a [protocol://]edn[.gz] file."} + "Writes a seq of elements serialized as EDN in a [protocol://]edn[.zext] file."} write-edns-file (write-string-file-fn prn-str)) (defn write-edn-file - "Write an element serialized as EDN in a [protocol://]edn[.gz] file. + "Writes an element serialized as EDN in a [protocol://]edn[.zext] file. This is equivalent to call write-edns-file on a one-element sequence." ([filename x] (write-edn-file filename {} x)) ([filename options x] (write-edns-file filename options [x]))) (defn ^{:added "0.3.16" :doc - "Return a lazy seq of parsed csv row as vector from a [protocol://]csv[.gz] file. - see http://clojure.github.io/data.csv/ for options - warning: the seq must be entirely consumed before the file is closed. + "Returns a lazy seq of parsed csv row as vector from a [protocol://]csv[.zext] file. + See http://clojure.github.io/data.csv/ for options + Warning: The seq must be entirely consumed before the file is closed. sample usage: (read-csv-file \"infos_tarifs.csv\" {:encoding \"ISO-8859-1\"} :separator \\;)"} @@ -297,8 +297,8 @@ (defn ^{:added "0.3.16" :doc - "Write a seq of vectors serialized as CSV in a [protocol://]csv[.gz] file. - see http://clojure.github.io/data.csv/ for options. + "Writes a seq of vectors serialized as CSV in a [protocol://]csv[.zext] file. + See http://clojure.github.io/data.csv/ for options. (write-csv-file out my-lines) (write-csv-file out [stream-options-map] my-lines [csv options...]) @@ -325,15 +325,15 @@ (def ^{:added "0.3.16" :doc - "Return a lazy seq of parsed json objects from [protocol://]jsons[.gz] files. - warning: the seq must be entirely consumed before every files are closed."} + "Returns a lazy seq of parsed json objects from [protocol://]jsons[.zext] files. + Warning: The seq must be entirely consumed before every files are closed."} read-jsons-files (read-string-files-fn read-jsons-file)) (def ^{:added "0.3.16" :doc - "Return a lazy seq of parsed edn objects from [protocol://]edns[.gz] files. - warning: the seq must be entirely consumed before every files are closed."} + "Returns a lazy seq of parsed edn objects from [protocol://]edns[.zext] files. + Warning: The seq must be entirely consumed before every files are closed."} read-edns-files (read-string-files-fn read-edns-file)) diff --git a/src/tools/io/compress.clj b/src/tools/io/compress.clj new file mode 100644 index 0000000..92cef49 --- /dev/null +++ b/src/tools/io/compress.clj @@ -0,0 +1,228 @@ +(ns tools.io.compress + (:require + [clojure.string :as str]) + (:import + (java.io InputStream OutputStream) + (org.apache.commons.compress.compressors CompressorException CompressorStreamFactory) + (org.apache.commons.compress.compressors.bzip2 BZip2CompressorInputStream BZip2CompressorOutputStream) + (org.apache.commons.compress.compressors.gzip GzipCompressorInputStream GzipCompressorOutputStream GzipParameters) + (org.apache.commons.compress.compressors.lz4 FramedLZ4CompressorInputStream FramedLZ4CompressorOutputStream FramedLZ4CompressorOutputStream$BlockSize FramedLZ4CompressorOutputStream$Parameters) + (org.apache.commons.compress.compressors.xz XZUtils) + (org.apache.commons.compress.compressors.zstandard ZstdUtils))) + +(defn ^:no-doc detect? + "Do not expose! + Useful for internal use but depends too much on Common Compress." + [input-stream] + (try + (CompressorStreamFactory/detect input-stream) + (catch CompressorException e + (println (ex-message e))))) + +(defprotocol Compressor + "Interface of a Compressor implementation usable with `tools.io`." + + (-get-file-extensions + [_this] + "Returns a collection of related file extensions.") + (-get-input-stream ^InputStream + [_this input-stream options] + "Returns a stream which uncompress an input stream.") + (-get-output-stream ^OutputStream + [_this output-stream options] + "Returns a stream which compress an output stream.")) + +(defonce ^:private ^:no-doc !ext->compressor (atom {})) + +(defn register-compressor! + "Registers a new compressor implementation in the global registry." + [compressor] + (if (satisfies? Compressor compressor) + (let [exts (-get-file-extensions compressor)] + (doseq [ext (mapv str/lower-case exts)] + (swap! !ext->compressor assoc ext compressor))) + (throw (ex-info (str "Invalid compressor object:" compressor) {})))) + +(defn unregister-compressor! + "Unregisters a compressor from the global registry." + [compressor] + (if (satisfies? Compressor compressor) + (let [exts (-get-file-extensions compressor)] + (doseq [ext (mapv str/lower-case exts)] + (swap! !ext->compressor dissoc ext))) + (throw (ex-info (str "Invalid compressor object:" compressor) {})))) + +(defn get-compressor + "Returns the compressor associated with a given file extension or nil." + [file-extension] + (when (string? file-extension) + (get @!ext->compressor (str/lower-case file-extension)))) + +;; +;; Gzip support +;; + +(defn- gzip-opts + [{:keys [compression-level buffer-size]}] + (let [params (GzipParameters.)] + (when compression-level + (.setCompressionLevel params (int compression-level))) + (when buffer-size + (.setBufferSize params (int buffer-size))) + params)) + +(defrecord GzipCompressor [] + Compressor + (-get-file-extensions + [_] + ["gz" "gzip"]) + (-get-input-stream + [_ input-stream {:keys [concatenated?]}] + (GzipCompressorInputStream. input-stream (boolean concatenated?))) + (-get-output-stream + [_ output-stream opts] + (GzipCompressorOutputStream. output-stream (gzip-opts opts)))) + +;; +;; Bzip2 support +;; + +(defrecord Bzip2Compressor [] + Compressor + (-get-file-extensions + [_] + ["bz2" "bzip2"]) + (-get-input-stream + [_ input-stream {:keys [concatenated?]}] + (BZip2CompressorInputStream. input-stream (boolean concatenated?))) + (-get-output-stream + [_ output-stream {:keys [block-size]}] + (if block-size + (BZip2CompressorOutputStream. output-stream (int block-size)) + (BZip2CompressorOutputStream. output-stream)))) + +;; +;; Framed LZ4 support +;; + +(defn- ->lz4-block-size + [s] + (FramedLZ4CompressorOutputStream$BlockSize/valueOf (name s))) + +(defn- lz4-opts + [{:keys [block-size]}] + (if block-size + (FramedLZ4CompressorOutputStream$Parameters. (->lz4-block-size block-size)) + FramedLZ4CompressorOutputStream$Parameters/DEFAULT)) + +(defrecord FramedLZ4Compressor [] + Compressor + (-get-file-extensions + [_] + ["lz4"]) + (-get-input-stream + [_ input-stream {:keys [concatenated?]}] + (FramedLZ4CompressorInputStream. input-stream (boolean concatenated?))) + (-get-output-stream + [_ output-stream opts] + (FramedLZ4CompressorOutputStream. output-stream (lz4-opts opts)))) + +;; +;; Extra provided compression +;; + +(defn- ex-compression + [compression] + (let [msg (format "%s compression is not available" + (str/upper-case compression))] + (ex-info msg {:error msg :missing-compression compression}))) + +;; +;; +;; Zstandard support +;; + +(defmacro ^:private when-zstd-provided + [& body] + (if (ZstdUtils/isZstdCompressionAvailable) + `(do ~@body) + `(throw (ex-compression "Zstd")))) + +(defmacro ^:private zstd-new + [stream-name & args] + `(new ~(symbol (str "org.apache.commons.compress.compressors.zstandard." + (name stream-name))) + ~@args)) + +(defrecord ZstdCompressor [] + Compressor + (-get-file-extensions + [_] + ["zst" "zstd"]) + (-get-input-stream + [_ input-stream _opts] + (when-zstd-provided + (zstd-new "ZstdCompressorInputStream" input-stream))) + (-get-output-stream + [_ output-stream {:keys [level]}] + (when-zstd-provided + (if level + (zstd-new "ZstdCompressorOutputStream" output-stream (int level)) + (zstd-new "ZstdCompressorOutputStream" output-stream))))) + +;; +;; XZ support +;; + +(defmacro ^:private when-xz-provided + [& body] + (if (XZUtils/isXZCompressionAvailable) + `(do ~@body) + `(throw (ex-compression "XZ")))) + +(defmacro ^:private xz-new + [stream-name & args] + `(new ~(symbol (str "org.apache.commons.compress.compressors.xz." + (name stream-name))) + ~@args)) + +(defrecord XZCompressor [] + Compressor + (-get-file-extensions + [_] + ["xz"]) + (-get-input-stream + [_ input-stream {:keys [concatenated?]}] + (when-xz-provided + (xz-new "XZCompressorInputStream" input-stream (boolean concatenated?)))) + (-get-output-stream + [_ output-stream {:keys [preset]}] + (when-xz-provided + (if preset + (xz-new "XZCompressorOutputStream" output-stream (int preset)) + (xz-new "XZCompressorOutputStream" output-stream))))) + +;; +;; Register default compressors +;; + +(def ^:private default-compressors + #{(->GzipCompressor) + (->Bzip2Compressor) + (->ZstdCompressor) + (->XZCompressor) + (->FramedLZ4Compressor)}) + +(defn register-default-compressors! + "Register all default compressors." + [] + (doseq [compressor default-compressors] + (register-compressor! compressor))) + +(defn unregister-default-compressors! + "Unregister all default compressors." + [] + (doseq [compressor default-compressors] + (unregister-compressor! compressor))) + +(register-default-compressors!) diff --git a/src/tools/io/core.clj b/src/tools/io/core.clj index dd0e730..d72e50e 100644 --- a/src/tools/io/core.clj +++ b/src/tools/io/core.clj @@ -1,12 +1,12 @@ (ns tools.io.core (:require [clojure.java.io :as io] - [clojure.string :as str]) + [clojure.string :as str] + [tools.io.compress :as zio]) (:import - [java.io File Closeable Reader Writer] - [java.util.zip GZIPInputStream GZIPOutputStream ZipOutputStream ZipEntry] - [org.apache.commons.compress.archivers.zip ZipFile ZipArchiveEntry]) - (:gen-class)) + (java.io ByteArrayOutputStream Closeable File Reader Writer) + (java.util.zip ZipEntry ZipOutputStream) + (org.apache.commons.compress.archivers.zip ZipArchiveEntry ZipFile))) (defonce ^:private file-preds (atom {})) @@ -69,27 +69,28 @@ "Returns an output stream with any implementation." get-file-type) -(defn gzipped? - "Tests if a filename ends with .gz or .gzip" +(defn- file-ext [filename] - (re-find #"(?i)\.gz(?:ip)?$" (str filename))) + (second (re-find #"\.([^./]+)$" (str filename)))) (defn input-stream - "Returns an input-stream, with support of gzip compression." + "Returns an input-stream for plain or compressed file." ([filename] (input-stream filename nil)) ([filename options] - (let [is (mk-input-stream filename options)] - (if (gzipped? filename) - (update is :stream #(GZIPInputStream. %)) + (let [is (mk-input-stream filename options) + compressor (zio/get-compressor (file-ext filename))] + (if compressor + (update is :stream #(zio/-get-input-stream compressor % options)) is)))) (defn output-stream - "Returns an output-stream, with support of gzip compression." + "Returns an output-stream for plain or compressed file." ([filename] (output-stream filename nil)) ([filename options] - (let [os (mk-output-stream filename options)] - (if (gzipped? filename) - (update os :stream #(GZIPOutputStream. %)) + (let [os (mk-output-stream filename options) + compressor (zio/get-compressor (file-ext filename))] + (if compressor + (update os :stream #(zio/-get-output-stream compressor % options)) os)))) (defn file-reader @@ -152,6 +153,12 @@ (.write output buffer 0 size) (recur)))))) +(defn ^:no-doc ->byte-array + [input-stream] + (let [bao (ByteArrayOutputStream.)] + (io/copy input-stream bao) + (.toByteArray bao))) + ;; Default Hooks ;; ============= diff --git a/test/resources/compress/utf8-demo.txt b/test/resources/compress/utf8-demo.txt new file mode 100644 index 0000000..eea6fe8 --- /dev/null +++ b/test/resources/compress/utf8-demo.txt @@ -0,0 +1,57 @@ + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 CC BY + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ + ⎪⎢⎜│a²+b³ ⎟⎥⎪ + ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ + ⎪⎢⎜⎷ c₈ ⎟⎥⎪ + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ + ⎪⎢⎜ ∞ ⎟⎥⎪ + ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ + ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Combining characters: + + STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ diff --git a/test/resources/compress/utf8-demo.txt.bz2 b/test/resources/compress/utf8-demo.txt.bz2 new file mode 100644 index 0000000..c43597d Binary files /dev/null and b/test/resources/compress/utf8-demo.txt.bz2 differ diff --git a/test/resources/compress/utf8-demo.txt.gz b/test/resources/compress/utf8-demo.txt.gz new file mode 100644 index 0000000..209b847 Binary files /dev/null and b/test/resources/compress/utf8-demo.txt.gz differ diff --git a/test/resources/compress/utf8-demo.txt.lz4 b/test/resources/compress/utf8-demo.txt.lz4 new file mode 100644 index 0000000..1861b56 Binary files /dev/null and b/test/resources/compress/utf8-demo.txt.lz4 differ diff --git a/test/resources/compress/utf8-demo.txt.xz b/test/resources/compress/utf8-demo.txt.xz new file mode 100644 index 0000000..07cdace Binary files /dev/null and b/test/resources/compress/utf8-demo.txt.xz differ diff --git a/test/resources/compress/utf8-demo.txt.zst b/test/resources/compress/utf8-demo.txt.zst new file mode 100644 index 0000000..0b937cc Binary files /dev/null and b/test/resources/compress/utf8-demo.txt.zst differ diff --git a/test/tools/io/core_test.clj b/test/tools/io/core_test.clj index 14421f1..44b8ad4 100644 --- a/test/tools/io/core_test.clj +++ b/test/tools/io/core_test.clj @@ -6,7 +6,8 @@ [clojure.test :refer [are deftest is testing]] [tools.io :as tio] [tools.io.core :as sut]) - (:import (java.io File))) + (:import + (java.io Closeable File))) (deftest expand-home-test (let [home (System/getProperty "user.home")] @@ -178,3 +179,69 @@ (is (tio/exists? "test/resources/test.txt"))) (testing "exists? with an inexistant file" (is (not (tio/exists? "-i do no exists-"))))) + +(defmacro ^:private test-input-stream + [file-ext] + (let [path "compress/utf8-demo.txt"] + `(with-open [expected-stream# (io/input-stream (io/resource ~path)) + ^Closeable actual-stream# (-> ~(str path + (when (not= "plain" file-ext) + (str "." file-ext))) + (io/resource) + (sut/input-stream) + :stream)] + (is (= (vec (sut/->byte-array expected-stream#)) + (vec (sut/->byte-array actual-stream#))) + ~(format "read %s input-stream" file-ext))))) + +(deftest input-stream-test + (testing "it deals with plain data" + (test-input-stream "plain")) + (testing "it deals with compressed data" + (test-input-stream "gz") + (test-input-stream "bz2") + (test-input-stream "lz4"))) + +(deftest ^:extra-compression input-stream-extra-test + (testing "it deals with compressed data (extra algorithms)" + (test-input-stream "zst") + (test-input-stream "xz"))) + +(defmacro ^:private test-output-stream + [file-ext & [stream-opts]] + (let [path "compress/utf8-demo.txt"] + `(tio/with-tempdir [adir#] + (let [afile# (tio/join-path adir# ~(str "out." file-ext)) + byte-arr# (with-open [in# (io/input-stream (io/resource ~path))] + (sut/->byte-array in#))] + + (with-open [^Closeable out# (-> afile# + (sut/output-stream ~stream-opts) + :stream)] + (io/copy byte-arr# out#)) + + (with-open [^Closeable in# (-> afile# (sut/input-stream) :stream)] + (is (= (vec byte-arr#) + (vec (sut/->byte-array in#))))))))) + +(deftest output-stream-test + (testing "it uses uncompressed output-stream" + (test-output-stream "dat")) + + (testing "it uses compressed output stream" + (test-output-stream "gz") + (test-output-stream "gz" {:compression-level 1}) + + (test-output-stream "bz2") + (test-output-stream "bz2" {:block-size 1}) + + (test-output-stream "lz4") + (test-output-stream "lz4" {:block-size :K64}))) + +(deftest ^:extra-compression output-stream-extra-test + (testing "it uses compressed output stream (extra algorithms)" + (test-output-stream "zst") + (test-output-stream "zst" {:level 1}) + + (test-output-stream "xz") + (test-output-stream "xz" {:preset 2}))) diff --git a/test/tools/io_test.clj b/test/tools/io_test.clj index 04e0ef4..2fd3357 100644 --- a/test/tools/io_test.clj +++ b/test/tools/io_test.clj @@ -5,6 +5,7 @@ [clojure.string :as str] [clojure.test :refer [are deftest is testing]] [tools.io :as sut] + [tools.io.compress :as zio] [tools.io.core :refer [file-writer]]) (:import (java.util.zip GZIPInputStream GZIPOutputStream) @@ -121,15 +122,59 @@ (testing "list dirs from non existant directory" (is (= 0 (count (sut/list-dirs "i'm broken ~~~")))))) +(defmacro ^:private test-spit-text + [aformat & [file-ext]] + `(sut/with-tempdir [adir#] + (let [~'plain (slurp (io/resource "compress/utf8-demo.txt")) + ~'afile (sut/join-path adir# + ~(str "out.txt" + (when (not= "plain" aformat) + (str "." (or file-ext aformat)))))] + (sut/spit ~'afile ~'plain) + (is (= ~'plain (sut/slurp ~'afile)) "read with `tools.io/slurp`") + + ~(if (= "plain" aformat) + `(is (= ~'plain (slurp ~'afile)) "read with `clojure.core/slurp`") + + `(with-open [in# (io/input-stream (io/file ~'afile))] + (is (= ~aformat (zio/detect? in#)) "is a compressed stream")))))) + (deftest spit-test - (let [text "Hey\n\nI \njust\n\n met \n\tyou\n\n" - filename ".spit-slurp-tmp-test-rw-text"] - (io/delete-file filename true) - (try - (sut/spit filename text) - (is (= text (slurp filename))) - (finally - (io/delete-file filename true))))) + (testing "it writes plain text file" + (test-spit-text "plain")) + + (testing "it writes compressed text file" + (test-spit-text "gz") + (test-spit-text "bzip2" "bz2") + (test-spit-text "lz4-framed" "lz4"))) + +(deftest ^:extra-compression spit-extra-test + (testing "it writes compressed text file (extra algorithms)" + (test-spit-text "zstd" "zst") + (test-spit-text "xz"))) + +(defmacro ^:private test-slurp-text + [compression] + (let [path "compress/utf8-demo.txt"] + `(is (= (slurp (io/resource ~path)) + (sut/slurp (io/resource ~(if (= "plain" compression) + path + (str path "." compression))))) + ~(format "slurp %s file" compression)))) + +(deftest slurp-test + (testing "it reads plain text file" + (test-slurp-text "plain")) + + (testing "it reads compressed text files" + (test-slurp-text "gz") + (test-slurp-text "bz2") + (test-slurp-text "lz4"))) + +(deftest ^:extra-compression slurp-extra-test + (testing "it reads compressed text files (extra algorithms)" + (test-slurp-text "zst") + (test-slurp-text "xz"))) (defn- write-fixture [path data] @@ -138,11 +183,6 @@ GZIPOutputStream.))] (.write w ^String data))) -(deftest slurp-test - (let [test-file "test.txt"] - (is (= (slurp (io/resource test-file)) - (sut/slurp (io/resource test-file)))))) - (deftest read-jsons-file-test (testing "with resources" (testing "reading resource json.gz file"