From 82f1600f4210a8085fb48651064ccddca679375e Mon Sep 17 00:00:00 2001 From: Michiel Borkent Date: Mon, 11 Nov 2019 21:14:30 +0100 Subject: [PATCH] [#95] add clojure.data.csv --- README.md | 1 + src/babashka/impl/clojure/core.clj | 19 +++- src/babashka/impl/clojure/data/csv.clj | 150 +++++++++++++++++++++++++ src/babashka/impl/csv.clj | 9 ++ src/babashka/main.clj | 7 +- test-resources/test.csv | 7 ++ test/babashka/main_test.clj | 8 +- test/babashka/scripts/csv.bb | 3 + 8 files changed, 200 insertions(+), 4 deletions(-) create mode 100644 src/babashka/impl/clojure/data/csv.clj create mode 100644 src/babashka/impl/csv.clj create mode 100644 test-resources/test.csv create mode 100644 test/babashka/scripts/csv.bb diff --git a/README.md b/README.md index 25ba8aa3..8875ff55 100644 --- a/README.md +++ b/README.md @@ -160,6 +160,7 @@ namespaces. If not all vars are available, they are enumerated explicitly. - [`me.raynes.conch.low-level`](https://github.com/clj-commons/conch#low-level-usage) aliased as `conch` - [`clojure.tools.cli`](https://github.com/clojure/tools.cli) aliased as `tools.cli` +- [`clojure.data.csv`](https://github.com/clojure/data.csv) aliased as `csv` From Java the following is available: diff --git a/src/babashka/impl/clojure/core.clj b/src/babashka/impl/clojure/core.clj index 83027fd7..80d66308 100644 --- a/src/babashka/impl/clojure/core.clj +++ b/src/babashka/impl/clojure/core.clj @@ -6,6 +6,21 @@ [_ _ & body] `(~'future-call (fn [] ~@body))) +(defn close! [^java.io.Closeable x] + (.close x)) + +(defn with-open* + [_ _ bindings & body] + (cond + (= (count bindings) 0) `(do ~@body) + (symbol? (bindings 0)) `(let ~(subvec bindings 0 2) + (try + (with-open ~(subvec bindings 2) ~@body) + (finally + (~'close! ~(bindings 0))))) + :else (throw (IllegalArgumentException. + "with-open only allows Symbols in bindings")))) + (def core-extras {'future-call future-call 'future (with-meta future {:sci/macro true}) @@ -28,4 +43,6 @@ 'println println 'println-str println-str 'flush flush - 'read-line read-line}) + 'read-line read-line + 'close! close! + 'with-open (with-meta with-open* {:sci/macro true})}) diff --git a/src/babashka/impl/clojure/data/csv.clj b/src/babashka/impl/clojure/data/csv.clj new file mode 100644 index 00000000..5c189708 --- /dev/null +++ b/src/babashka/impl/clojure/data/csv.clj @@ -0,0 +1,150 @@ +;; Copyright (c) Jonas Enlund. All rights reserved. The use and +;; distribution terms for this software are covered by the Eclipse +;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) +;; which can be found in the file epl-v10.html at the root of this +;; distribution. By using this software in any fashion, you are +;; agreeing to be bound by the terms of this license. You must not +;; remove this notice, or any other, from this software. + +(ns ^{:author "Jonas Enlund" + :doc "Reading and writing comma separated values. Forked from + clojure/data.csv by @github/plexus to expose the private #'read-record + function, and to address some reflection warnings."} + babashka.impl.clojure.data.csv + (:require [clojure.string :as str]) + (:import (java.io PushbackReader Reader Writer StringReader EOFException))) + +(set! *warn-on-reflection* true) + +;; Reading + +(def ^{:private true} lf (int \newline)) +(def ^{:private true} cr (int \return)) +(def ^{:private true} eof -1) + +(defn- read-quoted-cell [^PushbackReader reader ^StringBuilder sb sep quote] + (loop [ch (.read reader)] + (condp == ch + quote (let [next-ch (.read reader)] + (condp == next-ch + quote (do (.append sb (char quote)) + (recur (.read reader))) + sep :sep + lf :eol + cr (let [next-next-ch (.read reader)] + (when (not= next-next-ch lf) + (.unread reader next-next-ch)) + :eol) + eof :eof + (throw (Exception. ^String (format "CSV error (unexpected character: %c)" next-ch))))) + eof (throw (EOFException. "CSV error (unexpected end of file)")) + (do (.append sb (char ch)) + (recur (.read reader)))))) + +(defn- read-cell [^PushbackReader reader ^StringBuilder sb sep quote] + (let [first-ch (.read reader)] + (if (== first-ch quote) + (read-quoted-cell reader sb sep quote) + (loop [ch first-ch] + (condp == ch + sep :sep + lf :eol + cr (let [next-ch (.read reader)] + (when (not= next-ch lf) + (.unread reader next-ch)) + :eol) + eof :eof + (do (.append sb (char ch)) + (recur (.read reader)))))))) + +(defn read-record [reader sep quote] + (loop [record (transient [])] + (let [cell (StringBuilder.) + sentinel (read-cell reader cell sep quote)] + (if (= sentinel :sep) + (recur (conj! record (str cell))) + [(persistent! (conj! record (str cell))) sentinel])))) + +(defprotocol Read-CSV-From + (read-csv-from [input sep quote])) + +(extend-protocol Read-CSV-From + String + (read-csv-from [s sep quote] + (read-csv-from (PushbackReader. (StringReader. s)) sep quote)) + + Reader + (read-csv-from [reader sep quote] + (read-csv-from (PushbackReader. reader) sep quote)) + + PushbackReader + (read-csv-from [reader sep quote] + (lazy-seq + (let [[record sentinel] (read-record reader sep quote)] + (case sentinel + :eol (cons record (read-csv-from reader sep quote)) + :eof (when-not (= record [""]) + (cons record nil))))))) + +(defn read-csv + "Reads CSV-data from input (String or java.io.Reader) into a lazy + sequence of vectors. + + Valid options are + :separator (default \\,) + :quote (default \\\")" + [input & options] + (let [{:keys [separator quote] :or {separator \, quote \"}} options] + (read-csv-from input (int separator) (int quote)))) + + +;; Writing + +(defn- write-cell [^Writer writer obj sep quote quote?] + (let [string (str obj) + must-quote (quote? string)] + (when must-quote (.write writer (int quote))) + (.write writer (if must-quote + (str/escape string + {quote (str quote quote)}) + string)) + (when must-quote (.write writer (int quote))))) + +(defn write-record [^Writer writer record sep quote quote?] + (loop [record record] + (when-first [cell record] + (write-cell writer cell sep quote quote?) + (when-let [more (next record)] + (.write writer (int sep)) + (recur more))))) + +(defn- write-csv* + [^Writer writer records sep quote quote? ^String newline] + (loop [records records] + (when-first [record records] + (write-record writer record sep quote quote?) + (.write writer newline) + (recur (next records))))) + +(defn write-csv + "Writes data to writer in CSV-format. + + Valid options are + :separator (Default \\,) + :quote (Default \\\") + :quote? (A predicate function which determines if a string should be quoted. Defaults to quoting only when necessary.) + :newline (:lf (default) or :cr+lf)" + [writer data & options] + (let [opts (apply hash-map options) + separator (or (:separator opts) \,) + quote (or (:quote opts) \") + quote? (or (:quote? opts) #(some #{separator quote \return \newline} %)) + newline (or (:newline opts) :lf)] + (write-csv* writer + data + separator + quote + quote? + ({:lf "\n" :cr+lf "\r\n"} newline)))) + + diff --git a/src/babashka/impl/csv.clj b/src/babashka/impl/csv.clj new file mode 100644 index 00000000..6c71ab1f --- /dev/null +++ b/src/babashka/impl/csv.clj @@ -0,0 +1,9 @@ +(ns babashka.impl.csv + {:no-doc true} + (:require [babashka.impl.clojure.data.csv :as csv])) + +(def csv-namespace + {'read-record csv/read-record + 'read-csv csv/read-csv + 'write-record csv/write-record + 'write-csv csv/write-csv}) diff --git a/src/babashka/main.clj b/src/babashka/main.clj index 62cce3c9..ca8e14e1 100644 --- a/src/babashka/main.clj +++ b/src/babashka/main.clj @@ -14,6 +14,7 @@ [babashka.impl.pipe-signal-handler :refer [handle-pipe! pipe-signal-received?]] [babashka.impl.socket-repl :as socket-repl] [babashka.impl.tools.cli :refer [tools-cli-namespace]] + [babashka.impl.csv :as csv] [babashka.wait :as wait] [clojure.edn :as edn] [clojure.java.io :as io] @@ -196,7 +197,8 @@ Everything after that is bound to *command-line-args*.")) shell clojure.java.shell io clojure.java.io conch me.raynes.conch.low-level - async clojure.core.async} + async clojure.core.async + csv clojure.data.csv} :namespaces {'clojure.core core-extras 'clojure.tools.cli tools-cli-namespace 'clojure.edn {'read-string edn/read-string} @@ -206,7 +208,8 @@ Everything after that is bound to *command-line-args*.")) 'babashka.signal {'pipe-signal-received? pipe-signal-received?} 'clojure.java.io io-namespace 'me.raynes.conch.low-level conch-namespace - 'clojure.core.async async-namespace} + 'clojure.core.async async-namespace + 'clojure.data.csv csv/csv-namespace} :bindings (assoc bindings '*command-line-args* command-line-args) :env env :features #{:bb}} diff --git a/test-resources/test.csv b/test-resources/test.csv new file mode 100644 index 00000000..30ea8e75 --- /dev/null +++ b/test-resources/test.csv @@ -0,0 +1,7 @@ +Adult,87727 +Elderly,43914 +Child,33411 +Adolescent,29849 +Infant,15238 +Newborn,10050 +In Utero,1198 diff --git a/test/babashka/main_test.clj b/test/babashka/main_test.clj index 4169b17b..93edcee3 100644 --- a/test/babashka/main_test.clj +++ b/test/babashka/main_test.clj @@ -5,7 +5,8 @@ [clojure.edn :as edn] [clojure.java.shell :refer [sh]] [clojure.string :as str] - [clojure.test :as test :refer [deftest is testing]])) + [clojure.test :as test :refer [deftest is testing]] + [clojure.java.io :as io])) (defn bb [input & args] (edn/read-string (apply test-utils/bb (str input) (map str args)))) @@ -226,3 +227,8 @@ (deftest reader-conditionals-test (is (= :hello (bb nil "#?(:clj (in-ns 'foo)) (println :hello)"))) (is (= :hello (bb nil "#?(:bb :hello :default :bye)")))) + +(deftest csv-test + (is (= '(["Adult" "87727"] ["Elderly" "43914"] ["Child" "33411"] ["Adolescent" "29849"] + ["Infant" "15238"] ["Newborn" "10050"] ["In Utero" "1198"]) + (bb nil (.getPath (io/file "test" "babashka" "scripts" "csv.bb")))))) diff --git a/test/babashka/scripts/csv.bb b/test/babashka/scripts/csv.bb new file mode 100644 index 00000000..9e011215 --- /dev/null +++ b/test/babashka/scripts/csv.bb @@ -0,0 +1,3 @@ +(with-open [reader (io/reader (io/file "test-resources" "test.csv"))] + (doall + (csv/read-csv reader)))