diff --git a/.gitmodules b/.gitmodules index 6bf5f5fe..1e094c0f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,3 +5,6 @@ [submodule "babashka.curl"] path = babashka.curl url = https://github.com/borkdude/babashka.curl +[submodule "babashka.pods"] + path = babashka.pods + url = https://github.com/babashka/babashka.pods diff --git a/babashka.pods b/babashka.pods new file mode 160000 index 00000000..4d484dc1 --- /dev/null +++ b/babashka.pods @@ -0,0 +1 @@ +Subproject commit 4d484dc12628719e845ac15824748f6d4f8a5e25 diff --git a/deps.edn b/deps.edn index 962befdf..f2135b56 100644 --- a/deps.edn +++ b/deps.edn @@ -1,7 +1,8 @@ {:paths ["src" "feature-xml" "feature-core-async" "feature-yaml" "feature-csv" "feature-transit" "feature-java-time" "feature-java-nio" - "sci/src" "babashka.curl/src" "resources" "sci/resources"], + "sci/src" "babashka.curl/src" "babashka.pods/src" + "resources" "sci/resources"], :deps {org.clojure/clojure {:mvn/version "1.10.2-alpha1"}, org.clojure/tools.reader {:mvn/version "1.3.2"}, borkdude/edamame {:mvn/version "0.0.11-alpha.9"}, @@ -15,6 +16,7 @@ fipp {:mvn/version "0.6.22"} clj-commons/clj-yaml {:mvn/version "0.7.1"} com.cognitect/transit-clj {:mvn/version "1.0.324"} + nrepl/bencode {:mvn/version "1.1.0"} seancorfield/next.jdbc {:mvn/version "1.0.424"} org.postgresql/postgresql {:mvn/version "42.2.12"} org.hsqldb/hsqldb {:mvn/version "2.4.0"} diff --git a/project.clj b/project.clj index cf1e6a36..03058614 100644 --- a/project.clj +++ b/project.clj @@ -7,7 +7,7 @@ :url "https://github.com/borkdude/babashka"} :license {:name "Eclipse Public License 1.0" :url "http://opensource.org/licenses/eclipse-1.0.php"} - :source-paths ["src" "sci/src" "babashka.curl/src"] + :source-paths ["src" "sci/src" "babashka.curl/src" "babashka.pods/src"] ;; for debugging Reflector.java code: ;; :java-source-paths ["sci/reflector/src-java"] :java-source-paths ["src-java"] @@ -19,7 +19,8 @@ [borkdude/sci.impl.reflector "0.0.1"] [org.clojure/tools.cli "1.0.194"] [cheshire "5.10.0"] - [fipp "0.6.22"]] + [fipp "0.6.22"] + [nrepl/bencode "1.1.0"]] :profiles {:feature/xml {:source-paths ["feature-xml"] :dependencies [[org.clojure/data.xml "0.2.0-alpha6"]]} :feature/yaml {:source-paths ["feature-yaml"] diff --git a/src/babashka/impl/bencode.clj b/src/babashka/impl/bencode.clj index 3d452eb6..70ed7ea1 100644 --- a/src/babashka/impl/bencode.clj +++ b/src/babashka/impl/bencode.clj @@ -1,6 +1,6 @@ (ns babashka.impl.bencode {:no-doc true} - (:require [babashka.impl.bencode.core :as bencode] + (:require [bencode.core :as bencode] [sci.impl.namespaces :refer [copy-var]] [sci.impl.vars :as vars])) diff --git a/src/babashka/impl/bencode/core.clj b/src/babashka/impl/bencode/core.clj deleted file mode 100644 index 20e24e5b..00000000 --- a/src/babashka/impl/bencode/core.clj +++ /dev/null @@ -1,420 +0,0 @@ -(ns babashka.impl.bencode.core - "A netstring and bencode implementation for Clojure." - {:author "Meikel Brandmeyer" - :no-doc true} - (:require [clojure.java.io :as io]) - (:import [java.io ByteArrayOutputStream - EOFException - InputStream - IOException - OutputStream - PushbackInputStream])) - -;; Copyright (c) Meikel Brandmeyer. All rights reserved. -;; The use and distribution terms for this software are covered by the -;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) -;; which can be found in the file epl-v10.html at the root of this distribution. -;; By using this software in any fashion, you are agreeing to be bound by -;; the terms of this license. -;; You must not remove this notice, or any other, from this software. - -;; # Motivation -;; -;; In each and every application, which contacts peer processes via some -;; communication channel, the handling of the communication channel is -;; obviously a central part of the application. Unfortunately introduces -;; handling of buffers of varying sizes often bugs in form of buffer -;; overflows and similar. -;; -;; A strong factor in this situation is of course the protocol which goes -;; over the wire. Depending on its design it might be difficult to estimate -;; the size of the input up front. This introduces more handling of message -;; buffers to accomodate for inputs of varying sizes. This is particularly -;; difficult in languages like C, where there is no bounds checking of array -;; accesses and where errors might go unnoticed for considerable amount of -;; time. -;; -;; To address these issues D. Bernstein developed the so called -;; [netstrings][net]. They are especially designed to allow easy construction -;; of the message buffers, easy and robust parsing. -;; -;; BitTorrent extended this to the [bencode][bc] protocol which also -;; includes ways to encode numbers and collections like lists or maps. -;; -;; *wire* is based on these ideas. -;; -;; [net]: http://cr.yp.to/proto/netstrings.txt -;; [bc]: http://wiki.theory.org/BitTorrentSpecification#Bencoding -;; -;; # Netstrings -;; -;; Now let's start with the basic netstrings. They consist of a byte count, -;; followed a colon and the binary data and a trailing comma. Examples: -;; -;; 13:Hello, World!, -;; 10:Guten Tag!, -;; 0:, -;; -;; The initial byte count allows to efficiently allocate a sufficiently -;; sized message buffer. The trailing comma serves as a hint to detect -;; incorrect netstrings. -;; -;; ## Low-level reading -;; -;; We will need some low-level reading helpers to read the bytes from -;; the input stream. These are `read-byte` as well as `read-bytes`. They -;; are split out, because doing such a simple task as reading a byte is -;; mild catastrophe in Java. So it would add some clutter to the algorithm -;; `read-netstring`. -;; -;; On the other hand they might be also useful elsewhere. -;; -;; To remove some magic numbers from the code below. - -(set! *warn-on-reflection* true) - -(def #^{:const true} i 105) -(def #^{:const true} l 108) -(def #^{:const true} d 100) -(def #^{:const true} comma 44) -(def #^{:const true} minus 45) - -;; These two are only used boxed. So we keep them extra here. - -(def e 101) -(def colon 58) - -(defn #^{:private true} read-byte - #^long [#^InputStream input] - (let [c (.read input)] - (when (neg? c) - (throw (EOFException. "Invalid netstring. Unexpected end of input."))) - ;; Here we have a quirk for example. `.read` returns -1 on end of - ;; input. However the Java `Byte` has only a range from -128 to 127. - ;; How does the fit together? - ;; - ;; The whole thing is shifted. `.read` actually returns an int - ;; between zero and 255. Everything below the value 128 stands - ;; for itself. But larger values are actually negative byte values. - ;; - ;; So we have to do some translation here. `Byte/byteValue` would - ;; do that for us, but we want to avoid boxing here. - (if (< 127 c) (- c 256) c))) - -(defn #^{:private true :tag "[B"} read-bytes - #^Object [#^InputStream input n] - (let [content (byte-array n)] - (loop [offset (int 0) - len (int n)] - (let [result (.read input content offset len)] - (when (neg? result) - (throw - (EOFException. - "Invalid netstring. Less data available than expected."))) - (when (not= result len) - (recur (+ offset result) (- len result))))) - content)) - -;; `read-long` is used for reading integers from the stream as well -;; as the byte count prefixes of byte strings. The delimiter is \: -;; for byte count prefixes and \e for integers. - -(defn #^{:private true} read-long - #^long [#^InputStream input delim] - (loop [n (long 0)] - ;; We read repeatedly a byte from the input… - (let [b (read-byte input)] - ;; …and stop at the delimiter. - (cond - (= b minus) (- (read-long input delim)) - (= b delim) n - :else (recur (+ (* n (long 10)) (- (long b) (long 48)))))))) - -;; ## Reading a netstring -;; -;; Let's dive straight into reading a netstring from an `InputStream`. -;; -;; For convenience we split the function into two subfunctions. The -;; public `read-netstring` is the normal entry point, which also checks -;; for the trailing comma after reading the payload data with the -;; private `read-netstring*`. -;; -;; The reason we need the less strict `read-netstring*` is that with -;; bencode we don't have a trailing comma. So a check would not be -;; beneficial here. -;; -;; However the consumer doesn't have to care. `read-netstring` as -;; well as `read-bencode` provide the public entry points, which do -;; the right thing. Although they both may reference the `read-netstring*` -;; underneath. -;; -;; With this in mind we define the inner helper function first. - -(declare #^"[B" string>payload - #^String stringpayload` and `stringpayload - [#^String s] - (.getBytes s "UTF-8")) - -(defn #^{:private true :tag String} stringpayload (str (alength content)))) - (.write (int colon)) - (.write content))) - -(defn write-netstring - "Write the given binary data to the output stream in form of a classic - netstring." - [#^OutputStream output content] - (doto output - (write-netstring* content) - (.write (int comma)))) - -;; # Bencode -;; -;; However most of the time we don't want to send simple blobs of data -;; back and forth. The data sent between the communication peers usually -;; have some structure, which has to be carried along the way to the -;; other side. Here [bencode][bc] come into play. -;; -;; Bencode defines additionally to netstrings easily parseable structures -;; for lists, maps and numbers. It allows to communicate information -;; about the data structure to the peer on the other side. -;; -;; ## Tokens -;; -;; The data is encoded in tokens in bencode. There are several types of -;; tokens: -;; -;; * A netstring without trailing comma for string data. -;; * A tag specifiyng the type of the following tokens. -;; The tag may be one of these: -;; * `\i` to encode integers. -;; * `\l` to encode lists of items. -;; * `\d` to encode maps of item pairs. -;; * `\e` to end the a previously started tag. -;; -;; ## Reading bencode -;; -;; Reading bencode encoded data is basically parsing a stream of tokens -;; from the input. Hence we need a read-token helper which allows to -;; retrieve the next token. - -(defn #^{:private true} read-token - [#^PushbackInputStream input] - (let [ch (read-byte input)] - (cond - (= (long e) ch) nil - (= i ch) :integer - (= l ch) :list - (= d ch) :map - :else (do - (.unread input (int ch)) - (read-netstring* input))))) - -;; To read the bencode encoded data we walk a long the sequence of tokens -;; and act according to the found tags. - -(declare read-integer read-list read-map) - -(defn read-bencode - "Read bencode token from the input stream." - [input] - (let [token (read-token input)] - (case token - :integer (read-integer input) - :list (read-list input) - :map (read-map input) - token))) - -;; Of course integers and the collection types are have to treated specially. -;; -;; Integers for example consist of a sequence of decimal digits. - -(defn #^{:private true} read-integer - [input] - (read-long input e)) - -;; *Note:* integers are an ugly special case, which cannot be -;; handled with `read-token` or `read-netstring*`. -;; -;; Lists are just a sequence of other tokens. - -(declare token-seq) - -(defn #^{:private true} read-list - [input] - (vec (token-seq input))) - -;; Maps are sequences of key/value pairs. The keys are always -;; decoded into strings. The values are kept as is. - -(defn #^{:private true} read-map - [input] - (->> (token-seq input) - (into {} (comp (partition-all 2) - (map (fn [[k v]] - [(string> #(read-bencode input) - repeatedly - (take-while identity))) - -;; ## Writing bencode -;; -;; Writing bencode is similar easy as reading it. The main entry point -;; takes a string, map, sequence or integer and writes it according to -;; the rules to the given OutputStream. - -(defmulti write-bencode - "Write the given thing to the output stream. “Thing” means here a - string, map, sequence or integer. Alternatively an ByteArray may - be provided whose contents are written as a bytestring. Similar - the contents of a given InputStream are written as a byte string. - Named things (symbols or keywords) are written in the form - 'namespace/name'." - (fn [_output thing] - (cond - (bytes? thing) :bytes - (instance? InputStream thing) :input-stream - (integer? thing) :integer - (string? thing) :string - (symbol? thing) :named - (keyword? thing) :named - (map? thing) :map - (or (nil? thing) (coll? thing) (.isArray (class thing))) :list - :else (type thing)))) - -(defmethod write-bencode :default - [output x] - (throw (IllegalArgumentException. (str "Cannot write value of type " (class x))))) - -;; The following methods should be pretty straight-forward. -;; -;; The easiest case is of course when we already have a byte array. -;; We can simply pass it on to the underlying machinery. - -(defmethod write-bencode :bytes - [output bytes] - (write-netstring* output bytes)) - -;; For strings we simply write the string as a netstring without -;; trailing comma after encoding the string as UTF-8 bytes. - -(defmethod write-bencode :string - [output string] - (write-netstring* output (string>payload string))) - -;; Streaming does not really work, since we need to know the -;; number of bytes to write upfront. So we read in everything -;; for InputStreams and pass on the byte array. - -(defmethod write-bencode :input-stream - [output stream] - (let [bytes (ByteArrayOutputStream.)] - (io/copy stream bytes) - (write-netstring* output (.toByteArray bytes)))) - -;; Integers are again the ugly special case. - -(defmethod write-bencode :integer - [#^OutputStream output n] - (doto output - (.write (int i)) - (.write (string>payload (str n))) - (.write (int e)))) - -;; Symbols and keywords are converted to a string of the -;; form 'namespace/name' or just 'name' in case its not -;; qualified. We do not add colons for keywords since the -;; other side might not have the notion of keywords. - -(defmethod write-bencode :named - [output thing] - (let [nspace (namespace thing) - name (name thing)] - (->> (str (when nspace (str nspace "/")) name) - string>payload - (write-netstring* output)))) - -;; Lists as well as maps work recursively to print their elements. - -(defmethod write-bencode :list - [#^OutputStream output lst] - (.write output (int l)) - (doseq [elt lst] - (write-bencode output elt)) - (.write output (int e))) - -;; However, maps are a bit special because their keys are sorted -;; lexicographically based on their byte string represantation. - -(declare lexicographically) - -(defmethod write-bencode :map - [#^OutputStream output m] - (let [translation (into {} (map (juxt string>payload identity) (keys m))) - key-strings (sort lexicographically (keys translation)) - >value (comp m translation)] - (.write output (int d)) - (doseq [k key-strings] - (write-netstring* output k) - (write-bencode output (>value k))) - (.write output (int e)))) - -;; However, since byte arrays are not `Comparable` we need a custom -;; comparator which we can feed to `sort`. - -(defn #^{:private true} lexicographically - [#^"[B" a #^"[B" b] - (let [alen (alength a) - blen (alength b) - len (min alen blen)] - (loop [i 0] - (if (== i len) - (- alen blen) - (let [x (- (int (aget a i)) (int (aget b i)))] - (if (zero? x) - (recur (inc i)) - x)))))) diff --git a/src/babashka/impl/nrepl_server.clj b/src/babashka/impl/nrepl_server.clj index 7e1fbf4d..19d50420 100644 --- a/src/babashka/impl/nrepl_server.clj +++ b/src/babashka/impl/nrepl_server.clj @@ -1,9 +1,9 @@ (ns babashka.impl.nrepl-server {:no-doc true} (:refer-clojure :exclude [send future binding]) - (:require [babashka.impl.bencode.core :refer [read-bencode]] - [babashka.impl.nrepl-server.utils :refer [dev? response-for send send-exception + (:require [babashka.impl.nrepl-server.utils :refer [dev? response-for send send-exception replying-print-writer]] + [bencode.core :refer [read-bencode]] [clojure.string :as str] [clojure.tools.reader.reader-types :as r] [sci.core :as sci] diff --git a/src/babashka/impl/nrepl_server/utils.clj b/src/babashka/impl/nrepl_server/utils.clj index 46dca89f..b96cddfd 100644 --- a/src/babashka/impl/nrepl_server/utils.clj +++ b/src/babashka/impl/nrepl_server/utils.clj @@ -1,7 +1,7 @@ (ns babashka.impl.nrepl-server.utils {:no-doc true} (:refer-clojure :exclude [send]) - (:require [babashka.impl.bencode.core :refer [write-bencode]]) + (:require [bencode.core :refer [write-bencode]]) (:import [java.io Writer PrintWriter StringWriter OutputStream BufferedWriter])) (set! *warn-on-reflection* true) diff --git a/src/babashka/impl/pods.clj b/src/babashka/impl/pods.clj index b22ef750..d72e0e2c 100644 --- a/src/babashka/impl/pods.clj +++ b/src/babashka/impl/pods.clj @@ -1,165 +1,8 @@ (ns babashka.impl.pods {:no-doc true} (:refer-clojure :exclude [read]) - (:require [babashka.impl.bencode.core :as bencode] - [cheshire.core :as cheshire] - [clojure.core.async :as async] - [clojure.edn :as edn] - [sci.core :as sci])) - -(set! *warn-on-reflection* true) - -(defn add-shutdown-hook! [^Runnable f] - (-> (Runtime/getRuntime) - (.addShutdownHook (Thread. f)))) - -(defn write [^java.io.OutputStream stream v] - (locking stream - (bencode/write-bencode stream v) - (.flush stream))) - -(defn read [stream] - (bencode/read-bencode stream)) - -(defn bytes->string [^"[B" bytes] - (String. bytes)) - -(defn get-string [m k] - (-> (get m k) - bytes->string)) - -(defn processor [_ctx pod] - (let [stdout (:stdout pod) - format (:format pod) - chans (:chans pod) - read-fn (case format - :edn edn/read-string - :json #(cheshire/parse-string-strict % true))] - (try - (loop [] - (let [reply (read stdout) - id (get reply "id") - id (bytes->string id) - value* (find reply "value") - value (some-> value* - second - bytes->string - read-fn) - status (get reply "status") - status (set (map (comp keyword bytes->string) status)) - done? (contains? status :done) - error? (contains? status :error) - value (if error? - (let [message (or (some-> (get reply "ex-message") - bytes->string) - "") - data (or (some-> (get reply "ex-data") - bytes->string - read-fn) - {})] - (ex-info message data)) - value) - chan (get @chans id) - out (some-> (get reply "out") - bytes->string) - err (some-> (get reply "err") - bytes->string)] - (when (or value* error?) (async/put! chan value)) - (when (or done? error?) (async/close! chan)) - (when out (binding [*out* @sci/out] - (println out))) - (when err (binding [*out* @sci/err] - (println err)))) - (recur)) - (catch Exception e - (binding [*out* @sci/err] - (prn e)))))) - -(defn next-id [] - (str (java.util.UUID/randomUUID))) - -(defn invoke [pod pod-var args async?] - (let [stream (:stdin pod) - format (:format pod) - chans (:chans pod) - write-fn (case format - :edn pr-str - :json cheshire/generate-string) - id (next-id) - chan (async/chan) - _ (swap! chans assoc id chan) - _ (write stream {"id" id - "op" "invoke" - "var" (str pod-var) - "args" (write-fn args)})] - (if async? chan ;; TODO: https://blog.jakubholy.net/2019/core-async-error-handling/ - (let [v (async/ (get reply "format") bytes->string keyword) - ops (some->> (get reply "ops") keys (map keyword) set) - pod {:process p - :pod-spec pod-spec - :stdin stdin - :stdout stdout - :chans (atom {}) - :format format - :ops ops} - _ (add-shutdown-hook! - (fn [] - (if (contains? ops :shutdown) - (do (write stdin {"op" "shutdown" - "id" (next-id)}) - (.waitFor p)) - (.destroy p)))) - pod-namespaces (get reply "namespaces") - vars-fn (fn [ns-name-str vars] - (reduce - (fn [m var] - (let [name (get-string var "name") - async? (some-> (get var "async") - bytes->string - #(Boolean/parseBoolean %)) - name-sym (symbol name) - sym (symbol ns-name-str name)] - (assoc m name-sym (fn [& args] - (let [res (invoke pod sym args async?)] - res))))) - {} - vars)) - env (:env ctx)] - (swap! env - (fn [env] - (let [namespaces (:namespaces env) - namespaces - (reduce (fn [namespaces namespace] - (let [name-str (-> namespace (get "name") bytes->string) - name-sym (symbol name-str) - vars (get namespace "vars") - vars (vars-fn name-str vars)] - (assoc namespaces name-sym vars))) - namespaces - pod-namespaces)] - (assoc env :namespaces namespaces)))) - (sci/future (processor ctx pod)) - ;; TODO: we could return the entire describe map here - nil))) + (:require [babashka.pods.sci :as pods])) (def pods-namespace - {'load-pod (with-meta load-pod + {'load-pod (with-meta pods/load-pod {:sci.impl/op :needs-ctx})}) diff --git a/test-resources/pod.clj b/test-resources/pod.clj index b07ef29f..e55459e5 100644 --- a/test-resources/pod.clj +++ b/test-resources/pod.clj @@ -47,8 +47,6 @@ op (read-string op) op (keyword op)] (case op - ;; TODO: - ;; group by namespace :describe (do (write {"format" (if (= format :json) "json" "edn") diff --git a/test/babashka/impl/nrepl_server_test.clj b/test/babashka/impl/nrepl_server_test.clj index 52613754..5fd1e9c9 100644 --- a/test/babashka/impl/nrepl_server_test.clj +++ b/test/babashka/impl/nrepl_server_test.clj @@ -1,10 +1,10 @@ (ns babashka.impl.nrepl-server-test (:require - [babashka.impl.bencode.core :as bencode] [babashka.impl.nrepl-server :refer [start-server! stop-server!]] [babashka.main :as main] [babashka.test-utils :as tu] [babashka.wait :as wait] + [bencode.core :as bencode] [clojure.test :as t :refer [deftest is testing]] [sci.impl.opts :refer [init]]) (:import [java.lang ProcessBuilder$Redirect]))