mirror of
https://github.com/taoensso/telemere.git
synced 2025-12-16 17:41:12 +00:00
[new] OpenTelemetry handler: improve span interop
When this feature is enabled (see `otel-tracing?`), Telemere's tracing signal creators (`trace!`, `spy!`, etc.) will now manipulate OpenTelemetry's span context when relevant. Before this commit: Telemere would detect and use OpenTelemetry span context, but the inverse wasn't true: OpenTelemetry instrumentation wouldn't recognize Telemere spans. After this commit: Telemere detects OpenTelemetry span context, and the inverse is also true: OpenTelemetry instrumentation will recognize Telemere spans. The net effect: When you use Telemere to trace forms that may themselves do auto/manual OpenTelemetry instrumentation - the resulting spans will now properly identify Telemere's spans as parents. Note that this is interop is implemented in a unique way that retains Telemere's usual benefits re: low costs at signal callsite, and ability to skip costs when filtering / sampling / rate-limiting / etc.
This commit is contained in:
parent
ef678bcc36
commit
84957c6d0a
5 changed files with 449 additions and 533 deletions
16
project.clj
16
project.clj
|
|
@ -29,7 +29,9 @@
|
|||
[[org.clojure/clojure "1.11.4"]
|
||||
[com.github.clj-easy/graal-build-time "1.0.5"]]}
|
||||
|
||||
:test {:aot [] #_[taoensso.telemere-tests]}
|
||||
:test {:aot [] #_[taoensso.telemere-tests]}
|
||||
:ott-on {:jvm-opts ["-Dtaoensso.telemere.otel-tracing=true"]}
|
||||
:ott-off {:jvm-opts ["-Dtaoensso.telemere.otel-tracing=false"]}
|
||||
:dev
|
||||
{:jvm-opts
|
||||
["-server"
|
||||
|
|
@ -50,10 +52,10 @@
|
|||
#_[org.slf4j/slf4j-nop "2.0.14"]
|
||||
|
||||
;;; For optional handlers
|
||||
[io.opentelemetry/opentelemetry-api "1.41.0"]
|
||||
#_[io.opentelemetry/opentelemetry-sdk-extension-autoconfigure "1.41.0"]
|
||||
#_[io.opentelemetry/opentelemetry-exporter-otlp "1.41.0"]
|
||||
#_[io.opentelemetry/opentelemetry-exporters-jaeger "0.9.1"]
|
||||
[io.opentelemetry/opentelemetry-api "1.41.0"]
|
||||
[io.opentelemetry/opentelemetry-sdk-extension-autoconfigure "1.41.0"]
|
||||
[io.opentelemetry/opentelemetry-exporter-otlp "1.41.0"]
|
||||
#_[io.opentelemetry/opentelemetry-exporters-jaeger "0.9.1"]
|
||||
[metosin/jsonista "0.3.10"]
|
||||
[com.draines/postal "2.0.5"]
|
||||
[org.julienxx/clj-slack "0.8.3"]]
|
||||
|
|
@ -91,4 +93,6 @@
|
|||
|
||||
"test-clj" ["with-profile" "+c1.12:+c1.11:+c1.10" "test"]
|
||||
"test-cljs" ["with-profile" "+c1.12" "cljsbuild" "test"]
|
||||
"test-all" ["do" ["clean"] ["test-clj"] ["test-cljs"]]})
|
||||
|
||||
"test-clj-ott-off" ["with-profile" "+ott-off" "test-clj"]
|
||||
"test-all" ["do" ["clean"] ["test-clj"] ["test-clj-ott-off"] ["test-cljs"]]})
|
||||
|
|
|
|||
|
|
@ -79,13 +79,16 @@
|
|||
(impl/defhelp help:signal-content :signal-content)
|
||||
(impl/defhelp help:environmental-config :environmental-config))
|
||||
|
||||
;;;;
|
||||
;;;; Unique ids
|
||||
|
||||
(def ^:dynamic *uid-fn*
|
||||
"Experimental, subject to change.
|
||||
(fn [root?]) used to generate signal `:uid` values when tracing.
|
||||
These are basically unique signal instance identifiers.
|
||||
|
||||
Relevant only when `otel-tracing?` is false.
|
||||
If `otel-tracing?` is true, uids are instead generated by `*otel-tracer*`.
|
||||
|
||||
`root?` argument is true iff signal is a top-level trace (i.e. form
|
||||
being traced is unnested = has no parent form).
|
||||
|
||||
|
|
@ -108,6 +111,89 @@
|
|||
|
||||
(comment (enc/qb 1e6 (enc/uuid) (*uid-fn* true) (*uid-fn* false))) ; [168.83 79.02 62.95]
|
||||
|
||||
;;;; OpenTelemetry
|
||||
|
||||
#?(:clj
|
||||
(def otel-tracing?
|
||||
"Experimental, subject to change. Feedback welcome!
|
||||
|
||||
Should Telemere's tracing signal creators (`trace!`, `spy!`, etc.)
|
||||
interop with OpenTelemetry Java [1]? This will affect relevant
|
||||
Telemere macro expansions.
|
||||
|
||||
Defaults to `true` iff OpenTelemetry Java is present when this
|
||||
namespace is evaluated/compiled.
|
||||
|
||||
If `false`:
|
||||
1. Telemere's OpenTelemetry handler will NOT emit to `SpanExporter`s.
|
||||
2. Telemere and OpenTelemetry will NOT recognize each other's spans.
|
||||
|
||||
If `true`:
|
||||
1. Telemere's OpenTelemetry handler WILL emit to `SpanExporter`s.
|
||||
2. Telemere and OpenTelemetry WILL recognize each other's spans.
|
||||
|
||||
Override default by setting one of the following to \"true\" or \"false\":
|
||||
JVM property: `taoensso.telemere.otel-tracing`
|
||||
Env variable: `TAOENSSO_TELEMERE_otel-tracing`
|
||||
Classpath resource: `taoensso.telemere.otel-tracing`
|
||||
|
||||
See also: `otel-get-default-providers`, `*otel-tracer*`,
|
||||
`taoensso.telemere.open-telemere/handler:open-telemetry`.
|
||||
|
||||
[1] Ref. <https://github.com/open-telemetry/opentelemetry-java>"
|
||||
impl/enabled:otel-tracing?))
|
||||
|
||||
#?(:clj
|
||||
(defn otel-get-default-providers
|
||||
"Experimental, subject to change. Feedback welcome!
|
||||
|
||||
When OpenTelemetry Java API [1] is present, returns map with keys:
|
||||
:logger-provider - default `io.opentelemetry.api.logs.LoggerProvider`
|
||||
:tracer-provider - default `io.opentelemetry.api.trace.TracerProvider`
|
||||
:via - ∈ #{:sdk-extension-autoconfigure :global}
|
||||
|
||||
Uses `AutoConfiguredOpenTelemetrySdk` when possible, or
|
||||
`GlobalOpenTelemetry` otherwise.
|
||||
|
||||
See the relevant OpenTelemetry Java docs for details.
|
||||
|
||||
[1] Ref. <https://github.com/open-telemetry/opentelemetry-java>"
|
||||
[]
|
||||
(enc/compile-when impl/present:otel?
|
||||
(or
|
||||
;; Via SDK autoconfiguration extension (when available)
|
||||
(enc/compile-when
|
||||
io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdk
|
||||
(enc/catching :common
|
||||
(let [builder (io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdk/builder)
|
||||
sdk (.getOpenTelemetrySdk (.build builder))]
|
||||
{:logger-provider (.getLogsBridge sdk)
|
||||
:tracer-provider (.getTracerProvider sdk)
|
||||
:via :sdk-extension-autoconfigure})))
|
||||
|
||||
;; Via Global (generally not recommended)
|
||||
(let [g (io.opentelemetry.api.GlobalOpenTelemetry/get)]
|
||||
{:logger-provider (.getLogsBridge g)
|
||||
:tracer-provider (.getTracerProvider g)
|
||||
:via :global})))))
|
||||
|
||||
#?(:clj
|
||||
(def ^:no-doc otel-default-providers_
|
||||
(when impl/present:otel? (delay (otel-get-default-providers)))))
|
||||
|
||||
#?(:clj
|
||||
(def ^:dynamic *otel-tracer*
|
||||
"OpenTelemetry `Tracer` to use for Telemere's tracing signal creators
|
||||
(`trace!`, `span!`, etc.), ∈ #{nil io.opentelemetry.api.trace.Tracer Delay}.
|
||||
See also `otel-tracing?`, `otel-get-default-providers`."
|
||||
(enc/compile-when impl/enabled:otel-tracing?
|
||||
(delay
|
||||
(when-let [^io.opentelemetry.api.trace.TracerProvider p
|
||||
(get (force otel-default-providers_) :tracer-provider)]
|
||||
(do #_impl/viable-tracer (.get p "Telemere")))))))
|
||||
|
||||
(comment (enc/qb 1e6 (force *otel-tracer*))) ; 51.23
|
||||
|
||||
;;;; Signal creators
|
||||
;; - event! [id ] [id opts/level] ; id + ?level => allowed? ; Sole signal with descending main arg!
|
||||
;; - log! [msg ] [opts/level msg] ; msg + ?level => allowed?
|
||||
|
|
@ -310,8 +396,10 @@
|
|||
|
||||
(add-handler! :default/console (handler:console))
|
||||
|
||||
#?(:clj (enc/catching (require '[taoensso.telemere.tools-logging])))
|
||||
#?(:clj (enc/catching (require '[taoensso.telemere.slf4j]))))
|
||||
#?(:clj (enc/catching (require '[taoensso.telemere.tools-logging]))) ; TL->Telemere
|
||||
#?(:clj (enc/catching (require '[taoensso.telemere.slf4j]))) ; SLF4J->Telemere
|
||||
#?(:clj (enc/catching (require '[taoensso.telemere.open-telemetry]))) ; Telemere->OTel
|
||||
)
|
||||
|
||||
;;;; Flow benchmarks
|
||||
|
||||
|
|
|
|||
|
|
@ -16,10 +16,11 @@
|
|||
(:api (enc/interns-overview)))
|
||||
|
||||
#?(:clj
|
||||
(enc/declare-remote ; For macro expansions
|
||||
(enc/declare-remote
|
||||
^:dynamic taoensso.telemere/*ctx*
|
||||
^:dynamic taoensso.telemere/*middleware*
|
||||
^:dynamic taoensso.telemere/*uid-fn*))
|
||||
^:dynamic taoensso.telemere/*uid-fn*
|
||||
^:dynamic taoensso.telemere/*otel-tracer*))
|
||||
|
||||
;;;; Config
|
||||
|
||||
|
|
@ -32,7 +33,12 @@
|
|||
|
||||
(def enabled:tools-logging?
|
||||
"Documented at `taoensso.telemere.tools-logging/tools-logging->telemere!`."
|
||||
(enc/get-env {:as :bool} :clojure.tools.logging/to-telemere))))
|
||||
(enc/get-env {:as :bool, :default false} :clojure.tools.logging/to-telemere))
|
||||
|
||||
(def enabled:otel-tracing?
|
||||
"Documented at `taoensso.telemere/otel-tracing?`."
|
||||
(enc/get-env {:as :bool, :default present:otel?}
|
||||
:taoensso.telemere/otel-tracing<.platform>))))
|
||||
|
||||
#?(:clj
|
||||
(let [base (enc/get-env {:as :edn} :taoensso.telemere/ct-filters<.platform><.edn>)
|
||||
|
|
@ -166,29 +172,62 @@
|
|||
(default-trace-msg "(+ 1 2)" 3 nil 12345)
|
||||
(default-trace-msg "(+ 1 2)" nil (Exception. "Ex") 12345))
|
||||
|
||||
;;;; Tracing (optional flow tracking)
|
||||
;;;; Tracing
|
||||
|
||||
(enc/def* ^:dynamic *trace-root* "?{:keys [id uid inst]}" nil)
|
||||
(enc/def* ^:dynamic *trace-parent* "?{:keys [id uid inst]}" nil)
|
||||
(enc/def* ^:dynamic *trace-root* "?{:keys [id uid]}" nil) ; Fixed once bound
|
||||
(enc/def* ^:dynamic *trace-parent* "?{:keys [id uid]}" nil) ; Changes each nesting level
|
||||
|
||||
;; Root Telemere ids: {:parent nil, :id id1, :uid uid1 :root {:id id1, :uid uid1}}
|
||||
;; Root OTel ids: {:parent nil, :id id1, :uid span1,:root {:id id1, :uid trace1}}
|
||||
|
||||
;;;; OpenTelemetry
|
||||
|
||||
#?(:clj
|
||||
(defmacro cond-binding
|
||||
"Wraps `form` with binding if `bind?` is true."
|
||||
[bind? bindings body-form]
|
||||
(if bind?
|
||||
`(binding ~bindings ~body-form)
|
||||
(do body-form))))
|
||||
(enc/compile-when present:otel?
|
||||
(do
|
||||
(enc/def* ^:dynamic *otel-context* "`?Context`" nil)
|
||||
(defmacro otel-context [] `(or *otel-context* (io.opentelemetry.context.Context/current)))
|
||||
|
||||
(defn otel-trace-id
|
||||
"Returns valid `traceId` or nil."
|
||||
[^io.opentelemetry.context.Context context]
|
||||
(let [sc (.getSpanContext (io.opentelemetry.api.trace.Span/fromContext context))]
|
||||
(when (.isValid sc) (.getTraceId sc))))
|
||||
|
||||
(defn otel-span-id
|
||||
"Returns valid `spanId` or nil."
|
||||
[^io.opentelemetry.context.Context context]
|
||||
(let [sc (.getSpanContext (io.opentelemetry.api.trace.Span/fromContext context))]
|
||||
(when (.isValid sc) (.getSpanId sc))))
|
||||
|
||||
(defn viable-tracer
|
||||
"Returns viable `Tracer`, or nil."
|
||||
[tracer]
|
||||
(when-let [tracer ^io.opentelemetry.api.trace.Tracer tracer]
|
||||
(let [sb (.spanBuilder tracer "test-span")
|
||||
span (.startSpan sb)]
|
||||
(when (.isValid (.getSpanContext span))
|
||||
tracer))))
|
||||
|
||||
(def ^String otel-name (enc/fmemoize (fn [id] (if id (enc/as-qname id) "telemere/no-id"))))
|
||||
(defn otel-context+span
|
||||
"Returns new `Context` that includes minimal `Span` in given parent `Context`.
|
||||
We leave the (expensive) population of attributes, etc. for signal handler.
|
||||
Interop needs only the basics (t0, traceId, spanId, spanName) right away."
|
||||
^io.opentelemetry.context.Context
|
||||
[id inst ?parent-context]
|
||||
(let [parent-context (or ?parent-context (otel-context))]
|
||||
(enc/if-not [tracer (force taoensso.telemere/*otel-tracer*)]
|
||||
parent-context ; Can't add Span without Tracer
|
||||
(let [sb (.spanBuilder ^io.opentelemetry.api.trace.Tracer tracer (otel-name id))]
|
||||
(.setStartTimestamp sb ^java.time.Instant inst)
|
||||
(.with ^io.opentelemetry.context.Context parent-context
|
||||
(.startSpan sb)))))))))
|
||||
|
||||
(comment
|
||||
[(enc/qb 1e6 (cond-binding true [*trace-parent* {:id :id1, :uid :uid1, :inst :inst1}] *trace-parent*)) ; 226.18
|
||||
(macroexpand '(cond-binding true [*trace-parent* {:id :id1, :uid :uid1, :inst :inst1}] *trace-parent*))])
|
||||
|
||||
#?(:clj
|
||||
(enc/compile-if io.opentelemetry.context.Context
|
||||
(defmacro otel-context [] `(io.opentelemetry.context.Context/current))
|
||||
(defmacro otel-context [] nil)))
|
||||
|
||||
(comment (enc/qb 1e6 (otel-context))) ; 20.43
|
||||
(enc/qb 1e6 (otel-context) (otel-context+span ::id1 (enc/now-inst) nil)) ; [46.42 186.89]
|
||||
(viable-tracer (force taoensso.telemere/*otel-tracer*))
|
||||
(otel-trace-id (otel-context)))
|
||||
|
||||
;;;; Main types
|
||||
|
||||
|
|
@ -481,6 +520,7 @@
|
|||
|
||||
(comment (enc/qb 1e6 (inst+nsecs (enc/now-inst) 1e9)))
|
||||
|
||||
#?(:clj (defn- auto-> [form auto-form] (if (= form :auto) auto-form form)))
|
||||
#?(:clj
|
||||
(defmacro ^:public signal!
|
||||
"Generic low-level signal call, also aliased in Encore."
|
||||
|
|
@ -488,9 +528,10 @@
|
|||
:arglists (signal-arglists :signal!)}
|
||||
[opts]
|
||||
(have? map? opts) ; We require const map keys, but vals may require eval
|
||||
(let [defaults (get opts :defaults)
|
||||
opts (merge defaults (dissoc opts :defaults))
|
||||
clj? (not (:ns &env))
|
||||
(let [defaults (get opts :defaults)
|
||||
opts (merge defaults (dissoc opts :defaults))
|
||||
cljs? (boolean (:ns &env))
|
||||
clj? (not cljs?)
|
||||
{run-form :run} opts
|
||||
|
||||
{:keys [#_expansion-id location elide? allow?]}
|
||||
|
|
@ -521,17 +562,15 @@
|
|||
{:msg "Expected constant (compile-time) `:trace?` boolean"
|
||||
:context `signal!}))
|
||||
|
||||
parent-form (get opts :parent (when trace? `taoensso.telemere.impl/*trace-parent*))
|
||||
root-form (get opts :root (when trace? `taoensso.telemere.impl/*trace-root*))
|
||||
thread-form (when clj? `(enc/thread-info))
|
||||
|
||||
inst-form (get opts :inst :auto)
|
||||
uid-form (get opts :uid (when trace? :auto))
|
||||
inst-form (get opts :inst :auto)
|
||||
inst-form (auto-> inst-form `(enc/now-inst*))
|
||||
|
||||
inst-form (if (not= inst-form :auto) inst-form `(enc/now-inst*))
|
||||
uid-form (if (not= uid-form :auto) uid-form `(taoensso.telemere/*uid-fn* (if ~'__root0 false true)))
|
||||
parent-form (get opts :parent `*trace-parent*)
|
||||
root-form0 (get opts :root `*trace-root*)
|
||||
|
||||
thread-form (if clj? `(enc/thread-info) nil)
|
||||
otel-ctx-form (if clj? `(otel-context) nil)
|
||||
uid-form (get opts :uid (when trace? :auto))
|
||||
|
||||
signal-delay-form
|
||||
(let [{do-form :do
|
||||
|
|
@ -553,7 +592,7 @@
|
|||
:elidable? :location :inst :uid :middleware,
|
||||
:sample-rate :ns :kind :id :level :filter :when #_:rate-limit,
|
||||
:ctx :parent #_:trace?, :do :let :data :msg :error :run
|
||||
:elide? :allow? #_:expansion-id))
|
||||
:elide? :allow? #_:expansion-id :otel/context))
|
||||
|
||||
_ ; Compile-time validation
|
||||
(do
|
||||
|
|
@ -572,12 +611,12 @@
|
|||
|
||||
signal-form
|
||||
(let [record-form
|
||||
(let [clause [(if run-form :run :no-run) (if clj? :clj :cljs)]]
|
||||
(let [clause [(if run-form :run :no-run) (if clj? :clj :cljs)]]
|
||||
(case clause
|
||||
[:run :clj ] `(Signal. 1 ~'__inst ~'__uid, ~location ~'__ns ~line-form ~column-form ~file-form, (enc/host-info) ~'__thread ~'__otel-ctx, ~sample-rate-form, ~'__kind ~'__id ~'__level, ~ctx-form ~parent-form ~'__root, ~data-form ~kvs-form ~'_msg_, ~'_run-err '~run-form ~'_run-val ~'_end-inst ~'_run-nsecs)
|
||||
[:run :cljs] `(Signal. 1 ~'__inst ~'__uid, ~location ~'__ns ~line-form ~column-form ~file-form, ~sample-rate-form, ~'__kind ~'__id ~'__level, ~ctx-form ~parent-form ~'__root, ~data-form ~kvs-form ~'_msg_, ~'_run-err '~run-form ~'_run-val ~'_end-inst ~'_run-nsecs)
|
||||
[:no-run :clj ] `(Signal. 1 ~'__inst ~'__uid, ~location ~'__ns ~line-form ~column-form ~file-form, (enc/host-info) ~'__thread ~'__otel-ctx, ~sample-rate-form, ~'__kind ~'__id ~'__level, ~ctx-form ~parent-form ~'__root, ~data-form ~kvs-form ~msg-form, ~error-form nil nil nil nil)
|
||||
[:no-run :cljs] `(Signal. 1 ~'__inst ~'__uid, ~location ~'__ns ~line-form ~column-form ~file-form, ~sample-rate-form, ~'__kind ~'__id ~'__level, ~ctx-form ~parent-form ~'__root, ~data-form ~kvs-form ~msg-form, ~error-form nil nil nil nil)
|
||||
[:run :clj ] `(Signal. 1 ~'__inst ~'__uid, ~location ~'__ns ~line-form ~column-form ~file-form, (enc/host-info) ~'__thread ~'__otel-context1, ~sample-rate-form, ~'__kind ~'__id ~'__level, ~ctx-form ~parent-form ~'__root1, ~data-form ~kvs-form ~'_msg_, ~'_run-err '~run-form ~'_run-val ~'_end-inst ~'_run-nsecs)
|
||||
[:run :cljs] `(Signal. 1 ~'__inst ~'__uid, ~location ~'__ns ~line-form ~column-form ~file-form, ~sample-rate-form, ~'__kind ~'__id ~'__level, ~ctx-form ~parent-form ~'__root1, ~data-form ~kvs-form ~'_msg_, ~'_run-err '~run-form ~'_run-val ~'_end-inst ~'_run-nsecs)
|
||||
[:no-run :clj ] `(Signal. 1 ~'__inst ~'__uid, ~location ~'__ns ~line-form ~column-form ~file-form, (enc/host-info) ~'__thread ~'__otel-context1, ~sample-rate-form, ~'__kind ~'__id ~'__level, ~ctx-form ~parent-form ~'__root1, ~data-form ~kvs-form ~msg-form, ~error-form nil nil nil nil)
|
||||
[:no-run :cljs] `(Signal. 1 ~'__inst ~'__uid, ~location ~'__ns ~line-form ~column-form ~file-form, ~sample-rate-form, ~'__kind ~'__id ~'__level, ~ctx-form ~parent-form ~'__root1, ~data-form ~kvs-form ~msg-form, ~error-form nil nil nil nil)
|
||||
(enc/unexpected-arg! clause {:context :signal-constructor-args})))
|
||||
|
||||
record-form
|
||||
|
|
@ -610,7 +649,56 @@
|
|||
;; Final unwrapped signal value visible to users/handler-fns, allow to throw
|
||||
(if-let [sig-middleware# ~middleware-form]
|
||||
(sig-middleware# signal#) ; Apply signal middleware, can throw
|
||||
(do signal#)))))]
|
||||
(do signal#)))))
|
||||
|
||||
into-let-form
|
||||
(enc/cond!
|
||||
(not trace?) ; Don't trace
|
||||
`[~'__otel-context1 nil
|
||||
~'__uid ~uid-form ; Given or nil
|
||||
~'__root1 ~'__root0 ; Retain, but don't establish
|
||||
~'__run-result
|
||||
~(when run-form
|
||||
`(let [t0# (enc/now-nano*)]
|
||||
(enc/try*
|
||||
(do (RunResult. ~run-form nil (- (enc/now-nano*) t0#)))
|
||||
(catch :all t# (RunResult. nil t# (- (enc/now-nano*) t0#))))))]
|
||||
|
||||
;; Trace without OpenTelemetry
|
||||
(or cljs? (not enabled:otel-tracing?))
|
||||
`[~'__otel-context1 nil
|
||||
~'__uid ~(auto-> uid-form `(taoensso.telemere/*uid-fn* (if ~'__root0 false true)))
|
||||
~'__root1 (or ~'__root0 ~(when trace? `{:id ~'__id, :uid ~'__uid}))
|
||||
~'__run-result
|
||||
~(when run-form
|
||||
`(binding [*trace-root* ~'__root1
|
||||
*trace-parent* {:id ~'__id, :uid ~'__uid}]
|
||||
(let [t0# (enc/now-nano*)]
|
||||
(enc/try*
|
||||
(do (RunResult. ~run-form nil (- (enc/now-nano*) t0#)))
|
||||
(catch :all t# (RunResult. nil t# (- (enc/now-nano*) t0#)))))))]
|
||||
|
||||
;; Trace with OpenTelemetry
|
||||
(and clj? enabled:otel-tracing?)
|
||||
`[~'__otel-context0 ~(get opts :otel/context `(otel-context)) ; Context
|
||||
~'__otel-context1 ~(if run-form `(otel-context+span ~'__id ~'__inst ~'__otel-context0) ~'__otel-context0)
|
||||
~'__uid ~(auto-> uid-form `(or (otel-span-id ~'__otel-context1) (com.taoensso.encore.Ids/genHexId16)))
|
||||
~'__root1
|
||||
(or ~'__root0
|
||||
~(when trace?
|
||||
`{:id ~'__id, :uid (or (otel-trace-id ~'__otel-context1) (com.taoensso.encore.Ids/genHexId32))}))
|
||||
|
||||
~'__run-result
|
||||
~(when run-form
|
||||
`(binding [*otel-context* ~'__otel-context1
|
||||
*trace-root* ~'__root1
|
||||
*trace-parent* {:id ~'__id, :uid ~'__uid}]
|
||||
(let [otel-scope# (.makeCurrent ~'__otel-context1)
|
||||
t0# (enc/now-nano*)]
|
||||
(enc/try*
|
||||
(do (RunResult. ~run-form nil (- (enc/now-nano*) t0#)))
|
||||
(catch :all t# (RunResult. nil t# (- (enc/now-nano*) t0#)))
|
||||
(finally (.close otel-scope#))))))])]
|
||||
|
||||
;; Could avoid double `run-form` expansion with a fn wrap (>0 cost)
|
||||
;; (let [run-fn-form (when run-form `(fn [] (~run-form)))]
|
||||
|
|
@ -622,39 +710,25 @@
|
|||
`(enc/if-not ~allow? ; Allow to throw at call
|
||||
~run-form
|
||||
(let [;;; Allow to throw at call
|
||||
~'__inst ~inst-form
|
||||
~'__root0 ~root-form
|
||||
~'__level ~level-form
|
||||
~'__kind ~kind-form
|
||||
~'__id ~id-form
|
||||
~'__uid ~uid-form
|
||||
~'__ns ~ns-form
|
||||
~'__root
|
||||
(or ~'__root0
|
||||
(when ~trace? {:id ~'__id, :uid ~'__uid, :inst ~'__inst}))
|
||||
~'__inst ~inst-form
|
||||
~'__level ~level-form
|
||||
~'__kind ~kind-form
|
||||
~'__id ~id-form
|
||||
~'__ns ~ns-form
|
||||
~'__thread ~thread-form
|
||||
~'__root0 ~root-form0 ; ?{:keys [id uid]}
|
||||
|
||||
~'__thread ~thread-form ; Necessarily eager to get callsite value
|
||||
~'__otel-ctx ~otel-ctx-form ; ''
|
||||
|
||||
~'__run-result ; Non-throwing (traps)
|
||||
~(when run-form
|
||||
`(let [t0# (enc/now-nano*)]
|
||||
(cond-binding ~trace?
|
||||
[*trace-root* ~'__root
|
||||
*trace-parent* {:id ~'__id, :uid ~'__uid, :inst ~'__inst}]
|
||||
(enc/try*
|
||||
(do (RunResult. ~run-form nil (- (enc/now-nano*) t0#)))
|
||||
(catch :all t# (RunResult. nil t# (- (enc/now-nano*) t0#)))))))
|
||||
|
||||
signal_# ~signal-delay-form]
|
||||
~@into-let-form ; Inject conditional bindings
|
||||
signal# ~signal-delay-form]
|
||||
|
||||
(dispatch-signal! ; Runner preserves dynamic bindings when async.
|
||||
;; Unconditionally send same wrapped signal to all handlers. Each handler will
|
||||
;; use wrapper for handler filtering, unwrapping (realizing) only allowed signals.
|
||||
(WrappedSignal. ~'__ns ~'__kind ~'__id ~'__level signal_#))
|
||||
;; Unconditionally send same wrapped signal to all handlers.
|
||||
;; Each handler will use wrapper for handler filtering,
|
||||
;; unwrapping (realizing) only allowed signals.
|
||||
(WrappedSignal. ~'__ns ~'__kind ~'__id ~'__level signal#))
|
||||
|
||||
(if ~'__run-result
|
||||
(do (~'__run-result signal_#))
|
||||
(if ~'__run-result
|
||||
( ~'__run-result signal#)
|
||||
true))))))))
|
||||
|
||||
(comment
|
||||
|
|
@ -691,7 +765,7 @@
|
|||
(atom
|
||||
{:tools-logging (fn [] {:present? present:tools-logging?, :enabled-by-env? enabled:tools-logging?})
|
||||
:slf4j (fn [] {:present? present:slf4j?, :telemere-provider-present? present:slf4j-telemere?})
|
||||
:open-telemetry (fn [] {:present? present:otel?})}))
|
||||
:open-telemetry (fn [] {:present? present:otel?, :use-tracer? enabled:otel-tracing?})}))
|
||||
|
||||
(defn add-interop-check! [source-id check-fn] (swap! interop-checks_ assoc source-id check-fn))
|
||||
|
||||
|
|
|
|||
|
|
@ -11,66 +11,22 @@
|
|||
[taoensso.telemere :as tel])
|
||||
|
||||
(:import
|
||||
[io.opentelemetry.context Context]
|
||||
[io.opentelemetry.api.common AttributesBuilder Attributes]
|
||||
[io.opentelemetry.api.logs LoggerProvider Severity]
|
||||
[io.opentelemetry.api.trace TracerProvider Tracer Span SpanContext]
|
||||
[java.util.concurrent CountDownLatch]))
|
||||
[io.opentelemetry.api.trace TracerProvider]))
|
||||
|
||||
(comment
|
||||
(remove-ns 'taoensso.telemere.open-telemetry)
|
||||
(:api (enc/interns-overview)))
|
||||
|
||||
(enc/declare-remote
|
||||
^:dynamic taoensso.telemere/*otel-tracer*
|
||||
taoensso.telemere/otel-default-providers_
|
||||
taoensso.telemere/add-handler!)
|
||||
|
||||
;;;; TODO
|
||||
;; - API for `remote-span-context`, trace state, span links?
|
||||
;; - Ability to actually set (compatible) traceId, spanId?
|
||||
;; - Consider actually establishing relevant OpenTelemetry Context when tracing?
|
||||
;; Would allow a simpler OpenTelemetry handler, and allow low-level
|
||||
;; manual/auto tracing *within* Telemere run forms.
|
||||
|
||||
;;;; Providers
|
||||
|
||||
(defn get-default-providers
|
||||
"Experimental, subject to change. Feedback welcome!
|
||||
|
||||
Returns map with keys:
|
||||
:logger-provider - default `io.opentelemetry.api.logs.LoggerProvider`
|
||||
:tracer-provider - default `io.opentelemetry.api.trace.TracerProvider`
|
||||
:via - ∈ #{:sdk-extension-autoconfigure :global}
|
||||
|
||||
Uses `AutoConfiguredOpenTelemetrySdk` when possible, or
|
||||
`GlobalOpenTelemetry` otherwise.
|
||||
|
||||
See the relevant `opentelemetry-java` docs for details."
|
||||
[]
|
||||
(or
|
||||
;; Via SDK autoconfiguration extension (when available)
|
||||
(enc/compile-when
|
||||
io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdk
|
||||
(enc/catching :common
|
||||
(let [builder (io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdk/builder)
|
||||
sdk (.getOpenTelemetrySdk (.build builder))]
|
||||
{:logger-provider (.getLogsBridge sdk)
|
||||
:tracer-provider (.getTracerProvider sdk)
|
||||
:via :sdk-extension-autoconfigure})))
|
||||
|
||||
;; Via Global (generally not recommended)
|
||||
(let [g (io.opentelemetry.api.GlobalOpenTelemetry/get)]
|
||||
{:logger-provider (.getLogsBridge g)
|
||||
:tracer-provider (.getTracerProvider g)
|
||||
:via :global})))
|
||||
|
||||
(def ^:no-doc default-providers_
|
||||
(delay (get-default-providers)))
|
||||
|
||||
(comment
|
||||
(get-default-providers)
|
||||
(let [{:keys [logger-provider tracer-provider]} (get-default-providers)]
|
||||
(def ^LoggerProvider my-lp logger-provider)
|
||||
(def ^Tracer my-tr (.get tracer-provider "Telemere")))
|
||||
|
||||
;; Confirm that we have a real (not noop) SpanBuilder
|
||||
(.spanBuilder my-tr "my-span"))
|
||||
;; - API for remote span context and trace state? (Ref. beta19)
|
||||
;; - API for span links?
|
||||
|
||||
;;;; Attributes
|
||||
|
||||
|
|
@ -137,209 +93,7 @@
|
|||
(enc/run-kv! (fn [k v] (put-attr! attr-builder (attr-name name-or-prefix k) v)) x)
|
||||
(do (put-attr! attr-builder name-or-prefix x))))
|
||||
|
||||
;;;; Spans
|
||||
|
||||
#_
|
||||
(defn- remote-span-context
|
||||
"Returns new remote `io.opentelemetry.api.trace.SpanContext`
|
||||
for use as `start-span` parent."
|
||||
^SpanContext [^String trace-id ^String span-id sampled? ?trace-state]
|
||||
(SpanContext/createFromRemoteParent trace-id span-id
|
||||
(if sampled?
|
||||
(io.opentelemetry.api.trace.TraceFlags/getSampled)
|
||||
(io.opentelemetry.api.trace.TraceFlags/getDefault))
|
||||
|
||||
(enc/if-not [trace-state ?trace-state]
|
||||
(io.opentelemetry.api.trace.TraceState/getDefault)
|
||||
(cond
|
||||
(map? trace-state)
|
||||
(let [tsb (io.opentelemetry.api.trace.TraceState/builder)]
|
||||
(enc/run-kv! (fn [k v] (.put tsb k v)) trace-state) ; NB only `a-zA-Z.-_` chars allowed
|
||||
(.build tsb))
|
||||
|
||||
(instance? io.opentelemetry.api.trace.TraceState trace-state) trace-state
|
||||
:else
|
||||
(enc/unexpected-arg! trace-state
|
||||
:context `remote-span-context
|
||||
:param 'trace-state
|
||||
:expected '#{nil {string string} io.opentelemetry.api.trace.TraceState})))))
|
||||
|
||||
(comment (enc/qb 1e6 (remote-span-context "c5b856d919f65e39a202bfb3034d65d8" "9740419096347616" false {"a" "A"}))) ; 111.13
|
||||
|
||||
(defn- start-span
|
||||
"Returns new `io.opentelemetry.api.trace.Span` with random `traceId` and `spanId`."
|
||||
^Span
|
||||
[^Tracer tracer ^Context context ^String span-name ^java.time.Instant inst ?parent]
|
||||
(let [sb (.spanBuilder tracer span-name)]
|
||||
(enc/if-not [parent ?parent]
|
||||
(.setParent sb context) ; Base (callsite) context
|
||||
(cond
|
||||
(instance? Span parent) (.setParent sb (.with context ^Span parent)) ; Local parent span, etc.
|
||||
(instance? SpanContext parent) (.setParent sb (.with context (Span/wrap ^SpanContext parent))) ; Remote parent context, etc.
|
||||
:else
|
||||
(enc/unexpected-arg! parent
|
||||
{:context `start-span
|
||||
:expected
|
||||
#{io.opentelemetry.api.trace.Span
|
||||
io.opentelemetry.api.trace.SpanContext}})))
|
||||
|
||||
(.setStartTimestamp sb inst)
|
||||
(.startSpan sb)))
|
||||
|
||||
(comment
|
||||
(let [inst (enc/now-inst)] (enc/qb 1e6 (start-span my-tr (Context/current) "id1" inst nil))) ; 158.09
|
||||
(start-span my-tr (Context/current) "id1" (enc/now-inst) (start-span my-tr (Context/current) "id2" (enc/now-inst) nil))
|
||||
(start-span my-tr (Context/current) "id1" (enc/now-inst)
|
||||
(remote-span-context "c5b856d919f65e39a202bfb3034d65d8" "1111111111111111" false nil)))
|
||||
|
||||
(let [ak-uid (io.opentelemetry.api.common.AttributeKey/stringKey "uid")
|
||||
ak-ns (io.opentelemetry.api.common.AttributeKey/stringKey "ns")
|
||||
ak-line (io.opentelemetry.api.common.AttributeKey/longKey "line")]
|
||||
|
||||
(defn- span-attrs
|
||||
"Returns `io.opentelemetry.api.common.Attributes` or nil."
|
||||
[uid signal]
|
||||
(if uid
|
||||
(if-let [ns (get signal :ns)]
|
||||
(if-let [line (get signal :line)]
|
||||
(Attributes/of ak-uid (str uid), ak-ns ns, ak-line (long line))
|
||||
(Attributes/of ak-uid (str uid), ak-ns ns))
|
||||
(Attributes/of ak-uid (str uid)))
|
||||
|
||||
(if-let [ns (get signal :ns)]
|
||||
(if-let [line (get signal :line)]
|
||||
(Attributes/of ak-ns ns, ak-line (long line))
|
||||
(Attributes/of ak-ns ns))
|
||||
nil))))
|
||||
|
||||
(comment (enc/qb 1e6 (span-attrs "uid1" {:ns "ns1" :line 495}))) ; 101.36
|
||||
|
||||
(def ^:private ^String span-name (enc/fmemoize (fn [id] (if id (enc/as-qname id) "telemere/nil-id"))))
|
||||
(comment (enc/qb 1e6 (span-name :foo/bar))) ; 46.09
|
||||
|
||||
(defn- handle-tracing!
|
||||
"Experimental! Takes care of relevant signal `Span` management.
|
||||
Returns nil or `io.opentelemetry.api.trace.Span` for possible use as
|
||||
`io.opentelemetry.api.logs.LogRecordBuilder` context.
|
||||
|
||||
Expect:
|
||||
- `spans_` - latom: {<uid> <Span_>}
|
||||
- `end-buffer_` - latom: #{[<uid> <end-inst>]}
|
||||
- `gc-buffer_` - latom: #{<uid>}"
|
||||
|
||||
[tracer context spans_ end-buffer_ gc-buffer_ gc-latch_ signal]
|
||||
|
||||
;; Notes:
|
||||
;; - Spans go to `SpanExporter` after `.end` call, ~random order okay
|
||||
;; - Span data: t1 of self, and name + id + t0 of #{self parent trace}
|
||||
;; - No API to directly create spans with needed data, so we ~simulate
|
||||
;; typical usage
|
||||
|
||||
(when-let [^java.util.concurrent.CountDownLatch gc-latch (gc-latch_)]
|
||||
(try (.await gc-latch) (catch InterruptedException _)))
|
||||
|
||||
(enc/when-let
|
||||
[root (get signal :root) ; Tracing iff root
|
||||
root-uid (get root :uid)
|
||||
:let [curr-spans (spans_)]
|
||||
root-span
|
||||
(force
|
||||
(or ; Fetch/ensure Span for root
|
||||
(get curr-spans root-uid)
|
||||
(when-let [root-inst (get root :inst)]
|
||||
(let [root-id (get root :id)]
|
||||
(spans_ root-uid
|
||||
(fn [old]
|
||||
(or old
|
||||
(delay
|
||||
;; TODO Support remote-span-context parent and/or span links?
|
||||
(start-span tracer context (span-name root-id)
|
||||
root-inst nil)))))))))]
|
||||
|
||||
(let [?parent-span ; May be identical to root-span
|
||||
(when-let [parent (get signal :parent)]
|
||||
(when-let [parent-uid (get parent :uid)]
|
||||
(if (= parent-uid root-uid)
|
||||
root-span
|
||||
(force
|
||||
(or ; Fetch/ensure Span for parent
|
||||
(get curr-spans parent-uid)
|
||||
(let [{parent-id :id, parent-inst :inst} parent]
|
||||
(spans_ parent-uid
|
||||
(fn [old]
|
||||
(or old
|
||||
(delay
|
||||
(start-span tracer context (span-name parent-id)
|
||||
parent-inst root-span)))))))))))
|
||||
|
||||
{this-uid :uid, this-end-inst :end-inst} signal]
|
||||
|
||||
(enc/cond
|
||||
;; No end-inst => no run-form =>
|
||||
;; add `Event` (rather than child `Span`) to parent
|
||||
:if-let [this-is-event? (not this-end-inst)]
|
||||
(when-let [^Span parent-span ?parent-span]
|
||||
(let [{this-id :id, this-inst :inst} signal]
|
||||
(if-let [^Attributes attrs (span-attrs this-uid signal)]
|
||||
(.addEvent parent-span (span-name this-id) attrs ^java.time.Instant this-inst)
|
||||
(.addEvent parent-span (span-name this-id) ^java.time.Instant this-inst)))
|
||||
(do parent-span))
|
||||
|
||||
:if-let
|
||||
[^Span this-span
|
||||
(if (= this-uid root-uid)
|
||||
root-span
|
||||
(force
|
||||
(or ; Fetch/ensure Span for this (child)
|
||||
(get curr-spans this-uid)
|
||||
(let [{this-id :id, this-inst :inst} signal]
|
||||
(spans_ this-uid
|
||||
(fn [old]
|
||||
(or old
|
||||
(delay
|
||||
(start-span tracer context (span-name this-id)
|
||||
this-inst (or ?parent-span root-span))))))))))]
|
||||
|
||||
(do
|
||||
(if (utils/error-signal? signal)
|
||||
(.setStatus this-span io.opentelemetry.api.trace.StatusCode/ERROR)
|
||||
(.setStatus this-span io.opentelemetry.api.trace.StatusCode/OK))
|
||||
|
||||
(when-let [^Attributes attrs (span-attrs this-uid signal)]
|
||||
(.setAllAttributes this-span attrs))
|
||||
|
||||
;; Error stuff
|
||||
(when-let [error (get signal :error)]
|
||||
(when (instance? Throwable error)
|
||||
(if-let [attrs
|
||||
(when-let [ex-data (ex-data error)]
|
||||
(when-not (empty? ex-data)
|
||||
(let [sb (Attributes/builder)]
|
||||
(enc/run-kv! (fn [k v] (put-attr! sb (attr-name k) v)) ex-data)
|
||||
(.build sb))))]
|
||||
(.recordException this-span error attrs)
|
||||
(.recordException this-span error))))
|
||||
|
||||
;; (.end this-span this-end-inst) ; Ready for `SpanExporter`
|
||||
(end-buffer_ (fn [old] (conj old [this-uid this-end-inst])))
|
||||
(gc-buffer_ (fn [old] (conj old this-uid)))
|
||||
|
||||
this-span)))))
|
||||
|
||||
(comment
|
||||
(do
|
||||
(require '[taoensso.telemere :as t])
|
||||
(def spans_ "{<uid> <Span_>}" (enc/latom {}))
|
||||
(def end-buffer_ "#{[<uid> <end-inst>]}" (enc/latom #{}))
|
||||
(def gc-buffer_ "#{<uid>}" (enc/latom #{}))
|
||||
(let [[_ [s1 s2]] (t/with-signals (t/trace! ::id1 (t/trace! ::id2 "form2")))]
|
||||
(def s1 s1)
|
||||
(def s2 s2)))
|
||||
|
||||
[@gc-buffer_ @end-buffer_ @spans_]
|
||||
(handle-tracing! my-tr spans_ end-buffer_ gc-buffer_ (enc/latom nil) s1))
|
||||
|
||||
;;;; Logging
|
||||
;;;; Handler
|
||||
|
||||
(defn- level->severity
|
||||
^Severity [level]
|
||||
|
|
@ -367,7 +121,7 @@
|
|||
(str level))))
|
||||
|
||||
(defn- signal->attrs
|
||||
"Returns `io.opentelemetry.api.common.Attributes` for given signal.
|
||||
"Returns `Attributes` for given signal.
|
||||
Ref. <https://opentelemetry.io/docs/specs/otel/logs/data-model/>."
|
||||
^Attributes [signal]
|
||||
(let [ab (Attributes/builder)]
|
||||
|
|
@ -431,11 +185,24 @@
|
|||
(.build ab)))
|
||||
|
||||
(comment
|
||||
(enc/qb 1e6 ; 850.93
|
||||
(enc/qb 1e6 ; 808.56
|
||||
(signal->attrs
|
||||
{:level :info :data {:ns/kw1 :v1 :ns/kw2 :v2}
|
||||
:otel/attrs {:longs [1 1 2 3] :strs ["a" "b" "c"]}})))
|
||||
|
||||
(let [ak-ns (io.opentelemetry.api.common.AttributeKey/stringKey "ns")
|
||||
ak-line (io.opentelemetry.api.common.AttributeKey/longKey "line")]
|
||||
|
||||
(defn- basic-span-attrs
|
||||
"Returns `?Attributes`."
|
||||
[signal]
|
||||
(when-let [ns (get signal :ns)]
|
||||
(if-let [line (get signal :line)]
|
||||
(Attributes/of ak-ns ns, ak-line (long line))
|
||||
(Attributes/of ak-ns ns)))))
|
||||
|
||||
(comment (enc/qb 1e6 (basic-span-attrs {:ns "ns1" :line 495}))) ; 52.4
|
||||
|
||||
(defn handler:open-telemetry
|
||||
"Highly experimental, possibly buggy, and subject to change!!
|
||||
Feedback and bug reports very welcome! Please ping me (Peter) at:
|
||||
|
|
@ -447,154 +214,102 @@
|
|||
|
||||
Returns a signal handler that:
|
||||
- Takes a Telemere signal (map).
|
||||
- Emits signal data to configured `io.opentelemetry.api.logs.Logger`
|
||||
- Emits tracing data to configured `io.opentelemetry.api.logs.Tracer`
|
||||
- Emits signal data to configured `LogExporter`
|
||||
- Emits tracing data to configured `SpanExporter`
|
||||
iff `telemere/otel-tracing?` is true.
|
||||
|
||||
Options:
|
||||
`:logger-provider` - ∈ #{nil :default <io.opentelemetry.api.logs.LoggerProvider>} [1]
|
||||
`:tracer-provider` - ∈ #{nil :default <io.opentelemetry.api.trace.TracerProvider>} [1]
|
||||
`:max-span-msecs` - (Advanced) Longest tracing span to support in milliseconds
|
||||
(default 120 mins). If recorded spans exceed this max, emitted
|
||||
data will be inaccurate. Larger values use more memory.
|
||||
|
||||
[1] See `get-default-providers` for more info"
|
||||
`:logger-provider` - nil or `io.opentelemetry.api.logs.LoggerProvider`,
|
||||
(see `telemere/get-default-providers` for default)."
|
||||
|
||||
;; Notes:
|
||||
;; - Multi-threaded handlers may see signals ~out of order
|
||||
;; - Sampling means that root/parent/child signals may never be handled
|
||||
;; - Sampling means that root/parent/child signals might not be handled
|
||||
;; - `:otel/attrs`, `:otel/context` currently undocumented
|
||||
|
||||
([] (handler:open-telemetry nil))
|
||||
([{:keys [logger-provider tracer-provider max-span-msecs]
|
||||
:or
|
||||
{logger-provider :default
|
||||
tracer-provider :default
|
||||
max-span-msecs (enc/msecs :mins 120)}}]
|
||||
([{:keys [emit-tracing? logger-provider]
|
||||
:or {emit-tracing? true}}]
|
||||
|
||||
(let [min-max-span-msecs (enc/msecs :mins 15)]
|
||||
(when (< (long max-span-msecs) min-max-span-msecs)
|
||||
(throw
|
||||
(ex-info "`max-span-msecs` too small"
|
||||
{:given max-span-msecs, :min min-max-span-msecs}))))
|
||||
(let [?logger-provider
|
||||
(if (not= logger-provider :default)
|
||||
logger-provider
|
||||
(:logger-provider (force taoensso.telemere/otel-default-providers_)))
|
||||
|
||||
(let [?logger-provider (if (= logger-provider :default) (:logger-provider (force default-providers_)) logger-provider)
|
||||
?tracer-provider (if (= tracer-provider :default) (:tracer-provider (force default-providers_)) tracer-provider)
|
||||
?tracer
|
||||
(when-let [^io.opentelemetry.api.trace.TracerProvider p ?tracer-provider]
|
||||
(.get p "Telemere"))
|
||||
;; Mechanism to end spans 3-6 secs *after* signal handling. The delay
|
||||
;; helps support out-of-order signals due to >1 handler threads, etc.
|
||||
span-buffer1_ (enc/latom #{}) ; #{[<Span> <end-inst>]}
|
||||
span-buffer2_ (enc/latom #{})
|
||||
timer_
|
||||
(delay
|
||||
(let [t3s (java.util.Timer. "autoTelemereOpenTelemetryHandlerTimer3s" (boolean :daemon))]
|
||||
(.schedule t3s
|
||||
(proxy [java.util.TimerTask] []
|
||||
(run []
|
||||
;; span2->end!
|
||||
(when-let [drained (enc/reset-in! span-buffer2_ #{})]
|
||||
(doseq [[span end-inst] drained]
|
||||
(.end
|
||||
^io.opentelemetry.api.trace.Span span
|
||||
^java.time.Instant end-inst)))
|
||||
|
||||
;;; Tracing state
|
||||
spans_ (when ?tracer (enc/latom {})) ; {<uid> <Span_>}
|
||||
end-buffer1_ (when ?tracer (enc/latom #{})) ; #{[<uid> <end-inst>]}
|
||||
sgc-buffer1_ (when ?tracer (enc/latom #{})) ; #{<uid>} ; Slow GC
|
||||
gc-latch_ (when ?tracer (enc/latom nil)) ; ?CountDownLatch
|
||||
;; span1->span2
|
||||
(when-let [drained (enc/reset-in! span-buffer1_ #{})]
|
||||
(when-not (empty? drained)
|
||||
(span-buffer2_ (fn [old] (set/union old drained)))))))
|
||||
|
||||
3000 3000)))
|
||||
|
||||
stop-tracing!
|
||||
(if-not ?tracer
|
||||
(fn stop-tracing! []) ; Noop
|
||||
(let [end-buffer2_ (enc/latom #{})
|
||||
sgc-buffer2_ (enc/latom #{})
|
||||
fgc-buffer1_ (enc/latom #{})
|
||||
fgc-buffer2_ (enc/latom #{})
|
||||
|
||||
tmax (java.util.Timer. "autoTelemereOpenTelemetryHandlerTimerMax" (boolean :daemon))
|
||||
t2m (java.util.Timer. "autoTelemereOpenTelemetryHandlerTimer2m" (boolean :daemon))
|
||||
t3s (java.util.Timer. "autoTelemereOpenTelemetryHandlerTimer3s" (boolean :daemon))
|
||||
schedule!
|
||||
(fn [^java.util.Timer timer ^long interval-msecs f]
|
||||
(.schedule timer (proxy [java.util.TimerTask] [] (run [] (f)))
|
||||
interval-msecs interval-msecs))
|
||||
|
||||
gc-spans!
|
||||
(fn [uids-to-gc]
|
||||
(when-not (empty? uids-to-gc)
|
||||
(let [uids-to-gc (set/intersection uids-to-gc (set (keys (spans_))))]
|
||||
(when-not (empty? uids-to-gc)
|
||||
;; ;; Update in small batches to minimize contention
|
||||
;; (doseq [batch (partition-all 16 uids-to-gc)]
|
||||
;; (spans_ (fn [old] (reduce dissoc old batch))))
|
||||
(let [gc-latch (java.util.concurrent.CountDownLatch. 1)]
|
||||
(when (compare-and-set! gc-latch_ nil gc-latch)
|
||||
(try
|
||||
(spans_ (fn [old] (reduce dissoc old uids-to-gc)))
|
||||
(finally
|
||||
(.countDown gc-latch)
|
||||
(reset! gc-latch_ nil)))))))))
|
||||
|
||||
move-uids!
|
||||
(fn [src_ dst_]
|
||||
(let [drained (enc/reset-in! src_ #{})]
|
||||
(when-not (empty? drained)
|
||||
(dst_ (fn [old] (set/union old drained))))))]
|
||||
|
||||
;; Notes:
|
||||
;; - Maintain local {<uid> <Span_>} state, creating spans as needed
|
||||
;; - A timer+buffer system is used to delay calling `.end` on
|
||||
;; spans, allowing parents to linger in case they're handled
|
||||
;; before children.
|
||||
;;
|
||||
;; Internal buffer flow:
|
||||
;; 1. handler->end1->end2->(end!)->fgc1->fgc2->(gc!) ; Fast GC path (span ended)
|
||||
;; 2. handler ->sgc1->sgc2->(gc!) ; Slow GC path (span not ended)
|
||||
;;
|
||||
;; Properties:
|
||||
;; - End spans 3-6 secs after trace handler ; Linger for possible out-of-order children
|
||||
;; - GC spans 2-4 mins after ending ; '', children will noop
|
||||
;; - GC spans 90-92 mins after span first created
|
||||
;; Final catch-all for spans that may have been created but
|
||||
;; never ended (e.g. due to sampling or filtering).
|
||||
;; => Max span runtime!
|
||||
|
||||
(schedule! tmax max-span-msecs ; sgc2->(gc!)
|
||||
(fn [] (gc-spans! (enc/reset-in! sgc-buffer2_ #{}))))
|
||||
|
||||
(schedule! t2m (enc/msecs :mins 2)
|
||||
(fn []
|
||||
(gc-spans! (enc/reset-in! fgc-buffer2_ #{})) ; fgc2->(gc!)
|
||||
(move-uids! fgc-buffer1_ fgc-buffer2_) ; fgc1->fgc2
|
||||
(move-uids! sgc-buffer1_ sgc-buffer2_) ; sgc1->sgc2
|
||||
))
|
||||
|
||||
(schedule! t3s (enc/msecs :secs 3)
|
||||
(fn []
|
||||
(let [drained (enc/reset-in! end-buffer2_ #{})]
|
||||
(when-not (empty? drained)
|
||||
|
||||
;; end2->(end!)
|
||||
(let [spans (spans_)]
|
||||
(doseq [[uid end-inst] drained]
|
||||
(when-let [span_ (get spans uid)]
|
||||
(.end ^Span (force span_) ^java.time.Instant end-inst))))
|
||||
|
||||
;; (end!)->fgc1
|
||||
(let [uids (into #{} (map (fn [[uid _]] uid)) drained)]
|
||||
(fgc-buffer1_ (fn [old] (set/union old uids))))))
|
||||
|
||||
;; end1->end2
|
||||
(move-uids! end-buffer1_ end-buffer2_)))
|
||||
|
||||
(fn stop-tracing! []
|
||||
(loop [] (when-not (empty? (end-buffer1_)) (recur))) ; Block to drain `end1`
|
||||
(loop [] (when-not (empty? (end-buffer2_)) (recur))) ; Block to drain `end2`
|
||||
(.cancel t3s) (.cancel t2m) (.cancel tmax))))]
|
||||
(fn stop-tracing! []
|
||||
(when (realized? timer_)
|
||||
(loop [] (when-not (empty? (span-buffer1_)) (recur))) ; Block to drain `span1`
|
||||
(loop [] (when-not (empty? (span-buffer2_)) (recur))) ; Block to drain `span2`
|
||||
(.cancel ^java.util.Timer @timer_)))]
|
||||
|
||||
(fn a-handler:open-telemetry
|
||||
([ ] (stop-tracing!))
|
||||
([signal]
|
||||
(let [?context
|
||||
(enc/when-let
|
||||
[^Tracer tracer ?tracer
|
||||
^Context context
|
||||
(enc/get* signal :otel/context ; Undocumented
|
||||
:_otel-context
|
||||
#_(io.opentelemetry.context.Context/root)
|
||||
(io.opentelemetry.context.Context/current))
|
||||
(let [?tracing-context
|
||||
(when emit-tracing?
|
||||
(when-let [context (enc/get* signal :otel/context :_otel-context nil)]
|
||||
(let [span (io.opentelemetry.api.trace.Span/fromContext context)]
|
||||
(when (.isRecording span)
|
||||
(enc/if-not [end-inst (get signal :end-inst)]
|
||||
;; No end-inst => no run-form => add `Event` to span (parent)
|
||||
(let [{:keys [id ^java.time.Instant inst]} signal]
|
||||
(if-let [^Attributes attrs (basic-span-attrs signal)]
|
||||
(.addEvent span (impl/otel-name id) attrs inst)
|
||||
(.addEvent span (impl/otel-name id) inst)))
|
||||
|
||||
^Span span
|
||||
(handle-tracing! tracer context
|
||||
spans_ end-buffer1_ sgc-buffer1_ gc-latch_ signal)]
|
||||
;; Real span
|
||||
(do
|
||||
(if (utils/error-signal? signal)
|
||||
(.setStatus span io.opentelemetry.api.trace.StatusCode/ERROR)
|
||||
(.setStatus span io.opentelemetry.api.trace.StatusCode/OK))
|
||||
|
||||
(.storeInContext span context))]
|
||||
(when-let [^Attributes attrs (basic-span-attrs signal)]
|
||||
(.setAllAttributes span attrs))
|
||||
|
||||
;; Error stuff
|
||||
(when-let [error (get signal :error)]
|
||||
(when (instance? Throwable error)
|
||||
(if-let [attrs
|
||||
(when-let [ex-data (ex-data error)]
|
||||
(when-not (empty? ex-data)
|
||||
(let [sb (Attributes/builder)]
|
||||
(enc/run-kv! (fn [k v] (put-attr! sb (attr-name k) v)) ex-data)
|
||||
(.build sb))))]
|
||||
(.recordException span error attrs)
|
||||
(.recordException span error))))
|
||||
|
||||
;; (.end span end-inst) ; Emit to `SpanExporter` now
|
||||
;; Emit to `SpanExporter` after delay:
|
||||
(span-buffer1_ (fn [old] (conj old [span end-inst])))
|
||||
(.deref timer_) ; Ensure timer is running
|
||||
))
|
||||
|
||||
context))))]
|
||||
|
||||
(when-let [^io.opentelemetry.api.logs.LoggerProvider logger-provider ?logger-provider]
|
||||
(let [{:keys [ns inst level msg_]} signal
|
||||
|
|
@ -605,8 +320,8 @@
|
|||
(.setSeverity lrb (level->severity level))
|
||||
(.setAllAttributes lrb (signal->attrs signal))
|
||||
|
||||
(when-let [^Context context ?context] ; Incl. traceId, SpanId, etc.
|
||||
(.setContext lrb context))
|
||||
(when-let [^io.opentelemetry.context.Context tracing-context ?tracing-context]
|
||||
(.setContext lrb tracing-context)) ; Incl. traceId, spanId, etc.
|
||||
|
||||
(when-let [body
|
||||
(or
|
||||
|
|
@ -616,7 +331,7 @@
|
|||
(str (enc/ex-type error) ": " (enc/ex-message error)))))]
|
||||
(.setBody lrb body))
|
||||
|
||||
;; Ready for `LogRecordExporter`
|
||||
;; Emit to `LogRecordExporter`
|
||||
(.emit lrb)))))))))
|
||||
|
||||
(enc/deprecated
|
||||
|
|
@ -633,3 +348,21 @@
|
|||
(def s2 s2)))
|
||||
|
||||
(h1 s1))
|
||||
|
||||
(defn check-interop
|
||||
"Returns interop debug info map."
|
||||
[]
|
||||
{:present? true
|
||||
:use-tracer? impl/enabled:otel-tracing?
|
||||
:viable-tracer? (boolean (impl/viable-tracer (force taoensso.telemere/*otel-tracer*)))})
|
||||
|
||||
(impl/add-interop-check! :open-telemetry check-interop)
|
||||
|
||||
(impl/on-init
|
||||
(when impl/enabled:otel-tracing?
|
||||
;; (taoensso.telemere/add-handler! :default/open-telemetry (handler:open-telemetry))
|
||||
(impl/signal!
|
||||
{:kind :event
|
||||
:level :debug ; < :info since runs on init
|
||||
:id :taoensso.telemere/open-telemetry-tracing!
|
||||
:msg "Enabling interop: OpenTelemetry tracing"})))
|
||||
|
|
|
|||
|
|
@ -264,9 +264,8 @@
|
|||
(let [sv1 (dissoc (with-sig (sig! {:level :info, :run (+ 1 2), :my-k1 :my-v1})) :_otel-context)
|
||||
sv1 ; Ensure instants are printable
|
||||
(-> sv1
|
||||
(update-in [:inst] enc/inst->udt)
|
||||
(update-in [:end-inst] enc/inst->udt)
|
||||
(update-in [:root :inst] enc/inst->udt))]
|
||||
(update-in [:inst] enc/inst->udt)
|
||||
(update-in [:end-inst] enc/inst->udt))]
|
||||
|
||||
[(is (= sv1 (read-string (pr-str sv1))))])))])
|
||||
|
||||
|
|
@ -422,80 +421,98 @@
|
|||
(let [sv (with-sig (sig! {:level :info, :root {:id :id1, :foo :bar}}))] (is (sm? sv {:root {:id :id1 :uid :submap/nx, :foo :bar}}) "Manual `:root/id`"))
|
||||
(let [sv (with-sig (sig! {:level :info, :root {:uid :uid1, :foo :bar}}))] (is (sm? sv {:root {:id :submap/nx :uid :uid1, :foo :bar}}) "Manual `:root/uid`"))])
|
||||
|
||||
(testing "Auto trace {:keys [id uid inst]}s for parent and root"
|
||||
[(testing "Tracing enabled"
|
||||
(let [sv (with-sig (sig! {:level :info, :id :id1, :uid :uid1, :inst t1, :trace? true
|
||||
:run [impl/*trace-parent* impl/*trace-root*],
|
||||
:data [impl/*trace-parent* impl/*trace-root*]}))]
|
||||
(testing "Auto parent/root"
|
||||
[(testing "Tracing disabled"
|
||||
(let [sv (with-sig (sig! {:level :info, :id :id1, :uid :uid1, :trace? false
|
||||
:run {:parent impl/*trace-parent*, :root impl/*trace-root*}
|
||||
:data {:parent impl/*trace-parent*, :root impl/*trace-root*}}))]
|
||||
|
||||
[(is (sm? sv {:parent nil, :root {:id :id1, :uid :uid1, :inst t1}}))
|
||||
(let [[rv1 rv2] (:run-val sv)]
|
||||
[(is (sm? rv1 {:id :id1, :uid :uid1, :inst t1}) "`*trace-parent*` visible to run-form")
|
||||
(is (sm? rv2 {:id :id1, :uid :uid1, :inst t1}) "`*trace-root*` visible to run-form")])
|
||||
[(is (sm? sv {:parent nil, :root nil}))
|
||||
(is (sm? sv {:run-val {:parent nil, :root nil}}) "`*trace-x*` visible to run-form")
|
||||
(is (sm? sv {:data {:parent nil, :root nil}}) "`*trace-x*` NOT visible to data-form")]))
|
||||
|
||||
(let [[dv1 dv2] (:data sv)]
|
||||
[(is (= dv1 nil) "`*trace-parent*` NOT visible to data-form")
|
||||
(is (= dv2 nil) "`*trace-root*` NOT visible to data-form")])]))
|
||||
(when #?(:cljs true :clj (not impl/enabled:otel-tracing?))
|
||||
(testing "Tracing enabled"
|
||||
(let [sv (with-sig (sig! {:level :info, :id :id1, :uid :uid1, :trace? true
|
||||
:run {:parent impl/*trace-parent*, :root impl/*trace-root*}
|
||||
:data {:parent impl/*trace-parent*, :root impl/*trace-root*}}))]
|
||||
|
||||
(testing "Tracing disabled"
|
||||
(let [sv (with-sig (sig! {:level :info, :id :id1, :uid :uid1, :inst t1, :trace? false
|
||||
:run [impl/*trace-parent* impl/*trace-root*],
|
||||
:data [impl/*trace-parent* impl/*trace-root*]}))]
|
||||
[(is (sm? sv {:parent nil, :root {:id :id1, :uid :uid1}}))
|
||||
(is (sm? sv {:run-val {:parent {:id :id1, :uid :uid1}, :root {:id :id1, :uid :uid1}}}) "`*trace-x*` visible to run-form")
|
||||
(is (sm? sv {:data {:parent nil, :root nil}}) "`*trace-x*` NOT visible to data-form")])))])
|
||||
|
||||
[(is (sm? sv {:parent nil, :root nil}))
|
||||
(let [[rv1 rv2] (:run-val sv)]
|
||||
[(is (= rv1 nil) "`*trace-parent*` visible to run-form")
|
||||
(is (= rv2 nil) "`*trace-root*` visible to run-form")])
|
||||
(testing "Manual parent/root"
|
||||
[(testing "Tracing disabled"
|
||||
(let [sv (with-sig (sig! {:level :info, :id :id1, :uid :uid1, :trace? false,
|
||||
:parent {:id :id2, :uid :uid2},
|
||||
:root {:id :id3, :uid :uid3}
|
||||
:run {:parent impl/*trace-parent*, :root impl/*trace-root*}
|
||||
:data {:parent impl/*trace-parent*, :root impl/*trace-root*}}))]
|
||||
|
||||
(let [[dv1 dv2] (:data sv)]
|
||||
[(is (= dv1 nil) "`*trace-parent*` NOT visible to data-form")
|
||||
(is (= dv2 nil) "`*trace-root*` NOT visible to data-form")])]))])
|
||||
[(is (sm? sv {:parent {:id :id2, :uid :uid2}, :root {:id :id3, :uid :uid3}}))
|
||||
(is (sm? sv {:run-val {:parent nil, :root nil}}) "`*trace-x*` visible to run-form")
|
||||
(is (sm? sv {:data {:parent nil, :root nil}}) "`*trace-x*` NOT visible to data-form")]))
|
||||
|
||||
(testing "Manual trace {:keys [id uid inst]}s for parent and root"
|
||||
[(testing "Tracing enabled"
|
||||
(let [sv (with-sig (sig! {:level :info, :id :id1, :uid :uid1, :inst t1, :trace? true,
|
||||
:parent {:id :id2, :uid :uid2, :inst t2},
|
||||
:root {:id :id3, :uid :uid3, :inst t3}
|
||||
:run [impl/*trace-parent* impl/*trace-root*],
|
||||
:data [impl/*trace-parent* impl/*trace-root*]}))]
|
||||
(when #?(:cljs true :clj (not impl/enabled:otel-tracing?))
|
||||
(testing "Tracing enabled"
|
||||
(let [sv (with-sig (sig! {:level :info, :id :id1, :uid :uid1, :trace? true,
|
||||
:parent {:id :id2, :uid :uid2},
|
||||
:root {:id :id3, :uid :uid3}
|
||||
:run {:parent impl/*trace-parent*, :root impl/*trace-root*}
|
||||
:data {:parent impl/*trace-parent*, :root impl/*trace-root*}}))]
|
||||
|
||||
[(is (sm? sv {:parent {:id :id2, :uid :uid2, :inst t2}, :root {:id :id3, :uid :uid3, :inst t3}}))
|
||||
(let [[rv1 rv2] (:run-val sv)]
|
||||
[(is (sm? rv1 {:id :id1, :uid :uid1, :inst t1}) "`*trace-parent*` visible to run-form")
|
||||
(is (sm? rv2 {:id :id3, :uid :uid3, :inst t3}) "`*trace-root*` visible to run-form")])
|
||||
|
||||
(let [[dv1 dv2] (:data sv)]
|
||||
[(is (= dv1 nil) "`*trace-parent*` NOT visible to data-form")
|
||||
(is (= dv2 nil) "`*trace-root*` NOT visible to data-form")])]))
|
||||
|
||||
(testing "Tracing disabled"
|
||||
(let [sv (with-sig (sig! {:level :info, :id :id1, :uid :uid1, :inst t1, :trace? false,
|
||||
:parent {:id :id2, :uid :uid2, :inst t2},
|
||||
:root {:id :id3, :uid :uid3, :inst t3}
|
||||
:run [impl/*trace-parent* impl/*trace-root*],
|
||||
:data [impl/*trace-parent* impl/*trace-root*]}))]
|
||||
|
||||
[(is (sm? sv {:parent {:id :id2, :uid :uid2, :inst t2}, :root {:id :id3, :uid :uid3, :inst t3}}))
|
||||
(let [[rv1 rv2] (:run-val sv)]
|
||||
[(is (= rv1 nil) "`*trace-parent*` visible to run-form")
|
||||
(is (= rv2 nil) "`*trace-root*` visible to run-form")])
|
||||
|
||||
(let [[dv1 dv2] (:data sv)]
|
||||
[(is (= dv1 nil) "`*trace-parent*` NOT visible to data-form")
|
||||
(is (= dv2 nil) "`*trace-root*` NOT visible to data-form")])]))])
|
||||
[(is (sm? sv {:parent {:id :id2, :uid :uid2}, :root {:id :id3, :uid :uid3}}))
|
||||
(is (sm? sv {:run-val {:parent {:id :id1, :uid :uid1}, :root {:id :id3, :uid :uid3}}}) "`*trace-x*` visible to run-form")
|
||||
(is (sm? sv {:data {:parent nil, :root nil}}) "`*trace-x*` NOT visible to data-form")
|
||||
])))])
|
||||
|
||||
(testing "Signal nesting"
|
||||
(let [[[outer-rv _] [outer-sv]]
|
||||
(with-sigs
|
||||
(sig! { :level :info, :id :id1, :uid :uid1, :inst t1
|
||||
:run (with-sigs (sig! {:level :info, :id :id2, :uid :uid2, :inst t2, :run [impl/*trace-parent* impl/*trace-root*]}))}))
|
||||
(when #?(:cljs true :clj (not impl/enabled:otel-tracing?))
|
||||
(let [[[s1-rv _] [s1-sv]]
|
||||
(with-sigs
|
||||
(sig! { :level :info, :id :id1, :uid :uid1
|
||||
:run (with-sigs (sig! {:level :info, :id :id2, :uid :uid2
|
||||
:run
|
||||
{:parent impl/*trace-parent*
|
||||
:root impl/*trace-root*}}))}))
|
||||
[[s2-rv _] [s2-sv]] s1-rv]
|
||||
|
||||
[[inner-rv _] [inner-sv]] outer-rv]
|
||||
[(is (sm? s1-sv {:id :id1, :uid :uid1, :parent nil}))
|
||||
(is (sm? s2-sv {:parent {:id :id1, :uid :uid1}}))
|
||||
|
||||
[(is (sm? outer-sv {:id :id1, :uid :uid1, :parent nil}))
|
||||
(is (sm? inner-rv [{:id :id2, :uid :uid2, :inst t2} {:id :id1, :uid :uid1, :inst t1}]))
|
||||
(is (sm? inner-sv {:parent {:id :id1, :uid :uid1, :inst t1}}))
|
||||
(is (sm? inner-sv {:run-val [{:id :id2, :uid :uid2, :inst t2} {:id :id1, :uid :uid1, :inst t1}]}))]))]))
|
||||
(is (sm? s2-rv {:parent {:id :id2, :uid :uid2}, :root {:id :id1, :uid :uid1}}))
|
||||
(is (sm? s2-sv {:run-val {:parent {:id :id2, :uid :uid2}, :root {:id :id1, :uid :uid1}}}))]))
|
||||
|
||||
#?(:clj
|
||||
(enc/compile-when impl/enabled:otel-tracing?
|
||||
(let [[[s1-rv _] [s1-sv]]
|
||||
(with-sigs
|
||||
(sig! { :level :info, :id :id1
|
||||
:run (with-sigs (sig! {:level :info, :id :id2
|
||||
:run
|
||||
{:parent impl/*trace-parent*
|
||||
:root impl/*trace-root*}}))}))
|
||||
[[s2-rv _] [s2-sv]] s1-rv
|
||||
|
||||
c1 (:_otel-context s1-sv)
|
||||
c2 (:_otel-context s2-sv)
|
||||
|
||||
s1-trace-id (impl/otel-trace-id c1)
|
||||
s2-trace-id (impl/otel-trace-id c2)
|
||||
|
||||
s1-span-id (impl/otel-span-id c1)
|
||||
s2-span-id (impl/otel-span-id c2)]
|
||||
|
||||
[(is (impl/viable-tracer (force tel/*otel-tracer*)) "Viable tracer")
|
||||
(is (every? string? [s1-trace-id s2-trace-id s1-span-id s2-span-id]) "Viable tracer produces spans with ids")
|
||||
(is (= s1-trace-id s2-trace-id))
|
||||
(is (not= s1-span-id s2-span-id))
|
||||
|
||||
(is (sm? s1-sv {:id :id1, :uid s1-span-id, :parent nil, :root {:id :id1, :uid s1-trace-id}}))
|
||||
(is (sm? s2-sv {:parent {:id :id1, :uid s1-span-id}}))
|
||||
|
||||
(is (sm? s2-rv {:parent {:id :id2, :uid s2-span-id}, :root {:id :id1, :uid s1-trace-id}}))
|
||||
(is (sm? s2-sv {:run-val {:parent {:id :id2, :uid s2-span-id}, :root {:id :id1, :uid s1-trace-id}}}))]))))]))
|
||||
|
||||
(deftest _sampling
|
||||
;; Capture combined (call * handler) sample rate in Signal when possible
|
||||
|
|
|
|||
Loading…
Reference in a new issue