mirror of
https://github.com/taoensso/telemere.git
synced 2026-02-09 05:23:11 +00:00
405 lines
16 KiB
Clojure
405 lines
16 KiB
Clojure
(ns ^:no-doc taoensso.telemere.files
|
|
"Private ns, implementation detail.
|
|
Core file handler, aliased in main Telemere ns."
|
|
(:require
|
|
[taoensso.encore :as enc :refer [have have?]]
|
|
[taoensso.telemere.utils :as utils]))
|
|
|
|
(comment
|
|
(require '[taoensso.telemere :as tel])
|
|
(remove-ns 'taoensso.telemere.files)
|
|
(:api (enc/interns-overview)))
|
|
|
|
;;;; Implementation
|
|
|
|
(defn gzip-file
|
|
"Compresses contents of `file-in` to `file-out` using gzip."
|
|
[file-in file-out]
|
|
(let [file-in (utils/as-file file-in)
|
|
file-out (utils/as-file file-out)]
|
|
|
|
(with-open
|
|
[stream-in (java.io.FileInputStream. file-in)
|
|
stream-out (java.io.FileOutputStream. file-out)
|
|
gz-out (java.util.zip.GZIPOutputStream. stream-out 2048 false)]
|
|
|
|
(let [read-buffer (byte-array (.length file-in))]
|
|
(loop []
|
|
(let [bytes-read (.read stream-in read-buffer)]
|
|
(when-not (== -1 bytes-read)
|
|
(.write gz-out read-buffer 0 bytes-read))))))
|
|
|
|
true))
|
|
|
|
(comment (gzip-file "foo.txt" "foo.txt.gz"))
|
|
|
|
(defn get-file-name
|
|
"(main-path)(-YYYY-MM-DD(d/w/m))(.part)?(.gz)?"
|
|
^String [main-path ?timestamp ?part gz?]
|
|
(str main-path
|
|
(when-let [ts ?timestamp] (str "-" ts))
|
|
(when-let [p ?part] (str "." p (when gz? ".gz")))))
|
|
|
|
(comment (get-file-name "test/logs/app.log" nil nil true))
|
|
|
|
;; Timestamp handling, edy (long epoch day) as base type
|
|
(let [utc java.time.ZoneOffset/UTC
|
|
^java.time.format.DateTimeFormatter dtf
|
|
(.withZone java.time.format.DateTimeFormatter/ISO_LOCAL_DATE
|
|
utc)]
|
|
|
|
(let [cf (* 24 60 60 1000)]
|
|
(defn udt->edy ^long [^long udt] (quot udt cf))
|
|
(defn edy->udt ^long [^long edy] (* edy cf)))
|
|
|
|
(let [ta (java.time.temporal.TemporalAdjusters/previousOrSame java.time.DayOfWeek/MONDAY)]
|
|
(defn edy-week ^long [^long edy] (.toEpochDay (.with (java.time.LocalDate/ofEpochDay edy) ta))))
|
|
|
|
(let [ta (java.time.temporal.TemporalAdjusters/firstDayOfMonth)]
|
|
(defn edy-month ^long [^long edy] (.toEpochDay (.with (java.time.LocalDate/ofEpochDay edy) ta))))
|
|
|
|
(defn file-timestamp->edy ^long [^String timestamp]
|
|
(let [timestamp (subs timestamp 0 (dec (count timestamp)))]
|
|
(.toEpochDay (java.time.LocalDate/parse timestamp dtf))))
|
|
|
|
(defn file-last-modified->edy ^long [^java.io.File file]
|
|
(.toEpochDay (.toLocalDate (.atZone (java.time.Instant/ofEpochMilli (.lastModified file)) utc))))
|
|
|
|
(defn format-file-timestamp
|
|
^String [interval ^long edy]
|
|
(case interval
|
|
:daily (str (.format dtf (java.time.LocalDate/ofEpochDay edy)) "d")
|
|
:weekly (str (.format dtf (java.time.LocalDate/ofEpochDay (edy-week edy))) "w")
|
|
:monthly (str (.format dtf (java.time.LocalDate/ofEpochDay (edy-month edy))) "m")
|
|
(enc/unexpected-arg! interval
|
|
{:context `file-timestamp
|
|
:param 'interval
|
|
:expected #{:daily :weekly :monthly}}))))
|
|
|
|
(comment (file-timestamp->edy (format-file-timestamp :weekly (udt->edy (enc/now-udt*)))))
|
|
|
|
(defn manage-test-files!
|
|
"Describes/creates/deletes files used for tests/debugging, etc."
|
|
[action]
|
|
(have? [:el #{:return :println :create :delete}] action)
|
|
(let [fnames_ (volatile! [])
|
|
action!
|
|
(fn [app timestamp part gz? timestamp main?]
|
|
(let [path (str "test/logs/app" app ".log")
|
|
fname (get-file-name path (when-not main? timestamp) part gz?)
|
|
file (utils/as-file fname)]
|
|
|
|
(case action
|
|
:return nil
|
|
:println (println fname)
|
|
:delete (.delete file)
|
|
:create
|
|
(do
|
|
(utils/writeable-file! file)
|
|
(spit file fname)
|
|
(when timestamp
|
|
(.setLastModified file
|
|
(edy->udt (file-timestamp->edy timestamp))))))
|
|
|
|
(vswap! fnames_ conj fname)))]
|
|
|
|
(doseq [{:keys [app gz? timestamps parts]}
|
|
[{:app 1}
|
|
{:app 2, :gz? true, :parts [1 2 3 4 5]}
|
|
{:app 3, :gz? false, :parts [1 2 3 4 5]}
|
|
|
|
{:app 4, :gz? true, :parts [1 2 3 4 5]}
|
|
{:app 4, :gz? false, :parts [1 2 3 4 5]}
|
|
|
|
{:app 5, :gz? true, :timestamps
|
|
["2020-01-01d" "2020-01-02d" "2020-02-01d" "2020-02-02d" "2021-01-01d"
|
|
"2020-01-01w" "2020-02-01m"]}
|
|
|
|
{:app 6, :gz? true, :parts [1 2 3 4 5],
|
|
:timestamps
|
|
["2020-01-01d" "2020-01-02d" "2020-02-01d" "2020-02-02d" "2021-01-01d"
|
|
"2020-01-01w" "2020-02-01m"]}]]
|
|
|
|
(action! app nil nil false (peek timestamps) :main)
|
|
|
|
(doseq [timestamp (or timestamps [nil])
|
|
part (or parts [nil])]
|
|
|
|
(action! app timestamp part gz? timestamp (not :main))))
|
|
|
|
@fnames_))
|
|
|
|
(comment (manage-test-files! :create))
|
|
|
|
(defn scan-files
|
|
"Returns ?[{:keys [file edy part ...]}] for files in same dir as `main-path` that:
|
|
- Have the same `interval` type ∈ #{:daily :weekly :monthly nil} (=> ?timestamped).
|
|
- Have the given timestamp (e.g. \"2020-01-01d\", or nil for NO timestamp)."
|
|
[main-path interval timestamp sort?]
|
|
(have? [:el #{:daily :weekly :monthly nil}] interval)
|
|
(let [main-file (utils/as-file main-path) ; `logs/app.log`
|
|
main-dir (.getParentFile (.getAbsoluteFile main-file)) ; `.../logs`
|
|
|
|
file-pattern ; Matches ?[_ timestamp part gz]
|
|
(let [main (str "\\Q" (.getName main-file) "\\E")
|
|
end "(\\.\\d+)?(\\.gz)?"]
|
|
|
|
(if interval
|
|
(let [ts-suffix (case interval :daily "d" :weekly "w" :monthly "m")]
|
|
(re-pattern (str main "-(\\d{4}-\\d{2}-\\d{2}" ts-suffix ")" end)))
|
|
(re-pattern (str main "(__no-timestamp__)?" end))))
|
|
|
|
ref-timestamp timestamp
|
|
any-timestamp? (and interval (nil? ref-timestamp))]
|
|
|
|
(when-let [file-maps
|
|
(not-empty
|
|
(reduce
|
|
(fn [acc ^java.io.File file-in]
|
|
(or
|
|
(when-let [[_ timestamp part gz] (re-matches file-pattern (.getName file-in))]
|
|
(when (or any-timestamp? (= timestamp ref-timestamp))
|
|
(let [edy (when timestamp (file-timestamp->edy timestamp))
|
|
part (when part (enc/as-pos-int (subs part 1)))
|
|
gz? (boolean gz)
|
|
file-name (get-file-name main-path timestamp part gz?)]
|
|
|
|
;; Verify that scanned file name matches our template
|
|
(let [actual (.getAbsolutePath file-in)
|
|
expected file-name]
|
|
(when-not (.endsWith actual expected)
|
|
(throw
|
|
(ex-info "Unexpected file name"
|
|
{:actual actual, :expected expected}))))
|
|
|
|
(conj acc
|
|
{:file file-in
|
|
:file-name file-name
|
|
:timestamp timestamp
|
|
:edy edy
|
|
:part part
|
|
:gz? gz?}))))
|
|
acc))
|
|
[] (.listFiles main-dir)))]
|
|
|
|
(if sort? ; For unit tests, etc.
|
|
(sort-by (fn [{:keys [edy part]}] [edy part]) file-maps)
|
|
(do file-maps)))))
|
|
|
|
(comment (group-by :edy (scan-files "logs/app.log" nil nil false)))
|
|
(comment
|
|
(mapv #(select-keys % [:full-name :edy :part :gz?])
|
|
(scan-files "test/logs/app6.log" :daily nil :sort)))
|
|
|
|
;; Debugger used to test/debug file ops
|
|
(defn debugger [] (let [log_ (volatile! [])] (fn ([ ] @log_) ([x] (vswap! log_ conj x)))))
|
|
|
|
(defn archive-main-file!
|
|
"Renames main -> <timestamp>.1.gz archive. Makes room by first rotating
|
|
pre-existing parts (n->n+1) and maintaining `max-num-parts` limit.
|
|
Expensive. Must manually reset any main file streams after!"
|
|
[main-path interval timestamp max-num-parts gz? ?debugger]
|
|
|
|
;; Rename n->n+1, deleting when n+1>max
|
|
(when-let [file-maps (scan-files main-path interval timestamp false)] ; [<file-map> ...]
|
|
(let [file-maps-by-edy (group-by :edy file-maps)] ; {<edy> [<file-map> ...]}
|
|
(enc/run-kv!
|
|
(fn [edy file-maps]
|
|
(doseq [{:keys [^java.io.File file file-name timestamp part gz?]}
|
|
(sort-by :part enc/rcompare file-maps)]
|
|
|
|
(when part
|
|
(let [part (long part)
|
|
part+ (inc part)]
|
|
|
|
(if-let [drop? (and max-num-parts (> part+ (long max-num-parts)))]
|
|
(if-let [df ?debugger]
|
|
(df [:delete file-name])
|
|
(.delete file))
|
|
|
|
(let [file-name+ (get-file-name main-path timestamp part+ gz?)]
|
|
(if-let [df ?debugger]
|
|
(df [:rename file-name file-name+])
|
|
(.renameTo file (utils/as-file file-name+)))))))))
|
|
file-maps-by-edy)))
|
|
|
|
;; Rename main -> <timestamp>.1.gz archive
|
|
(let [arch-file-name-gz (get-file-name main-path timestamp 1 false)
|
|
arch-file-name+gz (get-file-name main-path timestamp 1 gz?)]
|
|
|
|
(if-let [df ?debugger]
|
|
(df [:rename main-path arch-file-name+gz])
|
|
(let [main-file (utils/as-file main-path) ; `logs/app.log`
|
|
arch-file-gz (utils/as-file arch-file-name-gz) ; `logs/app.log.1` or `logs/app.log-2020-01-01d.1`
|
|
arch-file+gz (utils/as-file arch-file-name+gz) ; `logs/app.log.1.gz` or `logs/app.log-2020-01-01d.1.gz`
|
|
]
|
|
|
|
(have? false? (.exists arch-file+gz)) ; No pre-existing `.1.gz`
|
|
(.renameTo main-file arch-file-gz)
|
|
(.createNewFile main-file)
|
|
|
|
(when gz?
|
|
(gzip-file arch-file-gz arch-file+gz)
|
|
(.delete arch-file-gz))))))
|
|
|
|
(defn prune-archive-files!
|
|
"Scans files in same dir as `main-path`, and maintains `max-num-intervals` limit
|
|
by deleting ALL parts for oldest intervals. Expensive."
|
|
[main-path interval max-num-intervals ?debugger]
|
|
(when (and interval max-num-intervals)
|
|
(when-let [file-maps (scan-files main-path interval nil false)] ; [<file-map> ...]
|
|
(let [file-maps-by-edy (group-by :edy file-maps) ; {<edy> [<file-map> ...]}
|
|
n-prune (- (count file-maps-by-edy) (long max-num-intervals))]
|
|
|
|
(when (pos? n-prune) ; Prune some (oldest) intervals
|
|
(doseq [old-edy (take n-prune (sort (keys file-maps-by-edy)))]
|
|
|
|
;; Delete every part of this interval
|
|
(doseq [{:keys [^java.io.File file file-name]}
|
|
(sort-by :part enc/rcompare
|
|
(get file-maps-by-edy old-edy))]
|
|
|
|
(if-let [df ?debugger]
|
|
(df [:delete file-name])
|
|
(.delete file)))))))))
|
|
|
|
;;;; Handler
|
|
|
|
(defn ^:public handler:file
|
|
"Experimental, subject to change.
|
|
|
|
Returns a signal handler that:
|
|
- Takes a Telemere signal (map).
|
|
- Writes (appends) the signal as a string to file specified by `path`.
|
|
|
|
Depending on options, archives may be maintained:
|
|
- `logs/app.log.n.gz` (for nil `:interval`, non-nil `:max-file-size`)
|
|
- `logs/app.log-YYYY-MM-DDd.n.gz` (for non-nil `:interval`) ; d=daily/w=weekly/m=monthly
|
|
|
|
Can output signals as human or machine-readable (edn, JSON) strings.
|
|
|
|
Example files with default options:
|
|
`/logs/telemere.log` ; Current file
|
|
`/logs/telemere.log-2020-01-01m.1.gz` ; Archive for Jan 2020, part 1 (newest entries)
|
|
...
|
|
`/logs/telemere.log-2020-01-01m.8.gz` ; Archive for Jan 2020, part 8 (oldest entries)
|
|
|
|
Options:
|
|
`:output-fn`- (fn [signal]) => string, see `format-signal-fn` or `pr-signal-fn`
|
|
`:path` - Path string of the target output file (default `logs/telemere.log`)
|
|
|
|
`:interval` - ∈ #{nil :daily :weekly :monthly} (default `:monthly`)
|
|
When non-nil, causes interval-based archives to be maintained.
|
|
|
|
`:max-file-size` ∈ #{nil <pos-int>} (default 4MB)
|
|
When `path` file size > ~this many bytes, rotates old content to numbered archives.
|
|
|
|
`:max-num-parts` ∈ #{nil <pos-int>} (default 8)
|
|
Maximum number of numbered archives to retain for any particular interval.
|
|
|
|
`:max-num-intervals` ∈ #{nil <pos-int>} (default 6)
|
|
Maximum number of intervals (days/weeks/months) to retain."
|
|
|
|
([] (handler:file nil))
|
|
([{:keys
|
|
[output-fn
|
|
path interval
|
|
max-file-size
|
|
max-num-parts
|
|
max-num-intervals
|
|
gzip-archives?]
|
|
|
|
:or
|
|
{output-fn (utils/format-signal-fn)
|
|
path "logs/telemere.log" ; Main path, we'll ALWAYS write to this exact file
|
|
interval :monthly
|
|
max-file-size (* 1024 1024 4) ; 4MB
|
|
max-num-parts 8
|
|
max-num-intervals 6
|
|
gzip-archives? true}}]
|
|
|
|
(let [main-path path
|
|
main-file (utils/as-file main-path)
|
|
fw (utils/file-writer {:file main-file, :append? true})
|
|
|
|
>max-file-size?
|
|
(when max-file-size
|
|
(let [max-file-size (long max-file-size)
|
|
rl (enc/rate-limiter-once-per 250)]
|
|
(fn [] (and (not (rl)) (> (.length main-file) max-file-size)))))
|
|
|
|
prev-timestamp_ (enc/latom nil) ; Initially nil
|
|
curr-timestamp_ (enc/latom nil) ; Will be bootstrapped based on main file
|
|
|
|
;; Called on every write attempt,
|
|
;; maintains `timestamp_`s and returns true iff timestamp changed.
|
|
new-interval!?
|
|
(when interval
|
|
(let [init-edy (let [n (file-last-modified->edy main-file)] (when (pos? n) n))
|
|
curr-edy_ (enc/latom init-edy)
|
|
updated!? ; Returns ?[old new] on change
|
|
(fn [latom_ new]
|
|
(let [old (latom_)]
|
|
(when
|
|
(and
|
|
(not= old new)
|
|
(compare-and-set! latom_ old new))
|
|
[old new])))]
|
|
|
|
(when init-edy ; Don't bootstrap "1970-01-01d", etc.
|
|
(reset! curr-timestamp_
|
|
(format-file-timestamp interval init-edy)))
|
|
|
|
(fn new-interval!? []
|
|
(let [curr-edy (udt->edy (System/currentTimeMillis))]
|
|
(when (updated!? curr-edy_ curr-edy) ; Day changed
|
|
(let [curr-timestamp (format-file-timestamp interval curr-edy)]
|
|
(when-let [[prev-timestamp _] (updated!? curr-timestamp_ curr-timestamp)]
|
|
;; Timestamp changed (recall: interval may not be daily)
|
|
(reset! prev-timestamp_ prev-timestamp)
|
|
true)))))))
|
|
|
|
lock (Object.)]
|
|
|
|
(fn a-handler:file
|
|
([ ] (locking lock (fw))) ; Stop => close writer
|
|
([signal]
|
|
(when-let [output (output-fn signal)]
|
|
(let [new-interval? (when interval (new-interval!?))
|
|
>max-file-size? (when max-file-size (>max-file-size?))
|
|
reset-stream? (or new-interval? >max-file-size?)]
|
|
|
|
(locking lock
|
|
|
|
(if new-interval?
|
|
(do
|
|
;; Rename main -> <prev-timestamp>.1.gz, etc.
|
|
(when-let [prev-timestamp (prev-timestamp_)]
|
|
(archive-main-file! main-path interval prev-timestamp
|
|
max-num-parts gzip-archives? nil))
|
|
|
|
(when max-num-intervals
|
|
(prune-archive-files! main-path interval
|
|
max-num-intervals nil)))
|
|
|
|
(when >max-file-size?
|
|
;; Rename main -> <curr-timestamp>.1.gz, etc.
|
|
(archive-main-file! main-path interval (curr-timestamp_)
|
|
max-num-parts gzip-archives? nil)))
|
|
|
|
(when reset-stream? (fw :writer/reset!))
|
|
(do (fw output))))))))))
|
|
|
|
(comment
|
|
(manage-test-files! :create)
|
|
(.setLastModified (utils/as-file "test/logs/app6.log")
|
|
(enc/as-udt "1999-01-01T01:00:00.00Z"))
|
|
|
|
(let [f (utils/as-file "test/logs/app6.log")] (enc/qb 1e5 (.length f)))
|
|
(let [hfn
|
|
(handler:file
|
|
{:path "test/logs/app6.log"
|
|
:max-num-intervals 2
|
|
:max-num-parts 2})]
|
|
|
|
(hfn {:info :level :msg_ "hello"}) (hfn)))
|