mvxcvi/clj-cbor

0.7.2


Concise Binary Object Representation (RFC 7049)

dependencies

org.clojure/clojure
1.10.0



(this space intentionally left almost blank)
 

Functions for reading and writing CBOR headers.

(ns clj-cbor.header
  (:require
    [clj-cbor.error :as error]
    [clojure.string :as str])
  (:import
    clojure.lang.BigInt
    (java.io
      DataInputStream
      DataOutputStream)))

Vector of major type keywords, indexed by the three-bit values 0-7. (§2.1)

(def major-types
  [:unsigned-integer
   :negative-integer
   :byte-string
   :text-string
   :data-array
   :data-map
   :tagged-value
   :simple-value])

Map of major type keywords to code values.

(def ^:private major-type-codes
  (zipmap major-types (range)))

Encoding Functions

Writes a header byte for the given major-type and additional info numbers.

(defn write-leader
  [^DataOutputStream out mtype info]
  (let [header (-> (bit-and (major-type-codes mtype) 0x07)
                   (bit-shift-left 5)
                   (bit-or (bit-and (long info) 0x1F)))]
    (.writeByte out header)))

Write an unsigned byte (8-bit) value to the data output stream.

(defn write-byte
  [^DataOutputStream out i]
  (.writeByte out i))

Write an unsigned short (16-bit) value to the data output stream.

(defn write-short
  [^DataOutputStream out i]
  (.writeShort out i))

Write an unsigned int (32-bit) value to the data output stream. Coerces the value into a signed representation before writing if necessary.

(defn write-int
  [^DataOutputStream out i]
  (.writeInt
    out
    (if (<= i Integer/MAX_VALUE)
      i
      (+ Integer/MIN_VALUE (- (dec i) Integer/MAX_VALUE)))))

Write a long (32-bit) value to the data output stream. Coerces the value into a signed representation before writing if necessary.

(defn write-long
  [^DataOutputStream out i]
  (.writeLong
    out
    (if (<= i Long/MAX_VALUE)
      i
      (+ Long/MIN_VALUE (- (dec i) Long/MAX_VALUE)))))

Writes a header byte for the given major-type, plus extra bytes to encode the given integer code. Always writes the smallest possible representation. Returns the number of bytes written.

(defn write
  ^long
  [^DataOutputStream out mtype i]
  (cond
    (neg? i)
    (error/*handler*
      ::negative-info-code
      (str "Cannot write negative integer code: " i)
      {:code i})
    (<= i 23)
    (do (write-leader out mtype i)
        1)
    (<= i 0xFF)
    (do (write-leader out mtype 24)
        (write-byte out i)
        2)
    (<= i 0xFFFF)
    (do (write-leader out mtype 25)
        (write-short out i)
        3)
    (<= i 0xFFFFFFFF)
    (do (write-leader out mtype 26)
        (write-int out i)
        5)
    (<= i (* -2N Long/MIN_VALUE))
    (do (write-leader out mtype 27)
        (write-long out i)
        9)
    :else
    (error/*handler*
      ::overflow-info-code
      (str "Cannot write integer code requiring 9 bytes of space: " i)
      {:code i})))

Decoding Functions

Determines the major type keyword and additional information encoded by the header byte. §2.1

(defn decode
  [header]
  [(-> header
       (bit-and 0xE0)
       (bit-shift-right 5)
       (bit-and 0x07)
       (major-types))
   (bit-and header 0x1F)])

Constant holding 2^64 for integer manipulation.

(def ^:private two-64
  (.shiftLeft BigInteger/ONE 64))

Read an unsigned byte (8-bit) value from the data input stream. Promotes the value to a long for consistency.

(defn read-byte
  [^DataInputStream in]
  (long (.readUnsignedByte in)))

Read an unsigned short (16-bit) value from the data input stream. Promotes the value to a long for consistency.

(defn read-short
  [^DataInputStream in]
  (long (.readUnsignedShort in)))

Read an unsigned int (32-bit) value from the data input stream. Promotes the value to a long for consistency.

(defn read-int
  [^DataInputStream in]
  (bit-and (long (.readInt in)) 0xFFFFFFFF))

Read an unsigned long (64-bit) value from the data input stream. Handles overflowing values by promoting them to a bigint.

https://tools.ietf.org/html/rfc7049#section-1.2

(defn read-long
  [^DataInputStream in]
  (let [i (.readLong in)]
    (if (neg? i)
      (-> (BigInteger/valueOf i)
          (.add two-64)
          (BigInt/fromBigInteger))
      i)))

Reads a size value from the initial bytes of the input stream. Returns either a number, the keyword :indefinite, or calls the error handler on reserved info codes.

(defn read-code
  [^DataInputStream in ^long info]
  (if (< info 24)
    ; Info codes less than 24 directly represent the number.
    info
    ; Otherwise, signify the number of bytes following.
    (case info
      24 (read-byte in)
      25 (read-short in)
      26 (read-int in)
      27 (read-long in)
      (28 29 30) (error/*handler*
                   ::reserved-info-code
                   (format "Additional information int code %d is reserved."
                           info)
                   {:info info})
      31 :indefinite)))
 

Dynamic error handling support.

(ns clj-cbor.error)

Default behavior for codec errors.

(defn codec-exception!
  [error-type message data]
  (throw (ex-info message (assoc data :cbor/error error-type))))

Dynamic error handler which can be bound to a function which will be called with a type keyword, a message, and a map of extra data.

(def ^:dynamic *handler*
  codec-exception!)

Error Hierarchy

Encoding errors.

(derive :clj-cbor.header/negative-info-code ::encoding-error)
(derive :clj-cbor.header/overflow-info-code ::encoding-error)
(derive :clj-cbor.codec/illegal-simple-type ::encoding-error)
(derive :clj-cbor.codec/unsupported-type    ::encoding-error)

Decoding errors.

(derive :clj-cbor.header/reserved-info-code ::decoding-error)
(derive :clj-cbor.codec/illegal-chunk-type  ::decoding-error)
(derive :clj-cbor.codec/illegal-stream      ::decoding-error)
(derive :clj-cbor.codec/missing-map-value   ::decoding-error)
(derive :clj-cbor.codec/duplicate-map-key   ::decoding-error)
(derive :clj-cbor.codec/tag-handling-error  ::decoding-error)
(derive :clj-cbor.codec/unexpected-break    ::decoding-error)
(derive :clj-cbor.codec/end-of-input        ::decoding-error)
 

Core CBOR library API.

(ns clj-cbor.core
  (:refer-clojure :exclude [spit slurp])
  (:require
    [clj-cbor.codec :as codec]
    [clj-cbor.error :as error]
    [clj-cbor.tags.clojure :as tags.clj]
    [clj-cbor.tags.content :as tags.content]
    [clj-cbor.tags.numbers :as tags.num]
    [clj-cbor.tags.text :as tags.text]
    [clj-cbor.tags.time :as tags.time]
    [clojure.java.io :as io])
  (:import
    (java.io
      ByteArrayOutputStream
      DataInputStream
      DataOutputStream
      EOFException
      InputStream
      OutputStream)))

Codec Construction

Construct a new CBOR codec with no configuration. Note that this does not include any read and write handlers. See the default-codec and the default-read-handlers and default-write-handlers vars.

Arguments may be a map or a sequence of key/value pairs. Valid options are:

  • :write-dispatch function which is called to provide a dispatch value based on the data to be rendered. (default: class)
  • :write-handlers lookup function from dispatch values to handlers which take some data to be encoded and return a transformed version of it (typically a tagged value).
  • :read-handlers lookup function from integer tags to handlers which take the embedded item and return the parsed data value.
(defn cbor-codec
  [& opts]
  (merge
    (codec/blank-codec)
    (if (and (= 1 (count opts)) (map? (first opts)))
      (first opts)
      (apply hash-map opts))))

Map of default write handlers to use, keyed by class.

The default choice of encoding for instants in time is the numeric epoch representation (tag 1).

(def default-write-handlers
  (merge tags.clj/clojure-write-handlers
         tags.content/content-write-handlers
         tags.num/number-write-handlers
         tags.time/epoch-time-write-handlers
         tags.text/text-write-handlers))

Map of default tag handlers to use, keyed by tag.

The default choice of representation for instants in time is java.time.Instant.

(def default-read-handlers
  (merge tags.clj/clojure-read-handlers
         tags.content/content-read-handlers
         tags.num/number-read-handlers
         tags.time/instant-read-handlers
         tags.text/text-read-handlers))

Default CBOR codec to use when none is specified.

(def default-codec
  (cbor-codec
    :write-handlers default-write-handlers
    :read-handlers default-read-handlers))

Encoding Functions

Coerce the argument to a DataOutputStream.

(defn- data-output-stream
  ^DataOutputStream
  [output]
  (condp instance? output
    DataOutputStream
    output
    OutputStream
    (DataOutputStream. output)
    (throw (IllegalArgumentException.
             (str "Cannot coerce argument to an OutputStream: "
                  (pr-str output))))))

Encode a single value as CBOR data.

Writes the value bytes to the provided output stream, or returns the value as a byte array if no output is given. The default-codec is used to encode the value if none is provided.

(defn encode
  ([value]
   (encode default-codec value))
  ([encoder value]
   (let [buffer (ByteArrayOutputStream.)]
     (with-open [output (data-output-stream buffer)]
       (encode encoder output value))
     (.toByteArray buffer)))
  ([encoder output value]
   (let [data-output (data-output-stream output)]
     (codec/write-value encoder data-output value))))

Encode a sequence of values as CBOR data. This eagerly consumes the input sequence.

Writes the value bytes to the provided output stream, or returns the value as a byte array if no output is given. The default-codec is used to encode the value if none is provided.

(defn encode-seq
  ([values]
   (encode-seq default-codec values))
  ([encoder values]
   (let [buffer (ByteArrayOutputStream.)]
     (with-open [output (data-output-stream buffer)]
       (encode-seq encoder output values))
     (.toByteArray buffer)))
  ([encoder output values]
   (let [data-output (data-output-stream output)]
     (transduce (map (partial encode encoder data-output)) + 0 values))))

Decoding Functions

Coerce the argument to a DataInputStream.

(defn- data-input-stream
  [input]
  (condp instance? input
    DataInputStream
    input
    InputStream
    (DataInputStream. input)
    (DataInputStream. (io/input-stream input))))

Attempts to read a header byte from the input stream. If there is no more input, the guard value is returned.

(defn- maybe-read-header
  [^DataInputStream input guard]
  (try
    (.readUnsignedByte input)
    (catch EOFException _
      guard)))

Attemtps to read the rest of a CBOR value from the input stream. If the input ends during the read, the error handler is called with an end-of-input error.

(defn- try-read-value
  [decoder input header]
  (try
    (codec/read-value* decoder input header)
    (catch EOFException _
      (error/*handler* :clj-cbor.codec/end-of-input
        "Input data ended while parsing a CBOR value."
        {:header header}))))

Decode a single CBOR value from the input.

This uses the given codec or the default-codec if none is provided. If at the end of the input, this returns eof-guard or nil.

The input must be an input stream or something coercible to one like a file or byte array. Note that coercion will produce a BufferedInputStream if the argument is not already a stream, so repeated reads will probably not behave as expected! If you need incremental parsing, make sure you pass in something that is already an InputStream.

(defn decode
  ([input]
   (decode default-codec input))
  ([decoder input]
   (decode decoder input nil))
  ([decoder input eof-guard]
   (let [input (data-input-stream input)
         header (maybe-read-header input eof-guard)]
     (if (identical? header eof-guard)
       eof-guard
       (try-read-value decoder input header)))))

Decode a sequence of CBOR values from the input.

This uses the given codec or the default-codec if none is provided. The returned sequence is lazy, so take care that the input stream is not closed before the entries are realized.

The input must be an input stream or something coercible to one - see decode for usage notes.

(defn decode-seq
  ([input]
   (decode-seq default-codec input))
  ([decoder input]
   (let [eof-guard (Object.)
         data-input (data-input-stream input)
         read-data! #(decode decoder data-input eof-guard)]
     (take-while
       #(not (identical? eof-guard %))
       (repeatedly read-data!)))))

Utility Functions

Opens an output stream to f, writes value to it, then closes the stream.

Options may include :append to write to the end of the file instead of truncating.

(defn spit
  [f value & opts]
  (with-open [out ^OutputStream (apply io/output-stream f opts)]
    (encode default-codec out value)))

Opens an output stream to f, writes each element in values to it, then closes the stream.

Options may include :append to write to the end of the file instead of truncating.

(defn spit-all
  [f values & opts]
  (with-open [out ^OutputStream (apply io/output-stream f opts)]
    (encode-seq default-codec out values)))

Opens an input stream from f, reads the first value from it, then closes the stream.

(defn slurp
  [f & opts]
  (with-open [in ^InputStream (apply io/input-stream f opts)]
    (decode default-codec in)))

Opens an input stream from f, reads all values from it, then closes the stream.

(defn slurp-all
  [f & opts]
  (with-open [in ^InputStream (apply io/input-stream f opts)]
    (doall (decode-seq default-codec in))))

Wraps a value with a self-describing CBOR tag. This will cause the first few bytes of the data to be D9D9F7, which serves as a distinguishing header for format detection.

(defn self-describe
  [value]
  (tags.content/format-self-described value))
 

Main CBOR codec implementation.

(ns clj-cbor.codec
  (:require
    [clj-cbor.error :as error]
    [clj-cbor.header :as header]
    [clj-cbor.data.core :as data]
    [clj-cbor.data.float16 :as float16]
    [clojure.string :as str])
  (:import
    clj_cbor.data.simple.SimpleValue
    clj_cbor.data.tagged.TaggedValue
    (java.io
      ByteArrayOutputStream
      DataInputStream
      DataOutputStream)))

Codec Protocols

An encoder is a process that generates the representation format of a CBOR data item from application information.

(defprotocol Encoder
  (write-value
    [encoder out x]
    "Writes the given value `x` to the `DataOutputStream` `out`."))

A decoder is a process that reads a CBOR data item and makes it available to an application.

Formally speaking, a decoder contains a parser to break up the input using the syntax rules of CBOR, as well as a semantic processor to prepare the data in a form suitable to the application.

(defprotocol Decoder
  (read-value*
    [decoder input header]
    "Reads a single value from the `DataInputStream`, given the just-read
    initial byte."))

Reads a single value from the DataInputStream.

(defn read-value
  [decoder ^DataInputStream input]
  (read-value* decoder input (.readUnsignedByte input)))

Byte Utilities

Read length bytes from the input stream. Returns a byte array.

(defn- read-bytes
  ^bytes
  [^DataInputStream input length]
  (let [buffer (byte-array length)]
    (.readFully input buffer)
    buffer))

Writes the given value x to a byte array.

(defn- write-bytes
  [encoder x]
  (let [out (ByteArrayOutputStream.)]
    (with-open [data (DataOutputStream. out)]
      (write-value encoder data x))
    (.toByteArray out)))

Returns a negative number, zero, or a positive number when x is 'less than', 'equal to', or 'greater than' y.

Sorting is performed on the bytes of the representation of the key data items without paying attention to the 3/5 bit splitting for major types. The sorting rules are:

  • If two keys have different lengths, the shorter one sorts earlier;
  • If two keys have the same length, the one with the lower value in (byte-wise) lexical order sorts earlier.
(defn- compare-bytes
  [^bytes x ^bytes y]
  (let [xlen (alength x)
        ylen (alength y)
        get-byte (fn get-byte
                   [^bytes bs i]
                   (let [b (aget bs i)]
                     (if (neg? b)
                       (+ b 256)
                       b)))]
    (if (= xlen ylen)
      ; Same length - compare content.
      (loop [i 0]
        (if (< i xlen)
          (let [xi (get-byte x i)
                yi (get-byte y i)]
            (if (= xi yi)
              (recur (inc i))
              (compare xi yi)))
          0))
      ; Compare lengths.
      (compare xlen ylen))))

Reader Functions

These functions provide some data-reading capabilities which later major-type readers are built on. In particular, these help deal with the four data types which can be streamed with indefinite lengths.

Reads chunks from the input in a streaming fashion, combining them with the given reducing function. All chunks must have the given major type and definite length.

(defn- read-chunks
  [decoder ^DataInputStream input stream-type reducer]
  (loop [state (reducer)]
    (let [header (.readUnsignedByte input)]
      (if (== header 0xFF)
        ; Break code, finish up result.
        (reducer state)
        ; Read next value.
        (let [[chunk-type info] (header/decode header)]
          (cond
            ; Illegal element type.
            (not= stream-type chunk-type)
            (error/*handler*
              ::illegal-chunk-type
              (str stream-type " stream may not contain chunks of type "
                   chunk-type)
              {:stream-type stream-type
               :chunk-type chunk-type})
            ; Illegal indefinite-length chunk.
            (= info 31)
            (error/*handler*
              ::illegal-stream
              (str stream-type " stream chunks must have a definite length")
              {:stream-type stream-type})
            ; Reduce state with next value.
            :else
            (recur (reducer state (read-value* decoder input header)))))))))

Reads values from the input in a streaming fashion, combining them with the given reducing function.

(defn- read-value-stream
  [decoder ^DataInputStream input reducer]
  (loop [state (reducer)]
    (let [header (.readUnsignedByte input)]
      (if (== header 0xFF)
        ; Break code, finish up result.
        (reducer state)
        ; Read next value.
        (recur (reducer state (read-value* decoder input header)))))))

Major Types

The header byte of each CBOR encoded data value uses the high-order three bits to encode the major type of the value. The remaining five bits contain an additional information code, which often gives the size of the resulting value.

Integers

Integers are represented by major types 0 and 1. Positive integers use type 0, and the 5-bit additional information is either the integer itself (for additional information values 0 through 23) or the length of additional data.

The encoding for negative integers follows the rules for unsigned integers, except that the type is 1 and the value is negative one minus the encoded unsigned integer.

Additional information 24 means the value is represented in an additional uint8, 25 means a uint16, 26 means a uint32, and 27 means a uint64.

The minimum integer value representable as a native type.

(def ^:private min-integer
  (-> BigInteger/ONE
      (.shiftLeft 64)
      (.negate)))

The maximum integer value representable as a native type.

(def ^:private max-integer
  (-> BigInteger/ONE
      (.shiftLeft 64)
      (.subtract BigInteger/ONE)))

True if the value is small enough to represent using the normal integer major-type.

This is made slightly trickier at the high end of the representable range by the JVM's lack of unsigned types, so some values that are represented in CBOR as 8-byte integers must be represented by BigInt in memory.

(defn- representable-integer?
  [value]
  (and (integer? value) (<= min-integer value max-integer)))

Writes an integer value.

(defn- write-integer
  [encoder ^DataOutputStream out n]
  (if (neg? n)
    (header/write out :negative-integer (-' -1 n))
    (header/write out :unsigned-integer n)))

Byte Strings

Byte strings are represented by major type 2. The string's length in bytes is represented following the rules for positive integers (major type 0).

If the additional info indicates an indefinite length, the header must be followed by a sequence of definite-length byte strings, terminated with a break stop code. The chunks will be concatenated together into the final byte string.

Writes an array of bytes to the output string as a CBOR byte string.

(defn- write-byte-string
  [encoder ^DataOutputStream out bs]
  (let [hlen (header/write out :byte-string (count bs))]
    (.write out ^bytes bs)
    (+ hlen (count bs))))

Reducing function which builds a contiguous byte-array from a sequence of byte-array chunks.

(defn- concat-bytes
  ([]
   (ByteArrayOutputStream.))
  ([buffer]
   (.toByteArray ^ByteArrayOutputStream buffer))
  ([buffer v]
   (.write ^ByteArrayOutputStream buffer ^bytes v)
   buffer))

Text Strings

Major type 3 encodes a text string, specifically a string of Unicode characters that is encoded as UTF-8 [RFC3629].

The format of this type is identical to that of byte strings (major type 2), that is, as with major type 2, the length gives the number of bytes. This type is provided for systems that need to interpret or display human-readable text, and allows the differentiation between unstructured bytes and text that has a specified repertoire and encoding.

If the additional info indicates an indefinite length, the header must be followed by a sequence of definite-length text strings, terminated with a break stop code. The chunks will be concatenated together into the final text string.

Write a string of characters to the output as a CBOR text string.

(defn- write-text-string
  [encoder ^DataOutputStream out ts]
  (let [text (.getBytes ^String ts "UTF-8")
        hlen (header/write out :text-string (count text))]
    (.write out text)
    (+ hlen (count text))))

Reads a fixed-length text string from the input.

(defn- read-text
  [^DataInputStream input n]
  (String. (read-bytes input n) "UTF-8"))

Reducing function which builds a contiguous string from a sequence of string chunks.

(defn- concat-text
  ([]
   (StringBuilder.))
  ([buffer]
   (str buffer))
  ([buffer v]
   (.append ^StringBuilder buffer ^String v)
   buffer))

Data Arrays

Arrays of data items are encoded using major type 4. Arrays are used to represent both lists and vectors in Clojure. Items in an array do not need to all be of the same type.

The array's length follows the rules for byte strings (major type 2), except that the length denotes the number of data items, not the length in bytes that the array takes up.

If the additional info indicates an indefinite length, the header must be followed by a sequence of element data values, terminated with a break stop code.

Writes an array of data items to the output. The array will be encoded with a definite length, so xs will be fully realized.

(defn- write-array
  [encoder ^DataOutputStream out xs]
  (let [hlen (header/write out :data-array (count xs))]
    (reduce
      (fn write-element
        [len x]
        (+ len (write-value encoder out x)))
      hlen xs)))

Reducing function which builds a vector to represent a data array.

(defn- build-array
  ([] [])
  ([xs] xs)
  ([xs v] (conj xs v)))

Read a fixed length array from the input as a vector of elements.

(defn- read-array
  [decoder input ^long n]
  {:pre [(pos? n)]}
  (let [objs (object-array n)]
    (loop [idx 0]
      (if (< idx n)
        (do (aset objs idx (read-value decoder input))
            (recur (unchecked-inc idx)))
        (vec objs)))))

Data Maps

Maps of key-value entries are encoded using major type 5. A map is comprised of pairs of data items, each pair consisting of a key that is immediately followed by a value.

The map's length follows the rules for byte strings (major type 2), except that the length denotes the number of pairs, not the length in bytes that the map takes up.

If the additional info indicates an indefinite length, the header must be followed by a sequence of data value pairs, terminated with a break stop code. An odd number of values before the break means the map is not well-formed.

A map that has duplicate keys may be well-formed, but it is not valid, and thus it causes indeterminate decoding.

Writes a sequence of key/value pairs to the output in the order given. The map will be encoded with a definite length, so xm will be fully realized.

(defn- write-map-seq
  [encoder ^DataOutputStream out xm]
  (let [hlen (header/write out :data-map (count xm))]
    (reduce
      (fn encode-entry
        [^long sum [k v]]
        (let [^long klen (write-value encoder out k)
              ^long vlen (write-value encoder out v)]
          (+ sum klen vlen)))
      hlen
      xm)))

Writes a sequence of key/value pairs to the output in canonical order. This requires serializing the keys in order to compare bytes.

(defn- write-map-canonical
  [encoder ^DataOutputStream out xm]
  (let [hlen (header/write out :data-map (count xm))]
    (->>
      xm
      (map (fn encode-key
             [[k v]]
             [(write-bytes encoder k) v]))
      (sort-by first compare-bytes)
      (reduce
        (fn encode-entry
          [^long sum [^bytes k v]]
          (.write out k)
          (let [klen (alength k)
                ^long vlen (write-value encoder out v)]
            (+ sum klen vlen)))
        hlen))))

Writes a map of key/value pairs to the output. The map will be encoded with a definite length, so xm will be fully realized.

(defn- write-map
  [encoder ^DataOutputStream out xm]
  (if (:canonical encoder)
    (write-map-canonical encoder out xm)
    (write-map-seq encoder out xm)))

Reducing function which builds a map from a sequence of alternating key and value elements.

(defn- build-map
  ([]
   [{}])
  ([[m k :as state]]
   (if (= 1 (count state))
     m
     (error/*handler*
       ::missing-map-value
       (str "Encoded map did not contain a value for key: "
            (pr-str k))
       {:map m, :key k})))
  ([[m k :as state] e]
   (if (= 1 (count state))
     (if (contains? m e)
       ; Duplicate key error.
       (error/*handler*
         ::duplicate-map-key
         (str "Encoded map contains duplicate key: "
              (pr-str e))
         {:map m, :key e})
       ; Save key and wait for value.
       [m e])
     ; Add completed entry to map.
     [(assoc m k e)])))

Read a fixed length map from the input as a sequence of entries.

(defn- read-map
  [decoder input ^long n]
  {:pre [(pos? n)]}
  (let [m (java.util.HashMap.)]
    (loop [idx 0]
      (if (< idx n)
        (let [k (read-value decoder input)]
          (if (.containsKey m k)
            (error/*handler*
              ::duplicate-map-key
              (str "Encoded map contains duplicate key: " (pr-str k))
              {:map (into {} m)
               :key k})
            (do (.put m k (read-value decoder input))
                (recur (unchecked-inc idx)))))
        (into {} m)))))

Sets

Sets are represented as arrays of elements tagged with code 258.

This support is implemented here rather than as a normal read/write handler pair for two reasons. First, unlike the normal write-handlers which operate on concrete types, there are many types which represent the 'set' semantic in Clojure, and we don't want to maintain a brittle list of such types. That approach would also prevent easy extension to new set types outside the core libray. Instead, we use the set? predicate to trigger this handler.

Second, when the codec is in canonical mode, we want to sort the entries in the set before writing them out. A write handler wouldn't have a way to know whether the codec had this behavior enabled, requiring coordination between the codec setting and the selection of a canonical writer vs a regular one.

Writes a sequence of set entries to the output in the order given. The set will be encoded with a definite length, so xm will be fully realized.

(defn- write-set-seq
  [encoder ^DataOutputStream out tag xs]
  (->>
    (vec xs)
    (data/tagged-value tag)
    (write-value encoder out)))

Writes a set of entries to the output in canonical order. This requires serializing the entries in order to compare bytes.

(defn- write-set-canonical
  [encoder ^DataOutputStream out tag xs]
  (let [tag-hlen (header/write out :tagged-value tag)
        array-hlen (header/write out :data-array (count xs))]
    (->>
      xs
      (map (partial write-bytes encoder))
      (sort compare-bytes)
      (reduce
        (fn encode-entry
          [^long sum ^bytes v]
          (.write out v)
          (+ sum (alength v)))
        (+ tag-hlen array-hlen)))))

Writes a set of values to the output as a tagged array.

(defn- write-set
  [encoder ^DataOutputStream out tag xs]
  (if (:canonical encoder)
    (write-set-canonical encoder out tag xs)
    (write-set-seq encoder out tag xs)))

Parse a set from the value contained in the tagged representation.

(defn- read-set
  [decoder value]
  (if (sequential? value)
    (let [result (set value)]
      (if (and (:strict decoder) (< (count result) (count value)))
        (error/*handler*
          ::duplicate-set-entry
          "Encoded set contains duplicate entries"
          {:value value})
        result))
    (error/*handler*
      ::tag-handling-error
      (str "Sets must be tagged arrays, got: " (class value))
      {:value value})))

Tagged Values

Major type 6 is used for optional semantic tagging of other CBOR values.

Writes out a tagged value.

(defn- write-tagged
  ([encoder ^DataOutputStream out ^TaggedValue tv]
   (write-tagged encoder out (.tag tv) (.value tv)))
  ([encoder ^DataOutputStream out tag value]
   (let [hlen (header/write out :tagged-value tag)
         vlen (write-value encoder out value)]
     (+ hlen ^long vlen))))

Read a tagged value from the input stream.

(defn- read-tagged
  [decoder ^DataInputStream input info]
  (let [tag (header/read-code input info)
        value (read-value decoder input)]
    (if (= tag data/set-tag)
      (read-set decoder value)
      (try
        (if-let [handler ((:read-handlers decoder) tag)]
          ; TODO: better error reporting
          (handler value)
          (if (:strict decoder)
            (error/*handler*
              ::unknown-tag
              (str "Unknown tag code " tag)
              {:tag tag, :value value})
            (data/tagged-value tag value)))
        (catch Exception ex
          (error/*handler*
            ::tag-handling-error
            (.getMessage ex)
            (assoc (ex-data ex) ::error ex)))))))

Simple Values

Major type 7 is for two types of data: floating-point numbers and "simple values" that do not need any content, as well as the "break" stop code. Each value of the 5-bit additional information in the initial byte has its own separate meaning.

Like the major types for integers, items of this major type do not carry content data; all the information is in the initial bytes.

Writes a boolean simple value to the output.

(defn- write-boolean
  [encoder ^DataOutputStream out x]
  (.writeByte out (if x 0xF5 0xF4))
  1)

Writes a 'null' simple value to the output.

(defn- write-null
  [encoder ^DataOutputStream out]
  (.writeByte out 0xF6)
  1)

Writes an 'undefined' simple value to the output.

(defn- write-undefined
  [encoder ^DataOutputStream out]
  (.writeByte out 0xF7)
  1)

Writes a floating-point value to the output. Special values zero, NaN, and +/- Infinity are represented as 16-bit numbers, otherwise the encoding is determined by class.

(defn- write-float
  [encoder ^DataOutputStream out n]
  (cond
    (zero? (double n))
    (do (header/write-leader out :simple-value 25)
        (.writeShort out float16/zero)
        3)
    (Double/isNaN n)
    (do (header/write-leader out :simple-value 25)
        (.writeShort out float16/not-a-number)
        3)
    (Double/isInfinite n)
    (do (header/write-leader out :simple-value 25)
        (.writeShort out (if (pos? (double n))
                           float16/positive-infinity
                           float16/negative-infinity))
        3)
    (instance? Float n)
    (do (header/write-leader out :simple-value 26)
        (.writeFloat out (float n))
        5)
    :else
    (do (header/write-leader out :simple-value 27)
        (.writeDouble out (double n))
        9)))

Writes a generic simple value for the given code and returns the number of bytes written. Does not handle floating-point or reserved values.

(defn- write-simple
  [encoder ^DataOutputStream out ^SimpleValue x]
  (let [n (.n x)]
    (cond
      (<= 0 n 23)
      (do (header/write-leader out :simple-value n)
          1)
      (<= 32 n 255)
      (do (header/write-leader out :simple-value 24)
          (.writeByte out n)
          2)
      :else
      (error/*handler*
        ::illegal-simple-type
        (str "Illegal or reserved simple value: " n)
        {:code n}))))

Helper function to construct an unknown simple value from the given code.

(defn- unknown-simple
  [decoder value]
  (if (:strict decoder)
    (error/*handler*
      ::unknown-simple-value
      (str "Unknown simple value " value)
      {:code value})
    (data/simple-value value)))

Codec Implementation

Encoding Functions

Writes the value x as one of the native CBOR values and return the number of bytes written. Returns nil if x is not a native type.

(defn- write-native
  [codec out x]
  (cond
    ; Special and simple values
    (nil? x) (write-null codec out)
    (boolean? x) (write-boolean codec out x)
    (= data/undefined x) (write-undefined codec out)
    (data/simple-value? x) (write-simple codec out x)
    ; Numbers
    (representable-integer? x) (write-integer codec out x)
    (float? x) (write-float codec out x)
    ; Byte and text strings
    (char? x) (write-text-string codec out (str x))
    (string? x) (write-text-string codec out x)
    (bytes? x) (write-byte-string codec out x)
    ; Tag extensions
    (data/tagged-value? x) (write-tagged codec out x)
    :else nil))

Writes the value x using a write-handler, if one is returned by the write-handlers lookup function. Returns the number of bytes written, or nil if no handler was found.

(defn- write-handled
  [codec out x]
  (let [dispatch (:dispatch codec)
        write-handlers (:write-handlers codec)]
    (when-let [formatter (write-handlers (dispatch x))]
      ; TODO: better error reporting
      (write-value codec out (formatter x)))))

Writes the value x as a collection type. Returns the number of bytes written, or nil if x is not a collection.

(defn- write-collection
  [codec out x]
  (cond
    (seq? x)    (write-array codec out x)
    (vector? x) (write-array codec out x)
    (map? x)    (write-map codec out x)
    (set? x)    (write-set codec out data/set-tag x)
    :else       nil))

Decoding Functions

Use a jump-table to decode the next value from the input.

For decoding efficiency, we can directly represent decoding operations based on the first full byte of an encoded value. This can short circuit conditional logic in many cases.

See https://tools.ietf.org/html/rfc7049#appendix-B for details.

(defn- jump-decode
  [decoder ^DataInputStream input ^long header]
  (let [info (bit-and 0x1F header)]
    (case (int header)
      ; Positive Integers
      0x00  0
      0x01  1
      0x02  2
      0x03  3
      0x04  4
      0x05  5
      0x06  6
      0x07  7
      0x08  8
      0x09  9
      0x0A 10
      0x0B 11
      0x0C 12
      0x0D 13
      0x0E 14
      0x0F 15
      0x10 16
      0x11 17
      0x12 18
      0x13 19
      0x14 20
      0x15 21
      0x16 22
      0x17 23
      0x18 (header/read-byte input)
      0x19 (header/read-short input)
      0x1A (header/read-int input)
      0x1B (header/read-long input)
      0x1F (error/*handler*
             ::illegal-stream
             "Encoded integers cannot have indefinite length."
             {:code info})
      ; Negative Integers
      0x20  -1
      0x21  -2
      0x22  -3
      0x23  -4
      0x24  -5
      0x25  -6
      0x26  -7
      0x27  -8
      0x28  -9
      0x29 -10
      0x2A -11
      0x2B -12
      0x2C -13
      0x2D -14
      0x2E -15
      0x2F -16
      0x30 -17
      0x31 -18
      0x32 -19
      0x33 -20
      0x34 -21
      0x35 -22
      0x36 -23
      0x37 -24
      0x38 (unchecked-dec (unchecked-negate (long (header/read-byte input))))
      0x39 (unchecked-dec (unchecked-negate (long (header/read-short input))))
      0x3A (unchecked-dec (unchecked-negate (long (header/read-int input))))
      0x3B (dec (- (header/read-long input)))
      0x3F (error/*handler*
             ::illegal-stream
             "Encoded integers cannot have indefinite length."
             {:code info})
      ; Byte Strings
      0x40 (byte-array 0)
      (0x41 0x42 0x43 0x44 0x45 0x46 0x47
       0x48 0x49 0x4A 0x4B 0x4C 0x4D 0x4E 0x4F
       0x50 0x51 0x52 0x53 0x54 0x55 0x56 0x57)
      (read-bytes input info)
      0x58 (read-bytes input (header/read-byte input))
      0x59 (read-bytes input (header/read-short input))
      0x5A (read-bytes input (header/read-int input))
      0x5B (read-bytes input (header/read-long input))
      0x5F (read-chunks decoder input :byte-string concat-bytes)
      ; Text Strings
      0x60 ""
      (0x61 0x62 0x63 0x64 0x65 0x66 0x67
       0x68 0x69 0x6A 0x6B 0x6C 0x6D 0x6E 0x6F
       0x70 0x71 0x72 0x73 0x74 0x75 0x76 0x77)
      (read-text input info)
      0x78 (read-text input (header/read-byte input))
      0x79 (read-text input (header/read-short input))
      0x7A (read-text input (header/read-int input))
      0x7B (read-text input (header/read-long input))
      0x7F (read-chunks decoder input :text-string concat-text)
      ; Arrays
      0x80 []
      0x81 [(read-value decoder input)]
      0x82 [(read-value decoder input)
            (read-value decoder input)]
      0x83 [(read-value decoder input)
            (read-value decoder input)
            (read-value decoder input)]
      0x84 [(read-value decoder input)
            (read-value decoder input)
            (read-value decoder input)
            (read-value decoder input)]
      (0x85 0x86 0x87
       0x88 0x89 0x8A 0x8B 0x8C 0x8D 0x8E 0x8F
       0x90 0x91 0x92 0x93 0x94 0x95 0x96 0x97)
      (read-array decoder input info)
      0x98 (read-array decoder input (header/read-byte input))
      0x99 (read-array decoder input (header/read-short input))
      0x9A (read-array decoder input (header/read-int input))
      0x9B (read-array decoder input (header/read-long input))
      0x9F (-> (read-value-stream decoder input build-array)
               (vary-meta assoc :cbor/streaming true))
      ; Maps
      0xA0 {}
      0xA1 {(read-value decoder input)
            (read-value decoder input)}
      (0xA2 0xA3 0xA4 0xA5 0xA6 0xA7
       0xA8 0xA9 0xAA 0xAB 0xAC 0xAD 0xAE 0xAF
       0xB0 0xB1 0xB2 0xB3 0xB4 0xB5 0xB6 0xB7)
      (read-map decoder input info)
      0xB8 (read-map decoder input (header/read-byte input))
      0xB9 (read-map decoder input (header/read-short input))
      0xBA (read-map decoder input (header/read-int input))
      0xBB (read-map decoder input (header/read-long input))
      0xBF (-> (read-value-stream decoder input build-map)
               (vary-meta assoc :cbor/streaming true))
      ; Tagged Values
      (0xC0 0xC1 0xC2 0xC3 0xC4 0xC5 0xC6 0xC7
       0xC8 0xC9 0xCA 0xCB 0xCC 0xCD 0xCE 0xCF
       0xD0 0xD1 0xD2 0xD3 0xD4 0xD5 0xD6 0xD7
       0xD8 0xD9 0xDA 0xDB)
      (read-tagged decoder input info)
      0xDF
      (error/*handler*
        ::illegal-stream
        "Encoded tags cannot have indefinite length."
        {:code info})
      ; Simple Values
      (0xE0 0xE1 0xE2 0xE3 0xE4 0xE5 0xE6 0xE7
       0xE8 0xE9 0xEA 0xEB 0xEC 0xED 0xEE 0xEF
       0xF0 0xF1 0xF2 0xF3)
      (unknown-simple decoder info)
      0xF4 false
      0xF5 true
      0xF6 nil
      0xF7 data/undefined
      0xF8 (unknown-simple decoder (.readUnsignedByte input))
      0xF9 (float16/decode (.readUnsignedShort input))
      0xFA (.readFloat input)
      0xFB (.readDouble input)
      (0xFC 0xFD 0xFE)
      (error/*handler*
        ::illegal-simple-type
        (format "Additional information simple-value code %d is reserved."
                info)
        {:code info})
      0xFF
      (error/*handler*
        ::unexpected-break
        "Break encountered outside streaming context."
        {})
      ; Otherwise, must be some reserved info code.
      (error/*handler*
        ::header/reserved-info-code
        (format "Additional information int code %d is reserved."
                info)
        {:header header
         :info info}))))

Codec Record

(defrecord CBORCodec
  [dispatch write-handlers read-handlers]
  Encoder
  (write-value
    [this out x]
    (or (write-native this out x)
        (write-handled this out x)
        (write-collection this out x)
        (error/*handler*
          ::unsupported-type
          (str "No known encoding for object: " (pr-str x))
          {:value x})))
  Decoder
  (read-value*
    [this input header]
    (jump-decode this input header)))

Constructs a new CBORCodec record with default empty field values.

(defn blank-codec
  []
  (map->CBORCodec
    {:dispatch class
     :write-handlers {}
     :read-handlers {}
     :canonical false
     :strict false}))
 

Read and write handler support for Clojure types.

(ns clj-cbor.tags.clojure
  (:require
    [clj-cbor.data.core :as data])
  (:import
    (clojure.lang
      Keyword
      Symbol
      TaggedLiteral)))

Symbols & Keywords

Keywords and symbols are represented using tag 39 ('identifier') applied to the string version of the value. This adds three bytes to the size of the identifier itself for the header, tag code, and string header. Keywords are symbols whose first character is a colon (:).

See: https://github.com/lucas-clemente/cbor-specs/blob/master/id.md

(def ^:const identifier-tag
  39)
(defn format-symbol
  [value]
  (data/tagged-value identifier-tag (str value)))
(defn parse-symbol
  [value]
  (when-not (string? value)
    (throw (ex-info (str "Symbols must be tagged strings, got: "
                         (class value))
                    {:value value})))
  (if (= \: (.charAt ^String value 0))
    (keyword (subs value 1))
    (symbol value)))

Tagged Literals

Tagged literals are represented using tag 27 ('generic object') applied to an array containing two elements. The first element is the string version of the EDN tag symbol and the second is the tagged literal form.

See: http://cbor.schmorp.de/generic-object

(def ^:const generic-object-tag
  27)
(defn format-tagged-literal
  [value]
  (data/tagged-value
    generic-object-tag
    [(str (:tag value)) (:form value)]))
(defn parse-tagged-literal
  [value]
  (when-not (and (sequential? value) (= 2 (count value)))
    (throw (ex-info (str "Sets must be tagged two-element arrays, got: "
                         (class value))
                    {:value value})))
  (tagged-literal (symbol (first value)) (second value)))

Codec Formatter/Handler Maps

Map of Clojure types to write handler functions.

(def clojure-write-handlers
  {Keyword       format-symbol
   Symbol        format-symbol
   TaggedLiteral format-tagged-literal})

Map of tag codes to read handlers to parse Clojure values.

(def clojure-read-handlers
  {generic-object-tag parse-tagged-literal
   identifier-tag     parse-symbol})
 

Built-in tag support for the text extensions in RFC 7049. See section 2.4.4.

(ns clj-cbor.tags.text
  (:require
    [clj-cbor.data.core :as data])
  (:import
    java.net.URI
    java.nio.ByteBuffer
    java.util.UUID
    java.util.regex.Pattern))

URIs

Tag 32 indicates that the tagged string represents a Uniform Resource Identifier.

(def ^:const uri-tag
  32)
(defn format-uri
  [^URI value]
  (data/tagged-value uri-tag (str value)))
(defn parse-uri
  [value]
  (when-not (string? value)
    (throw (ex-info (str "URIs must be tagged strings, got: "
                         (class value))
                    {:value value})))
  (URI. value))

Patterns

Tag 35 is used to represent regular expressions, expressed as a Perl-compatible pattern.

(def ^:const pattern-tag
  35)
(defn format-pattern
  [^Pattern value]
  (data/tagged-value pattern-tag (str value)))
(defn parse-pattern
  [value]
  (when-not (string? value)
    (throw (ex-info (str "Regular expressions must be tagged strings, got: "
                         (class value))
                    {:value value})))
  (Pattern/compile value))

UUIDs

UUIDs are represented in binary form as a byte string tagged with code 37.

See: https://github.com/lucas-clemente/cbor-specs/blob/master/uuid.md

(def ^:const uuid-tag
  37)
(defn format-uuid
  [^UUID value]
  (let [data (ByteBuffer/allocate 16)]
    (.putLong data (.getMostSignificantBits value))
    (.putLong data (.getLeastSignificantBits value))
    (data/tagged-value uuid-tag (.array data))))
(defn parse-uuid
  [value]
  (when-not (bytes? value)
    (throw (ex-info (str "UUIDs must be tagged byte strings, got: "
                         (class value))
                    {:value value})))
  (let [data (ByteBuffer/wrap value)]
    (UUID. (.getLong data) (.getLong data))))

Codec Formatter/Handler Maps

Map of text types to formatting functions.

(def text-write-handlers
  {URI     format-uri
   UUID    format-uuid
   Pattern format-pattern})

Map of tag handlers to parse text values.

(def text-read-handlers
  {uri-tag     parse-uri
   pattern-tag parse-pattern
   uuid-tag    parse-uuid})
 

Built-in tag support for the time extensions in RFC 7049. See section 2.4.1.

This namespace offers interop with both the older java.util.Date class as well as the newer java.time.Instant. Support for both timestamp-based tagged values and the more efficient epoch-based values is included.

(ns clj-cbor.tags.time
  (:require
    [clj-cbor.data.core :as data])
  (:import
    java.time.Instant
    java.time.format.DateTimeFormatter
    java.util.Date))

Tag value 0 is for date/time strings that follow the standard format described in RFC3339, as refined by Section 3.3 of RFC4287.

(def ^:const string-time-tag
  0)

Tag value 1 is for numerical representation of seconds relative to 1970-01-01T00:00Z in UTC time.

The tagged item can be a positive or negative integer (major types 0 and 1), or a floating-point number (major type 7 with additional information 25, 26, or 27). Note that the number can be negative (time before 1970-01-01T00:00Z) and, if a floating-point number, indicate fractional seconds.

(def ^:const epoch-time-tag
  1)
(defn- tagged-epoch-time
  [epoch-millis]
  (data/tagged-value
    epoch-time-tag
    (if (zero? (mod epoch-millis 1000))
      (long (/ epoch-millis 1000))
      (/ epoch-millis 1000.0))))
(defn- check-epoch-form!
  [value]
  (when-not (number? value)
    (throw (ex-info (str "Tag 1 values must be tagged numbers, got: "
                         (class value))
                    {:value value}))))
(defn- check-timestamp-form!
  [value]
  (when-not (string? value)
    (throw (ex-info (str "Tag 0 values must be tagged strings, got: "
                         (class value))
                    {:value value}))))

Instants

These functions interoperate with the java.time.Instant class.

(defn format-instant-epoch
  [^Instant value]
  (tagged-epoch-time (.toEpochMilli value)))
(defn parse-epoch-instant
  [value]
  (check-epoch-form! value)
  (Instant/ofEpochMilli (long (* value 1000))))
(defn format-instant-string
  [^Instant value]
  (data/tagged-value
    string-time-tag
    (.format DateTimeFormatter/ISO_INSTANT value)))
(defn parse-string-instant
  [value]
  (check-timestamp-form! value)
  (Instant/parse value))

Dates

These functions interoperate with the java.util.Date class.

(defn format-date-epoch
  [^Date value]
  (tagged-epoch-time (.getTime value)))
(defn parse-epoch-date
  [value]
  (check-epoch-form! value)
  (Date. (long (* value 1000))))
(defn format-date-string
  [^Date value]
  (format-instant-string (.toInstant value)))
(defn parse-string-date
  [value]
  (check-timestamp-form! value)
  (Date/from (parse-string-instant value)))

Codec Maps

Map of date-time types to render as tag 1 epoch offsets.

(def epoch-time-write-handlers
  {Date    format-date-epoch
   Instant format-instant-epoch})

Map of date-time types to render as tag 0 time strings.

(def string-time-write-handlers
  {Date    format-date-string
   Instant format-instant-string})

Map of tag handlers to parse date-times as java.time.Instant values.

(def instant-read-handlers
  {string-time-tag parse-string-instant
   epoch-time-tag  parse-epoch-instant})

Map of tag handlers to parse date-times as java.util.Date values.

(def date-read-handlers
  {string-time-tag parse-string-date
   epoch-time-tag  parse-epoch-date})
 

Read and write handler support for content sharing and encoding hints.

(ns clj-cbor.tags.content
  (:require
    [clj-cbor.data.core :as data]))

Self-Describe CBOR

In many applications, it will be clear from the context that CBOR is being employed for encoding a data item. For instance, a specific protocol might specify the use of CBOR, or a media type is indicated that specifies its use. However, there may be applications where such context information is not available, such as when CBOR data is stored in a file and disambiguating metadata is not in use. Here, it may help to have some distinguishing characteristics for the data itself.

Tag 55799 is defined for self-described CBOR values. It does not impart any special semantics on the data item that follows; that is, the semantics of a data item tagged with tag 55799 is exactly identical to the semantics of the data item itself.

(def ^:const self-describe-cbor-tag
  55799)
(defn format-self-described
  [value]
  (data/tagged-value self-describe-cbor-tag value))

Codec Formatter/Handler Maps

Map of misc types to write handler functions.

(def content-write-handlers
  {})

Map of tag codes to read handlers to parse misc values.

(def content-read-handlers
  {self-describe-cbor-tag identity})
 

Built-in tag support for the number extensions in RFC 7049. See section 2.4.2.

(ns clj-cbor.tags.numbers
  (:require
    [clj-cbor.data.core :as data])
  (:import
    (clojure.lang
      BigInt
      Ratio)
    (java.math
      BigDecimal
      BigInteger)))

Bignums

Bignums are integers that do not fit into the basic integer representations provided by major types 0 and 1.

Tag 2 is for positive bignums, which are encoded as a byte string data item. This is interpreted as an unsigned integer n in network byte order.

(def ^:const positive-bignum-tag
  2)

Tag 3 is for negative bignums. These are encoded the same as for positive bignums (tag 2), but the value of the bignum is -1 - n.

(def ^:const negative-bignum-tag
  3)
(defn format-bignum
  [value]
  (let [big-integer (biginteger value)]
    (if-not (neg? big-integer)
      (data/tagged-value
        positive-bignum-tag
        (.toByteArray big-integer))
      (data/tagged-value
        negative-bignum-tag
        (-> big-integer
            (.add BigInteger/ONE)
            (.negate)
            (.toByteArray))))))
(defn parse-positive-bignum
  [value]
  (when-not (bytes? value)
    (throw (ex-info (str "Bignums must be represented as a tagged byte string, got: "
                         (class value))
                    {:value value})))
  (bigint (BigInteger. ^bytes value)))
(defn parse-negative-bignum
  [value]
  (when-not (bytes? value)
    (throw (ex-info (str "Bignums must be represented as a tagged byte string, got: "
                         (class value))
                    {:value value})))
  (-> (BigInteger. ^bytes value)
      (.add BigInteger/ONE)
      (.negate)
      (bigint)))

Decimal Fractions

Decimal fractions combine an integer mantissa with a base-10 scaling factor. They are most useful if an application needs the exact representation of a decimal fraction such as 1.1 because there is no exact representation for many decimal fractions in binary floating point.

Tag 4 indicates a decimal fraction represented by a tagged array with two items, an integer exponent and an integer or bignum mantissa. The value of a decimal fraction is m*(10**e).

(def ^:const big-decimal-tag
  4)
(defn format-big-decimal
  [^BigDecimal value]
  (let [exponent (.scale value)
        mantissa (.unscaledValue value)]
    (data/tagged-value big-decimal-tag [(- exponent) mantissa])))
(defn parse-big-decimal
  [value]
  (when-not (and (sequential? value) (= 2 (count value)))
    (throw (ex-info (str "Decimal fractions must be represented with a two-element array, got: "
                         (pr-str value))
                    {:value value})))
  (let [[exponent mantissa] value]
    (BigDecimal. (biginteger mantissa) (int (- exponent)))))

Ratios

Tag 30 is used to represent a rational number composed of two integers, a numerator and a denominator.

See: http://peteroupc.github.io/CBOR/rational.html

(def ^:const ratio-tag
  30)
(defn format-ratio
  [value]
  (data/tagged-value ratio-tag [(numerator value) (denominator value)]))
(defn parse-ratio
  [value]
  (when-not (and (sequential? value) (= 2 (count value)))
    (throw (ex-info (str "Rational numbers must be represented with a two-element array, got: "
                         (pr-str value))
                    {:value value})))
  (let [[numerator denominator] value]
    (Ratio. (biginteger numerator) (biginteger denominator))))

Codec Formatter/Handler Maps

Map of number types to write handler functions.

(def number-write-handlers
  {BigInt     format-bignum
   BigInteger format-bignum
   BigDecimal format-big-decimal
   Ratio      format-ratio})

Map of tag codes to read handlers to parse number values.

(def number-read-handlers
  {positive-bignum-tag parse-positive-bignum
   negative-bignum-tag parse-negative-bignum
   big-decimal-tag     parse-big-decimal
   ratio-tag           parse-ratio})
 

Type definition for CBOR simple values.

(ns clj-cbor.data.simple)

Undefined Value

(deftype Undefined
  [_meta]
  Object
  (toString
    [this]
    "undefined")
  (equals
    [this that]
    (boolean (or (identical? this that) (instance? Undefined that))))
  (hashCode
    [this]
    (hash (class this)))
  clojure.lang.IObj
  (meta [this] _meta)
  (withMeta
    [this meta-map]
    (Undefined. meta-map)))

Generic Simple Value

(deftype SimpleValue
  [^long n _meta]
  Object
  (toString
    [this]
    (str "simple(" n ")"))
  (equals
    [this that]
    (boolean (or (identical? this that)
                 (and (instance? SimpleValue that)
                      (= n (.n ^SimpleValue that))))))
  (hashCode
    [this]
    (hash [(class this) n]))
  clojure.lang.IObj
  (meta [this] _meta)
  (withMeta
    [this meta-map]
    (SimpleValue. n meta-map)))
 

Implementation of IEEE 754 half-precision floating point.

(ns clj-cbor.data.float16)
(def zero              2r0000000000000000)
(def positive-infinity 2r0111110000000000)
(def negative-infinity 2r1111110000000000)
(def not-a-number      2r0111111000000000)

Combine values for different fields in the float into a composite binary value.

(defn- combine-bits
  [sign exp mant]
  (Float/intBitsToFloat
    (bit-or (if (zero? sign) 0 Integer/MIN_VALUE)
            (bit-shift-left (bit-or exp mant) 13))))

Returns a float value read as a half-precision IEEE floating-point number from the lower two bytes of x.

(defn decode
  [x]
  (let [sign (bit-and x 0x8000)
        exp  (bit-and x 0x7c00)
        mant (bit-and x 0x03ff)]
    (cond
      ; NaN and Infinite values.
      (= exp 0x7c00)
        (combine-bits sign 0x3fc00 mant)
      ; Normalized value.
      (not (zero? exp))
        (combine-bits sign (+ exp 0x1c000) mant)
      ; Subnormal value.
      (not (zero? mant))
        (loop [exp 0x1c400
               mant mant]
          (if (zero? (bit-and mant 0x400))
            (recur (- exp 0x400) (bit-shift-left mant 1))
            (combine-bits sign exp (bit-and mant 0x3ff))))
      ; +/- 0
      :else
        (combine-bits sign exp mant))))

Returns an integer whose lower two bytes encode the given number in the half-precision IEEE floating point format.

(defn encode
  [x]
  (let [fbits (Float/floatToIntBits (float x))
        sign (bit-and (unsigned-bit-shift-right fbits 16)
                      0x8000)
        value (+ (bit-and fbits 0x7fffffff) 0x1000)]     ; rounded value
    (cond
      ; Value might be or become NaN/Inf.
      (>= value 0x47800000)
        (if (< value 0x7f800000)
          ; Value was too large, promote to infinity.
          (bit-or sign 0x7c00)
          ; Value remains NaN or +/-Inf.
          (bit-or sign 0x7c00 (unsigned-bit-shift-right
                                (bit-and fbits 0x007fffff)
                                13)))
      ; Retain normalized value.
      (>= value 0x38800000)
        (bit-or sign (unsigned-bit-shift-right (- value 0x38000000) 13))
      ; Value is too small, becomes +/-0
      (< value 0x33000000)
        sign
      ; Encode subnormal value.
      :else
        (let [exp (unsigned-bit-shift-right (bit-and fbits 0x7fffffff) 23)]
          (bit-or sign
                  (unsigned-bit-shift-right
                    (+ (bit-or (bit-and fbits 0x7fffff)
                               0x800000)
                       (unsigned-bit-shift-right 0x800000 (- exp 102)))
                    (- 126 exp)))))))
 

Type definition for CBOR tagged values.

(ns clj-cbor.data.tagged)
(deftype TaggedValue
  [tag value _meta]
  Object
  (toString
    [this]
    (str tag "(" value ")"))
  (equals
    [this that]
    (boolean (or (identical? this that)
                 (and (instance? TaggedValue that)
                      (= tag (.tag ^TaggedValue that))
                      (= value (.value ^TaggedValue that))))))
  (hashCode
    [this]
    (hash [(class this) tag value]))
  clojure.lang.IObj
  (meta [this] _meta)
  (withMeta
    [this meta-map]
    (TaggedValue. tag value meta-map)))
 

Type definitions and keyword identifiers for CBOR data types.

(ns clj-cbor.data.core
  (:require
    [clj-cbor.data.float16 :as float16]
    [clj-cbor.data.simple :as simple]
    [clj-cbor.data.tagged :as tagged])
  (:import
    (clj_cbor.data.simple
      SimpleValue
      Undefined)
    clj_cbor.data.tagged.TaggedValue))

Simple Values

Base singleton undefined value.

(def undefined
  (simple/->Undefined nil))

Constructs a simple type for the given number.

(defn simple-value
  [n]
  (when (or (neg? n) (< 255 n))
    (throw (IllegalArgumentException.
             "Simple value codes must be between 0 and 255")))
  (simple/->SimpleValue n nil))

Predicate which tests whether x is a simple CBOR value.

(defn simple-value?
  [x]
  (instance? SimpleValue x))

Tagged Values

Constructs a tagged value.

(defn tagged-value
  [tag value]
  (tagged/->TaggedValue tag value nil))

Predicate which tests whether x is a CBOR tagged value.

(defn tagged-value?
  [x]
  (instance? TaggedValue x))

Tag code used to identify sets of unique values. Hard-coded here to support canonical encoding.

(def set-tag
  258)