// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT OR Apache-2.0
use std::{
collections::BTreeMap,
io::{Error, ErrorKind, Result, Write},
};
use serde::Serialize;
use serde_json::ser::{CharEscape, CompactFormatter, Formatter, Serializer};
use unicode_normalization::UnicodeNormalization;
/// A [`serde_json::ser::Formatter`] which outputs [Canonical JSON].
///
/// The Radicle specification disagrees with [Canonical JSON] spec, in that
/// ASCII plane control characters (`U+0000` - `U+007F`) string values are
/// escaped according to the normal JSON escaping rules as per [RFC 8259],
/// Section 7. The reason is that conformant JSON parsers (such as `serde_json`)
/// will typically refuse to accept strings containing these characters as
/// unescaped bytes. As we standardise on an exact set, and mandate that hex
/// escape sequences shall be lower-case, canonicity is preserved (unicode
/// normalisation still applies).
///
/// This implementation is based on the [`olpc-cjson`] crate, and inlined here
/// for distribution convenience. We expressly license the original code under
/// the term of the MIT licence.
///
/// [Canonical JSON]: https://web.archive.org/web/20250207154955/https://wiki.laptop.org/go/Canonical_JSON
/// [RFC 8259]: https://www.rfc-editor.org/rfc/rfc8259.txt
#[derive(Debug, Default)]
pub struct CanonicalFormatter {
object_stack: Vec<Object>,
}
/// Internal struct to keep track of an object in progress of being built.
///
/// As keys and values are received by `CanonicalFormatter`, they are written to
/// `next_key` and `next_value` by using the `CanonicalFormatter::writer`
/// convenience method.
///
/// How this struct behaves when `Formatter` methods are called:
///
/// ```plain
/// [other methods] // values written to the writer received by method
/// begin_object // create this object
/// /-> begin_object_key // object.key_done = false;
/// | [other methods] // values written to object.next_key, writer received by method ignored
/// | end_object_key // object.key_done = true;
/// | begin_object_value // [nothing]
/// | [other methods] // values written to object.next_value
/// | end_object_value // object.next_key and object.next_value are inserted into object.obj
/// \---- // jump back if more values are present
/// end_object // write the object (sorted by its keys) to the writer received by the method
/// ```
#[derive(Debug, Default)]
struct Object {
obj: BTreeMap<Vec<u8>, Vec<u8>>,
next_key: Vec<u8>,
next_value: Vec<u8>,
key_done: bool,
}
impl CanonicalFormatter {
/// Create a new `CanonicalFormatter` object.
pub fn new() -> Self {
Self::default()
}
/// Convenience method to return the appropriate writer given the current
/// context.
///
/// If we are currently writing an object (that is, if
/// `!self.object_stack.is_empty()`), we need to write the value to
/// either the next key or next value depending on that state
/// machine. See the docstrings for `Object` for more detail.
///
/// If we are not currently writing an object, pass through `writer`.
fn writer<'a, W: Write + ?Sized>(&'a mut self, writer: &'a mut W) -> Box<dyn Write + 'a> {
if let Some(object) = self.object_stack.last_mut() {
if object.key_done {
Box::new(&mut object.next_value)
} else {
Box::new(&mut object.next_key)
}
} else {
Box::new(writer)
}
}
/// Returns a mutable reference to the top of the object stack.
fn obj_mut(&mut self) -> Result<&mut Object> {
self.object_stack.last_mut().ok_or_else(|| {
Error::other("serde_json called an object method without calling begin_object first")
})
}
}
/// Wraps `serde_json::CompactFormatter` to use the appropriate writer (see
/// `CanonicalFormatter::writer`).
macro_rules! wrapper {
($f:ident) => {
fn $f<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
CompactFormatter.$f(&mut self.writer(writer))
}
};
($f:ident, $t:ty) => {
fn $f<W: Write + ?Sized>(&mut self, writer: &mut W, arg: $t) -> Result<()> {
CompactFormatter.$f(&mut self.writer(writer), arg)
}
};
}
/// This is used in three places. Write it once.
macro_rules! float_err {
() => {
Err(Error::new(
ErrorKind::InvalidInput,
"floating point numbers are not allowed in canonical JSON",
))
};
}
impl Formatter for CanonicalFormatter {
wrapper!(write_null);
wrapper!(write_bool, bool);
wrapper!(write_i8, i8);
wrapper!(write_i16, i16);
wrapper!(write_i32, i32);
wrapper!(write_i64, i64);
wrapper!(write_u8, u8);
wrapper!(write_u16, u16);
wrapper!(write_u32, u32);
wrapper!(write_u64, u64);
fn write_f32<W: Write + ?Sized>(&mut self, _writer: &mut W, _value: f32) -> Result<()> {
float_err!()
}
fn write_f64<W: Write + ?Sized>(&mut self, _writer: &mut W, _value: f64) -> Result<()> {
float_err!()
}
// By default this is only used for u128/i128. If serde_json's
// `arbitrary_precision` feature is enabled, all numbers are internally
// stored as strings, and this method is always used (even for floating
// point values).
fn write_number_str<W: Write + ?Sized>(&mut self, writer: &mut W, value: &str) -> Result<()> {
if value.chars().any(|c| c == '.' || c == 'e' || c == 'E') {
float_err!()
} else {
CompactFormatter.write_number_str(&mut self.writer(writer), value)
}
}
wrapper!(begin_string);
wrapper!(end_string);
// Strings are normalized as Normalization Form C (NFC). `str::nfc` is provided
// by the `UnicodeNormalization` trait and returns an iterator of `char`s.
fn write_string_fragment<W: Write + ?Sized>(
&mut self,
writer: &mut W,
fragment: &str,
) -> Result<()> {
fragment.nfc().try_for_each(|ch| {
self.writer(writer)
.write_all(ch.encode_utf8(&mut [0; 4]).as_bytes())
})
}
// Unlike Canonical JSON proper, we **do** escape control characters
wrapper!(write_char_escape, CharEscape);
wrapper!(begin_array);
wrapper!(end_array);
wrapper!(begin_array_value, bool); // hack: this passes through the `first` argument
wrapper!(end_array_value);
// Here are the object methods. Because keys must be sorted, we serialize the
// object's keys and values in memory as a `BTreeMap`, then write it all out
// when `end_object_value` is called.
fn begin_object<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
CompactFormatter.begin_object(&mut self.writer(writer))?;
self.object_stack.push(Object::default());
Ok(())
}
fn end_object<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
let object = self.object_stack.pop().ok_or_else(|| {
Error::other(
"serde_json called Formatter::end_object object method
without calling begin_object first",
)
})?;
let mut writer = self.writer(writer);
let mut first = true;
for (key, value) in object.obj {
CompactFormatter.begin_object_key(&mut writer, first)?;
writer.write_all(&key)?;
CompactFormatter.end_object_key(&mut writer)?;
CompactFormatter.begin_object_value(&mut writer)?;
writer.write_all(&value)?;
CompactFormatter.end_object_value(&mut writer)?;
first = false;
}
CompactFormatter.end_object(&mut writer)
}
fn begin_object_key<W: Write + ?Sized>(&mut self, _writer: &mut W, _first: bool) -> Result<()> {
let object = self.obj_mut()?;
object.key_done = false;
Ok(())
}
fn end_object_key<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
let object = self.obj_mut()?;
object.key_done = true;
Ok(())
}
fn begin_object_value<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
Ok(())
}
fn end_object_value<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
let object = self.obj_mut()?;
let key = std::mem::take(&mut object.next_key);
let value = std::mem::take(&mut object.next_value);
object.obj.insert(key, value);
Ok(())
}
// This is for serde_json's `raw_value` feature, which provides a RawValue type
// that is passed through as-is. That's not good enough for canonical JSON,
// so we parse it and immediately write it back out... as canonical JSON.
fn write_raw_fragment<W: Write + ?Sized>(
&mut self,
writer: &mut W,
fragment: &str,
) -> Result<()> {
let mut ser = Serializer::with_formatter(self.writer(writer), Self::new());
serde_json::from_str::<serde_json::Value>(fragment)?.serialize(&mut ser)?;
Ok(())
}
}
#[cfg(test)]
mod test {
use std::io::Result;
use super::CanonicalFormatter;
use serde::Serialize;
use serde_json::Serializer;
macro_rules! encode {
($($tt:tt)+) => {
(|v: serde_json::Value| -> Result<Vec<u8>> {
let mut buf = Vec::new();
let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
v.serialize(&mut ser)?;
Ok(buf)
})(serde_json::json!($($tt)+))
};
}
macro_rules! encode_string {
($($tt:tt)+) => {
(|v: serde_json::Value| -> Result<String> {
let bytes = encode!(v)?;
let string = unsafe { String::from_utf8_unchecked(bytes) };
Ok(string)
})(serde_json::json!($($tt)+))
};
}
#[test]
fn securesystemslib_asserts() -> Result<()> {
assert_eq!(encode!([1, 2, 3])?, b"[1,2,3]");
assert_eq!(encode!([1, 2, 3])?, b"[1,2,3]");
assert_eq!(encode!([])?, b"[]");
assert_eq!(encode!({})?, b"{}");
assert_eq!(encode!({"A": [99]})?, br#"{"A":[99]}"#);
assert_eq!(encode!({"A": true})?, br#"{"A":true}"#);
assert_eq!(encode!({"B": false})?, br#"{"B":false}"#);
assert_eq!(encode!({"x": 3, "y": 2})?, br#"{"x":3,"y":2}"#);
assert_eq!(encode!({"x": 3, "y": null})?, br#"{"x":3,"y":null}"#);
// Test conditions for invalid arguments.
assert!(encode!(8.0).is_err());
assert!(encode!({"x": 8.0}).is_err());
Ok(())
}
#[test]
fn ascii_control_characters() -> Result<()> {
assert_eq!(encode_string!("\x00")?, r#""\u0000""#);
assert_eq!(encode_string!("\x01")?, r#""\u0001""#);
assert_eq!(encode_string!("\x02")?, r#""\u0002""#);
assert_eq!(encode_string!("\x03")?, r#""\u0003""#);
assert_eq!(encode_string!("\x04")?, r#""\u0004""#);
assert_eq!(encode_string!("\x05")?, r#""\u0005""#);
assert_eq!(encode_string!("\x06")?, r#""\u0006""#);
assert_eq!(encode_string!("\x07")?, r#""\u0007""#);
assert_eq!(encode_string!("\x08")?, r#""\b""#);
assert_eq!(encode_string!("\x09")?, r#""\t""#);
assert_eq!(encode_string!("\x0a")?, r#""\n""#);
assert_eq!(encode_string!("\x0b")?, r#""\u000b""#);
assert_eq!(encode_string!("\x0c")?, r#""\f""#);
assert_eq!(encode_string!("\x0d")?, r#""\r""#);
assert_eq!(encode_string!("\x0e")?, r#""\u000e""#);
assert_eq!(encode_string!("\x0f")?, r#""\u000f""#);
assert_eq!(encode_string!("\x10")?, r#""\u0010""#);
assert_eq!(encode_string!("\x11")?, r#""\u0011""#);
assert_eq!(encode_string!("\x12")?, r#""\u0012""#);
assert_eq!(encode_string!("\x13")?, r#""\u0013""#);
assert_eq!(encode_string!("\x14")?, r#""\u0014""#);
assert_eq!(encode_string!("\x15")?, r#""\u0015""#);
assert_eq!(encode_string!("\x16")?, r#""\u0016""#);
assert_eq!(encode_string!("\x17")?, r#""\u0017""#);
assert_eq!(encode_string!("\x18")?, r#""\u0018""#);
assert_eq!(encode_string!("\x19")?, r#""\u0019""#);
assert_eq!(encode_string!("\x1a")?, r#""\u001a""#);
assert_eq!(encode_string!("\x1b")?, r#""\u001b""#);
assert_eq!(encode_string!("\x1c")?, r#""\u001c""#);
assert_eq!(encode_string!("\x1d")?, r#""\u001d""#);
assert_eq!(encode_string!("\x1e")?, r#""\u001e""#);
assert_eq!(encode_string!("\x1f")?, r#""\u001f""#);
pretty_assertions::assert_eq!(encode_string!({"\t": "\n"})?, r#"{"\t":"\n"}"#);
assert_eq!(encode_string!("\\")?, r#""\\""#);
assert_eq!(encode_string!("\"")?, r#""\"""#);
Ok(())
}
#[test]
fn ordered_nested_object() -> Result<()> {
assert_eq!(
encode!({
"nested": {
"good": false,
"bad": true
},
"b": 2,
"a": 1,
"c": {
"h": {
"h": -5,
"i": 3
},
"a": null,
"x": {}
},
"zzz": "I have a newline\n"
})?,
br#"{"a":1,"b":2,"c":{"a":null,"h":{"h":-5,"i":3},"x":{}},"nested":{"bad":true,"good":false},"zzz":"I have a newline\n"}"#.to_vec(),
);
Ok(())
}
}