Radish alpha
h
Radicle Heartwood Protocol & Stack
Radicle
Git (anonymous pull)
Log in to clone via SSH
radicle: custom CanonicalFormatter
Fintan Halpenny committed 3 years ago
commit 5ea8505000dea690699258e78ad16e2009af3fa6
parent f9dbd590a40aeceab30198327f4d846aed5400b9
8 files changed +424 -15
modified Cargo.lock
@@ -1564,17 +1564,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"

[[package]]
-
name = "olpc-cjson"
-
version = "0.1.2"
-
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "87dc75cf72208cd853671c1abccc5d5d1e43b1e378dde67340ef933219a8c13c"
-
dependencies = [
-
 "serde",
-
 "serde_json",
-
 "unicode-normalization",
-
]
-

-
[[package]]
name = "once_cell"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1820,7 +1809,6 @@ dependencies = [
 "log",
 "multibase",
 "nonempty 0.8.1",
-
 "olpc-cjson",
 "once_cell",
 "pretty_assertions",
 "qcheck",
@@ -1836,6 +1824,7 @@ dependencies = [
 "sqlite",
 "tempfile",
 "thiserror",
+
 "unicode-normalization",
]

[[package]]
modified radicle/Cargo.toml
@@ -20,7 +20,6 @@ multibase = { version = "0.9.1" }
log = { version = "0.4.17", features = ["std"] }
nonempty = { version = "0.8.1", features = ["serialize"] }
once_cell = { version = "1.13" }
-
olpc-cjson = { version = "0.1.1" }
serde = { version = "1", features = ["derive"] }
serde_json = { version = "1", features = ["preserve_order"] }
siphasher = { version = "0.3.10" }
@@ -28,6 +27,7 @@ radicle-git-ext = { version = "0", features = ["serde"] }
sqlite = { version = "0.30.3", optional = true }
tempfile = { version = "3.3.0" }
thiserror = { version = "1" }
+
unicode-normalization = { version = "0.1" }

[dependencies.git2]
version = "0.16.1"
added radicle/src/canonical.rs
@@ -0,0 +1,16 @@
+
//! A [Canonical JSON] formatter that escapes control characters. This
+
//! differs to the olpc-cjson standard.
+
//!
+
//! The [`olpc-cjson`] crate itself states:
+
//!
+
//! > OLPC’s canonical JSON specification is subtly different from
+
//! > other “canonical JSON” specifications, and is also not a strict
+
//! > subset of JSON (specifically, ASCII control characters 0x00–0x1f
+
//! > are printed literally, which is not valid JSON). Therefore,
+
//! > serde_json cannot necessarily deserialize JSON produced by this
+
//! > formatter.
+
//!
+
//! [Canonical JSON]: http://wiki.laptop.org/go/Canonical_JSON
+
//! [olpc-json]: https://docs.rs/olpc-cjson/0.1.2/olpc_cjson
+

+
pub mod formatter;
added radicle/src/canonical/formatter.rs
@@ -0,0 +1,372 @@
+
// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
// SPDX-License-Identifier: MIT OR Apache-2.0
+

+
use std::{
+
    collections::BTreeMap,
+
    io::{Error, ErrorKind, Result, Write},
+
};
+

+
use serde::Serialize;
+
use serde_json::ser::{CharEscape, CompactFormatter, Formatter, Serializer};
+
use unicode_normalization::UnicodeNormalization;
+

+
/// A [`serde_json::ser::Formatter`] which outputs [Canonical JSON].
+
///
+
/// The Radicle specification disagrees with [Canonical JSON] spec, in that
+
/// ASCII plane control characters (`U+0000` - `U+007F`) string values are
+
/// escaped according to the normal JSON escaping rules as per [RFC 8259],
+
/// Section 7. The reason is that conformant JSON parsers (such as `serde_json`)
+
/// will typically refuse to accept strings containing these characters as
+
/// unescaped bytes. As we standardise on an exact set, and mandate that hex
+
/// escape sequences shall be lower-case, canonicity is preserved (unicode
+
/// normalisation still applies).
+
///
+
/// This implementation is based on the [`olpc-cjson`] crate, and inlined here
+
/// for distribution convenience. We expressly license the original code under
+
/// the term of the MIT licence.
+
///
+
/// [Canonical JSON]: http://wiki.laptop.org/go/Canonical_JSON
+
/// [RFC 8259]: https://www.rfc-editor.org/rfc/rfc8259.txt
+
#[derive(Debug, Default)]
+
pub struct CanonicalFormatter {
+
    object_stack: Vec<Object>,
+
}
+

+
/// Internal struct to keep track of an object in progress of being built.
+
///
+
/// As keys and values are received by `CanonicalFormatter`, they are written to
+
/// `next_key` and `next_value` by using the `CanonicalFormatter::writer`
+
/// convenience method.
+
///
+
/// How this struct behaves when `Formatter` methods are called:
+
///
+
/// ```plain
+
/// [other methods]  // values written to the writer received by method
+
/// begin_object     // create this object
+
/// /-> begin_object_key    // object.key_done = false;
+
/// |   [other methods]     // values written to object.next_key, writer received by method ignored
+
/// |   end_object_key      // object.key_done = true;
+
/// |   begin_object_value  // [nothing]
+
/// |   [other methods]     // values written to object.next_value
+
/// |   end_object_value    // object.next_key and object.next_value are inserted into object.obj
+
/// \---- // jump back if more values are present
+
/// end_object       // write the object (sorted by its keys) to the writer received by the method
+
/// ```
+
#[derive(Debug, Default)]
+
struct Object {
+
    obj: BTreeMap<Vec<u8>, Vec<u8>>,
+
    next_key: Vec<u8>,
+
    next_value: Vec<u8>,
+
    key_done: bool,
+
}
+

+
impl CanonicalFormatter {
+
    /// Create a new `CanonicalFormatter` object.
+
    pub fn new() -> Self {
+
        Self::default()
+
    }
+

+
    /// Convenience method to return the appropriate writer given the current
+
    /// context.
+
    ///
+
    /// If we are currently writing an object (that is, if
+
    /// `!self.object_stack.is_empty()`), we need to write the value to
+
    /// either the next key or next value depending on that state
+
    /// machine. See the docstrings for `Object` for more detail.
+
    ///
+
    /// If we are not currently writing an object, pass through `writer`.
+
    fn writer<'a, W: Write + ?Sized>(&'a mut self, writer: &'a mut W) -> Box<dyn Write + 'a> {
+
        if let Some(object) = self.object_stack.last_mut() {
+
            if object.key_done {
+
                Box::new(&mut object.next_value)
+
            } else {
+
                Box::new(&mut object.next_key)
+
            }
+
        } else {
+
            Box::new(writer)
+
        }
+
    }
+

+
    /// Returns a mutable reference to the top of the object stack.
+
    fn obj_mut(&mut self) -> Result<&mut Object> {
+
        self.object_stack.last_mut().ok_or_else(|| {
+
            Error::new(
+
                ErrorKind::Other,
+
                "serde_json called an object method without calling begin_object first",
+
            )
+
        })
+
    }
+
}
+

+
/// Wraps `serde_json::CompactFormatter` to use the appropriate writer (see
+
/// `CanonicalFormatter::writer`).
+
macro_rules! wrapper {
+
    ($f:ident) => {
+
        fn $f<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
+
            CompactFormatter.$f(&mut self.writer(writer))
+
        }
+
    };
+

+
    ($f:ident, $t:ty) => {
+
        fn $f<W: Write + ?Sized>(&mut self, writer: &mut W, arg: $t) -> Result<()> {
+
            CompactFormatter.$f(&mut self.writer(writer), arg)
+
        }
+
    };
+
}
+

+
/// This is used in three places. Write it once.
+
macro_rules! float_err {
+
    () => {
+
        Err(Error::new(
+
            ErrorKind::InvalidInput,
+
            "floating point numbers are not allowed in canonical JSON",
+
        ))
+
    };
+
}
+

+
impl Formatter for CanonicalFormatter {
+
    wrapper!(write_null);
+
    wrapper!(write_bool, bool);
+
    wrapper!(write_i8, i8);
+
    wrapper!(write_i16, i16);
+
    wrapper!(write_i32, i32);
+
    wrapper!(write_i64, i64);
+
    wrapper!(write_u8, u8);
+
    wrapper!(write_u16, u16);
+
    wrapper!(write_u32, u32);
+
    wrapper!(write_u64, u64);
+

+
    fn write_f32<W: Write + ?Sized>(&mut self, _writer: &mut W, _value: f32) -> Result<()> {
+
        float_err!()
+
    }
+

+
    fn write_f64<W: Write + ?Sized>(&mut self, _writer: &mut W, _value: f64) -> Result<()> {
+
        float_err!()
+
    }
+

+
    // By default this is only used for u128/i128. If serde_json's
+
    // `arbitrary_precision` feature is enabled, all numbers are internally
+
    // stored as strings, and this method is always used (even for floating
+
    // point values).
+
    fn write_number_str<W: Write + ?Sized>(&mut self, writer: &mut W, value: &str) -> Result<()> {
+
        if value.chars().any(|c| c == '.' || c == 'e' || c == 'E') {
+
            float_err!()
+
        } else {
+
            CompactFormatter.write_number_str(&mut self.writer(writer), value)
+
        }
+
    }
+

+
    wrapper!(begin_string);
+
    wrapper!(end_string);
+

+
    // Strings are normalized as Normalization Form C (NFC). `str::nfc` is provided
+
    // by the `UnicodeNormalization` trait and returns an iterator of `char`s.
+
    fn write_string_fragment<W: Write + ?Sized>(
+
        &mut self,
+
        writer: &mut W,
+
        fragment: &str,
+
    ) -> Result<()> {
+
        fragment.nfc().try_for_each(|ch| {
+
            self.writer(writer)
+
                .write_all(ch.encode_utf8(&mut [0; 4]).as_bytes())
+
        })
+
    }
+

+
    // Unlike Canonical JSON proper, we **do** escape control characters
+
    wrapper!(write_char_escape, CharEscape);
+

+
    wrapper!(begin_array);
+
    wrapper!(end_array);
+
    wrapper!(begin_array_value, bool); // hack: this passes through the `first` argument
+
    wrapper!(end_array_value);
+

+
    // Here are the object methods. Because keys must be sorted, we serialize the
+
    // object's keys and values in memory as a `BTreeMap`, then write it all out
+
    // when `end_object_value` is called.
+

+
    fn begin_object<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
+
        CompactFormatter.begin_object(&mut self.writer(writer))?;
+
        self.object_stack.push(Object::default());
+
        Ok(())
+
    }
+

+
    fn end_object<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
+
        let object = self.object_stack.pop().ok_or_else(|| {
+
            Error::new(
+
                ErrorKind::Other,
+
                "serde_json called Formatter::end_object object method
+
                 without calling begin_object first",
+
            )
+
        })?;
+
        let mut writer = self.writer(writer);
+
        let mut first = true;
+

+
        for (key, value) in object.obj {
+
            CompactFormatter.begin_object_key(&mut writer, first)?;
+
            writer.write_all(&key)?;
+
            CompactFormatter.end_object_key(&mut writer)?;
+

+
            CompactFormatter.begin_object_value(&mut writer)?;
+
            writer.write_all(&value)?;
+
            CompactFormatter.end_object_value(&mut writer)?;
+

+
            first = false;
+
        }
+

+
        CompactFormatter.end_object(&mut writer)
+
    }
+

+
    fn begin_object_key<W: Write + ?Sized>(&mut self, _writer: &mut W, _first: bool) -> Result<()> {
+
        let mut object = self.obj_mut()?;
+
        object.key_done = false;
+
        Ok(())
+
    }
+

+
    fn end_object_key<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
+
        let mut object = self.obj_mut()?;
+
        object.key_done = true;
+
        Ok(())
+
    }
+

+
    fn begin_object_value<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
+
        Ok(())
+
    }
+

+
    fn end_object_value<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
+
        let object = self.obj_mut()?;
+
        let key = std::mem::take(&mut object.next_key);
+
        let value = std::mem::take(&mut object.next_value);
+
        object.obj.insert(key, value);
+
        Ok(())
+
    }
+

+
    // This is for serde_json's `raw_value` feature, which provides a RawValue type
+
    // that is passed through as-is. That's not good enough for canonical JSON,
+
    // so we parse it and immediately write it back out... as canonical JSON.
+
    fn write_raw_fragment<W: Write + ?Sized>(
+
        &mut self,
+
        writer: &mut W,
+
        fragment: &str,
+
    ) -> Result<()> {
+
        let mut ser = Serializer::with_formatter(self.writer(writer), Self::new());
+
        serde_json::from_str::<serde_json::Value>(fragment)?.serialize(&mut ser)?;
+
        Ok(())
+
    }
+
}
+

+
#[cfg(test)]
+
mod test {
+
    use std::io::Result;
+

+
    use super::CanonicalFormatter;
+
    use serde::Serialize;
+
    use serde_json::Serializer;
+

+
    macro_rules! encode {
+
        ($($tt:tt)+) => {
+
            (|v: serde_json::Value| -> Result<Vec<u8>> {
+
                let mut buf = Vec::new();
+
                let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
+
                v.serialize(&mut ser)?;
+
                Ok(buf)
+
            })(serde_json::json!($($tt)+))
+
        };
+
    }
+

+
    macro_rules! encode_string {
+
        ($($tt:tt)+) => {
+
            (|v: serde_json::Value| -> Result<String> {
+
                let bytes = encode!(v)?;
+
                let string = unsafe { String::from_utf8_unchecked(bytes) };
+
                Ok(string)
+
            })(serde_json::json!($($tt)+))
+
        };
+
    }
+

+
    #[test]
+
    fn securesystemslib_asserts() -> Result<()> {
+
        assert_eq!(encode!([1, 2, 3])?, b"[1,2,3]");
+
        assert_eq!(encode!([1, 2, 3])?, b"[1,2,3]");
+
        assert_eq!(encode!([])?, b"[]");
+
        assert_eq!(encode!({})?, b"{}");
+
        assert_eq!(encode!({"A": [99]})?, br#"{"A":[99]}"#);
+
        assert_eq!(encode!({"A": true})?, br#"{"A":true}"#);
+
        assert_eq!(encode!({"B": false})?, br#"{"B":false}"#);
+
        assert_eq!(encode!({"x": 3, "y": 2})?, br#"{"x":3,"y":2}"#);
+
        assert_eq!(encode!({"x": 3, "y": null})?, br#"{"x":3,"y":null}"#);
+

+
        // Test conditions for invalid arguments.
+
        assert!(encode!(8.0).is_err());
+
        assert!(encode!({"x": 8.0}).is_err());
+

+
        Ok(())
+
    }
+

+
    #[test]
+
    fn ascii_control_characters() -> Result<()> {
+
        assert_eq!(encode_string!("\x00")?, r#""\u0000""#);
+
        assert_eq!(encode_string!("\x01")?, r#""\u0001""#);
+
        assert_eq!(encode_string!("\x02")?, r#""\u0002""#);
+
        assert_eq!(encode_string!("\x03")?, r#""\u0003""#);
+
        assert_eq!(encode_string!("\x04")?, r#""\u0004""#);
+
        assert_eq!(encode_string!("\x05")?, r#""\u0005""#);
+
        assert_eq!(encode_string!("\x06")?, r#""\u0006""#);
+
        assert_eq!(encode_string!("\x07")?, r#""\u0007""#);
+
        assert_eq!(encode_string!("\x08")?, r#""\b""#);
+
        assert_eq!(encode_string!("\x09")?, r#""\t""#);
+
        assert_eq!(encode_string!("\x0a")?, r#""\n""#);
+
        assert_eq!(encode_string!("\x0b")?, r#""\u000b""#);
+
        assert_eq!(encode_string!("\x0c")?, r#""\f""#);
+
        assert_eq!(encode_string!("\x0d")?, r#""\r""#);
+
        assert_eq!(encode_string!("\x0e")?, r#""\u000e""#);
+
        assert_eq!(encode_string!("\x0f")?, r#""\u000f""#);
+
        assert_eq!(encode_string!("\x10")?, r#""\u0010""#);
+
        assert_eq!(encode_string!("\x11")?, r#""\u0011""#);
+
        assert_eq!(encode_string!("\x12")?, r#""\u0012""#);
+
        assert_eq!(encode_string!("\x13")?, r#""\u0013""#);
+
        assert_eq!(encode_string!("\x14")?, r#""\u0014""#);
+
        assert_eq!(encode_string!("\x15")?, r#""\u0015""#);
+
        assert_eq!(encode_string!("\x16")?, r#""\u0016""#);
+
        assert_eq!(encode_string!("\x17")?, r#""\u0017""#);
+
        assert_eq!(encode_string!("\x18")?, r#""\u0018""#);
+
        assert_eq!(encode_string!("\x19")?, r#""\u0019""#);
+
        assert_eq!(encode_string!("\x1a")?, r#""\u001a""#);
+
        assert_eq!(encode_string!("\x1b")?, r#""\u001b""#);
+
        assert_eq!(encode_string!("\x1c")?, r#""\u001c""#);
+
        assert_eq!(encode_string!("\x1d")?, r#""\u001d""#);
+
        assert_eq!(encode_string!("\x1e")?, r#""\u001e""#);
+
        assert_eq!(encode_string!("\x1f")?, r#""\u001f""#);
+

+
        pretty_assertions::assert_eq!(encode_string!({"\t": "\n"})?, r#"{"\t":"\n"}"#);
+
        assert_eq!(encode_string!("\\")?, r#""\\""#);
+
        assert_eq!(encode_string!("\"")?, r#""\"""#);
+

+
        Ok(())
+
    }
+

+
    #[test]
+
    fn ordered_nested_object() -> Result<()> {
+
        assert_eq!(
+
            encode!({
+
                "nested": {
+
                    "good": false,
+
                    "bad": true
+
                },
+
                "b": 2,
+
                "a": 1,
+
                "c": {
+
                    "h": {
+
                        "h": -5,
+
                        "i": 3
+
                    },
+
                    "a": null,
+
                    "x": {}
+
                },
+
                "zzz": "I have a newline\n"
+
            })?,
+
            br#"{"a":1,"b":2,"c":{"a":null,"h":{"h":-5,"i":3},"x":{}},"nested":{"bad":true,"good":false},"zzz":"I have a newline\n"}"#.to_vec(),
+
        );
+

+
        Ok(())
+
    }
+
}
modified radicle/src/cob/issue.rs
@@ -826,4 +826,32 @@ mod test {
        issues.iter().find(|i| i.title() == "Second").unwrap();
        issues.iter().find(|i| i.title() == "Third").unwrap();
    }
+

+
    #[test]
+
    fn test_issue_multilines() {
+
        let tmp = tempfile::tempdir().unwrap();
+
        let (_, signer, project) = test::setup::context(&tmp);
+
        let mut issues = Issues::open(*signer.public_key(), &project).unwrap();
+
        let created = issues
+
            .create(
+
                "My first issue",
+
                "Blah blah blah.\nYah yah yah",
+
                &[],
+
                &[],
+
                &signer,
+
            )
+
            .unwrap();
+

+
        assert_eq!(created.clock().get(), 4);
+

+
        let (id, created) = (created.id, created.issue);
+
        let issue = issues.get(&id).unwrap().unwrap();
+

+
        assert_eq!(created, issue);
+
        assert_eq!(issue.title(), "My first issue");
+
        assert_eq!(issue.author(), issues.author());
+
        assert_eq!(issue.description(), Some("Blah blah blah.\nYah yah yah"));
+
        assert_eq!(issue.comments().count(), 1);
+
        assert_eq!(issue.state(), &State::Open);
+
    }
}
modified radicle/src/cob/store.rs
@@ -320,11 +320,13 @@ impl<T: FromHistory> Transaction<T> {
pub mod encoding {
    use serde::Serialize;

+
    use crate::canonical::formatter::CanonicalFormatter;
+

    /// Serialize the change into a byte string.
    pub fn encode<T: Serialize>(obj: &T) -> Result<Vec<u8>, serde_json::Error> {
        let mut buf = Vec::new();
        let mut serializer =
-
            serde_json::Serializer::with_formatter(&mut buf, olpc_cjson::CanonicalFormatter::new());
+
            serde_json::Serializer::with_formatter(&mut buf, CanonicalFormatter::new());

        obj.serialize(&mut serializer)?;

modified radicle/src/identity/doc.rs
@@ -13,6 +13,7 @@ use radicle_git_ext::Oid;
use serde::{Deserialize, Serialize};
use thiserror::Error;

+
use crate::canonical::formatter::CanonicalFormatter;
use crate::crypto;
use crate::crypto::{Signature, Unverified, Verified};
use crate::git;
@@ -160,7 +161,7 @@ impl Doc<Verified> {
    pub fn encode(&self) -> Result<(git::Oid, Vec<u8>), DocError> {
        let mut buf = Vec::new();
        let mut serializer =
-
            serde_json::Serializer::with_formatter(&mut buf, olpc_cjson::CanonicalFormatter::new());
+
            serde_json::Serializer::with_formatter(&mut buf, CanonicalFormatter::new());

        self.serialize(&mut serializer)?;
        let oid = git2::Oid::hash_object(git2::ObjectType::Blob, &buf)?;
modified radicle/src/lib.rs
@@ -8,6 +8,7 @@ pub extern crate radicle_crypto as crypto;
#[macro_use]
extern crate amplify;

+
mod canonical;
pub mod cob;
pub mod collections;
pub mod git;