Radish alpha
r
Radicle Improvement Proposals (RIPs)
Radicle
Git (anonymous pull)
Log in to clone via SSH
R
Richard Levitte
RIP 4: URI Scheme 16 days ago 43301b47e5643de823fc14c51e4826dde54db3fb History
rips 0004-general-uri-scheme data rad-uri.ne
@{%
    const lstr = function(d) { return d.flat(5).join(''); }
%}

# In the ABNF grammar, 'rad-resource-type-and-identity' defaults to empty,
# which effectively renders it optional.  Nearley doesn't allow empty rules,
# and multiple rules with the same name can be added anyway, so we make
# resource_type_and_identity optional here to get the same essential semantics.
rad                     -> scheme ":" auth_and_resource resource_type_and_identity:?
                           ( "?" rad_query ):? ( "#" fragment ):?
                           {% ([s, , aar, rtai, q, f]) =>
                               ({ scheme: s,
                                  ...aar,
                                  ...(rtai ? { resource: rtai} : {}),
                                  ...(q ? { query: q[1]} : {}),
                                  ...(f ? { fragment: f[1]} : {}) }) %}
                         | scheme ":" legacy
                           {% ([s, , l]) => ({ scheme: s, ...l }) %}

scheme                  -> "rad" {% lstr %}

auth_and_resource       -> "//" auth:? "/" resource             # absolute resource "path"
                           {% ([, auth = {}, , res]) => ({ ...auth, ...res }) %}
                         | resource                             # relative resource "path"
                           {% ([res]) => res %}

# 'legacy' are URLs defined by RIP #3.  That URL doesn't quite hold up
# against the general URI syntax defined in RFC3986, in so far that a node
# (identified with NID or NID@host) is the conceptual authority, not
# a repository.
# To be noted is that RIDs and NIDs are distinct enough that there shouldn't
# be any ambiguity between rad-auth-and-resource and rad-legacy.
# Of all current implementations, this mostly impacts the git remote helper
# git-remote-rad.
legacy                  -> "//" resource
                           {% ([, res]) => res %}

# The NID is conceptually ambiguous from an RFC 3986 syntax perspective:
#
#       authority   = [ userinfo "@" ] host [ ":" port ]
#
# Is the NID 'userinfo' or 'host'?  In a sense, it currently is a little bit
# of both.  Furthermore, Radicle requires that if a hostname (+ port) is
# given, the NID must be given too.  Therefore, instead of using 'authority'
# strictly as given by RFC 3986, we allow ourselves a slightly different
# syntax, which is fine.  Do note also, that a 'host' can still be made to
# look like an NID, so programs dealing directly with the Radicle network
# will have to be prepared to handle that ambiguity appropriately.
#
# TODO: There already are ways to resolve its NID from a hostname.
# For example, if `radicle-httpd` runs next to the node, then it can be
# queried for its NID via HTTPS. Also, there are ways to use DNS TXT records,
# which is also done for DNS-SD.
# We would therefore likely make the `node` optional, or even omit it.
# If `node` is sufficiently different from a DNS name (it probably is
# not), then we can even omit "@".
auth                    -> node ("@" host_and_port):?
                           {% ([node, hap]) => ({ node: node, ...(hap ? { host: hap[1] } : {}) }) %}

host_and_port           -> host ":" port {% lstr %}

# rad-node is a semantic symbol, to signify intent
node                    -> nid {% id %}

# An NID is an ID that is ultimately a public key in the Radicle network.
# This is used to uniquely represent a node, or a user (also a node), or a
# namespace within a repository.
# It is a [multibase] and [multicodec] encoded string, using [base58btc] as the multibase
# and the codec for the underlying Ed25519 public key.
#
# It is always 48 characters long, including the initial "z6Mk",
# the `z` corresponding to the base, and `6Mk` corresponding to the codec.
#
# [base58btc]: https://digitalbazaar.github.io/base58-spec/
# [multibase]: https://www.ietf.org/archive/id/draft-multiformats-multibase-07.html
# [multicodec]: https://github.com/multiformats/multicodec/
#
# TODO: New NIDs in the future? Even this might change, as we allow rotating
# keys for nodes. But this is even further out than stable IDs for users.
nid                     -> "z6Mk" b58x25 b58x10 b58x5 base58btc base58btc base58btc base58btc {% lstr %}

resource                -> repository ( "/" namespace ):?
                           {% ([repo, ns]) => ({ repo: repo, ...(ns ? { namespace: ns[1] } : {}) }) %}

# repository is a semantic symbol, to signify intent
repository              -> rid {% id %}

# namespace is a semantic symbol, to signify intent
namespace               -> nid {% id %}

# An RID uniquely represents a git repository in the Radicle network.
# It is a [multibase] encoded string of a Git OID, using [base58btc] as the multibase.
#
# It is 28 or 29 characters long, including the initial multibase identifier "z".
#
# [base58btc]: https://digitalbazaar.github.io/base58-spec/
# [multibase]: https://www.ietf.org/archive/id/draft-multiformats-multibase-07.html
#
# TODO: In the near future (as soon as Git repositories with SHA-256 object
# format become popular, which will be in late 2026), we will have to extend
# the syntax of RIDs. Leave a note here so that implementors know this and
# make design decisions accordingly.
rid                     -> "z" b58x25 base58btc base58btc base58btc:?
                           {% (d,l,r) => {
                                   s = d.join('')
                                   // Check that the RID doesn't starts
                                   // with "z6Mk", as that can only be
                                   // an NID.  Reject it if it is.
                                   if (s.startsWith("z6Mk")) { return r; }
                                   return s;
                               }
                           %}

# For git specific resource types
resource_type_and_identity -> git_commit {% id %}
resource_type_and_identity -> git_tree {% id %}
resource_type_and_identity -> git_blob {% id %}
resource_type_and_identity -> git_tag {% id %}

git_commit              -> "/" "commit" "/" git_obj_or_ref {% ([, , , o]) => ({ type: "commit", ...o }) %}
git_tree                -> "/" "tree" "/" git_obj {% ([, , , o]) => ({ type: "tree", obj: o }) %}
git_blob                -> "/" "blob" "/" git_obj {% ([, , , o]) => ({ type: "blob", ref: o }) %}
git_tag                 -> "/" "tag" "/" git_obj_or_ref {% ([, , , o]) => ({ type: "tag", ...o }) %}

# Note: this rule will always be ambiguous, as something that looks like a
# git object ID could very well be a branch or a tag.
# Some git commands will warn when such refs are used, but not all.
git_obj_or_ref          -> git_obj {% ([o]) => ({ obj: o }) %}
                         | git_ref {% ([r]) => ({ ref: r }) %}

# Currently, only sha1 identities are supported.
git_obj                 -> git_sha1 {% id %}
# a sha1 ID is 40 hex digits
git_sha1                -> hex8 hex8 hex8 hex8 hex8 {% lstr %}

# unreserved is defined in RFC 3986.  Not that this syntax allows much
# more than what can be specified for a git reference.  It's left to
# the implementation to parse and apply semantics properly.
# Our best reference for how to interpret and parse git references is
# https://git-scm.com/docs/protocol-common.html
git_ref                 -> unreserved:+ ( "/" unreserved:+ ):* {% lstr %}

# For Radicle collaborative object resource types
resource_type_and_identity -> rad_cob {% id %}
resource_type_and_identity -> rad_cobs {% id %}

rad_cob                 -> "/" "cob" "/" rad_cob_type "/" rad_cob_obj
                           {% ([, , , type, , obj]) =>
                              ({ type: "cob", id: { type: type, obj: obj } }) %}
rad_cobs                -> "/" "cob" "/" rad_cob_type
                           {% ([, , , type]) =>
                              ({ type: "cobs", id: { type: type } }) %}

# The COB type is like a reversed FQDN.  The syntax spec is derived from
# https://datatracker.ietf.org/doc/html/rfc1034#section-3.5
rad_cob_type            -> rad_cob_label ( "." rad_cob_label ):+ {% lstr %}
rad_cob_label           -> alnum:+ ( "-" alnum:+ ):* {% lstr %}

# syntactically, radicle COB objects and object revisions are git objects
# semantically, they are as well, at least for now
rad_cob_obj             -> git_obj {% id %}

# While the ABNF doesn't say anything special about 'query', we can allow
# it for ourselves here, since nearley does a bit more than just checking
# syntax.
rad_query               -> rad_query_param ( "&" rad_query_param ):*
                           {% ([first, rest]) => {
                                  newrest = rest ? rest : [];
                                  newrest = newrest.reduce((tot, val) => { tot.push(val[1]); return tot }, []);
                                  return ([first, ...newrest].reduce((tot, val) => {
                                      if (tot[val['name']]) {
                                          tot[val['name']].push(val['value']);
                                      } else {
                                          tot[val['name']] = [ val['value'] ];
                                      }
                                      return tot;
                                  }, {}))
                              } %}
rad_query_param         -> ( alpha alnum:* ) "=" ( rad_query_pchar | "/" | "?" ):*
                           {% ([name, , value]) => ({ name: lstr(name), value: lstr(value) }) %}
rad_query_pchar         -> unreserved | pct_encoded | rad_query_sub_delims | ":" | "@"
rad_query_sub_delims    -> "!" | "$" | "'" | "(" | ")"
                         | "*" | "+" | "," | ";" | "="

# Helper symbols
alnum                   -> [a-zA-Z0-9] {% id %}
alnum5                  -> alnum alnum alnum alnum alnum {% function(d) { return d.join(''); } %}
alnum10                 -> alnum5 alnum5 {% function(d) { return d.join(''); } %}
alnum25                 -> alnum10 alnum10 alnum5 {% function(d) { return d.join(''); } %}

# Base58 Bitcoin Alphabet (excludes 0, O, I, l)
base58btc               -> [1-9A-HJ-NP-Za-km-z] {% id %}
b58x5                   -> base58btc base58btc base58btc base58btc base58btc {% function(d) { return d.join(''); } %}
b58x10                  -> b58x5 b58x5 {% function(d) { return d.join(''); } %}
b58x25                  -> b58x10 b58x10 b58x5 {% function(d) { return d.join(''); } %}

hex2                    -> hexdig hexdig {% function(d) { return d.join(''); } %}
hex4                    -> hex2 hex2 {% function(d) { return d.join(''); } %}
hex8                    -> hex4 hex4 {% function(d) { return d.join(''); } %}

# ABNF defines ALPHA, DIGIT and HEXDIG
alpha           -> [a-zA-Z] {% id %}
digit           -> [0-9] {% id %}
hexdig          -> [a-f0-9] {% id %}

# RFC 3986 defines the following (translated to nearley)
host            -> IP_literal | IPv4address | reg_name
port            -> [0-9]:+

IP_literal      -> "[" ( IPv6address | IPvFuture ) "]"

IPvFuture       -> "v" hexdig:+ "." ( unreserved | sub_delims | ":" ):+

IPv6address     ->                                                          h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" ls32
                 |                                                     "::" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":"         ls32
                 |                                                 h16 "::" h16 ":" h16 ":" h16 ":" h16 ":"                 ls32
                 |                                         h16 ":" h16 "::" h16 ":" h16 ":" h16 ":"                         ls32
                 |                                 h16 ":" h16 ":" h16 "::" h16 ":" h16 ":"                                 ls32
                 |                         h16 ":" h16 ":" h16 ":" h16 "::" h16 ":"                                         ls32
                 |                 h16 ":" h16 ":" h16 ":" h16 ":" h16 "::"                                                 ls32
                 |         h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 "::"                                                 h16
                 | h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 "::"

h16             -> hexdig | hex2 | hex2 hexdig | hex4
ls32            -> ( h16 ":" h16 ) | IPv4address
IPv4address     -> dec_octet "." dec_octet "." dec_octet "." dec_octet
dec_octet       -> digit                # 0-9
                 | [1-9] digit          # 10-99
                 | "1" digit digit      # 100-199
                 | "2" [0-4] digit      # 200-249
                 | "25" [0-5]           # 250-255

reg_name        -> ( unreserved | pct_encoded | sub_delims ):*

pchar           -> unreserved | pct_encoded | sub_delims | ":" | "@"

query           -> ( pchar | "/" | "?" ):*

fragment        -> ( pchar | "/" | "?" ):*

pct_encoded     -> "%" hex2

unreserved      -> alnum | "-" | "." | "_" | "~"
sub_delims      -> "!" | "$" | "&" | "'" | "(" | ")"
                 | "*" | "+" | "," | ";" | "="