@{%
const lstr = function(d) { return d.flat(5).join(''); }
%}
# In the ABNF grammar, 'rad-resource-type-and-identity' defaults to empty,
# which effectively renders it optional. Nearley doesn't allow empty rules,
# and multiple rules with the same name can be added anyway, so we make
# resource_type_and_identity optional here to get the same essential semantics.
rad -> scheme ":" auth_and_resource resource_type_and_identity:?
( "?" rad_query ):? ( "#" fragment ):?
{% ([s, , aar, rtai, q, f]) =>
({ scheme: s,
...aar,
...(rtai ? { resource: rtai} : {}),
...(q ? { query: q[1]} : {}),
...(f ? { fragment: f[1]} : {}) }) %}
| scheme ":" legacy
{% ([s, , l]) => ({ scheme: s, ...l }) %}
scheme -> "rad" {% lstr %}
auth_and_resource -> "//" auth:? "/" resource # absolute resource "path"
{% ([, auth = {}, , res]) => ({ ...auth, ...res }) %}
| resource # relative resource "path"
{% ([res]) => res %}
# 'legacy' are URLs defined by RIP #3. That URL doesn't quite hold up
# against the general URI syntax defined in RFC3986, in so far that a node
# (identified with NID or NID@host) is the conceptual authority, not
# a repository.
# To be noted is that RIDs and NIDs are distinct enough that there shouldn't
# be any ambiguity between rad-auth-and-resource and rad-legacy.
# Of all current implementations, this mostly impacts the git remote helper
# git-remote-rad.
legacy -> "//" resource
{% ([, res]) => res %}
# The NID is conceptually ambiguous from an RFC 3986 syntax perspective:
#
# authority = [ userinfo "@" ] host [ ":" port ]
#
# Is the NID 'userinfo' or 'host'? In a sense, it currently is a little bit
# of both. Furthermore, Radicle requires that if a hostname (+ port) is
# given, the NID must be given too. Therefore, instead of using 'authority'
# strictly as given by RFC 3986, we allow ourselves a slightly different
# syntax, which is fine. Do note also, that a 'host' can still be made to
# look like an NID, so programs dealing directly with the Radicle network
# will have to be prepared to handle that ambiguity appropriately.
#
# TODO: There already are ways to resolve its NID from a hostname.
# For example, if `radicle-httpd` runs next to the node, then it can be
# queried for its NID via HTTPS. Also, there are ways to use DNS TXT records,
# which is also done for DNS-SD.
# We would therefore likely make the `node` optional, or even omit it.
# If `node` is sufficiently different from a DNS name (it probably is
# not), then we can even omit "@".
auth -> node ("@" host_and_port):?
{% ([node, hap]) => ({ node: node, ...(hap ? { host: hap[1] } : {}) }) %}
host_and_port -> host ":" port {% lstr %}
# rad-node is a semantic symbol, to signify intent
node -> nid {% id %}
# An NID is an ID that is ultimately a public key in the Radicle network.
# This is used to uniquely represent a node, or a user (also a node), or a
# namespace within a repository.
# It is a [multibase] and [multicodec] encoded string, using [base58btc] as the multibase
# and the codec for the underlying Ed25519 public key.
#
# It is always 48 characters long, including the initial "z6Mk",
# the `z` corresponding to the base, and `6Mk` corresponding to the codec.
#
# [base58btc]: https://digitalbazaar.github.io/base58-spec/
# [multibase]: https://www.ietf.org/archive/id/draft-multiformats-multibase-07.html
# [multicodec]: https://github.com/multiformats/multicodec/
#
# TODO: New NIDs in the future? Even this might change, as we allow rotating
# keys for nodes. But this is even further out than stable IDs for users.
nid -> "z6Mk" b58x25 b58x10 b58x5 base58btc base58btc base58btc base58btc {% lstr %}
resource -> repository ( "/" namespace ):?
{% ([repo, ns]) => ({ repo: repo, ...(ns ? { namespace: ns[1] } : {}) }) %}
# repository is a semantic symbol, to signify intent
repository -> rid {% id %}
# namespace is a semantic symbol, to signify intent
namespace -> nid {% id %}
# An RID uniquely represents a git repository in the Radicle network.
# It is a [multibase] encoded string of a Git OID, using [base58btc] as the multibase.
#
# It is 28 or 29 characters long, including the initial multibase identifier "z".
#
# [base58btc]: https://digitalbazaar.github.io/base58-spec/
# [multibase]: https://www.ietf.org/archive/id/draft-multiformats-multibase-07.html
#
# TODO: In the near future (as soon as Git repositories with SHA-256 object
# format become popular, which will be in late 2026), we will have to extend
# the syntax of RIDs. Leave a note here so that implementors know this and
# make design decisions accordingly.
rid -> "z" b58x25 base58btc base58btc base58btc:?
{% (d,l,r) => {
s = d.join('')
// Check that the RID doesn't starts
// with "z6Mk", as that can only be
// an NID. Reject it if it is.
if (s.startsWith("z6Mk")) { return r; }
return s;
}
%}
# For git specific resource types
resource_type_and_identity -> git_commit {% id %}
resource_type_and_identity -> git_tree {% id %}
resource_type_and_identity -> git_blob {% id %}
resource_type_and_identity -> git_tag {% id %}
git_commit -> "/" "commit" "/" git_obj_or_ref {% ([, , , o]) => ({ type: "commit", ...o }) %}
git_tree -> "/" "tree" "/" git_obj {% ([, , , o]) => ({ type: "tree", obj: o }) %}
git_blob -> "/" "blob" "/" git_obj {% ([, , , o]) => ({ type: "blob", ref: o }) %}
git_tag -> "/" "tag" "/" git_obj_or_ref {% ([, , , o]) => ({ type: "tag", ...o }) %}
# Note: this rule will always be ambiguous, as something that looks like a
# git object ID could very well be a branch or a tag.
# Some git commands will warn when such refs are used, but not all.
git_obj_or_ref -> git_obj {% ([o]) => ({ obj: o }) %}
| git_ref {% ([r]) => ({ ref: r }) %}
# Currently, only sha1 identities are supported.
git_obj -> git_sha1 {% id %}
# a sha1 ID is 40 hex digits
git_sha1 -> hex8 hex8 hex8 hex8 hex8 {% lstr %}
# unreserved is defined in RFC 3986. Not that this syntax allows much
# more than what can be specified for a git reference. It's left to
# the implementation to parse and apply semantics properly.
# Our best reference for how to interpret and parse git references is
# https://git-scm.com/docs/protocol-common.html
git_ref -> unreserved:+ ( "/" unreserved:+ ):* {% lstr %}
# For Radicle collaborative object resource types
resource_type_and_identity -> rad_cob {% id %}
resource_type_and_identity -> rad_cobs {% id %}
rad_cob -> "/" "cob" "/" rad_cob_type "/" rad_cob_obj
{% ([, , , type, , obj]) =>
({ type: "cob", id: { type: type, obj: obj } }) %}
rad_cobs -> "/" "cob" "/" rad_cob_type
{% ([, , , type]) =>
({ type: "cobs", id: { type: type } }) %}
# The COB type is like a reversed FQDN. The syntax spec is derived from
# https://datatracker.ietf.org/doc/html/rfc1034#section-3.5
rad_cob_type -> rad_cob_label ( "." rad_cob_label ):+ {% lstr %}
rad_cob_label -> alnum:+ ( "-" alnum:+ ):* {% lstr %}
# syntactically, radicle COB objects and object revisions are git objects
# semantically, they are as well, at least for now
rad_cob_obj -> git_obj {% id %}
# While the ABNF doesn't say anything special about 'query', we can allow
# it for ourselves here, since nearley does a bit more than just checking
# syntax.
rad_query -> rad_query_param ( "&" rad_query_param ):*
{% ([first, rest]) => {
newrest = rest ? rest : [];
newrest = newrest.reduce((tot, val) => { tot.push(val[1]); return tot }, []);
return ([first, ...newrest].reduce((tot, val) => {
if (tot[val['name']]) {
tot[val['name']].push(val['value']);
} else {
tot[val['name']] = [ val['value'] ];
}
return tot;
}, {}))
} %}
rad_query_param -> ( alpha alnum:* ) "=" ( rad_query_pchar | "/" | "?" ):*
{% ([name, , value]) => ({ name: lstr(name), value: lstr(value) }) %}
rad_query_pchar -> unreserved | pct_encoded | rad_query_sub_delims | ":" | "@"
rad_query_sub_delims -> "!" | "$" | "'" | "(" | ")"
| "*" | "+" | "," | ";" | "="
# Helper symbols
alnum -> [a-zA-Z0-9] {% id %}
alnum5 -> alnum alnum alnum alnum alnum {% function(d) { return d.join(''); } %}
alnum10 -> alnum5 alnum5 {% function(d) { return d.join(''); } %}
alnum25 -> alnum10 alnum10 alnum5 {% function(d) { return d.join(''); } %}
# Base58 Bitcoin Alphabet (excludes 0, O, I, l)
base58btc -> [1-9A-HJ-NP-Za-km-z] {% id %}
b58x5 -> base58btc base58btc base58btc base58btc base58btc {% function(d) { return d.join(''); } %}
b58x10 -> b58x5 b58x5 {% function(d) { return d.join(''); } %}
b58x25 -> b58x10 b58x10 b58x5 {% function(d) { return d.join(''); } %}
hex2 -> hexdig hexdig {% function(d) { return d.join(''); } %}
hex4 -> hex2 hex2 {% function(d) { return d.join(''); } %}
hex8 -> hex4 hex4 {% function(d) { return d.join(''); } %}
# ABNF defines ALPHA, DIGIT and HEXDIG
alpha -> [a-zA-Z] {% id %}
digit -> [0-9] {% id %}
hexdig -> [a-f0-9] {% id %}
# RFC 3986 defines the following (translated to nearley)
host -> IP_literal | IPv4address | reg_name
port -> [0-9]:+
IP_literal -> "[" ( IPv6address | IPvFuture ) "]"
IPvFuture -> "v" hexdig:+ "." ( unreserved | sub_delims | ":" ):+
IPv6address -> h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" ls32
| "::" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" ls32
| h16 "::" h16 ":" h16 ":" h16 ":" h16 ":" ls32
| h16 ":" h16 "::" h16 ":" h16 ":" h16 ":" ls32
| h16 ":" h16 ":" h16 "::" h16 ":" h16 ":" ls32
| h16 ":" h16 ":" h16 ":" h16 "::" h16 ":" ls32
| h16 ":" h16 ":" h16 ":" h16 ":" h16 "::" ls32
| h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 "::" h16
| h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 "::"
h16 -> hexdig | hex2 | hex2 hexdig | hex4
ls32 -> ( h16 ":" h16 ) | IPv4address
IPv4address -> dec_octet "." dec_octet "." dec_octet "." dec_octet
dec_octet -> digit # 0-9
| [1-9] digit # 10-99
| "1" digit digit # 100-199
| "2" [0-4] digit # 200-249
| "25" [0-5] # 250-255
reg_name -> ( unreserved | pct_encoded | sub_delims ):*
pchar -> unreserved | pct_encoded | sub_delims | ":" | "@"
query -> ( pchar | "/" | "?" ):*
fragment -> ( pchar | "/" | "?" ):*
pct_encoded -> "%" hex2
unreserved -> alnum | "-" | "." | "_" | "~"
sub_delims -> "!" | "$" | "&" | "'" | "(" | ")"
| "*" | "+" | "," | ";" | "="