tree-sitters/nix/grammar.js

412 lines
10 KiB
JavaScript

const PREC = {
impl: 1,
or: 2,
and: 3,
eq: 4,
neq: 4,
"<": 5,
">": 5,
leq: 5,
geq: 5,
update: 6,
not: 7,
"+": 8,
"-": 8,
"*": 9,
"/": 9,
concat: 10,
"?": 11,
negate: 12,
};
module.exports = grammar({
name: "nix",
extras: ($) => [/\s/, $.comment],
supertypes: ($) => [$._expression],
inline: ($) => [],
externals: ($) => [
$.string_fragment,
$._indented_string_fragment,
$._path_start,
$.path_fragment,
$.dollar_escape,
$._indented_dollar_escape,
],
word: ($) => $.keyword,
conflicts: ($) => [],
rules: {
source_code: ($) => optional(field("expression", $._expression)),
_expression: ($) => $._expr_function_expression,
// Keywords go before identifiers to let them take precedence when both are expected.
// Workaround before https://github.com/tree-sitter/tree-sitter/pull/246
keyword: ($) => /if|then|else|let|inherit|in|rec|with|assert/,
identifier: ($) => /[a-zA-Z_][a-zA-Z0-9_\'\-]*/,
variable_expression: ($) => field("name", $.identifier),
integer_expression: ($) => /[0-9]+/,
float_expression: ($) =>
/(([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?/,
path_expression: ($) =>
seq(
alias($._path_start, $.path_fragment),
repeat(
choice(
$.path_fragment,
alias($._immediate_interpolation, $.interpolation)
)
)
),
_hpath_start: ($) => /\~\/[a-zA-Z0-9\._\-\+\/]+/,
hpath_expression: ($) =>
seq(
alias($._hpath_start, $.path_fragment),
repeat(
choice(
$.path_fragment,
alias($._immediate_interpolation, $.interpolation)
)
)
),
spath_expression: ($) => /<[a-zA-Z0-9\._\-\+]+(\/[a-zA-Z0-9\._\-\+]+)*>/,
uri_expression: ($) =>
/[a-zA-Z][a-zA-Z0-9\+\-\.]*:[a-zA-Z0-9%\/\?:@\&=\+\$,\-_\.\!\~\*\']+/,
_expr_function_expression: ($) =>
choice(
$.function_expression,
$.assert_expression,
$.with_expression,
$.let_expression,
$._expr_if
),
function_expression: ($) =>
choice(
seq(
field("universal", $.identifier),
":",
field("body", $._expr_function_expression)
),
seq(
field("formals", $.formals),
":",
field("body", $._expr_function_expression)
),
seq(
field("formals", $.formals),
"@",
field("universal", $.identifier),
":",
field("body", $._expr_function_expression)
),
seq(
field("universal", $.identifier),
"@",
field("formals", $.formals),
":",
field("body", $._expr_function_expression)
)
),
formals: ($) =>
choice(
seq("{", "}"),
seq("{", commaSep1(field("formal", $.formal)), "}"),
seq(
"{",
commaSep1(field("formal", $.formal)),
",",
field("ellipses", $.ellipses),
"}"
),
seq("{", field("ellipses", $.ellipses), "}")
),
formal: ($) =>
seq(
field("name", $.identifier),
optional(seq("?", field("default", $._expression)))
),
ellipses: ($) => "...",
assert_expression: ($) =>
seq(
"assert",
field("condition", $._expression),
";",
field("body", $._expr_function_expression)
),
with_expression: ($) =>
seq(
"with",
field("environment", $._expression),
";",
field("body", $._expr_function_expression)
),
let_expression: ($) =>
seq(
"let",
optional($.binding_set),
"in",
field("body", $._expr_function_expression)
),
_expr_if: ($) => choice($.if_expression, $._expr_op),
if_expression: ($) =>
seq(
"if",
field("condition", $._expression),
"then",
field("consequence", $._expression),
"else",
field("alternative", $._expression)
),
_expr_op: ($) =>
choice(
$.has_attr_expression,
$.unary_expression,
$.binary_expression,
$._expr_apply_expression
),
// I choose to *not* have this among the binary operators because
// this is the sole exception that takes an attrpath (instead of expression)
// as its right operand.
// My gut feeling is that this is:
// 1) better in theory, and
// 2) will be easier to work with in practice.
has_attr_expression: ($) =>
prec(
PREC["?"],
seq(
field("expression", $._expr_op),
field("operator", "?"),
field("attrpath", $.attrpath)
)
),
unary_expression: ($) =>
choice(
...[
["!", PREC.not],
["-", PREC.negate],
].map(([operator, precedence]) =>
prec(
precedence,
seq(field("operator", operator), field("argument", $._expr_op))
)
)
),
binary_expression: ($) =>
choice(
// left assoc.
...[
["==", PREC.eq],
["!=", PREC.neq],
["<", PREC["<"]],
["<=", PREC.leq],
[">", PREC[">"]],
[">=", PREC.geq],
["&&", PREC.and],
["||", PREC.or],
["+", PREC["+"]],
["-", PREC["-"]],
["*", PREC["*"]],
["/", PREC["/"]],
].map(([operator, precedence]) =>
prec.left(
precedence,
seq(
field("left", $._expr_op),
field("operator", operator),
field("right", $._expr_op)
)
)
),
// right assoc.
...[
["->", PREC.impl],
["//", PREC.update],
["++", PREC.concat],
].map(([operator, precedence]) =>
prec.right(
precedence,
seq(
field("left", $._expr_op),
field("operator", operator),
field("right", $._expr_op)
)
)
)
),
_expr_apply_expression: ($) =>
choice($.apply_expression, $._expr_select_expression),
apply_expression: ($) =>
seq(
field("function", $._expr_apply_expression),
field("argument", $._expr_select_expression)
),
_expr_select_expression: ($) => choice($.select_expression, $._expr_simple),
select_expression: ($) =>
choice(
seq(
field("expression", $._expr_simple),
".",
field("attrpath", $.attrpath)
),
seq(
field("expression", $._expr_simple),
".",
field("attrpath", $.attrpath),
"or",
field("default", $._expr_select_expression)
)
),
_expr_simple: ($) =>
choice(
$.variable_expression,
$.integer_expression,
$.float_expression,
$.string_expression,
$.indented_string_expression,
$.path_expression,
$.hpath_expression,
$.spath_expression,
$.uri_expression,
$.parenthesized_expression,
$.attrset_expression,
$.let_attrset_expression,
$.rec_attrset_expression,
$.list_expression
),
parenthesized_expression: ($) =>
seq("(", field("expression", $._expression), ")"),
attrset_expression: ($) => seq("{", optional($.binding_set), "}"),
let_attrset_expression: ($) =>
seq("let", "{", optional($.binding_set), "}"),
rec_attrset_expression: ($) =>
seq("rec", "{", optional($.binding_set), "}"),
string_expression: ($) =>
seq(
'"',
repeat(
choice(
$.string_fragment,
$.interpolation,
choice(
$.escape_sequence,
seq($.dollar_escape, alias("$", $.string_fragment))
)
)
),
'"'
),
escape_sequence: ($) => token.immediate(/\\([^$]|\s)/), // Can also escape newline.
indented_string_expression: ($) =>
seq(
"''",
repeat(
choice(
alias($._indented_string_fragment, $.string_fragment),
$.interpolation,
choice(
alias($._indented_escape_sequence, $.escape_sequence),
seq(
alias($._indented_dollar_escape, $.dollar_escape),
alias("$", $.string_fragment)
)
)
)
),
"''"
),
_indented_escape_sequence: ($) => token.immediate(/'''|''\\([^$]|\s)/), // Can also escape newline.
binding_set: ($) =>
repeat1(field("binding", choice($.binding, $.inherit, $.inherit_from))),
binding: ($) =>
seq(
field("attrpath", $.attrpath),
"=",
field("expression", $._expression),
";"
),
inherit: ($) => seq("inherit", field("attrs", $.inherited_attrs), ";"),
inherit_from: ($) =>
seq(
"inherit",
"(",
field("expression", $._expression),
")",
field("attrs", $.inherited_attrs),
";"
),
attrpath: ($) =>
sep1(
field(
"attr",
choice($.identifier, $.string_expression, $.interpolation)
),
"."
),
inherited_attrs: ($) =>
repeat1(
field(
"attr",
choice($.identifier, $.string_expression, $.interpolation)
)
),
_immediate_interpolation: ($) =>
seq(token.immediate("${"), field("expression", $._expression), "}"),
interpolation: ($) => seq("${", field("expression", $._expression), "}"),
list_expression: ($) =>
seq("[", repeat(field("element", $._expr_select_expression)), "]"),
comment: ($) =>
token(choice(seq("#", /.*/), seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/"))),
},
});
function sep(rule, separator) {
return optional(sep1(rule, separator));
}
function sep1(rule, separator) {
return seq(rule, repeat(seq(separator, rule)));
}
function commaSep1(rule) {
return sep1(rule, ",");
}
function commaSep(rule) {
return optional(commaSep1(rule));
}