commit 7c0c72b8b7c9aa5a2fcf8ff1f090b52950f4006c Author: xenia Date: Sat Nov 11 23:27:21 2023 +0100 Add agda, bash, c, nix, python and rust diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d188f0d --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 xenia + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..c4e2f4a --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# xenia/tree-sitters — a collection of tree-sitter grammars, built with nix for static linking + +Languages: +* c: https://github.com/tree-sitter/tree-sitter-c (MIT) +* agda: https://github.com/tree-sitter/tree-sitter-agda (MIT) +* bash: https://github.com/tree-sitter/tree-sitter-bash (MIT) +* nix: https://github.com/nix-community/tree-sitter-nix (MIT) +* python: https://github.com/tree-sitter/tree-sitter-python (MIT) +* rust: https://github.com/tree-sitter/tree-sitter-rust (MIT) diff --git a/agda/LICENSE b/agda/LICENSE new file mode 100644 index 0000000..70f5171 --- /dev/null +++ b/agda/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 LUA Ting-Gan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/agda/examples/example.txt b/agda/examples/example.txt new file mode 100644 index 0000000..555d3e6 --- /dev/null +++ b/agda/examples/example.txt @@ -0,0 +1,8 @@ +record RawMonoid c ℓ : Set (suc (c ⊔ ℓ)) where + infixl 7 _∙_ + infix 4 _≈_ + field + Carrier : Set c + _≈_ : Rel Carrier ℓ + _b_ : Op Carrier + a : Carrier diff --git a/agda/grammar.js b/agda/grammar.js new file mode 100644 index 0000000..5001569 --- /dev/null +++ b/agda/grammar.js @@ -0,0 +1,1084 @@ +/* eslint-disable arrow-parens */ +/* eslint-disable camelcase */ +/* eslint-disable-next-line spaced-comment */ +/// +// @ts-check + +const BRACE1 = [['{', '}']]; +const BRACE2 = [['{{', '}}'], ['⦃', '⦄']]; +// const BRACES = [...BRACE1, ...BRACE2]; +const IDIOM = [['(|', '|)'], ['⦇', '⦈']]; +const PAREN = [['(', ')']]; + +// numbers and literals +const integer = /\-?(0x[0-9a-fA-F]+|[0-9]+)/; + +module.exports = grammar({ + name: 'agda', + + word: $ => $.id, + + extras: $ => [ + $.comment, + $.pragma, + /\s|\\n/, + ], + + externals: $ => [ + $._newline, + $._indent, + $._dedent, + ], + + rules: { + source_file: $ => repeat(seq($._declaration, $._newline)), + + + // ////////////////////////////////////////////////////////////////////// + // Constants + // ////////////////////////////////////////////////////////////////////// + + _FORALL: _ => choice('forall', '∀'), + _ARROW: _ => choice('->', '→'), + _LAMBDA: _ => choice('\\', 'λ'), + _ELLIPSIS: _ => choice('...', '…'), + + // ////////////////////////////////////////////////////////////////////// + // Top-level Declarations + // ////////////////////////////////////////////////////////////////////// + + // Declarations + // indented, 1 or more declarations + _declaration_block: $ => block($, $._declaration), + + // Declarations0: use `optional($._declaration_block)` instead + // _declaration_block0: $ => block($, optional($._declaration)), + + // Declaration + _declaration: $ => choice( + $.fields, + $.function, + $.data, + $.data_signature, + $.record, + $.record_signature, + $.infix, + $.generalize, + $.mutual, + $.abstract, + $.private, + $.instance, + $.macro, + $.postulate, + $.primitive, + $.open, + $.import, + $.module_macro, + $.module, + $.pragma, + $.syntax, + $.pattern, + $.unquote_decl, + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Field + // ////////////////////////////////////////////////////////////////////// + + // Fields + fields: $ => seq( + 'field', + $._signature_block, + ), + + // ArgTypeSignatures + _signature_block: $ => block($, $.signature), + + // ArgTypeSigs + signature: $ => choice( + seq( + optional('overlap'), + $._modal_arg_ids, + ':', + $.expr, + ), + seq( + 'instance', + $._signature_block, + ), + ), + + // ModalArgIds + _modal_arg_ids: $ => seq(repeat($.attribute), $._arg_ids), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Functions + // ////////////////////////////////////////////////////////////////////// + + // We are splitting FunClause into 2 cases: + // *. function declaration (':') + // *. function definitions ('=') + // Doing so we can mark the LHS of a function declaration as 'function_name' + + // FunClause + function: $ => choice( + seq( + optional($.attributes), + alias($.lhs_decl, $.lhs), + alias(optional($.rhs_decl), $.rhs), + optional($.where), + ), + seq( + optional($.attributes), + alias($.lhs_defn, $.lhs), + alias(optional($.rhs_defn), $.rhs), + optional($.where), + ), + ), + + // LHS + lhs_decl: $ => seq( + alias($._with_exprs, $.function_name), + optional($.rewrite_equations), + optional($.with_expressions), + ), + lhs_defn: $ => prec(1, seq( + $._with_exprs, + optional($.rewrite_equations), + optional($.with_expressions), + )), + + // RHS + rhs_decl: $ => seq(':', $.expr), + rhs_defn: $ => seq('=', $.expr), + + // WithExpressions + with_expressions: $ => seq('with', $.expr), + + // RewriteEquations + rewrite_equations: $ => seq('rewrite', $._with_exprs), + + // WhereClause + where: $ => seq( + optional(seq( + 'module', + $.bid, + )), + 'where', + optional($._declaration_block), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Data + // ////////////////////////////////////////////////////////////////////// + + data_name: $ => alias($.id, 'data_name'), + + data: $ => seq( + choice('data', 'codata'), + $.data_name, + optional($._typed_untyped_bindings), + optional(seq(':', $.expr)), + 'where', + optional($._declaration_block), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Data Signature + // ////////////////////////////////////////////////////////////////////// + + data_signature: $ => seq( + 'data', + $.data_name, + optional($._typed_untyped_bindings), + ':', + $.expr, + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Record + // ////////////////////////////////////////////////////////////////////// + + // Record + record: $ => seq( + 'record', + alias($._atom_no_curly, $.record_name), + optional($._typed_untyped_bindings), + optional(seq(':', $.expr)), + $.record_declarations_block, + ), + + // RecordDeclarations + record_declarations_block: $ => seq( + 'where', + indent($, + // RecordDirectives + repeat(seq($._record_directive, $._newline)), + repeat(seq($._declaration, $._newline)), + ), + ), + + // RecordDirective + _record_directive: $ => choice( + $.record_constructor, + $.record_constructor_instance, + $.record_induction, + $.record_eta, + ), + // RecordConstructorName + record_constructor: $ => seq('constructor', $.id), + + // Declaration of record constructor name. + record_constructor_instance: $ => seq( + 'instance', + block($, $.record_constructor), + ), + + // RecordInduction + record_induction: _ => choice( + 'inductive', + 'coinductive', + ), + + // RecordEta + record_eta: _ => choice( + 'eta-equality', + 'no-eta-equality', + ), + + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Record Signature + // ////////////////////////////////////////////////////////////////////// + + record_signature: $ => seq( + 'record', + alias($._atom_no_curly, $.record_name), + optional($._typed_untyped_bindings), + ':', + $.expr, + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Infix + // ////////////////////////////////////////////////////////////////////// + + infix: $ => seq( + choice('infix', 'infixl', 'infixr'), + $.integer, + repeat1($.bid), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Generalize + // ////////////////////////////////////////////////////////////////////// + + generalize: $ => seq( + 'variable', + optional($._signature_block), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Mutual + // ////////////////////////////////////////////////////////////////////// + + mutual: $ => seq( + 'mutual', + optional($._declaration_block), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Abstract + // ////////////////////////////////////////////////////////////////////// + + abstract: $ => seq( + 'abstract', + optional($._declaration_block), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Private + // ////////////////////////////////////////////////////////////////////// + + private: $ => seq( + 'private', + optional($._declaration_block), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Instance + // ////////////////////////////////////////////////////////////////////// + + instance: $ => seq( + 'instance', + optional($._declaration_block), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Macro + // ////////////////////////////////////////////////////////////////////// + + macro: $ => seq( + 'macro', + optional($._declaration_block), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Postulate + // ////////////////////////////////////////////////////////////////////// + + postulate: $ => seq( + 'postulate', + optional($._declaration_block), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Primitive + // ////////////////////////////////////////////////////////////////////// + + primitive: $ => seq( + 'primitive', + optional($._type_signature_block), + ), + + // TypeSignatures + _type_signature_block: $ => block($, $.type_signature), + + // TypeSigs + type_signature: $ => seq( + $._field_names, + ':', + $.expr, + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Open + // ////////////////////////////////////////////////////////////////////// + + + open: $ => seq( + 'open', + choice($.import, $.module_name), + optional($._atoms), + optional($._import_directives), + ), + import: $ => seq('import', $.module_name), + + + // ModuleName + module_name: $ => $._qid, + + // ImportDirectives and shit + _import_directives: $ => repeat1($.import_directive), + import_directive: $ => choice( + 'public', + seq('using', '(', $._comma_import_names, ')'), + seq('hiding', '(', $._comma_import_names, ')'), + seq('renaming', '(', sepR(';', $.renaming), ')'), + seq('using', '(', ')'), + seq('hiding', '(', ')'), + seq('renaming', '(', ')'), + ), + + // CommaImportNames + _comma_import_names: $ => sepR(';', $._import_name), + + // Renaming + renaming: $ => seq( + optional('module'), + $.id, + 'to', + $.id, + ), + + // ImportName + _import_name: $ => seq( + optional('module'), $.id, + ), + + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Module Macro + // ////////////////////////////////////////////////////////////////////// + + // ModuleMacro + module_macro: $ => seq( + choice( + seq('module', alias($._qid, $.module_name)), + seq('open', 'module', alias($._qid, $.module_name)), + ), + optional($._typed_untyped_bindings), + '=', + $.module_application, + repeat($.import_directive), + ), + + // ModuleApplication + module_application: $ => seq( + $.module_name, + choice( + prec(1, brace_double($._ELLIPSIS)), + optional($._atoms), + ), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Module + // ////////////////////////////////////////////////////////////////////// + + // Module + module: $ => seq( + 'module', + alias(choice($._qid, '_'), $.module_name), + optional($._typed_untyped_bindings), + 'where', + optional($._declaration_block), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Pragma + // ////////////////////////////////////////////////////////////////////// + + // Pragma / DeclarationPragma + pragma: _ => token(seq( + '{-#', + repeat(choice( + /[^#]/, + /#[^-]/, + /#\-[^}]/, + )), + '#-}', + )), + + // CatchallPragma + catchall_pragma: _ => seq('{-#', 'CATCHALL', '#-}'), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Syntax + // ////////////////////////////////////////////////////////////////////// + + syntax: $ => seq( + 'syntax', + $.id, + $.hole_names, + '=', + repeat1($.id), + ), + + // HoleNames + hole_names: $ => repeat1($.hole_name), + hole_name: $ => choice( + $._simple_top_hole, + brace($._simple_hole), + brace_double($._simple_hole), + brace($.id, '=', $._simple_hole), + brace_double($.id, '=', $._simple_hole), + ), + + // SimpleTopHole + _simple_top_hole: $ => choice( + $.id, + paren($._LAMBDA, $.bid, $._ARROW, $.id), + ), + + // SimpleHole + _simple_hole: $ => choice( + $.id, + seq($._LAMBDA, $.bid, $._ARROW, $.id), + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Pattern Synonym + // ////////////////////////////////////////////////////////////////////// + + // PatternSyn + pattern: $ => seq( + 'pattern', + $.id, + optional($._lambda_bindings), // PatternSynArgs + '=', + $.expr, + ), + + // ////////////////////////////////////////////////////////////////////// + // Declaration: Unquoting declarations + // ////////////////////////////////////////////////////////////////////// + + // UnquoteDecl + unquote_decl: $ => choice( + seq('unquoteDecl', '=', $.expr), + seq('unquoteDecl', $._ids, '=', $.expr), + seq('unquoteDef', $._ids, '=', $.expr), + ), + + // ////////////////////////////////////////////////////////////////////// + // Names + // ////////////////////////////////////////////////////////////////////// + + // identifier: http://wiki.portal.chalmers.se/agda/pmwiki.php?n=ReferenceManual.Names + id: _ => /([^\s\\.\"\(\)\{\}@\'\\_]|\\[^\sa-zA-Z]|_[^\s;\.\"\(\)\{\}@])[^\s;\.\"\(\)\{\}@]*/, + + // qualified identifier: http://wiki.portal.chalmers.se/agda/pmwiki.php?n=ReferenceManual.Names + _qid: $ => prec.left( + choice( + // eslint-disable-next-line max-len + alias(/(([^\s;\.\"\(\)\{\}@\'\\_]|\\[^\sa-zA-Z]|_[^\s;\.\"\(\)\{\}@])[^\s;\.\"\(\)\{\}@]*\.)*([^\s;\.\"\(\)\{\}@\'\\_]|\\[^\sa-zA-Z]|_[^\s;\.\"\(\)\{\}@])[^\s;\.\"\(\)\{\}@]*/, $.qid), + alias($.id, $.qid), + ), + ), + + // BId + bid: $ => alias(choice('_', $.id), 'bid'), + + // SpaceIds + _ids: $ => repeat1($.id), + + _field_name: $ => alias($.id, $.field_name), + _field_names: $ => repeat1($._field_name), + + // MaybeDottedId + _maybe_dotted_id: $ => maybeDotted($._field_name), + _maybe_dotted_ids: $ => repeat1($._maybe_dotted_id), + + // ArgIds + _arg_ids: $ => repeat1($._arg_id), + _arg_id: $ => choice( + $._maybe_dotted_id, + + brace($._maybe_dotted_ids), + brace_double($._maybe_dotted_ids), + + seq('.', brace($._field_names)), + seq('.', brace_double($._field_names)), + + seq('..', brace($._field_names)), + seq('..', brace_double($._field_names)), + ), + + // CommaBIds / CommaBIdAndAbsurds + _binding_ids_and_absurds: $ => prec(-1, choice( + $._application, + seq($._qid, '=', $._qid), + seq($._qid, '=', '_'), + seq('-', '=', $._qid), + seq('-', '=', '_'), + )), + + // Attribute + attribute: $ => seq('@', $._expr_or_attr), + attributes: $ => repeat1($.attribute), + + // ////////////////////////////////////////////////////////////////////// + // Expressions (terms and types) + // ////////////////////////////////////////////////////////////////////// + + // Expr + expr: $ => choice( + seq($._typed_bindings, $._ARROW, $.expr), + seq(optional($.attributes), $._atoms, $._ARROW, $.expr), + seq($._with_exprs, '=', $.expr), + prec(-1, $._with_exprs), // lowest precedence + ), + stmt: $ => choice( + seq($._typed_bindings, $._ARROW, $.expr), + seq(optional($.attributes), $._atoms, $._ARROW, $.expr), + seq($._with_exprs, '=', $.expr), + prec(-1, $._with_exprs_stmt), // lowest precedence + ), + + // WithExprs/Expr1 + _with_exprs: $ => seq( + repeat(seq($._atoms, '|')), + $._application, + ), + _with_exprs_stmt: $ => seq( + repeat(seq($._atoms, '|')), + $._application_stmt, + ), + + // ExprOrAttr + _expr_or_attr: $ => choice( + $.literal, + $._qid, + paren($.expr), + ), + + // Application + _application: $ => seq( + optional($._atoms), + $._expr2, + ), + _application_stmt: $ => seq( + optional($._atoms), + $._expr2_stmt, + ), + + // Expr + _expr2_without_let: $ => choice( + $.lambda, + alias($.lambda_extended_or_absurd, $.lambda), + $.forall, + $.do, + prec(-1, $.atom), + seq('quoteGoal', $.id, 'in', $.expr), + seq('tactic', $._atoms), + seq('tactic', $._atoms, '|', $._with_exprs), + ), + _expr2: $ => choice( + $._expr2_without_let, + $.let, + ), + _expr2_stmt: $ => choice( + $._expr2_without_let, + alias($.let_in_do, $.let), + ), + + // Expr3 + atom: $ => choice( + $._atom_curly, + $._atom_no_curly, + ), + // Application3 / OpenArgs + _atoms: $ => repeat1($.atom), + + _atom_curly: $ => brace(optional($.expr)), + + _atom_no_curly: $ => choice( + '_', + 'Prop', + $.SetN, + 'quote', + 'quoteTerm', + 'quoteContext', + 'unquote', + $.PropN, + brace_double($.expr), + idiom($.expr), + seq('(', ')'), + seq('{{', '}}'), + seq('⦃', '⦄'), + seq($.id, '@', $.atom), + seq('.', $.atom), + $.record_assignments, + alias($.field_assignments, $.record_assignments), + $._ELLIPSIS, + $._expr_or_attr, + ), + + // ForallBindings + forall: $ => seq($._FORALL, $._typed_untyped_bindings, $._ARROW, $.expr), + + // LetBody + let: $ => prec.right(seq( + 'let', + // declarations + optional($._indent), + repeat(seq($._declaration, $._newline)), + $._declaration, + // in case that there's a newline between declarations and $._let_body + optional($._newline), + + $._let_body, + )), + + // special `let...in` in do statements + let_in_do: $ => prec.right(seq( + 'let', + // declarations + optional($._indent), + repeat(seq($._declaration, $._newline)), + $._declaration, + // + choice( + seq($._newline, $._dedent), + // covers the newline between declarations and $._let_body + seq($._newline, $._let_body), + // covers the rest of the cases + $._let_body, + ), + )), + + _let_body: $ => seq( + 'in', + $.expr, + ), + + // LamBindings + lambda: $ => seq( + $._LAMBDA, + $._lambda_bindings, + $._ARROW, + $.expr, + ), + + // LamBinds + _lambda_bindings: $ => seq( + repeat($._typed_untyped_binding), + choice( + $._typed_untyped_binding, + seq('(', ')'), + seq('{', '}'), + seq('{{', '}}'), + seq('⦃', '⦄'), + ), + ), + + // ExtendedOrAbsurdLam + lambda_extended_or_absurd: $ => seq( + $._LAMBDA, + choice( + // LamClauses (single non absurd lambda clause) + brace($.lambda_clause), + // LamClauses + brace($._lambda_clauses), + // LamWhereClauses + seq('where', $._lambda_clauses), + // AbsurdLamBindings + $._lambda_bindings, + ), + ), + + // bunch of `$._lambda_clause_maybe_absurd` sep by ';' + _lambda_clauses: $ => prec.left(seq( + repeat(seq($._lambda_clause_maybe_absurd, ';')), + $._lambda_clause_maybe_absurd, + )), + + // AbsurdLamBindings | AbsurdLamClause + _lambda_clause_maybe_absurd: $ => prec.left(choice( + $.lambda_clause_absurd, + $.lambda_clause, + )), + + // AbsurdLamClause + lambda_clause_absurd: $ => seq( + optional($.catchall_pragma), + $._application, + ), + + // NonAbsurdLamClause + lambda_clause: $ => seq( + optional($.catchall_pragma), + optional($._atoms), // Application3PossiblyEmpty + $._ARROW, + $.expr, + ), + + // DoStmts + do: $ => seq('do', + block($, $._do_stmt), + ), + + // DoStmt + _do_stmt: $ => seq( + $.stmt, + optional($.do_where), + ), + + // DoWhere + do_where: $ => seq( + 'where', + $._lambda_clauses, + ), + + // RecordAssignments + record_assignments: $ => seq( + 'record', + brace(optional($._record_assignments)), + ), + + field_assignments: $ => seq( + 'record', + $._atom_no_curly, + brace(optional($._field_assignments)), + ), + + // RecordAssignments1 + _record_assignments: $ => seq( + repeat(seq($._record_assignment, ';')), + $._record_assignment, + ), + + + // FieldAssignments1 + _field_assignments: $ => seq( + repeat(seq($.field_assignment, ';')), + $.field_assignment, + ), + + // RecordAssignment + _record_assignment: $ => choice( + $.field_assignment, + $.module_assignment, + ), + + // FieldAssignment + field_assignment: $ => seq( + alias($.id, $.field_name), + '=', + $.expr, + ), + + // ModuleAssignment + module_assignment: $ => seq( + $.module_name, + optional($._atoms), + optional($._import_directives), + ), + + + // ////////////////////////////////////////////////////////////////////// + // Bindings + // ////////////////////////////////////////////////////////////////////// + + // TypedBinding + _typed_bindings: $ => repeat1($.typed_binding), + typed_binding: $ => choice( + maybeDotted(choice( + paren($._application, ':', $.expr), + brace($._binding_ids_and_absurds, ':', $.expr), + brace_double($._binding_ids_and_absurds, ':', $.expr), + )), + paren($.attributes, $._application, ':', $.expr), + brace($.attributes, $._binding_ids_and_absurds, ':', $.expr), + brace_double($.attributes, $._binding_ids_and_absurds, ':', $.expr), + paren($.open), + paren('let', $._declaration_block), + ), + + // TypedUntypedBindings1 + _typed_untyped_bindings: $ => repeat1($._typed_untyped_binding), + _typed_untyped_binding: $ => choice( + $.untyped_binding, + $.typed_binding, + ), + + // DomainFreeBinding / DomainFreeBindingAbsurd + untyped_binding: $ => choice( // 13 variants + maybeDotted(choice( + $.bid, + brace($._binding_ids_and_absurds), + brace_double($._binding_ids_and_absurds), + )), + paren($._binding_ids_and_absurds), + paren($.attributes, $._binding_ids_and_absurds), + brace($.attributes, $._binding_ids_and_absurds), + brace_double($.attributes, $._binding_ids_and_absurds), + ), + + // ////////////////////////////////////////////////////////////////////// + // Literals + // ////////////////////////////////////////////////////////////////////// + + // -- Literals + // <0,code> \' { litChar } + // <0,code,pragma_> \" { litString } + // <0,code> @integer { literal LitNat } + // <0,code> @float { literal LitFloat } + integer: _ => integer, + string: _ => /\".*\"/, + literal: _ => choice( + integer, + /\".*\"/, + ), + + // ////////////////////////////////////////////////////////////////////// + // Comment + // ////////////////////////////////////////////////////////////////////// + + comment: _ => token(choice( + prec(100, seq('--', /.*/)), + seq('{--}'), + seq( + '{-', + /[^#]/, + repeat(choice( + /[^-]/, // anything but - + /-[^}]/, // - not followed by } + )), + /-}/, + ), + )), + + // setN + SetN: $ => prec.right(2, seq('Set', optional($.atom))), + + + // ////////////////////////////////////////////////////////////////////// + // Unimplemented + // ////////////////////////////////////////////////////////////////////// + + + // propN + PropN: _ => 'propN', + + }, +}); + + +// ////////////////////////////////////////////////////////////////////// +// Generic combinators +// ////////////////////////////////////////////////////////////////////// + +/** + * Creates a rule to match one or more of the rules separated by `sep`. + * + * @param {RuleOrLiteral} sep + * + * @param {RuleOrLiteral} rule + * + * @return {SeqRule} + * + */ +function sepR(sep, rule) { + return seq(rule, repeat(seq(sep, rule))); +} + +/** + * Creates a rule that requires indentation before and dedentation after. + * + * @param {GrammarSymbols} $ + * + * @param {RuleOrLiteral[]} rule + * + * @return {SeqRule} + * + */ +function indent($, ...rule) { + return seq( + $._indent, + ...rule, + $._dedent, + ); +} + +// 1 or more $RULE ending with a NEWLINE +/** + * Creates a rule that uses an indentation block, where each line is a rule. + * The indentation is required before and dedentation is required after. + * + * @param {GrammarSymbols} $ + * + * @param {RuleOrLiteral} rules + * + * @return {SeqRule} + */ +function block($, rules) { + return indent($, repeat1(seq(rules, $._newline))); +} + +// ////////////////////////////////////////////////////////////////////// +// Language-specific combinators +// ////////////////////////////////////////////////////////////////////// + +/** + * Creates a rule that matches a rule with a dot or two dots in front. + * + * @param {RuleOrLiteral} rule + * + * @return {ChoiceRule} + */ +function maybeDotted(rule) { + return choice( + rule, // Relevant + seq('.', rule), // Irrelevant + seq('..', rule), // NonStrict + ); +} + +/** + * Flattens an array of arrays. + * + * @param {Array>>} arrOfArrs + * + * @return {Array>} + * + */ +function flatten(arrOfArrs) { + return arrOfArrs.reduce((res, arr) => [...res, ...arr], []); +} + +/** + * A callback function that takes a left and right string and returns a rule. + * @callback encloseWithCallback + * @param {string} left + * @param {string} right + * @return {RuleOrLiteral} + * @see encloseWith + * @see enclose + */ + +/** + * Creates a rule that matches a sequence of rules enclosed by a pair of strings. + * + * @param {encloseWithCallback} fn + * + * @param {Array>>} pairs + * + * @return {ChoiceRule} + * + */ +function encloseWith(fn, ...pairs) { + return choice(...flatten(pairs).map(([left, right]) => fn(left, right))); +} + +/** + * + * @param {RuleOrLiteral} expr + * + * @param {Array>>} pairs + * + * @return {ChoiceRule} + * + */ +function enclose(expr, ...pairs) { + return encloseWith((left, right) => seq(left, expr, right), ...pairs); +} + +/** + * Creates a rule that matches a sequence of rules enclosed by `(` and `)`. + * + * @param {RuleOrLiteral[]} rules + * + * @return {ChoiceRule} + * + */ +function paren(...rules) { + return enclose(seq(...rules), PAREN); +} + +/** + * Creates a rule that matches a sequence of rules enclosed by `{` and `}`. + * + * @param {RuleOrLiteral[]} rules + * + * @return {ChoiceRule} + * + */ +function brace(...rules) { + return enclose(seq(...rules), BRACE1); +} + +/** + * Creates a rule that matches a sequence of rules enclosed by `{{` and `}}`. + * + * @param {RuleOrLiteral[]} rules + * + * @return {ChoiceRule} + * + */ +function brace_double(...rules) { + return enclose(seq(...rules), BRACE2); +} + +/** + * Creates a rule that matches a sequence of rules enclosed by `(|` and `|)`. + * + * @param {RuleOrLiteral[]} rules + * + * @return {ChoiceRule} + * + */ +function idiom(...rules) { + return enclose(seq(...rules), IDIOM); +} diff --git a/agda/src/scanner.c b/agda/src/scanner.c new file mode 100644 index 0000000..cab341b --- /dev/null +++ b/agda/src/scanner.c @@ -0,0 +1,291 @@ +#include "tree_sitter/parser.h" +#include +#include +#include + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define VEC_RESIZE(vec, _cap) \ + void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0])); \ + assert(tmp != NULL); \ + (vec).data = tmp; \ + (vec).cap = (_cap); + +#define VEC_GROW(vec, _cap) \ + if ((vec).cap < (_cap)) { \ + VEC_RESIZE((vec), (_cap)); \ + } + +#define VEC_PUSH(vec, el) \ + if ((vec).cap == (vec).len) { \ + VEC_RESIZE((vec), MAX(16, (vec).len * 2)); \ + } \ + (vec).data[(vec).len++] = (el); + +#define VEC_POP(vec) (vec).len--; + +#define VEC_NEW \ + { .len = 0, .cap = 0, .data = NULL } + +#define VEC_BACK(vec) ((vec).data[(vec).len - 1]) + +#define VEC_FREE(vec) \ + { \ + if ((vec).data != NULL) \ + free((vec).data); \ + } + +#define VEC_CLEAR(vec) (vec).len = 0; + +#define QUEUE_RESIZE(queue, _cap) \ + do { \ + void *tmp = realloc((queue).data, (_cap) * sizeof((queue).data[0])); \ + assert(tmp != NULL); \ + (queue).data = tmp; \ + (queue).cap = (_cap); \ + } while (0) + +#define QUEUE_GROW(queue, _cap) \ + do { \ + if ((queue).cap < (_cap)) { \ + QUEUE_RESIZE((queue), (_cap)); \ + } \ + } while (0) + +#define QUEUE_PUSH(queue, el) \ + do { \ + if ((queue).cap == 0) { \ + QUEUE_RESIZE((queue), 16); \ + } else if ((queue).cap == ((queue).tail - (queue).head)) { \ + QUEUE_RESIZE((queue), (queue).cap * 2); \ + } \ + (queue).data[(queue).tail % (queue).cap] = (el); \ + (queue).tail++; \ + } while (0) + +#define QUEUE_POP(queue) \ + do { \ + assert((queue).head < (queue).tail); \ + (queue).head++; \ + } while (0) + +#define QUEUE_FRONT(queue) (queue).data[(queue).head % (queue).cap] + +#define QUEUE_EMPTY(queue) ((queue).head == (queue).tail) + +#define QUEUE_NEW \ + { .head = 0, .tail = 0, .cap = 0, .data = NULL } + +#define QUEUE_FREE(queue) \ + do { \ + if ((queue).data != NULL) \ + free((queue).data); \ + } while (0) + +#define QUEUE_CLEAR(queue) \ + do { \ + (queue).head = 0; \ + (queue).tail = 0; \ + } while (0) + +enum TokenType { + NEWLINE, + INDENT, + DEDENT, +}; + +typedef struct { + uint32_t len; + uint32_t cap; + uint16_t *data; +} indent_vec; + +static indent_vec indent_vec_new() { + indent_vec vec = VEC_NEW; + vec.data = calloc(1, sizeof(uint16_t)); + vec.cap = 1; + return vec; +} + +typedef struct { + uint32_t head; + uint32_t tail; + uint32_t cap; + uint16_t *data; +} token_queue; + +static token_queue token_queue_new() { + token_queue queue = QUEUE_NEW; + queue.data = calloc(1, sizeof(uint16_t)); + queue.cap = 1; + return queue; +} + +typedef struct { + indent_vec indents; + uint32_t queued_dedent_count; + token_queue tokens; +} Scanner; + +static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + +static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } + +bool tree_sitter_agda_external_scanner_scan(void *payload, TSLexer *lexer, + const bool *valid_symbols) { + Scanner *scanner = (Scanner *)payload; + + if (QUEUE_EMPTY(scanner->tokens)) { + if (valid_symbols[DEDENT] && scanner->queued_dedent_count > 0) { + scanner->queued_dedent_count--; + QUEUE_PUSH(scanner->tokens, DEDENT); + QUEUE_PUSH(scanner->tokens, NEWLINE); + } else { + bool skipped_newline = false; + + while (lexer->lookahead == ' ' || lexer->lookahead == '\t' || + lexer->lookahead == '\r' || lexer->lookahead == '\n') { + if (lexer->lookahead == '\n') { + skipped_newline = true; + skip(lexer); + } else { + skip(lexer); + } + } + + if (lexer->eof(lexer)) { + if (valid_symbols[DEDENT] && scanner->indents.len > 1) { + VEC_POP(scanner->indents); + QUEUE_PUSH(scanner->tokens, DEDENT); + QUEUE_PUSH(scanner->tokens, NEWLINE); + } else if (valid_symbols[NEWLINE]) { + QUEUE_PUSH(scanner->tokens, NEWLINE); + } + } else { + bool next_token_is_comment = false; + + uint16_t indent_length = (uint16_t)lexer->get_column(lexer); + + bool indent = indent_length > VEC_BACK(scanner->indents); + bool dedent = indent_length < VEC_BACK(scanner->indents); + + if (!next_token_is_comment) { + if (skipped_newline) { + if (indent) { + if (valid_symbols[INDENT]) { + VEC_PUSH(scanner->indents, indent_length); + QUEUE_PUSH(scanner->tokens, INDENT); + } + } else if (dedent) { + if (valid_symbols[NEWLINE]) { + QUEUE_PUSH(scanner->tokens, NEWLINE); + } + } else { + if (valid_symbols[NEWLINE]) { + QUEUE_PUSH(scanner->tokens, NEWLINE); + } + } + } else { + if (indent) { + if (valid_symbols[INDENT]) { + VEC_PUSH(scanner->indents, indent_length); + QUEUE_PUSH(scanner->tokens, INDENT); + } + } else if (dedent) { + VEC_POP(scanner->indents); + while (indent_length < VEC_BACK(scanner->indents)) { + VEC_POP(scanner->indents); + scanner->queued_dedent_count++; + } + if (valid_symbols[DEDENT]) { + QUEUE_PUSH(scanner->tokens, DEDENT); + QUEUE_PUSH(scanner->tokens, NEWLINE); + } else { + scanner->queued_dedent_count++; + } + } + } + } + } + } + } + + if (QUEUE_EMPTY(scanner->tokens)) { + return false; + } + + lexer->result_symbol = QUEUE_FRONT(scanner->tokens); + QUEUE_POP(scanner->tokens); + return true; +} + +unsigned tree_sitter_agda_external_scanner_serialize(void *payload, + char *buffer) { + Scanner *scanner = (Scanner *)payload; + + if (scanner->indents.len * sizeof(uint16_t) + 1 > + TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { + return 0; + } + + unsigned size = 0; + + buffer[size++] = (char)scanner->queued_dedent_count; + + memcpy(&buffer[size], scanner->indents.data, + scanner->indents.len * sizeof(uint16_t)); + size += (unsigned)(scanner->indents.len * sizeof(uint16_t)); + + return size; +} + +void tree_sitter_agda_external_scanner_deserialize(void *payload, + const char *buffer, + unsigned length) { + Scanner *scanner = (Scanner *)payload; + + scanner->queued_dedent_count = 0; + VEC_CLEAR(scanner->indents); + + if (length == 0) { + if (buffer == NULL) { + VEC_PUSH(scanner->indents, 0); + } + return; + } + + scanner->queued_dedent_count = (uint8_t)buffer[0]; + + unsigned size = 1; + + if (length > size) { + VEC_GROW(scanner->indents, + (uint32_t)(length - size) / sizeof(uint16_t)); + scanner->indents.len = (length - size) / sizeof(uint16_t); + memcpy(scanner->indents.data, &buffer[size], + scanner->indents.len * sizeof(uint16_t)); + size += (unsigned)(scanner->indents.len * sizeof(uint16_t)); + } + + if (scanner->indents.len == 0) { + VEC_PUSH(scanner->indents, 0); + return; + } + + assert(size == length); +} + +void *tree_sitter_agda_external_scanner_create() { + Scanner *scanner = calloc(1, sizeof(Scanner)); + scanner->indents = indent_vec_new(); + scanner->tokens = token_queue_new(); + tree_sitter_agda_external_scanner_deserialize(scanner, NULL, 0); + return scanner; +} + +void tree_sitter_agda_external_scanner_destroy(void *payload) { + Scanner *scanner = (Scanner *)payload; + VEC_FREE(scanner->indents); + QUEUE_FREE(scanner->tokens); + free(scanner); +} diff --git a/bash/LICENSE b/bash/LICENSE new file mode 100644 index 0000000..aa9f858 --- /dev/null +++ b/bash/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2017 Max Brunsfeld + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/bash/examples/atom.sh b/bash/examples/atom.sh new file mode 100755 index 0000000..6b0e944 --- /dev/null +++ b/bash/examples/atom.sh @@ -0,0 +1,143 @@ +#!/bin/bash + +if [ "$(uname)" == 'Darwin' ]; then + OS='Mac' +elif [ "$(expr substr $(uname -s) 1 5)" == 'Linux' ]; then + OS='Linux' +else + echo "Your platform ($(uname -a)) is not supported." + exit 1 +fi + +if [ "$(basename $0)" == 'atom-beta' ]; then + BETA_VERSION=true +else + BETA_VERSION= +fi + +export ATOM_DISABLE_SHELLING_OUT_FOR_ENVIRONMENT=true + +while getopts ":wtfvh-:" opt; do + case "$opt" in + -) + case "${OPTARG}" in + wait) + WAIT=1 + ;; + help|version) + REDIRECT_STDERR=1 + EXPECT_OUTPUT=1 + ;; + foreground|benchmark|benchmark-test|test) + EXPECT_OUTPUT=1 + ;; + esac + ;; + w) + WAIT=1 + ;; + h|v) + REDIRECT_STDERR=1 + EXPECT_OUTPUT=1 + ;; + f|t) + EXPECT_OUTPUT=1 + ;; + esac +done + +if [ $REDIRECT_STDERR ]; then + exec 2> /dev/null +fi + +if [ $EXPECT_OUTPUT ]; then + export ELECTRON_ENABLE_LOGGING=1 +fi + +if [ $OS == 'Mac' ]; then + if [ -L "$0" ]; then + SCRIPT="$(readlink "$0")" + else + SCRIPT="$0" + fi + ATOM_APP="$(dirname "$(dirname "$(dirname "$(dirname "$SCRIPT")")")")" + if [ "$ATOM_APP" == . ]; then + unset ATOM_APP + else + ATOM_PATH="$(dirname "$ATOM_APP")" + ATOM_APP_NAME="$(basename "$ATOM_APP")" + fi + + if [ -n "$BETA_VERSION" ]; then + ATOM_EXECUTABLE_NAME="Atom Beta" + else + ATOM_EXECUTABLE_NAME="Atom" + fi + + if [ -z "${ATOM_PATH}" ]; then + # If ATOM_PATH isn't set, check /Applications and then ~/Applications for Atom.app + if [ -x "/Applications/$ATOM_APP_NAME" ]; then + ATOM_PATH="/Applications" + elif [ -x "$HOME/Applications/$ATOM_APP_NAME" ]; then + ATOM_PATH="$HOME/Applications" + else + # We haven't found an Atom.app, use spotlight to search for Atom + ATOM_PATH="$(mdfind "kMDItemCFBundleIdentifier == 'com.github.atom'" | grep -v ShipIt | head -1 | xargs -0 dirname)" + + # Exit if Atom can't be found + if [ ! -x "$ATOM_PATH/$ATOM_APP_NAME" ]; then + echo "Cannot locate ${ATOM_APP_NAME}, it is usually located in /Applications. Set the ATOM_PATH environment variable to the directory containing ${ATOM_APP_NAME}." + exit 1 + fi + fi + fi + + if [ $EXPECT_OUTPUT ]; then + "$ATOM_PATH/$ATOM_APP_NAME/Contents/MacOS/$ATOM_EXECUTABLE_NAME" --executed-from="$(pwd)" --pid=$$ "$@" + exit $? + else + open -a "$ATOM_PATH/$ATOM_APP_NAME" -n --args --executed-from="$(pwd)" --pid=$$ --path-environment="$PATH" "$@" + fi +elif [ $OS == 'Linux' ]; then + SCRIPT=$(readlink -f "$0") + USR_DIRECTORY=$(readlink -f $(dirname $SCRIPT)/..) + + if [ -n "$BETA_VERSION" ]; then + ATOM_PATH="$USR_DIRECTORY/share/atom-beta/atom" + else + ATOM_PATH="$USR_DIRECTORY/share/atom/atom" + fi + + ATOM_HOME="${ATOM_HOME:-$HOME/.atom}" + mkdir -p "$ATOM_HOME" + + : ${TMPDIR:=/tmp} + + [ -x "$ATOM_PATH" ] || ATOM_PATH="$TMPDIR/atom-build/Atom/atom" + + if [ $EXPECT_OUTPUT ]; then + "$ATOM_PATH" --executed-from="$(pwd)" --pid=$$ "$@" + exit $? + else + ( + nohup "$ATOM_PATH" --executed-from="$(pwd)" --pid=$$ "$@" > "$ATOM_HOME/nohup.out" 2>&1 + if [ $? -ne 0 ]; then + cat "$ATOM_HOME/nohup.out" + exit $? + fi + ) & + fi +fi + +# Exits this process when Atom is used as $EDITOR +on_die() { + exit 0 +} +trap 'on_die' SIGQUIT SIGTERM + +# If the wait flag is set, don't exit this process until Atom tells it to. +if [ $WAIT ]; then + while true; do + sleep 1 + done +fi diff --git a/bash/examples/clean-old.sh b/bash/examples/clean-old.sh new file mode 100755 index 0000000..cda80f2 --- /dev/null +++ b/bash/examples/clean-old.sh @@ -0,0 +1,165 @@ +#!/bin/bash + +# look for old 0.x cruft, and get rid of it. +# Should already be sitting in the npm folder. + +# This doesn't have to be quite as cross-platform as install.sh. +# There are some bash-isms, because maintaining *two* +# fully-portable posix/bourne sh scripts is too much for +# one project with a sane maintainer. + +# If readlink isn't available, then this is just too tricky. +# However, greadlink is fine, so Solaris can join the party, too. +readlink="readlink" +which $readlink >/dev/null 2>/dev/null +if [ $? -ne 0 ]; then + readlink="greadlink" + which $readlink >/dev/null 2>/dev/null + if [ $? -ne 0 ]; then + echo "Can't find the readlink or greadlink command. Aborting." + exit 1 + fi +fi + +if [ "x$npm_config_prefix" != "x" ]; then + PREFIXES=$npm_config_prefix +else + node="$NODE" + if [ "x$node" = "x" ]; then + node=`which node` + fi + if [ "x$node" = "x" ]; then + echo "Can't find node to determine prefix. Aborting." + exit 1 + fi + + + PREFIX=`dirname $node` + PREFIX=`dirname $PREFIX` + echo "cleanup prefix=$PREFIX" + PREFIXES=$PREFIX + + altprefix=`"$node" -e process.installPrefix` + if [ "x$altprefix" != "x" ] && [ "x$altprefix" != "x$PREFIX" ]; then + echo "altprefix=$altprefix" + PREFIXES="$PREFIX $altprefix" + fi +fi + +# now prefix is where npm would be rooted by default +# go hunting. + +packages= +for prefix in $PREFIXES; do + packages="$packages + "`ls "$prefix"/lib/node/.npm 2>/dev/null | grep -v .cache` +done + +packages=`echo $packages` + +filelist=() +fid=0 + +for prefix in $PREFIXES; do + # remove any links into the .npm dir, or links to + # version-named shims/symlinks. + for folder in share/man bin lib/node; do + find $prefix/$folder -type l | while read file; do + target=`$readlink $file | grep '/\.npm/'` + if [ "x$target" != "x" ]; then + # found one! + filelist[$fid]="$file" + let 'fid++' + # also remove any symlinks to this file. + base=`basename "$file"` + base=`echo "$base" | awk -F@ '{print $1}'` + if [ "x$base" != "x" ]; then + find "`dirname $file`" -type l -name "$base"'*' \ + | while read l; do + target=`$readlink "$l" | grep "$base"` + if [ "x$target" != "x" ]; then + filelist[$fid]="$1" + let 'fid++' + fi + done + fi + fi + done + + # Scour for shim files. These are relics of 0.2 npm installs. + # note: grep -r is not portable. + find $prefix/$folder -type f \ + | xargs grep -sl '// generated by npm' \ + | while read file; do + filelist[$fid]="$file" + let 'fid++' + done + done + + # now remove the package modules, and the .npm folder itself. + if [ "x$packages" != "x" ]; then + for pkg in $packages; do + filelist[$fid]="$prefix/lib/node/$pkg" + let 'fid++' + for i in $prefix/lib/node/$pkg\@*; do + filelist[$fid]="$i" + let 'fid++' + done + done + fi + + for folder in lib/node/.npm lib/npm share/npm; do + if [ -d $prefix/$folder ]; then + filelist[$fid]="$prefix/$folder" + let 'fid++' + fi + done +done + +# now actually clean, but only if there's anything TO clean +if [ "${#filelist[@]}" -gt 0 ]; then + echo "" + echo "This script will find and eliminate any shims, symbolic" + echo "links, and other cruft that was installed by npm 0.x." + echo "" + + if [ "x$packages" != "x" ]; then + echo "The following packages appear to have been installed with" + echo "an old version of npm, and will be removed forcibly:" + for pkg in $packages; do + echo " $pkg" + done + echo "Make a note of these. You may want to install them" + echo "with npm 1.0 when this process is completed." + echo "" + fi + + OK= + if [ "x$1" = "x-y" ]; then + OK="yes" + fi + + while [ "$OK" != "y" ] && [ "$OK" != "yes" ] && [ "$OK" != "no" ]; do + echo "Is this OK?" + echo " enter 'yes' or 'no'" + echo " or 'show' to see a list of files " + read OK + if [ "x$OK" = "xshow" ] || [ "x$OK" = "xs" ]; then + for i in "${filelist[@]}"; do + echo "$i" + done + fi + done + if [ "$OK" = "no" ]; then + echo "Aborting" + exit 1 + fi + for i in "${filelist[@]}"; do + rm -rf "$i" + done +fi + +echo "" +echo 'All clean!' + +exit 0 diff --git a/bash/examples/doc-build.sh b/bash/examples/doc-build.sh new file mode 100755 index 0000000..6181902 --- /dev/null +++ b/bash/examples/doc-build.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash + +if [[ $DEBUG != "" ]]; then + set -x +fi +set -o errexit +set -o pipefail + +if ! [ -x node_modules/.bin/marked-man ]; then + ps=0 + if [ -f .building_marked-man ]; then + pid=$(cat .building_marked-man) + ps=$(ps -p $pid | grep $pid | wc -l) || true + fi + + if [ -f .building_marked-man ] && [ $ps != 0 ]; then + while [ -f .building_marked-man ]; do + sleep 1 + done + else + # a race to see which make process will be the one to install marked-man + echo $$ > .building_marked-man + sleep 1 + if [ $(cat .building_marked-man) == $$ ]; then + make node_modules/.bin/marked-man + rm .building_marked-man + else + while [ -f .building_marked-man ]; do + sleep 1 + done + fi + fi +fi + +if ! [ -x node_modules/.bin/marked ]; then + ps=0 + if [ -f .building_marked ]; then + pid=$(cat .building_marked) + ps=$(ps -p $pid | grep $pid | wc -l) || true + fi + + if [ -f .building_marked ] && [ $ps != 0 ]; then + while [ -f .building_marked ]; do + sleep 1 + done + else + # a race to see which make process will be the one to install marked + echo $$ > .building_marked + sleep 1 + if [ $(cat .building_marked) == $$ ]; then + make node_modules/.bin/marked + rm .building_marked + else + while [ -f .building_marked ]; do + sleep 1 + done + fi + fi +fi + +src=$1 +dest=$2 +name=$(basename ${src%.*}) +date=$(date -u +'%Y-%m-%d %H:%M:%S') +version=$(node cli.js -v) + +mkdir -p $(dirname $dest) + +html_replace_tokens () { + local url=$1 + sed "s|@NAME@|$name|g" \ + | sed "s|@DATE@|$date|g" \ + | sed "s|@URL@|$url|g" \ + | sed "s|@VERSION@|$version|g" \ + | perl -p -e 's/]*)>([^\(]*\([0-9]\)) -- (.*?)<\/h1>/

\2<\/h1>

\3<\/p>/g' \ + | perl -p -e 's/npm-npm/npm/g' \ + | perl -p -e 's/([^"-])(npm-)?README(?!\.html)(\(1\))?/\1README<\/a>/g' \ + | perl -p -e 's/<a href="[^"]+README.html">README<\/a><\/title>/<title>README<\/title>/g' \ + | perl -p -e 's/([^"-])([^\(> ]+)(\(1\))/\1<a href="..\/cli\/\2.html">\2\3<\/a>/g' \ + | perl -p -e 's/([^"-])([^\(> ]+)(\(3\))/\1<a href="..\/api\/\2.html">\2\3<\/a>/g' \ + | perl -p -e 's/([^"-])([^\(> ]+)(\(5\))/\1<a href="..\/files\/\2.html">\2\3<\/a>/g' \ + | perl -p -e 's/([^"-])([^\(> ]+)(\(7\))/\1<a href="..\/misc\/\2.html">\2\3<\/a>/g' \ + | perl -p -e 's/\([1357]\)<\/a><\/h1>/<\/a><\/h1>/g' \ + | (if [ $(basename $(dirname $dest)) == "doc" ]; then + perl -p -e 's/ href="\.\.\// href="/g' + else + cat + fi) +} + +man_replace_tokens () { + sed "s|@VERSION@|$version|g" \ + | perl -p -e 's/(npm\\-)?([a-zA-Z\\\.\-]*)\(1\)/npm help \2/g' \ + | perl -p -e 's/(npm\\-)?([a-zA-Z\\\.\-]*)\(([57])\)/npm help \3 \2/g' \ + | perl -p -e 's/(npm\\-)?([a-zA-Z\\\.\-]*)\(3\)/npm apihelp \2/g' \ + | perl -p -e 's/npm\(1\)/npm help npm/g' \ + | perl -p -e 's/npm\(3\)/npm apihelp npm/g' +} + +case $dest in + *.[1357]) + ./node_modules/.bin/marked-man --roff $src \ + | man_replace_tokens > $dest + exit $? + ;; + *.html) + url=${dest/html\//} + (cat html/dochead.html && \ + cat $src | ./node_modules/.bin/marked && + cat html/docfoot.html)\ + | html_replace_tokens $url \ + > $dest + exit $? + ;; + *) + echo "Invalid destination type: $dest" >&2 + exit 1 + ;; +esac diff --git a/bash/examples/install.sh b/bash/examples/install.sh new file mode 100755 index 0000000..e6624f0 --- /dev/null +++ b/bash/examples/install.sh @@ -0,0 +1,270 @@ +#!/bin/sh + +# A word about this shell script: +# +# It must work everywhere, including on systems that lack +# a /bin/bash, map 'sh' to ksh, ksh97, bash, ash, or zsh, +# and potentially have either a posix shell or bourne +# shell living at /bin/sh. +# +# See this helpful document on writing portable shell scripts: +# http://www.gnu.org/s/hello/manual/autoconf/Portable-Shell.html +# +# The only shell it won't ever work on is cmd.exe. + +if [ "x$0" = "xsh" ]; then + # run as curl | sh + # on some systems, you can just do cat>npm-install.sh + # which is a bit cuter. But on others, &1 is already closed, + # so catting to another script file won't do anything. + # Follow Location: headers, and fail on errors + curl -f -L -s https://www.npmjs.org/install.sh > npm-install-$$.sh + ret=$? + if [ $ret -eq 0 ]; then + (exit 0) + else + rm npm-install-$$.sh + echo "Failed to download script" >&2 + exit $ret + fi + sh npm-install-$$.sh + ret=$? + rm npm-install-$$.sh + exit $ret +fi + +# See what "npm_config_*" things there are in the env, +# and make them permanent. +# If this fails, it's not such a big deal. +configures="`env | grep 'npm_config_' | sed -e 's|^npm_config_||g'`" + +npm_config_loglevel="error" +if [ "x$npm_debug" = "x" ]; then + (exit 0) +else + echo "Running in debug mode." + echo "Note that this requires bash or zsh." + set -o xtrace + set -o pipefail + npm_config_loglevel="verbose" +fi +export npm_config_loglevel + +# make sure that node exists +node=`which node 2>&1` +ret=$? +if [ $ret -eq 0 ] && [ -x "$node" ]; then + (exit 0) +else + echo "npm cannot be installed without node.js." >&2 + echo "Install node first, and then try again." >&2 + echo "" >&2 + echo "Maybe node is installed, but not in the PATH?" >&2 + echo "Note that running as sudo can change envs." >&2 + echo "" + echo "PATH=$PATH" >&2 + exit $ret +fi + +# set the temp dir +TMP="${TMPDIR}" +if [ "x$TMP" = "x" ]; then + TMP="/tmp" +fi +TMP="${TMP}/npm.$$" +rm -rf "$TMP" || true +mkdir "$TMP" +if [ $? -ne 0 ]; then + echo "failed to mkdir $TMP" >&2 + exit 1 +fi + +BACK="$PWD" + +ret=0 +tar="${TAR}" +if [ -z "$tar" ]; then + tar="${npm_config_tar}" +fi +if [ -z "$tar" ]; then + tar=`which tar 2>&1` + ret=$? +fi + +if [ $ret -eq 0 ] && [ -x "$tar" ]; then + echo "tar=$tar" + echo "version:" + $tar --version + ret=$? +fi + +if [ $ret -eq 0 ]; then + (exit 0) +else + echo "No suitable tar program found." + exit 1 +fi + + + +# Try to find a suitable make +# If the MAKE environment var is set, use that. +# otherwise, try to find gmake, and then make. +# If no make is found, then just execute the necessary commands. + +# XXX For some reason, make is building all the docs every time. This +# is an annoying source of bugs. Figure out why this happens. +MAKE=NOMAKE + +if [ "x$MAKE" = "x" ]; then + make=`which gmake 2>&1` + if [ $? -eq 0 ] && [ -x "$make" ]; then + (exit 0) + else + make=`which make 2>&1` + if [ $? -eq 0 ] && [ -x "$make" ]; then + (exit 0) + else + make=NOMAKE + fi + fi +else + make="$MAKE" +fi + +if [ -x "$make" ]; then + (exit 0) +else + # echo "Installing without make. This may fail." >&2 + make=NOMAKE +fi + +# If there's no bash, then don't even try to clean +if [ -x "/bin/bash" ]; then + (exit 0) +else + clean="no" +fi + +node_version=`"$node" --version 2>&1` +ret=$? +if [ $ret -ne 0 ]; then + echo "You need node to run this program." >&2 + echo "node --version reports: $node_version" >&2 + echo "with exit code = $ret" >&2 + echo "Please install node before continuing." >&2 + exit $ret +fi + +t="${npm_install}" +if [ -z "$t" ]; then + # switch based on node version. + # note that we can only use strict sh-compatible patterns here. + case $node_version in + 0.[01234567].* | v0.[01234567].*) + echo "You are using an outdated and unsupported version of" >&2 + echo "node ($node_version). Please update node and try again." >&2 + exit 99 + ;; + *) + echo "install npm@latest" + t="latest" + ;; + esac +fi + +# need to echo "" after, because Posix sed doesn't treat EOF +# as an implied end of line. +url=`(curl -SsL https://registry.npmjs.org/npm/$t; echo "") \ + | sed -e 's/^.*tarball":"//' \ + | sed -e 's/".*$//'` + +ret=$? +if [ "x$url" = "x" ]; then + ret=125 + # try without the -e arg to sed. + url=`(curl -SsL https://registry.npmjs.org/npm/$t; echo "") \ + | sed 's/^.*tarball":"//' \ + | sed 's/".*$//'` + ret=$? + if [ "x$url" = "x" ]; then + ret=125 + fi +fi +if [ $ret -ne 0 ]; then + echo "Failed to get tarball url for npm/$t" >&2 + exit $ret +fi + + +echo "fetching: $url" >&2 + +cd "$TMP" \ + && curl -SsL "$url" \ + | $tar -xzf - \ + && cd "$TMP"/* \ + && (ver=`"$node" bin/read-package-json.js package.json version` + isnpm10=0 + if [ $ret -eq 0 ]; then + if [ -d node_modules ]; then + if "$node" node_modules/semver/bin/semver -v "$ver" -r "1" + then + isnpm10=1 + fi + else + if "$node" bin/semver -v "$ver" -r ">=1.0"; then + isnpm10=1 + fi + fi + fi + + ret=0 + if [ $isnpm10 -eq 1 ] && [ -f "scripts/clean-old.sh" ]; then + if [ "x$skipclean" = "x" ]; then + (exit 0) + else + clean=no + fi + if [ "x$clean" = "xno" ] \ + || [ "x$clean" = "xn" ]; then + echo "Skipping 0.x cruft clean" >&2 + ret=0 + elif [ "x$clean" = "xy" ] || [ "x$clean" = "xyes" ]; then + NODE="$node" /bin/bash "scripts/clean-old.sh" "-y" + ret=$? + else + NODE="$node" /bin/bash "scripts/clean-old.sh" </dev/tty + ret=$? + fi + fi + + if [ $ret -ne 0 ]; then + echo "Aborted 0.x cleanup. Exiting." >&2 + exit $ret + fi) \ + && (if [ "x$configures" = "x" ]; then + (exit 0) + else + echo "./configure $configures" + echo "$configures" > npmrc + fi) \ + && (if [ "$make" = "NOMAKE" ]; then + (exit 0) + elif "$make" uninstall install; then + (exit 0) + else + make="NOMAKE" + fi + if [ "$make" = "NOMAKE" ]; then + "$node" cli.js rm npm -gf + "$node" cli.js install -gf + fi) \ + && cd "$BACK" \ + && rm -rf "$TMP" \ + && echo "It worked" + +ret=$? +if [ $ret -ne 0 ]; then + echo "It failed" >&2 +fi +exit $ret diff --git a/bash/examples/release.sh b/bash/examples/release.sh new file mode 100644 index 0000000..abe6c19 --- /dev/null +++ b/bash/examples/release.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# script for creating a zip and tarball for inclusion in node + +unset CDPATH + +set -e + +rm -rf release *.tgz || true +mkdir release +node ./cli.js pack --loglevel error >/dev/null +mv *.tgz release +cd release +tar xzf *.tgz + +mkdir node_modules +mv package node_modules/npm + +# make the zip for windows users +cp node_modules/npm/bin/*.cmd . +zipname=npm-$(node ../cli.js -v).zip +zip -q -9 -r -X "$zipname" *.cmd node_modules + +# make the tar for node's deps +cd node_modules +tarname=npm-$(node ../../cli.js -v).tgz +tar czf "$tarname" npm + +cd .. +mv "node_modules/$tarname" . + +rm -rf *.cmd +rm -rf node_modules + +echo "release/$tarname" +echo "release/$zipname" diff --git a/bash/examples/relocate.sh b/bash/examples/relocate.sh new file mode 100755 index 0000000..b7483f2 --- /dev/null +++ b/bash/examples/relocate.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Change the cli shebang to point at the specified node +# Useful for when the program is moved around after install. +# Also used by the default 'make install' in node to point +# npm at the newly installed node, rather than the first one +# in the PATH, which would be the default otherwise. + +# bash /path/to/npm/scripts/relocate.sh $nodepath +# If $nodepath is blank, then it'll use /usr/bin/env + +dir="$(dirname "$(dirname "$0")")" +cli="$dir"/bin/npm-cli.js +tmp="$cli".tmp + +node="$1" +if [ "x$node" = "x" ]; then + node="/usr/bin/env node" +fi +node="#!$node" + +sed -e 1d "$cli" > "$tmp" +echo "$node" > "$cli" +cat "$tmp" >> "$cli" +rm "$tmp" +chmod ogu+x $cli diff --git a/bash/examples/test.sh b/bash/examples/test.sh new file mode 100755 index 0000000..e582929 --- /dev/null +++ b/bash/examples/test.sh @@ -0,0 +1,138 @@ +#!/usr/bin/env bash + +set -e + +function usage { + cat <<-EOF +USAGE + + $0 [-dgGhv] [-f focus-string] [-s seed] + +OPTIONS + + -h print this message + + -b run make under scan-build static analyzer + + -d run tests in a debugger (either lldb or gdb) + + -g run tests with valgrind's memcheck tool + + -G run tests with valgrind's memcheck tool, including a full leak check + + -v run tests with verbose output + + -f run only tests whose description contain the given string + + -s set the seed used to control random behavior + + -z pipe tests' stderr to \`dot(1)\` to render an SVG log + + +EOF +} + +profile= +leak_check=no +mode=normal +verbose= +args=() +target=tests +export BUILDTYPE=Test +cmd="out/${BUILDTYPE}/${target}" +run_scan_build= + +if [ "$(uname -s)" == "Darwin" ]; then + export LINK="clang++ -fsanitize=address" +fi + +while getopts "bdf:s:gGhpvS" option; do + case ${option} in + h) + usage + exit + ;; + d) + mode=debug + ;; + g) + mode=valgrind + ;; + G) + mode=valgrind + leak_check=full + ;; + p) + profile=true + ;; + f) + args+=("--only=${OPTARG}") + ;; + v) + verbose=true + ;; + s) + export TREE_SITTER_SEED=${OPTARG} + ;; + S) + mode=SVG + ;; + b) + run_scan_build=true + ;; + esac +done + +if [[ -n $verbose ]]; then + args+=("--reporter=spec") +else + args+=("--reporter=singleline") +fi + +if [[ -n "$run_scan_build" ]]; then + . script/util/scan-build.sh + scan_build make -j2 $target +else + make -j2 $target +fi +args=${args:-""} + +if [[ -n $profile ]]; then + export CPUPROFILE=/tmp/${target}-$(date '+%s').prof +fi + +case ${mode} in + valgrind) + valgrind \ + --suppressions=./script/util/valgrind.supp \ + --dsymutil=yes \ + --leak-check=${leak_check} \ + $cmd "${args[@]}" 2>&1 | \ + grep --color -E '\w+_tests?.cc:\d+|$' + ;; + + debug) + if which -s lldb; then + lldb $cmd -- "${args[@]}" + elif which -s gdb; then + gdb $cmd -- "${args[@]}" + else + echo "No debugger found" + exit 1 + fi + ;; + + SVG) + echo "<!DOCTYPE html><style>svg { width: 100%; margin-bottom: 20px; }</style>" > index.html + $cmd "${args[@]}" 2> >(grep -v 'Assertion failed' | dot -Tsvg >> index.html) + echo "Wrote index.html" + ;; + + normal) + time $cmd "${args[@]}" + ;; +esac + +if [[ -n $profile ]]; then + pprof $cmd $CPUPROFILE +fi diff --git a/bash/examples/update-authors.sh b/bash/examples/update-authors.sh new file mode 100755 index 0000000..75a6e54 --- /dev/null +++ b/bash/examples/update-authors.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +git log --reverse --format='%aN <%aE>' | perl -wnE ' +BEGIN { + say "# Authors sorted by whether or not they\x27re me"; +} + +print $seen{$_} = $_ unless $seen{$_} +' > AUTHORS diff --git a/bash/grammar.js b/bash/grammar.js new file mode 100644 index 0000000..427670f --- /dev/null +++ b/bash/grammar.js @@ -0,0 +1,1164 @@ +/** + * @file Bash grammar for tree-sitter + * @author Max Brunsfeld <maxbrunsfeld@gmail.com> + * @author Amaan Qureshi <amaanq12@gmail.com> + * @license MIT + */ + +/* eslint-disable arrow-parens */ +/* eslint-disable camelcase */ +/* eslint-disable-next-line spaced-comment */ +/// <reference types="tree-sitter-cli/dsl" /> +// @ts-check + +const SPECIAL_CHARACTERS = [ + '\'', '"', + '<', '>', + '{', '}', + '\\[', '\\]', + '(', ')', + '`', '$', + '|', '&', ';', + '\\', + '\\s', +]; + +const PREC = { + UPDATE: 0, + ASSIGN: 1, + TERNARY: 2, + LOGICAL_OR: 3, + LOGICAL_AND: 4, + BITWISE_OR: 5, + BITWISE_XOR: 6, + BITWISE_AND: 7, + EQUALITY: 8, + COMPARE: 9, + TEST: 10, + UNARY: 11, + SHIFT: 12, + ADD: 13, + MULTIPLY: 14, + EXPONENT: 15, + NEGATE: 16, + PREFIX: 17, + POSTFIX: 18, +}; + +module.exports = grammar({ + name: 'bash', + + conflicts: $ => [ + [$._expression, $.command_name], + [$.command, $.variable_assignments], + [$.redirected_statement, $.command], + [$.redirected_statement, $.command_substitution], + [$.function_definition, $.command_name], + [$.pipeline], + ], + + inline: $ => [ + $._statement, + $._terminator, + $._literal, + $._terminated_statement, + $._primary_expression, + $._simple_variable_name, + $._multiline_variable_name, + $._special_variable_name, + $._c_word, + $._statement_not_subshell, + ], + + externals: $ => [ + $.heredoc_start, + $.simple_heredoc_body, + $._heredoc_body_beginning, + $.heredoc_content, + $.heredoc_end, + $.file_descriptor, + $._empty_value, + $._concat, + $.variable_name, // Variable name followed by an operator like '=' or '+=' + $.test_operator, + $.regex, + $._regex_no_slash, + $._regex_no_space, + $._expansion_word, + $.extglob_pattern, + $._bare_dollar, + $._brace_start, + $._immediate_double_hash, + $._external_expansion_sym_hash, + $._external_expansion_sym_bang, + $._external_expansion_sym_equal, + '}', + ']', + '<<', + '<<-', + /\n/, + $.__error_recovery, + ], + + extras: $ => [ + $.comment, + /\s/, + /\\\r?\n/, + /\\( |\t|\v|\f)/, + ], + + supertypes: $ => [ + $._statement, + $._expression, + $._primary_expression, + ], + + word: $ => $.word, + + rules: { + program: $ => optional($._statements), + + _statements: $ => prec(1, seq( + repeat(seq( + $._statement, + $._terminator, + )), + $._statement, + optional($._terminator), + )), + + _terminated_statement: $ => repeat1(seq( + $._statement, + $._terminator, + )), + + // Statements + + _statement: $ => choice( + $._statement_not_subshell, + $.subshell, + ), + + _statement_not_subshell: $ => choice( + $.redirected_statement, + $.variable_assignment, + $.variable_assignments, + $.command, + $.declaration_command, + $.unset_command, + $.test_command, + $.negated_command, + $.for_statement, + $.c_style_for_statement, + $.while_statement, + $.if_statement, + $.case_statement, + $.pipeline, + $.list, + $.compound_statement, + $.function_definition, + ), + + _statement_not_pipeline: $ => prec(1, choice( + $.redirected_statement, + $.variable_assignment, + $.variable_assignments, + $.command, + $.declaration_command, + $.unset_command, + $.test_command, + $.negated_command, + $.for_statement, + $.c_style_for_statement, + $.while_statement, + $.if_statement, + $.case_statement, + $.list, + $.compound_statement, + $.function_definition, + $.subshell, + )), + + redirected_statement: $ => prec.dynamic(-1, prec(-1, choice( + seq( + field('body', $._statement), + field('redirect', choice( + repeat1(choice( + $.file_redirect, + $.heredoc_redirect, + )), + )), + ), + seq( + field('body', choice($.if_statement, $.while_statement)), + $.herestring_redirect, + ), + field('redirect', repeat1($.file_redirect)), + $.herestring_redirect, + ))), + + for_statement: $ => seq( + choice('for', 'select'), + field('variable', $._simple_variable_name), + optional(seq( + 'in', + field('value', repeat1($._literal)), + )), + $._terminator, + field('body', $.do_group), + ), + + c_style_for_statement: $ => seq( + 'for', + '((', + choice($._for_body), + '))', + optional(';'), + field('body', choice( + $.do_group, + $.compound_statement, + )), + ), + _for_body: $ => seq( + field('initializer', commaSep($._c_expression)), + $._c_terminator, + field('condition', commaSep($._c_expression)), + $._c_terminator, + field('update', commaSep($._c_expression)), + ), + + _c_expression: $ => choice( + $._c_expression_not_assignment, + alias($._c_variable_assignment, $.variable_assignment), + ), + _c_expression_not_assignment: $ => choice( + $._c_word, + $.simple_expansion, + $.expansion, + $.number, + $.string, + alias($._c_unary_expression, $.unary_expression), + alias($._c_binary_expression, $.binary_expression), + alias($._c_postfix_expression, $.postfix_expression), + alias($._c_parenthesized_expression, $.parenthesized_expression), + $.command_substitution, + ), + + _c_variable_assignment: $ => seq( + field('name', alias($._c_word, $.variable_name)), + '=', + field('value', $._c_expression), + ), + _c_unary_expression: $ => prec(PREC.PREFIX, seq( + field('operator', choice('++', '--')), + $._c_expression_not_assignment, + )), + _c_binary_expression: $ => { + const table = [ + [choice('+=', '-=', '*=', '/=', '%=', '**=', '<<=', '>>=', '&=', '^=', '|='), PREC.UPDATE], + [choice('||', '-o'), PREC.LOGICAL_OR], + [choice('&&', '-a'), PREC.LOGICAL_AND], + ['|', PREC.BITWISE_OR], + ['^', PREC.BITWISE_XOR], + ['&', PREC.BITWISE_AND], + [choice('==', '!='), PREC.EQUALITY], + [choice('<', '>', '<=', '>='), PREC.COMPARE], + [choice('<<', '>>'), PREC.SHIFT], + [choice('+', '-'), PREC.ADD], + [choice('*', '/', '%'), PREC.MULTIPLY], + ['**', PREC.EXPONENT], + ]; + + return choice(...table.map(([operator, precedence]) => { + // @ts-ignore + return prec[operator === '**' ? 'right' : 'left'](precedence, seq( + field('left', $._c_expression_not_assignment), + // @ts-ignore + field('operator', operator), + field('right', $._c_expression_not_assignment), + )); + })); + }, + _c_postfix_expression: $ => prec(PREC.POSTFIX, seq( + $._c_expression_not_assignment, + field('operator', choice('++', '--')), + )), + _c_parenthesized_expression: $ => seq( + '(', + commaSep1($._c_expression), + ')', + ), + _c_word: $ => alias(/[a-zA-Z_][a-zA-Z0-9_]*/, $.word), + + while_statement: $ => seq( + choice('while', 'until'), + field('condition', $._terminated_statement), + field('body', $.do_group), + ), + + do_group: $ => seq( + 'do', + optional($._terminated_statement), + 'done', + ), + + if_statement: $ => seq( + 'if', + field('condition', $._terminated_statement), + 'then', + optional($._terminated_statement), + repeat($.elif_clause), + optional($.else_clause), + 'fi', + ), + + elif_clause: $ => seq( + 'elif', + $._terminated_statement, + 'then', + optional($._terminated_statement), + ), + + else_clause: $ => seq( + 'else', + optional($._terminated_statement), + ), + + case_statement: $ => seq( + 'case', + field('value', $._literal), + optional($._terminator), + 'in', + optional($._terminator), + optional(seq( + repeat($.case_item), + alias($.last_case_item, $.case_item), + )), + 'esac', + ), + + case_item: $ => seq( + choice( + seq( + optional('('), + field('value', choice($._literal, $._extglob_blob)), + repeat(seq('|', field('value', choice($._literal, $._extglob_blob)))), + ')', + ), + ), + optional($._statements), + prec(1, choice( + field('termination', ';;'), + field('fallthrough', choice(';&', ';;&')), + )), + ), + + last_case_item: $ => seq( + optional('('), + field('value', choice($._literal, $._extglob_blob)), + repeat(seq('|', field('value', choice($._literal, $._extglob_blob)))), + ')', + optional($._statements), + optional(prec(1, ';;')), + ), + + function_definition: $ => prec.right(seq( + choice( + seq( + 'function', + field('name', $.word), + optional(seq('(', ')')), + ), + seq( + field('name', $.word), + '(', ')', + ), + ), + field( + 'body', + choice( + $.compound_statement, + $.subshell, + $.test_command), + ), + field('redirect', optional($.file_redirect)), + )), + + compound_statement: $ => seq( + '{', + optional($._terminated_statement), + token(prec(-1, '}')), + ), + + subshell: $ => seq( + '(', + $._statements, + ')', + ), + + pipeline: $ => prec.right(seq( + $._statement_not_pipeline, + repeat1(seq( + choice('|', '|&'), + $._statement_not_pipeline, + )), + )), + + list: $ => prec.left(-1, seq( + $._statement, + choice('&&', '||'), + $._statement, + )), + + // Commands + + negated_command: $ => seq( + '!', + choice( + prec(2, $.command), + prec(1, $.variable_assignment), + $.test_command, + $.subshell, + ), + ), + + test_command: $ => seq( + choice( + seq('[', optional(choice($._expression, $.redirected_statement)), ']'), + seq('[[', $._expression, ']]'), + seq('((', optional($._expression), '))'), + ), + ), + + declaration_command: $ => prec.left(seq( + choice('declare', 'typeset', 'export', 'readonly', 'local'), + repeat(choice( + $._literal, + $._simple_variable_name, + $.variable_assignment, + )), + )), + + unset_command: $ => prec.left(seq( + choice('unset', 'unsetenv'), + repeat(choice( + $._literal, + $._simple_variable_name, + )), + )), + + command: $ => prec.left(seq( + repeat(choice( + $.variable_assignment, + field('redirect', choice($.file_redirect, $.herestring_redirect)), + )), + field('name', $.command_name), + choice( + repeat(choice( + field('argument', $._literal), + field('argument', alias($._bare_dollar, '$')), + field('argument', seq( + choice('=~', '=='), + choice($._literal, $.regex), + )), + field('redirect', $.herestring_redirect), + )), + $.subshell, + ), + )), + + command_name: $ => $._literal, + + variable_assignment: $ => seq( + field('name', choice( + $.variable_name, + $.subscript, + )), + choice( + '=', + '+=', + ), + field('value', choice( + $._literal, + $.array, + $._empty_value, + alias($._comment_word, $.word), + )), + ), + + variable_assignments: $ => seq($.variable_assignment, repeat1($.variable_assignment)), + + subscript: $ => seq( + field('name', $.variable_name), + '[', + field('index', choice($._literal, $.binary_expression, $.unary_expression, $.parenthesized_expression)), + optional($._concat), + ']', + optional($._concat), + ), + + file_redirect: $ => prec.left(seq( + field('descriptor', optional($.file_descriptor)), + choice( + seq( + choice('<', '>', '>>', '&>', '&>>', '<&', '>&', '>|'), + field('destination', repeat1($._literal)), + ), + seq( + choice('<&-', '>&-'), // close file descriptor + optional(field('destination', $._literal)), + ), + ), + )), + + heredoc_redirect: $ => seq( + field('descriptor', optional($.file_descriptor)), + choice('<<', '<<-'), + $.heredoc_start, + optional(choice( + alias($._heredoc_pipeline, $.pipeline), + seq( + field('redirect', repeat1($.file_redirect)), + optional($._heredoc_expression), + ), + $._heredoc_expression, + $._heredoc_command, + )), + /\n/, + choice($._heredoc_body, $._simple_heredoc_body), + ), + + _heredoc_pipeline: $ => seq( + choice('|', '|&'), + $._statement, + ), + + _heredoc_expression: $ => seq( + field('operator', choice('||', '&&')), + field('right', $._statement), + ), + + _heredoc_command: $ => repeat1(field('argument', $._literal)), + + _heredoc_body: $ => seq( + $.heredoc_body, + $.heredoc_end, + ), + + heredoc_body: $ => seq( + $._heredoc_body_beginning, + repeat(choice( + $.expansion, + $.simple_expansion, + $.command_substitution, + $.heredoc_content, + )), + ), + + _simple_heredoc_body: $ => seq( + alias($.simple_heredoc_body, $.heredoc_body), + $.heredoc_end, + ), + + herestring_redirect: $ => prec.left(seq( + field('descriptor', optional($.file_descriptor)), + '<<<', + $._literal, + )), + + // Expressions + + _expression: $ => choice( + $._literal, + $.unary_expression, + $.ternary_expression, + $.binary_expression, + $.postfix_expression, + $.parenthesized_expression, + ), + + // https://tldp.org/LDP/abs/html/opprecedence.html + binary_expression: $ => { + const table = [ + [choice('+=', '-=', '*=', '/=', '%=', '**=', '<<=', '>>=', '&=', '^=', '|='), PREC.UPDATE], + [choice('=', '=~'), PREC.ASSIGN], + ['||', PREC.LOGICAL_OR], + ['&&', PREC.LOGICAL_AND], + ['|', PREC.BITWISE_OR], + ['^', PREC.BITWISE_XOR], + ['&', PREC.BITWISE_AND], + [choice('==', '!='), PREC.EQUALITY], + [choice('<', '>', '<=', '>='), PREC.COMPARE], + [$.test_operator, PREC.TEST], + [choice('<<', '>>'), PREC.SHIFT], + [choice('+', '-'), PREC.ADD], + [choice('*', '/', '%'), PREC.MULTIPLY], + ['**', PREC.EXPONENT], + ]; + + return choice( + choice(...table.map(([operator, precedence]) => { + // @ts-ignore + return prec[operator === '**' ? 'right' : 'left'](precedence, seq( + field('left', $._expression), + // @ts-ignore + field('operator', operator), + field('right', $._expression), + )); + })), + prec(PREC.ASSIGN, seq( + field('left', $._expression), + field('operator', '=~'), + field('right', alias($._regex_no_space, $.regex)), + )), + prec(PREC.EQUALITY, seq( + field('left', $._expression), + field('operator', choice('==', '!=')), + field('right', $._extglob_blob), + )), + ); + }, + + ternary_expression: $ => prec.left(PREC.TERNARY, seq( + field('condition', $._expression), + '?', + field('consequence', $._expression), + ':', + field('alternative', $._expression), + )), + + unary_expression: $ => choice( + prec(PREC.PREFIX, seq( + field('operator', tokenLiterals(1, '++', '--')), + $._expression, + )), + prec(PREC.UNARY, seq( + field('operator', tokenLiterals(1, '-', '+', '~')), + $._expression, + )), + prec.right(PREC.UNARY, seq( + field('operator', '!'), + $._expression, + )), + prec.right(PREC.TEST, seq( + field('operator', $.test_operator), + $._expression, + )), + ), + + postfix_expression: $ => prec(PREC.POSTFIX, seq( + $._expression, + field('operator', choice('++', '--')), + )), + + parenthesized_expression: $ => seq( + '(', + $._expression, + ')', + ), + + // Literals + + _literal: $ => choice( + $.concatenation, + $._primary_expression, + alias(prec(-2, repeat1($._special_character)), $.word), + ), + + _primary_expression: $ => choice( + $.word, + alias($.test_operator, $.word), + $.string, + $.raw_string, + $.translated_string, + $.ansi_c_string, + $.number, + $.expansion, + $.simple_expansion, + $.command_substitution, + $.process_substitution, + $.arithmetic_expansion, + $.brace_expression, + ), + + arithmetic_expansion: $ => choice( + seq(choice('$((', '(('), commaSep1($._arithmetic_expression), '))'), + seq('$[', $._arithmetic_expression, ']'), + ), + + brace_expression: $ => seq( + alias($._brace_start, '{'), + alias(token.immediate(/\d+/), $.number), + token.immediate('..'), + alias(token.immediate(/\d+/), $.number), + token.immediate('}'), + ), + + _arithmetic_expression: $ => prec(1, choice( + $._arithmetic_literal, + alias($._arithmetic_unary_expression, $.unary_expression), + alias($._arithmetic_ternary_expression, $.ternary_expression), + alias($._arithmetic_binary_expression, $.binary_expression), + alias($._arithmetic_postfix_expression, $.postfix_expression), + alias($._arithmetic_parenthesized_expression, $.parenthesized_expression), + $.command_substitution, + )), + + _arithmetic_literal: $ => prec(1, choice( + $.number, + $.subscript, + $.simple_expansion, + $.expansion, + $._simple_variable_name, + $.variable_name, + $.string, + )), + + _arithmetic_binary_expression: $ => { + const table = [ + [choice('+=', '-=', '*=', '/=', '%=', '**=', '<<=', '>>=', '&=', '^=', '|='), PREC.UPDATE], + [choice('=', '=~'), PREC.ASSIGN], + ['||', PREC.LOGICAL_OR], + ['&&', PREC.LOGICAL_AND], + ['|', PREC.BITWISE_OR], + ['^', PREC.BITWISE_XOR], + ['&', PREC.BITWISE_AND], + [choice('==', '!='), PREC.EQUALITY], + [choice('<', '>', '<=', '>='), PREC.COMPARE], + [choice('<<', '>>'), PREC.SHIFT], + [choice('+', '-'), PREC.ADD], + [choice('*', '/', '%'), PREC.MULTIPLY], + ['**', PREC.EXPONENT], + ]; + + return choice(...table.map(([operator, precedence]) => { + // @ts-ignore + return prec.left(precedence, seq( + field('left', $._arithmetic_expression), + // @ts-ignore + field('operator', operator), + field('right', $._arithmetic_expression), + )); + })); + }, + + _arithmetic_ternary_expression: $ => prec.left(PREC.TERNARY, seq( + field('condition', $._arithmetic_expression), + '?', + field('consequence', $._arithmetic_expression), + ':', + field('alternative', $._arithmetic_expression), + )), + + _arithmetic_unary_expression: $ => choice( + prec(PREC.PREFIX, seq( + field('operator', tokenLiterals(1, '++', '--')), + $._arithmetic_expression, + )), + prec(PREC.UNARY, seq( + field('operator', tokenLiterals(1, '-', '+', '~')), + $._arithmetic_expression, + )), + prec.right(PREC.UNARY, seq( + field('operator', '!'), + $._arithmetic_expression, + )), + ), + + _arithmetic_postfix_expression: $ => prec(PREC.POSTFIX, seq( + $._arithmetic_expression, + field('operator', choice('++', '--')), + )), + + _arithmetic_parenthesized_expression: $ => seq( + '(', + $._arithmetic_expression, + ')', + ), + + + concatenation: $ => prec(-1, seq( + choice( + $._primary_expression, + alias($._special_character, $.word), + ), + repeat1(seq( + choice($._concat, alias(/`\s*`/, '``')), + choice( + $._primary_expression, + alias($._special_character, $.word), + alias($._comment_word, $.word), + alias($._bare_dollar, '$'), + ), + )), + optional(seq($._concat, '$')), + )), + + _special_character: _ => token(prec(-1, choice('{', '}', '[', ']'))), + + string: $ => seq( + '"', + repeat(seq( + choice( + seq(optional('$'), $.string_content), + $.expansion, + $.simple_expansion, + $.command_substitution, + $.arithmetic_expansion, + ), + optional($._concat), + )), + optional('$'), + '"', + ), + + string_content: _ => token(prec(-1, /([^"`$\\\r\n]|\\(.|\r?\n))+/)), + + translated_string: $ => seq('$', $.string), + + array: $ => seq( + '(', + repeat($._literal), + ')', + ), + + raw_string: _ => /'[^']*'/, + + ansi_c_string: _ => /\$'([^']|\\')*'/, + + number: $ => choice( + /-?(0x)?[0-9]+(#[0-9A-Za-z@_]+)?/, + // the base can be an expansion + seq(/-?(0x)?[0-9]+#/, $.expansion), + ), + + simple_expansion: $ => seq( + '$', + choice( + $._simple_variable_name, + $._multiline_variable_name, + $._special_variable_name, + $.variable_name, + alias('!', $.special_variable_name), + alias('#', $.special_variable_name), + ), + ), + + string_expansion: $ => seq('$', $.string), + + expansion: $ => seq( + '${', + optional($._expansion_body), + '}', + ), + _expansion_body: $ => choice( + // ${!##} ${!#} + repeat1(field( + 'operator', + choice( + alias($._external_expansion_sym_hash, '#'), + alias($._external_expansion_sym_bang, '!'), + alias($._external_expansion_sym_equal, '='), + ), + )), + seq( + optional(field('operator', token.immediate('!'))), + choice($.variable_name, $._simple_variable_name, $._special_variable_name, $.subscript), + choice( + $._expansion_expression, + $._expansion_regex, + $._expansion_regex_replacement, + $._expansion_regex_removal, + $._expansion_max_length, + $._expansion_operator, + ), + ), + seq( + field('operator', token.immediate('!')), + choice($._simple_variable_name, $.variable_name), + optional(field('operator', choice( + token.immediate('@'), + token.immediate('*'), + ))), + ), + seq( + optional(field('operator', immediateLiterals('#', '!', '='))), + choice( + $.subscript, + $._simple_variable_name, + $._special_variable_name, + $.command_substitution, + ), + repeat(field( + 'operator', + choice( + alias($._external_expansion_sym_hash, '#'), + alias($._external_expansion_sym_bang, '!'), + alias($._external_expansion_sym_equal, '='), + ), + )), + ), + ), + + _expansion_expression: $ => prec(1, seq( + field('operator', immediateLiterals('=', ':=', '-', ':-', '+', ':+', '?', ':?')), + optional(seq( + choice( + alias($._concatenation_in_expansion, $.concatenation), + $.command_substitution, + $.word, + $.expansion, + $.simple_expansion, + $.array, + $.string, + $.raw_string, + $.ansi_c_string, + alias($._expansion_word, $.word), + ), + )), + )), + + _expansion_regex: $ => seq( + field('operator', choice('#', alias($._immediate_double_hash, '##'), '%', '%%')), + optional(choice($.regex, alias(')', $.regex), $.string, $.raw_string, alias(/\s+/, $.regex))), + ), + + _expansion_regex_replacement: $ => seq( + field('operator', choice('/', '//', '/#', '/%')), + optional(choice( + alias($._regex_no_slash, $.regex), + $.string, + $.command_substitution, + seq($.string, alias($._regex_no_slash, $.regex)), + )), + // This can be elided + optional(seq( + field('operator', '/'), + optional(seq( + choice( + $._primary_expression, + alias(prec(-2, repeat1($._special_character)), $.word), + seq($.command_substitution, alias($._expansion_word, $.word)), + alias($._expansion_word, $.word), + alias($._concatenation_in_expansion, $.concatenation), + $.array, + ), + field('operator', optional('/')), + )), + )), + ), + + _expansion_regex_removal: $ => seq( + field('operator', choice(',', ',,', '^', '^^')), + optional($.regex), + ), + + _expansion_max_length: $ => seq( + field('operator', ':'), + optional(choice( + $._simple_variable_name, + $.number, + $.arithmetic_expansion, + $.expansion, + $.parenthesized_expression, + $.command_substitution, + alias($._expansion_max_length_binary_expression, $.binary_expression), + /\n/, + )), + optional(seq( + field('operator', ':'), + optional(choice( + $._simple_variable_name, + $.number, + $.arithmetic_expansion, + $.expansion, + $.parenthesized_expression, + $.command_substitution, + alias($._expansion_max_length_binary_expression, $.binary_expression), + /\n/, + )), + )), + ), + + _expansion_max_length_expression: $ => choice( + $._simple_variable_name, + $.number, + $.expansion, + alias($._expansion_max_length_binary_expression, $.binary_expression), + ), + _expansion_max_length_binary_expression: $ => { + const table = [ + [choice('+', '-'), PREC.ADD], + [choice('*', '/', '%'), PREC.MULTIPLY], + ]; + + return choice(...table.map(([operator, precedence]) => { + // @ts-ignore + return prec.left(precedence, seq( + $._expansion_max_length_expression, + // @ts-ignore + field('operator', operator), + $._expansion_max_length_expression, + )); + })); + }, + + _expansion_operator: _ => seq( + field('operator', token.immediate('@')), + field('operator', immediateLiterals('U', 'u', 'L', 'Q', 'E', 'P', 'A', 'K', 'a', 'k')), + ), + + _concatenation_in_expansion: $ => prec(-2, seq( + choice( + $.word, + $.variable_name, + $.simple_expansion, + $.expansion, + $.string, + $.raw_string, + $.ansi_c_string, + $.command_substitution, + alias($._expansion_word, $.word), + $.array, + ), + repeat1(seq( + choice($._concat, alias(/`\s*`/, '``')), + choice( + $.word, + $.variable_name, + $.simple_expansion, + $.expansion, + $.string, + $.raw_string, + $.ansi_c_string, + $.command_substitution, + alias($._expansion_word, $.word), + $.array, + ), + )), + )), + + command_substitution: $ => choice( + seq('$(', $._statements, ')'), + seq('$(', field('redirect', $.file_redirect), ')'), + prec(1, seq('`', $._statements, '`')), + seq('$`', $._statements, '`'), + ), + + process_substitution: $ => seq( + choice('<(', '>('), + $._statements, + ')', + ), + + _extglob_blob: $ => choice( + $.extglob_pattern, + seq( + $.extglob_pattern, + choice($.string, $.expansion, $.command_substitution), + optional($.extglob_pattern), + ), + ), + + comment: _ => token(prec(-10, /#.*/)), + + _comment_word: _ => token(prec(-8, seq( + choice( + noneOf(...SPECIAL_CHARACTERS), + seq('\\', noneOf('\\s')), + ), + repeat(choice( + noneOf(...SPECIAL_CHARACTERS), + seq('\\', noneOf('\\s')), + '\\ ', + )), + ))), + + _simple_variable_name: $ => alias(/\w+/, $.variable_name), + _multiline_variable_name: $ => alias( + token(prec(-1, /(\w|\\\r?\n)+/)), + $.variable_name, + ), + + _special_variable_name: $ => alias(choice('*', '@', '?', '!', '#', '-', '$', '0', '_'), $.special_variable_name), + + word: _ => token(seq( + choice( + noneOf('#', ...SPECIAL_CHARACTERS), + seq('\\', noneOf('\\s')), + ), + repeat(choice( + noneOf(...SPECIAL_CHARACTERS), + seq('\\', noneOf('\\s')), + '\\ ', + )), + )), + + _c_terminator: _ => choice(';', /\n/, '&'), + _terminator: _ => choice(';', ';;', /\n/, '&'), + }, +}); + +/** + * Returns a regular expression that matches any character except the ones + * provided. + * + * @param {...string} characters + * + * @return {RegExp} + * + */ +function noneOf(...characters) { + const negatedString = characters.map(c => c == '\\' ? '\\\\' : c).join(''); + return new RegExp('[^' + negatedString + ']'); +} + +/** + * Creates a rule to optionally match one or more of the rules separated by a comma + * + * @param {RuleOrLiteral} rule + * + * @return {ChoiceRule} + * + */ +function commaSep(rule) { + return optional(commaSep1(rule)); +} + +/** + * Creates a rule to match one or more of the rules separated by a comma + * + * @param {RuleOrLiteral} rule + * + * @return {SeqRule} + * + */ +function commaSep1(rule) { + return seq(rule, repeat(seq(',', rule))); +} + +/** + * + * Turns a list of rules into a choice of immediate rule + * + * @param {(RegExp|String)[]} literals + * + * @return {ChoiceRule} + */ +function immediateLiterals(...literals) { + return choice(...literals.map(l => token.immediate(l))); +} + +/** + * + * Turns a list of rules into a choice of aliased token rules + * + * @param {number} precedence + * + * @param {(RegExp|String)[]} literals + * + * @return {ChoiceRule} + */ +function tokenLiterals(precedence, ...literals) { + return choice(...literals.map(l => token(prec(precedence, l)))); +} diff --git a/bash/src/scanner.c b/bash/src/scanner.c new file mode 100644 index 0000000..a5c8797 --- /dev/null +++ b/bash/src/scanner.c @@ -0,0 +1,1271 @@ +#include "tree_sitter/parser.h" + +#include <assert.h> +#include <ctype.h> +#include <string.h> +#include <wctype.h> + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +#define VEC_RESIZE(vec, _cap) \ + void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0])); \ + assert(tmp != NULL); \ + (vec).data = tmp; \ + assert((vec).data != NULL); \ + (vec).cap = (_cap); + +#define VEC_PUSH(vec, el) \ + if ((vec).cap == (vec).len) { \ + VEC_RESIZE((vec), MAX(16, (vec).len * 2)); \ + } \ + (vec).data[(vec).len++] = (el); + +#define VEC_POP(vec) \ + { (vec).len--; } + +#define VEC_BACK(vec) ((vec).data[(vec).len - 1]) + +#define VEC_FREE(vec) \ + { \ + if ((vec).data != NULL) \ + free((vec).data); \ + (vec).data = NULL; \ + } + +#define VEC_CLEAR(vec) \ + { \ + for (uint32_t i = 0; i < (vec).len; i++) { \ + STRING_FREE((vec).data[i].word); \ + } \ + (vec).len = 0; \ + } + +#define STRING_RESIZE(vec, _cap) \ + void *tmp = realloc((vec).data, ((_cap) + 1) * sizeof((vec).data[0])); \ + assert(tmp != NULL); \ + (vec).data = tmp; \ + memset((vec).data + (vec).len, 0, \ + (((_cap) + 1) - (vec).len) * sizeof((vec).data[0])); \ + (vec).cap = (_cap); + +#define STRING_GROW(vec, _cap) \ + if ((vec).cap < (_cap)) { \ + STRING_RESIZE((vec), (_cap)); \ + } + +#define STRING_PUSH(vec, el) \ + if ((vec).cap == (vec).len) { \ + STRING_RESIZE((vec), MAX(16, (vec).len * 2)); \ + } \ + (vec).data[(vec).len++] = (el); + +#define STRING_FREE(vec) \ + if ((vec).data != NULL) \ + free((vec).data); \ + (vec).data = NULL; + +#define STRING_CLEAR(vec) \ + { \ + (vec).len = 0; \ + memset((vec).data, 0, (vec).cap * sizeof(char)); \ + } + +enum TokenType { + HEREDOC_START, + SIMPLE_HEREDOC_BODY, + HEREDOC_BODY_BEGINNING, + HEREDOC_CONTENT, + HEREDOC_END, + FILE_DESCRIPTOR, + EMPTY_VALUE, + CONCAT, + VARIABLE_NAME, + TEST_OPERATOR, + REGEX, + REGEX_NO_SLASH, + REGEX_NO_SPACE, + EXPANSION_WORD, + EXTGLOB_PATTERN, + BARE_DOLLAR, + BRACE_START, + IMMEDIATE_DOUBLE_HASH, + EXTERNAL_EXPANSION_SYM_HASH, + EXTERNAL_EXPANSION_SYM_BANG, + EXTERNAL_EXPANSION_SYM_EQUAL, + CLOSING_BRACE, + CLOSING_BRACKET, + HEREDOC_ARROW, + HEREDOC_ARROW_DASH, + NEWLINE, + ERROR_RECOVERY, +}; + +typedef struct { + uint32_t cap; + uint32_t len; + char *data; +} String; + +static String string_new() { + return (String){.cap = 16, .len = 0, .data = calloc(1, sizeof(char) * 17)}; +} + +typedef struct { + bool is_raw; + bool started; + bool allows_indent; + String delimiter; + String current_leading_word; +} Heredoc; + +static Heredoc heredoc_new() { + Heredoc heredoc = { + .is_raw = false, + .started = false, + .allows_indent = false, + .delimiter = string_new(), + .current_leading_word = string_new(), + }; + return heredoc; +} + +typedef struct { + uint32_t len; + uint32_t cap; + Heredoc *data; +} heredoc_vec; + +static heredoc_vec vec_new() { + heredoc_vec vec = {0, 0, NULL}; + vec.data = calloc(1, sizeof(Heredoc)); + vec.cap = 1; + return vec; +} + +typedef struct { + uint8_t last_glob_paren_depth; + heredoc_vec heredocs; +} Scanner; + +static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + +static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } + +static inline bool in_error_recovery(const bool *valid_symbols) { + return valid_symbols[ERROR_RECOVERY]; +} + +static inline void reset_heredoc(Heredoc *heredoc) { + heredoc->is_raw = false; + heredoc->started = false; + heredoc->allows_indent = false; + STRING_CLEAR(heredoc->delimiter); +} + +static inline void reset(Scanner *scanner) { + for (uint32_t i = 0; i < scanner->heredocs.len; i++) { + reset_heredoc(&scanner->heredocs.data[i]); + } +} + +static unsigned serialize(Scanner *scanner, char *buffer) { + uint32_t size = 0; + + buffer[size++] = (char)scanner->last_glob_paren_depth; + buffer[size++] = (char)scanner->heredocs.len; + + for (uint32_t i = 0; i < scanner->heredocs.len; i++) { + Heredoc heredoc = scanner->heredocs.data[i]; + if (heredoc.delimiter.len + 3 + size >= + TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { + return 0; + } + + buffer[size++] = (char)heredoc.is_raw; + buffer[size++] = (char)heredoc.started; + buffer[size++] = (char)heredoc.allows_indent; + + memcpy(&buffer[size], &heredoc.delimiter.len, sizeof(uint32_t)); + size += sizeof(uint32_t); + memcpy(&buffer[size], heredoc.delimiter.data, heredoc.delimiter.len); + size += heredoc.delimiter.len; + } + return size; +} + +static void deserialize(Scanner *scanner, const char *buffer, unsigned length) { + if (length == 0) { + reset(scanner); + } else { + uint32_t size = 0; + scanner->last_glob_paren_depth = buffer[size++]; + uint32_t heredoc_count = (unsigned char)buffer[size++]; + for (uint32_t i = 0; i < heredoc_count; i++) { + Heredoc *heredoc = NULL; + if (i < scanner->heredocs.len) { + heredoc = &scanner->heredocs.data[i]; + } else { + Heredoc new_heredoc = heredoc_new(); + VEC_PUSH(scanner->heredocs, new_heredoc); + heredoc = &VEC_BACK(scanner->heredocs); + } + + heredoc->is_raw = buffer[size++]; + heredoc->started = buffer[size++]; + heredoc->allows_indent = buffer[size++]; + + memcpy(&heredoc->delimiter.len, &buffer[size], sizeof(uint32_t)); + size += sizeof(uint32_t); + STRING_GROW(heredoc->delimiter, heredoc->delimiter.len); + + memcpy(heredoc->delimiter.data, &buffer[size], + heredoc->delimiter.len); + size += heredoc->delimiter.len; + } + assert(size == length); + } +} + +/** + * Consume a "word" in POSIX parlance, and returns it unquoted. + * + * This is an approximate implementation that doesn't deal with any + * POSIX-mandated substitution, and assumes the default value for + * IFS. + */ +static bool advance_word(TSLexer *lexer, String *unquoted_word) { + bool empty = true; + + int32_t quote = 0; + if (lexer->lookahead == '\'' || lexer->lookahead == '"') { + quote = lexer->lookahead; + advance(lexer); + } + + while (lexer->lookahead && + !(quote ? lexer->lookahead == quote || lexer->lookahead == '\r' || + lexer->lookahead == '\n' + : iswspace(lexer->lookahead))) { + if (lexer->lookahead == '\\') { + advance(lexer); + if (!lexer->lookahead) { + return false; + } + } + empty = false; + STRING_PUSH(*unquoted_word, lexer->lookahead); + advance(lexer); + } + + if (quote && lexer->lookahead == quote) { + advance(lexer); + } + + return !empty; +} + +static inline bool scan_bare_dollar(TSLexer *lexer) { + while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' && + !lexer->eof(lexer)) { + skip(lexer); + } + + if (lexer->lookahead == '$') { + advance(lexer); + lexer->result_symbol = BARE_DOLLAR; + lexer->mark_end(lexer); + return iswspace(lexer->lookahead) || lexer->eof(lexer) || + lexer->lookahead == '\"'; + } + + return false; +} + +static bool scan_heredoc_start(Heredoc *heredoc, TSLexer *lexer) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + lexer->result_symbol = HEREDOC_START; + heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || + lexer->lookahead == '\\'; + + bool found_delimiter = advance_word(lexer, &heredoc->delimiter); + if (!found_delimiter) + STRING_CLEAR(heredoc->delimiter); + return found_delimiter; +} + +static bool scan_heredoc_end_identifier(Heredoc *heredoc, TSLexer *lexer) { + STRING_CLEAR(heredoc->current_leading_word); + // Scan the first 'n' characters on this line, to see if they match the + // heredoc delimiter + int32_t size = 0; + while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && + ((int32_t)heredoc->delimiter.data[size++]) == lexer->lookahead && + heredoc->current_leading_word.len < heredoc->delimiter.len) { + STRING_PUSH(heredoc->current_leading_word, lexer->lookahead); + advance(lexer); + } + return strcmp(heredoc->current_leading_word.data, + heredoc->delimiter.data) == 0; +} + +static bool scan_heredoc_content(Scanner *scanner, TSLexer *lexer, + enum TokenType middle_type, + enum TokenType end_type) { + bool did_advance = false; + Heredoc *heredoc = &VEC_BACK(scanner->heredocs); + + for (;;) { + switch (lexer->lookahead) { + case '\0': { + if (lexer->eof(lexer) && did_advance) { + reset_heredoc(heredoc); + lexer->result_symbol = end_type; + return true; + } + return false; + } + + case '\\': { + did_advance = true; + advance(lexer); + advance(lexer); + break; + } + + case '$': { + if (heredoc->is_raw) { + did_advance = true; + advance(lexer); + break; + } + if (did_advance) { + lexer->mark_end(lexer); + lexer->result_symbol = middle_type; + heredoc->started = true; + advance(lexer); + if (isalpha(lexer->lookahead) || lexer->lookahead == '{' || + lexer->lookahead == '(') { + return true; + } + break; + } + if (middle_type == HEREDOC_BODY_BEGINNING && + lexer->get_column(lexer) == 0) { + lexer->result_symbol = middle_type; + heredoc->started = true; + return true; + } + return false; + } + + case '\n': { + if (!did_advance) { + skip(lexer); + } else { + advance(lexer); + } + did_advance = true; + if (heredoc->allows_indent) { + while (iswspace(lexer->lookahead)) { + advance(lexer); + } + } + lexer->result_symbol = + heredoc->started ? middle_type : end_type; + lexer->mark_end(lexer); + if (scan_heredoc_end_identifier(heredoc, lexer)) { + if (lexer->result_symbol == HEREDOC_END) { + VEC_POP(scanner->heredocs); + } + return true; + } + break; + } + + default: { + if (lexer->get_column(lexer) == 0) { + // an alternative is to check the starting column of the + // heredoc body and track that statefully + while (iswspace(lexer->lookahead)) { + /* did_advance ? advance(lexer) : skip(lexer); */ + if (did_advance) { + advance(lexer); + } else { + skip(lexer); + } + } + if (end_type != SIMPLE_HEREDOC_BODY) { + lexer->result_symbol = middle_type; + if (scan_heredoc_end_identifier(heredoc, lexer)) { + return true; + } + } + if (end_type == SIMPLE_HEREDOC_BODY) { + lexer->result_symbol = end_type; + lexer->mark_end(lexer); + if (scan_heredoc_end_identifier(heredoc, lexer)) { + return true; + } + } + } + did_advance = true; + advance(lexer); + break; + } + } + } +} + +static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) { + if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols)) { + if (!(lexer->lookahead == 0 || iswspace(lexer->lookahead) || + lexer->lookahead == '>' || lexer->lookahead == '<' || + lexer->lookahead == ')' || lexer->lookahead == '(' || + lexer->lookahead == ';' || lexer->lookahead == '&' || + lexer->lookahead == '|' || + (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) || + (lexer->lookahead == ']' && valid_symbols[CLOSING_BRACKET]))) { + lexer->result_symbol = CONCAT; + // So for a`b`, we want to return a concat. We check if the + // 2nd backtick has whitespace after it, and if it does we + // return concat. + if (lexer->lookahead == '`') { + lexer->mark_end(lexer); + advance(lexer); + while (lexer->lookahead != '`' && !lexer->eof(lexer)) { + advance(lexer); + } + if (lexer->eof(lexer)) { + return false; + } + if (lexer->lookahead == '`') { + advance(lexer); + } + return iswspace(lexer->lookahead) || lexer->eof(lexer); + } + // strings w/ expansions that contains escaped quotes or + // backslashes need this to return a concat + if (lexer->lookahead == '\\') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '"' || lexer->lookahead == '\'' || + lexer->lookahead == '\\') { + return true; + } + if (lexer->eof(lexer)) { + return false; + } + } else { + return true; + } + } + if (iswspace(lexer->lookahead) && valid_symbols[CLOSING_BRACE] && + !valid_symbols[EXPANSION_WORD]) { + lexer->result_symbol = CONCAT; + return true; + } + } + + if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && + !in_error_recovery(valid_symbols)) { + // advance two # and ensure not } after + if (lexer->lookahead == '#') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '#') { + advance(lexer); + if (lexer->lookahead != '}') { + lexer->result_symbol = IMMEDIATE_DOUBLE_HASH; + lexer->mark_end(lexer); + return true; + } + } + } + } + + if (valid_symbols[EXTERNAL_EXPANSION_SYM_HASH] && + !in_error_recovery(valid_symbols)) { + if (lexer->lookahead == '#' || lexer->lookahead == '=' || + lexer->lookahead == '!') { + lexer->result_symbol = + lexer->lookahead == '#' ? EXTERNAL_EXPANSION_SYM_HASH + : lexer->lookahead == '!' ? EXTERNAL_EXPANSION_SYM_BANG + : EXTERNAL_EXPANSION_SYM_EQUAL; + advance(lexer); + lexer->mark_end(lexer); + while (lexer->lookahead == '#' || lexer->lookahead == '=' || + lexer->lookahead == '!') { + advance(lexer); + } + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + if (lexer->lookahead == '}') { + return true; + } + return false; + } + } + + if (valid_symbols[EMPTY_VALUE]) { + if (iswspace(lexer->lookahead) || lexer->eof(lexer) || + lexer->lookahead == ';' || lexer->lookahead == '&') { + lexer->result_symbol = EMPTY_VALUE; + return true; + } + } + + if ((valid_symbols[HEREDOC_BODY_BEGINNING] || + valid_symbols[SIMPLE_HEREDOC_BODY]) && + scanner->heredocs.len > 0 && !VEC_BACK(scanner->heredocs).started && + !in_error_recovery(valid_symbols)) { + return scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, + SIMPLE_HEREDOC_BODY); + } + + if (valid_symbols[HEREDOC_END] && scanner->heredocs.len > 0) { + Heredoc *heredoc = &VEC_BACK(scanner->heredocs); + if (scan_heredoc_end_identifier(heredoc, lexer)) { + STRING_FREE(heredoc->current_leading_word); + STRING_FREE(heredoc->delimiter); + VEC_POP(scanner->heredocs); + lexer->result_symbol = HEREDOC_END; + return true; + } + } + + if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 && + VEC_BACK(scanner->heredocs).started && + !in_error_recovery(valid_symbols)) { + return scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, + HEREDOC_END); + } + + if (valid_symbols[HEREDOC_START] && !in_error_recovery(valid_symbols) && + scanner->heredocs.len > 0) { + return scan_heredoc_start(&VEC_BACK(scanner->heredocs), lexer); + } + + if (valid_symbols[TEST_OPERATOR] && !valid_symbols[EXPANSION_WORD]) { + while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') { + skip(lexer); + } + + if (lexer->lookahead == '\\') { + if (valid_symbols[EXTGLOB_PATTERN]) { + goto extglob_pattern; + } + if (valid_symbols[REGEX_NO_SPACE]) { + goto regex; + } + skip(lexer); + + if (lexer->eof(lexer)) { + return false; + } + + if (lexer->lookahead == '\r') { + skip(lexer); + if (lexer->lookahead == '\n') { + skip(lexer); + } + } else if (lexer->lookahead == '\n') { + skip(lexer); + } else { + return false; + } + + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + } + + if (lexer->lookahead == '\n' && !valid_symbols[NEWLINE]) { + skip(lexer); + + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + } + + if (lexer->lookahead == '-') { + advance(lexer); + + bool advanced_once = false; + while (isalpha(lexer->lookahead)) { + advanced_once = true; + advance(lexer); + } + + if (iswspace(lexer->lookahead) && advanced_once) { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) { + if (valid_symbols[EXPANSION_WORD]) { + lexer->mark_end(lexer); + lexer->result_symbol = EXPANSION_WORD; + return true; + } + return false; + } + lexer->result_symbol = TEST_OPERATOR; + return true; + } + if (iswspace(lexer->lookahead) && valid_symbols[EXTGLOB_PATTERN]) { + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && + scan_bare_dollar(lexer)) { + return true; + } + } + + if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || + valid_symbols[HEREDOC_ARROW]) && + !valid_symbols[REGEX_NO_SLASH] && !in_error_recovery(valid_symbols)) { + for (;;) { + if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || + lexer->lookahead == '\r' || + (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) && + !valid_symbols[EXPANSION_WORD]) { + skip(lexer); + } else if (lexer->lookahead == '\\') { + skip(lexer); + + if (lexer->eof(lexer)) { + lexer->mark_end(lexer); + lexer->result_symbol = VARIABLE_NAME; + return true; + } + + if (lexer->lookahead == '\r') { + skip(lexer); + } + if (lexer->lookahead == '\n') { + skip(lexer); + } else { + if (lexer->lookahead == '\\' && + valid_symbols[EXPANSION_WORD]) { + goto expansion_word; + } + return false; + } + } else { + break; + } + } + + // no '*', '@', '?', '-', '$', '0', '_' + if (!valid_symbols[EXPANSION_WORD] && + (lexer->lookahead == '*' || lexer->lookahead == '@' || + lexer->lookahead == '?' || lexer->lookahead == '-' || + lexer->lookahead == '0' || lexer->lookahead == '_')) { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '=' || lexer->lookahead == '[' || + lexer->lookahead == ':' || lexer->lookahead == '-' || + lexer->lookahead == '%' || lexer->lookahead == '#' || + lexer->lookahead == '/') { + return false; + } + if (valid_symbols[EXTGLOB_PATTERN] && iswspace(lexer->lookahead)) { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<') { + advance(lexer); + if (lexer->lookahead == '<') { + advance(lexer); + if (lexer->lookahead == '-') { + advance(lexer); + Heredoc heredoc = heredoc_new(); + heredoc.allows_indent = true; + VEC_PUSH(scanner->heredocs, heredoc); + lexer->result_symbol = HEREDOC_ARROW_DASH; + } else if (lexer->lookahead == '<' || lexer->lookahead == '=') { + return false; + } else { + Heredoc heredoc = heredoc_new(); + VEC_PUSH(scanner->heredocs, heredoc); + lexer->result_symbol = HEREDOC_ARROW; + } + return true; + } + return false; + } + + bool is_number = true; + if (iswdigit(lexer->lookahead)) { + advance(lexer); + } else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') { + is_number = false; + advance(lexer); + } else { + if (lexer->lookahead == '{') { + goto brace_start; + } + if (valid_symbols[EXPANSION_WORD]) { + goto expansion_word; + } + if (valid_symbols[EXTGLOB_PATTERN]) { + goto extglob_pattern; + } + return false; + } + + for (;;) { + if (iswdigit(lexer->lookahead)) { + advance(lexer); + } else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') { + is_number = false; + advance(lexer); + } else { + break; + } + } + + if (is_number && valid_symbols[FILE_DESCRIPTOR] && + (lexer->lookahead == '>' || lexer->lookahead == '<')) { + lexer->result_symbol = FILE_DESCRIPTOR; + return true; + } + + if (valid_symbols[VARIABLE_NAME]) { + if (lexer->lookahead == '+') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '=' || lexer->lookahead == ':' || + valid_symbols[CLOSING_BRACE]) { + lexer->result_symbol = VARIABLE_NAME; + return true; + } + return false; + } + if (lexer->lookahead == '/') { + return false; + } + if (lexer->lookahead == '=' || lexer->lookahead == '[' || + (lexer->lookahead == ':' && !valid_symbols[CLOSING_BRACE]) || + lexer->lookahead == '%' || + (lexer->lookahead == '#' && !is_number) || + lexer->lookahead == '@' || + (lexer->lookahead == '-' && valid_symbols[CLOSING_BRACE])) { + lexer->mark_end(lexer); + lexer->result_symbol = VARIABLE_NAME; + return true; + } + + if (lexer->lookahead == '?') { + lexer->mark_end(lexer); + advance(lexer); + lexer->result_symbol = VARIABLE_NAME; + return isalpha(lexer->lookahead); + } + } + + return false; + } + + if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && + scan_bare_dollar(lexer)) { + return true; + } + +regex: + if ((valid_symbols[REGEX] || valid_symbols[REGEX_NO_SLASH] || + valid_symbols[REGEX_NO_SPACE]) && + !in_error_recovery(valid_symbols)) { + if (valid_symbols[REGEX] || valid_symbols[REGEX_NO_SPACE]) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + } + + if ((lexer->lookahead != '"' && lexer->lookahead != '\'') || + (lexer->lookahead == '$' && valid_symbols[REGEX_NO_SLASH])) { + typedef struct { + bool done; + bool advanced_once; + bool found_non_alnumdollarunderdash; + uint32_t paren_depth; + uint32_t bracket_depth; + uint32_t brace_depth; + } State; + + if (lexer->lookahead == '$' && valid_symbols[REGEX_NO_SLASH]) { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '(') { + return false; + } + } + + lexer->mark_end(lexer); + + State state = {false, false, false, 0, 0, 0}; + while (!state.done) { + switch (lexer->lookahead) { + case '\0': + return false; + case '(': + state.paren_depth++; + break; + case '[': + state.bracket_depth++; + break; + case '{': + state.brace_depth++; + break; + case ')': + if (state.paren_depth == 0) { + state.done = true; + } + state.paren_depth--; + break; + case ']': + if (state.bracket_depth == 0) { + state.done = true; + } + state.bracket_depth--; + break; + case '}': + if (state.brace_depth == 0) { + state.done = true; + } + state.brace_depth--; + break; + } + + if (!state.done) { + if (valid_symbols[REGEX]) { + bool was_space = iswspace(lexer->lookahead); + advance(lexer); + state.advanced_once = true; + if (!was_space || state.paren_depth > 0) { + lexer->mark_end(lexer); + } + } else if (valid_symbols[REGEX_NO_SLASH]) { + if (lexer->lookahead == '/') { + lexer->mark_end(lexer); + lexer->result_symbol = REGEX_NO_SLASH; + return state.advanced_once; + } + if (lexer->lookahead == '\\') { + advance(lexer); + state.advanced_once = true; + if (!lexer->eof(lexer) && lexer->lookahead != '[' && + lexer->lookahead != '/') { + advance(lexer); + lexer->mark_end(lexer); + } + } else { + bool was_space = iswspace(lexer->lookahead); + advance(lexer); + state.advanced_once = true; + if (!was_space) { + lexer->mark_end(lexer); + } + } + } else if (valid_symbols[REGEX_NO_SPACE]) { + if (lexer->lookahead == '\\') { + state.found_non_alnumdollarunderdash = true; + advance(lexer); + if (!lexer->eof(lexer)) { + advance(lexer); + } + } else if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + // do not parse a command + // substitution + if (lexer->lookahead == '(') { + return false; + } + // end $ always means regex, e.g. + // 99999999$ + if (iswspace(lexer->lookahead)) { + lexer->result_symbol = REGEX_NO_SPACE; + lexer->mark_end(lexer); + return true; + } + } else { + if (iswspace(lexer->lookahead) && + state.paren_depth == 0) { + lexer->mark_end(lexer); + lexer->result_symbol = REGEX_NO_SPACE; + return state.found_non_alnumdollarunderdash; + } + if (!iswalnum(lexer->lookahead) && + lexer->lookahead != '$' && + lexer->lookahead != '-' && + lexer->lookahead != '_') { + state.found_non_alnumdollarunderdash = true; + } + advance(lexer); + } + } + } + } + + lexer->result_symbol = + valid_symbols[REGEX_NO_SLASH] ? REGEX_NO_SLASH + : valid_symbols[REGEX_NO_SPACE] ? REGEX_NO_SPACE + : REGEX; + if (valid_symbols[REGEX] && !state.advanced_once) { + return false; + } + return true; + } + } + +extglob_pattern: + if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols)) { + // first skip ws, then check for ? * + @ ! + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (lexer->lookahead == '?' || lexer->lookahead == '*' || + lexer->lookahead == '+' || lexer->lookahead == '@' || + lexer->lookahead == '!' || lexer->lookahead == '-' || + lexer->lookahead == ')' || lexer->lookahead == '\\' || + lexer->lookahead == '.') { + if (lexer->lookahead == '\\') { + advance(lexer); + if ((iswspace(lexer->lookahead) || lexer->lookahead == '"') && + lexer->lookahead != '\r' && lexer->lookahead != '\n') { + advance(lexer); + } else { + return false; + } + } + + if (lexer->lookahead == ')' && + scanner->last_glob_paren_depth == 0) { + lexer->mark_end(lexer); + advance(lexer); + + if (iswspace(lexer->lookahead)) { + return false; + } + } + + lexer->mark_end(lexer); + advance(lexer); + + // -\w is just a word, find something else special + if (lexer->lookahead == '-') { + lexer->mark_end(lexer); + advance(lexer); + while (isalnum(lexer->lookahead)) { + advance(lexer); + } + + if (lexer->lookahead == ')' || lexer->lookahead == '\\' || + lexer->lookahead == '.') { + return false; + } + lexer->mark_end(lexer); + } + + // case item -) or *) + if (lexer->lookahead == ')' && + scanner->last_glob_paren_depth == 0) { + lexer->mark_end(lexer); + advance(lexer); + if (iswspace(lexer->lookahead)) { + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (iswspace(lexer->lookahead)) { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = 0; + return true; + } + + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '{' || lexer->lookahead == '(') { + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (lexer->lookahead == '|') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '\\' || lexer->lookahead == '\r' || + lexer->lookahead == '\n') { + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (!isalnum(lexer->lookahead) && lexer->lookahead != '(' && + lexer->lookahead != '"' && lexer->lookahead != '[' && + lexer->lookahead != '?' && lexer->lookahead != '/' && + lexer->lookahead != '\\' && lexer->lookahead != '_') { + return false; + } + + typedef struct { + bool done; + uint32_t paren_depth; + uint32_t bracket_depth; + uint32_t brace_depth; + } State; + + State state = {false, scanner->last_glob_paren_depth, 0, 0}; + while (!state.done) { + switch (lexer->lookahead) { + case '\0': + return false; + case '(': + state.paren_depth++; + break; + case '[': + state.bracket_depth++; + break; + case '{': + state.brace_depth++; + break; + case ')': + if (state.paren_depth == 0) { + state.done = true; + } + state.paren_depth--; + break; + case ']': + if (state.bracket_depth == 0) { + state.done = true; + } + state.bracket_depth--; + break; + case '}': + if (state.brace_depth == 0) { + state.done = true; + } + state.brace_depth--; + break; + } + + if (!state.done) { + bool was_space = iswspace(lexer->lookahead); + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '(' || + lexer->lookahead == '{') { + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = state.paren_depth; + return true; + } + } + if (was_space) { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = 0; + return true; + } + if (lexer->lookahead == '"') { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = 0; + return true; + } + if (lexer->lookahead == '\\') { + advance(lexer); + if (iswspace(lexer->lookahead) || + lexer->lookahead == '"') { + advance(lexer); + } + } else { + advance(lexer); + } + if (!was_space) { + lexer->mark_end(lexer); + } + } + } + + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = 0; + return true; + } + scanner->last_glob_paren_depth = 0; + + return false; + } + +expansion_word: + if (valid_symbols[EXPANSION_WORD]) { + bool advanced_once = false; + bool advance_once_space = false; + for (;;) { + if (lexer->lookahead == '\"') { + return false; + } + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '{' || lexer->lookahead == '(' || + lexer->lookahead == '\'' || iswalnum(lexer->lookahead)) { + lexer->result_symbol = EXPANSION_WORD; + return advanced_once; + } + advanced_once = true; + } + + if (lexer->lookahead == '}') { + lexer->mark_end(lexer); + lexer->result_symbol = EXPANSION_WORD; + return advanced_once || advance_once_space; + } + + if (lexer->lookahead == '(' && + !(advanced_once || advance_once_space)) { + lexer->mark_end(lexer); + advance(lexer); + while (lexer->lookahead != ')' && !lexer->eof(lexer)) { + // if we find a $( or ${ assume this is valid and is + // a garbage concatenation of some weird word + an + // expansion + // I wonder where this can fail + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '{' || + lexer->lookahead == '(' || + lexer->lookahead == '\'' || + iswalnum(lexer->lookahead)) { + lexer->result_symbol = EXPANSION_WORD; + return advanced_once; + } + advanced_once = true; + } else { + advanced_once = + advanced_once || !iswspace(lexer->lookahead); + advance_once_space = + advance_once_space || iswspace(lexer->lookahead); + advance(lexer); + } + } + lexer->mark_end(lexer); + if (lexer->lookahead == ')') { + advanced_once = true; + advance(lexer); + lexer->mark_end(lexer); + if (lexer->lookahead == '}') { + return false; + } + } else { + return false; + } + } + + if (lexer->lookahead == '\'') { + return false; + } + + if (lexer->eof(lexer)) { + return false; + } + advanced_once = advanced_once || !iswspace(lexer->lookahead); + advance_once_space = + advance_once_space || iswspace(lexer->lookahead); + advance(lexer); + } + } + +brace_start: + if (valid_symbols[BRACE_START] && !in_error_recovery(valid_symbols)) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (lexer->lookahead != '{') { + return false; + } + + advance(lexer); + lexer->mark_end(lexer); + + while (isdigit(lexer->lookahead)) { + advance(lexer); + } + + if (lexer->lookahead != '.') { + return false; + } + advance(lexer); + + if (lexer->lookahead != '.') { + return false; + } + advance(lexer); + + while (isdigit(lexer->lookahead)) { + advance(lexer); + } + + if (lexer->lookahead != '}') { + return false; + } + + lexer->result_symbol = BRACE_START; + return true; + } + + return false; +} + +void *tree_sitter_bash_external_scanner_create() { + Scanner *scanner = calloc(1, sizeof(Scanner)); + scanner->heredocs = vec_new(); + return scanner; +} + +bool tree_sitter_bash_external_scanner_scan(void *payload, TSLexer *lexer, + const bool *valid_symbols) { + Scanner *scanner = (Scanner *)payload; + return scan(scanner, lexer, valid_symbols); +} + +unsigned tree_sitter_bash_external_scanner_serialize(void *payload, + char *state) { + Scanner *scanner = (Scanner *)payload; + return serialize(scanner, state); +} + +void tree_sitter_bash_external_scanner_deserialize(void *payload, + const char *state, + unsigned length) { + Scanner *scanner = (Scanner *)payload; + deserialize(scanner, state, length); +} + +void tree_sitter_bash_external_scanner_destroy(void *payload) { + Scanner *scanner = (Scanner *)payload; + for (size_t i = 0; i < scanner->heredocs.len; i++) { + Heredoc *heredoc = &scanner->heredocs.data[i]; + STRING_FREE(heredoc->current_leading_word); + STRING_FREE(heredoc->delimiter); + } + VEC_FREE(scanner->heredocs); + free(scanner); +} diff --git a/bash/test/corpus/commands.txt b/bash/test/corpus/commands.txt new file mode 100644 index 0000000..5b3a783 --- /dev/null +++ b/bash/test/corpus/commands.txt @@ -0,0 +1,708 @@ +=============================== +Commands +=============================== + +whoami + +--- + +(program + (command (command_name (word)))) + +=============================== +Commands with arguments +=============================== + +cat file1.txt +git diff --word-diff=color -- file1.txt file2.txt +echo $sing\ +levar + +--- + +(program + (command (command_name (word)) (word)) + (command (command_name (word)) (word) (word) (word) (word) (word)) + (command (command_name (word)) (simple_expansion (variable_name)) (word))) + +=============================== +Quoted command names +=============================== + +"$a/$b" c + +--- + +(program + (command + (command_name (string (simple_expansion (variable_name)) (string_content) (simple_expansion (variable_name)))) + (word))) + +=============================== +Commands with numeric arguments +=============================== + +exit 1 + +--- + +(program + (command (command_name (word)) (number))) + +=================================== +Commands with environment variables +=================================== + +VAR1=1 ./script/test +VAR1=a VAR2="ok" git diff --word-diff=color + +--- + +(program + (command + (variable_assignment (variable_name) (number)) + (command_name (word))) + (command + (variable_assignment (variable_name) (word)) + (variable_assignment (variable_name) (string (string_content))) + (command_name (word)) + (word) + (word))) + +=================================== +Empty environment variables +=================================== + +VAR1= +VAR2= echo + +--- + +(program + (variable_assignment (variable_name)) + (command (variable_assignment (variable_name)) (command_name (word)))) + +=============================== +File redirects +=============================== + +whoami > /dev/null +cat a b > /dev/null +2>&1 whoami +echo "foobar" >&2 +[ ! command -v go &>/dev/null ] && return + +if [ ]; then + >aa >bb +fi + +exec {VIRTWL[0]} {VIRTWL[1]} <&- >&- +exec {VIRTWL[0]}<&- {VIRTWL[1]}>&- + +grep 2>/dev/null -q "^/usr/bin/scponly$" /etc/shells + +--- + +(program + (redirected_statement + (command (command_name (word))) + (file_redirect (word))) + (redirected_statement + (command (command_name (word)) (word) (word)) + (file_redirect (word))) + (command + (file_redirect (file_descriptor) (number)) + (command_name (word))) + (redirected_statement + (command (command_name (word)) (string (string_content))) + (file_redirect (number))) + (list + (test_command + (redirected_statement + (negated_command + (command (command_name (word)) (word) (word))) + (file_redirect (word)))) + (command (command_name (word)))) + (if_statement + (test_command) + (redirected_statement + (file_redirect (word)) + (file_redirect (word)))) + (redirected_statement + (command + (command_name (word)) + (concatenation (word) (word) (word) (number) (word) (word)) + (concatenation (word) (word) (word) (number) (word) (word))) + (file_redirect) + (file_redirect)) + (redirected_statement + (command + (command_name (word)) + (concatenation (word) (word) (word) (number) (word) (word))) + (file_redirect + (concatenation (word) (word) (word) (number) (word) (word))) + (file_redirect)) + (redirected_statement + (command (command_name (word))) + (file_redirect (file_descriptor) (word) (word) (string (string_content)) (word)))) + +=============================== +File redirects (noclobber override) +=============================== + +whoami >| /dev/null +cat a b >| /dev/null + +--- + +(program + (redirected_statement + (command (command_name (word))) + (file_redirect (word))) + (redirected_statement + (command (command_name (word)) (word) (word)) + (file_redirect (word)))) + +=============================== +Heredoc redirects +=============================== + +node <<JS +console.log("hi") +JS + +bash -c <<JS +echo hi +JS + +newins <<-EOF - org.freedesktop.Notifications.service + [D-BUS Service] + Name=org.freedesktop.Notifications + Exec=/usr/libexec/notification-daemon +EOF + +--- + +(program + (redirected_statement + (command (command_name (word))) + (heredoc_redirect + (heredoc_start) + (heredoc_body) + (heredoc_end))) + (redirected_statement + (command (command_name (word)) (word)) + (heredoc_redirect + (heredoc_start) + (heredoc_body) + (heredoc_end))) + (redirected_statement + (command (command_name (word))) + (heredoc_redirect + (heredoc_start) + (word) + (word) + (heredoc_body) + (heredoc_end)))) + +=============================== +Heredocs with variables +=============================== + +node <<JS +a $B ${C} +JS + +exit + +--- + +(program + (redirected_statement + (command + (command_name + (word))) + (heredoc_redirect + (heredoc_start) + (heredoc_body + (simple_expansion + (variable_name)) + (heredoc_content) + (expansion + (variable_name)) + (heredoc_content)) + (heredoc_end))) + (command + (command_name + (word)))) + +================================= +Heredocs with file redirects +================================= + +cat <<EOF > $tmpfile +a $B ${C} +EOF + +wc -l $tmpfile + +--- + +(program + (redirected_statement + (command + (command_name + (word))) + (heredoc_redirect + (heredoc_start) + (file_redirect + (simple_expansion + (variable_name))) + (heredoc_body + (simple_expansion + (variable_name)) + (heredoc_content) + (expansion + (variable_name)) + (heredoc_content)) + (heredoc_end))) + (command + (command_name + (word)) + (word) + (simple_expansion + (variable_name)))) + +================================= +Heredocs with many file redirects +================================= + +FOO=bar echo <<EOF 2> err.txt > hello.txt +hello +EOF + +--- + +(program + (redirected_statement + body: (command + (variable_assignment + name: (variable_name) + value: (word)) + name: (command_name + (word))) + redirect: (heredoc_redirect + (heredoc_start) + redirect: (file_redirect + descriptor: (file_descriptor) + destination: (word)) + redirect: (file_redirect + destination: (word)) + (heredoc_body) + (heredoc_end)))) + +================================= +Heredocs with pipes +================================= + +one <<EOF | grep two +three +EOF + +--- + +(program + (redirected_statement + (command + (command_name + (word))) + (heredoc_redirect + (heredoc_start) + (pipeline + (command + (command_name + (word)) + (word))) + (heredoc_body) + (heredoc_end)))) + +====================================== +Heredocs with escaped expansions +====================================== + +cat << EOF +DEV_NAME=\$(lsblk) +EOF + +--- + +(program (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start) (heredoc_body) (heredoc_end)))) + +====================================== +Quoted Heredocs +====================================== + +cat << 'EOF' +a=$b +EOF + +cat << "EOF" +a=$b +EOF + +cat <<"END OF FILE" +hello, +world +END OF FILE + +cat << \EOF +EOF + +--- + +(program + (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start) (heredoc_body) (heredoc_end))) + (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start) (heredoc_body) (heredoc_end))) + (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start) (heredoc_body) (heredoc_end))) + (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start) (heredoc_body) (heredoc_end)))) + +========================================== +Heredocs with indented closing delimiters +========================================== + +usage() { + cat <<-EOF + Usage: ${0##*/} FOO BAR + EOF +} + +--- + +(program + (function_definition + (word) + (compound_statement + (redirected_statement + (command (command_name (word))) + (heredoc_redirect + (heredoc_start) + (heredoc_body (expansion (special_variable_name) (regex)) (heredoc_content)) + (heredoc_end)))))) + +========================================== +Heredocs with empty bodies +========================================== + +node <<JS +JS + +node << 'SJ' +SJ + +usage() { + cat <<-EOF + EOF +} + +node << 'EOF' > temp +EOF + +--- + +(program + (redirected_statement + body: (command + name: (command_name + (word))) + redirect: (heredoc_redirect + (heredoc_start) + (heredoc_body) + (heredoc_end))) + (redirected_statement + body: (command + name: (command_name + (word))) + redirect: (heredoc_redirect + (heredoc_start) + (heredoc_body) + (heredoc_end))) + (function_definition + name: (word) + body: (compound_statement + (redirected_statement + body: (command + name: (command_name + (word))) + redirect: (heredoc_redirect + (heredoc_start) + (heredoc_body) + (heredoc_end))))) + (redirected_statement + body: (command + name: (command_name + (word))) + redirect: (heredoc_redirect + (heredoc_start) + redirect: (file_redirect + destination: (word)) + (heredoc_body) + (heredoc_end)))) + +========================================== +Heredocs with weird characters +========================================== + +node <<_DELIMITER_WITH_UNDERSCORES_ +Hello. +_DELIMITER_WITH_UNDERSCORES_ + +node <<'```' +Hello. +``` + +node <<!HEREDOC! +Hello. +!HEREDOC! + +node <<\' +Hello. +' + +node <<\\ +Hello. +\ + +--- + +(program + (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start) (heredoc_body) (heredoc_end))) + (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start) (heredoc_body) (heredoc_end))) + (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start) (heredoc_body) (heredoc_end))) + (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start) (heredoc_body) (heredoc_end))) + (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start) (heredoc_body) (heredoc_end)))) + +========================================== +Heredocs with a rhs statement +========================================== + +cat <<-_EOF_ || die "cat EOF failed" + #!/bin/sh + echo hello +_EOF_ + +--- + +(program + (redirected_statement + (command (command_name (word))) + (heredoc_redirect + (heredoc_start) + (command (command_name (word)) (string (string_content))) + (heredoc_body) + (heredoc_end)))) + +========================================== +Heredocs with a $ that is not an expansion +========================================== + +cat <<EOF +# check out this regex '^EOF$' +EOF + +--- + +(program + (redirected_statement + (command (command_name (word))) + (heredoc_redirect + (heredoc_start) + (heredoc_body) + (heredoc_end)))) + +========================================== +Nested Heredocs +========================================== + +cat <<OUTER +Outer Heredoc Start +$(cat <<INNER +Inner Heredoc Content +$(cat <<INNERMOST +Innermost Heredoc Content +INNERMOST +) +INNER) +Outer Heredoc End +OUTER + +--- + +(program + (redirected_statement + (command (command_name (word))) + (heredoc_redirect + (heredoc_start) + (heredoc_body + (command_substitution + (redirected_statement + (command (command_name (word))) + (heredoc_redirect + (heredoc_start) + (heredoc_body + (command_substitution + (redirected_statement + (command (command_name (word))) + (heredoc_redirect + (heredoc_start) + (heredoc_body) + (heredoc_end)))) + (heredoc_content)) + (heredoc_end)))) + (heredoc_content)) + (heredoc_end)))) + +========================================== +Herestrings +========================================== + +node <<< foo + +while read -u 3 entry; do + echo $entry +done 3<<<"$ENTRIES" + +$(tc-getCC) -Werror -Wl,-l:libobjc.so.${ver} -x objective-c \ + - <<<$'int main() {}' -o /dev/null 2> /dev/null; + +<<<string cmd arg + +cmd arg <<<string + +cmd <<<string arg + +<<<string + +--- + +(program + (command (command_name (word)) (herestring_redirect (word))) + (redirected_statement + (while_statement + (command (command_name (word)) (word) (number) (word)) + (do_group + (command (command_name (word)) (simple_expansion (variable_name))))) + (herestring_redirect + (file_descriptor) + (string (simple_expansion (variable_name))))) + (redirected_statement + (command + (command_name (command_substitution (command (command_name (word))))) + (word) + (concatenation (word) (expansion (variable_name))) + (word) + (word) + (word) + (herestring_redirect (ansi_c_string)) + (word) + (word)) + (file_redirect (file_descriptor) (word))) + (command (herestring_redirect (word)) (command_name (word)) (word)) + (command (command_name (word)) (word) (herestring_redirect (word))) + (command (command_name (word)) (herestring_redirect (word)) (word)) + (redirected_statement (herestring_redirect (word)))) + +========================================== +Subscripts +========================================== + +echo ${a[1 + 2]} + +echo ${b[1234 % 2]} + +${words[++counter]} + +${array[(($number+1))]} + +${array[((number+1))]} + +--- + +(program + (command + (command_name (word)) + (expansion + (subscript (variable_name) (binary_expression (number) (number))))) + (command + (command_name (word)) + (expansion + (subscript (variable_name) (binary_expression (number) (number))))) + (command + (command_name + (expansion + (subscript (variable_name) (unary_expression (word)))))) + (command + (command_name + (expansion + (subscript + (variable_name) + (arithmetic_expansion (binary_expression (simple_expansion (variable_name)) (number))))))) + (command + (command_name + (expansion + (subscript + (variable_name) + (arithmetic_expansion (binary_expression (variable_name) (number)))))))) + +========================================== +Bare $ +========================================== + +echo $ +echo "${module}"$ +echo $$ + +--- + +(program + (command (command_name (word))) + (command + (command_name (word)) + (concatenation (string (expansion (variable_name))))) + (command + (command_name (word)) + (simple_expansion (special_variable_name)))) + +========================================== +Arithmetic with command substitution +========================================== + +$(( $( ver_cut 2 ) - 1 )) + +--- + +(program + (command + (command_name + (arithmetic_expansion + (binary_expression + (command_substitution + (command + (command_name + (word)) + (number))) + (number)))))) + + +========================================== +Ralative path without dots +========================================== + +bin/ls /usr/bin + +--- + +(program + (command + (command_name + (word)) + (word))) diff --git a/bash/test/corpus/crlf.txt b/bash/test/corpus/crlf.txt new file mode 100644 index 0000000..b3d6cde --- /dev/null +++ b/bash/test/corpus/crlf.txt @@ -0,0 +1,13 @@ +================================ +Variables with CRLF line endings +================================ + +A=one + +B=two + +--- + +(program + (variable_assignment (variable_name) (word)) + (variable_assignment (variable_name) (word))) diff --git a/bash/test/corpus/literals.txt b/bash/test/corpus/literals.txt new file mode 100644 index 0000000..b753958 --- /dev/null +++ b/bash/test/corpus/literals.txt @@ -0,0 +1,1336 @@ +Literal words +================================================================================ + +echo a +echo a b + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (word)) + (command + (command_name + (word)) + (word) + (word))) + +================================================================================ +Words with special characters +================================================================================ + +echo {o[k]} +echo }}} +echo ]]] === + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (concatenation + (word) + (word) + (word) + (word) + (word) + (word))) + (command + (command_name + (word)) + (concatenation + (word) + (word) + (word))) + (command + (command_name + (word)) + (concatenation + (word) + (word) + (word)) + (word))) + +================================================================================ +Simple variable expansions +================================================================================ + +echo $abc + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (simple_expansion + (variable_name)))) + +================================================================================ +Special variable expansions +================================================================================ + +echo $# $* $@ $! + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (simple_expansion + (special_variable_name)) + (simple_expansion + (special_variable_name)) + (simple_expansion + (special_variable_name)) + (simple_expansion + (special_variable_name)))) + +================================================================================ +Variable expansions +================================================================================ + +echo ${} +echo ${#} +echo ${var1#*#} +echo ${!abc} +echo ${abc} +echo ${abc:-def} +echo ${abc:+ghi} +echo ${abc:- } +echo ${abc: +} +echo ${abc,?} +echo ${abc^^b} +echo ${abc@U} +echo ${abc:- -quiet} + +-------------------------------------------------------------------------------- + +(program + (command (command_name (word)) (expansion)) + (command (command_name (word)) (expansion)) + (command (command_name (word)) (expansion (variable_name) (regex))) + (command (command_name (word)) (expansion (variable_name))) + (command (command_name (word)) (expansion (variable_name))) + (command (command_name (word)) (expansion (variable_name) (word))) + (command (command_name (word)) (expansion (variable_name) (word))) + (command (command_name (word)) (expansion (variable_name) (word))) + (command (command_name (word)) (expansion (variable_name))) + (command (command_name (word)) (expansion (variable_name) (regex))) + (command (command_name (word)) (expansion (variable_name) (regex))) + (command (command_name (word)) (expansion (variable_name))) + (command (command_name (word)) (expansion (variable_name) (word)))) + +================================================================================ +Variable expansions with operators +================================================================================ + +A="${B[0]# }" +C="${D/#* -E /}" +F="${G%% *}" +H="${I#*;}" +J="${K##*;}" +L="${M%;*}" +N="${O%%;*}" +P="${Q%|*}" +R="${S%()}" +T="${U%(}" +V="${W%)}" +X="${Y%<}" +Z="${A#*<B>}" +C="${D%</E>*}" +F="${#!}" +G=${H,,[I]} +J=${K^^[L]} + +-------------------------------------------------------------------------------- + +(program + (variable_assignment + (variable_name) + (string + (expansion + (subscript + (variable_name) + (number)) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex)))) + (variable_assignment + (variable_name) + (string + (expansion))) + (variable_assignment + (variable_name) + (expansion + (variable_name) + (regex))) + (variable_assignment + (variable_name) + (expansion + (variable_name) + (regex)))) + +================================================================================ +More Variable expansions with operators +================================================================================ + +${parameter-default} +${parameter:-default} +${parameter=default} +${parameter:=default} +${parameter+alt_value} +${parameter:+alt_value} +${parameter?err_msg} +${parameter:?err_msg} +${var%Pattern} +${var%%Pattern} +${var:pos} +${var:pos:len} +${MATRIX:$(($RANDOM%${#MATRIX})):1} +${PKG_CONFIG_LIBDIR:-${ESYSROOT}/usr/$(get_libdir)/pkgconfig} +${ver_str::${#ver_str}-${#not_match}} + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (expansion + (variable_name) + (word)))) + (command + (command_name + (expansion + (variable_name) + (word)))) + (command + (command_name + (expansion + (variable_name) + (word)))) + (command + (command_name + (expansion + (variable_name) + (word)))) + (command + (command_name + (expansion + (variable_name) + (word)))) + (command + (command_name + (expansion + (variable_name) + (word)))) + (command + (command_name + (expansion + (variable_name) + (word)))) + (command + (command_name + (expansion + (variable_name) + (word)))) + (command + (command_name + (expansion + (variable_name) + (regex)))) + (command + (command_name + (expansion + (variable_name) + (regex)))) + (command + (command_name + (expansion + (variable_name) + (variable_name)))) + (command + (command_name + (expansion + (variable_name) + (variable_name) + (variable_name)))) + (command + (command_name + (expansion + (variable_name) + (arithmetic_expansion + (binary_expression + (simple_expansion + (variable_name)) + (expansion + (variable_name)))) + (number)))) + (command + (command_name + (expansion + (variable_name) + (concatenation + (expansion (variable_name)) + (word) + (command_substitution + (command (command_name (word)))) + (word))))) + (command + (command_name + (expansion + (variable_name) + (binary_expression + (expansion (variable_name)) + (expansion (variable_name))))))) + +================================================================================ +Variable expansions in strings +================================================================================ + +A="${A:-$B/c}" +A="${b=$c/$d}" +MY_PV="${PV/_pre/$'\x7e'pre}" + +-------------------------------------------------------------------------------- + +(program + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (concatenation + (simple_expansion + (variable_name)) + (word))))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (concatenation + (simple_expansion + (variable_name)) + (word) + (simple_expansion + (variable_name)))))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex) + (concatenation (ansi_c_string) (word)))))) + +================================================================================ +Variable expansions with regexes +================================================================================ + +A=${B//:;;/$'\n'} + +# escaped space +C=${D/;\ *;|} +MOFILES=${LINGUAS// /.po }.po +MY_P="${PN/aspell/aspell"${ASPELL_VERSION}"}" +pyc=${pyc//*\/} +${pv/\.} +${new_test_cp//"${old_ver_cp}"/} +${tests_to_run//"${classes}"\/} +${allarchives// /\\|} + +-------------------------------------------------------------------------------- + +(program + (variable_assignment + (variable_name) + (expansion + (variable_name) + (regex) + (ansi_c_string))) + (comment) + (variable_assignment + (variable_name) + (expansion (variable_name) (regex))) + (variable_assignment + (variable_name) + (concatenation + (expansion (variable_name) (regex) (word)) + (word))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name) + (regex) + (concatenation + (word) + (string (expansion (variable_name))))))) + (variable_assignment + (variable_name) + (expansion (variable_name) (regex))) + (command (command_name (expansion (variable_name) (regex)))) + (command + (command_name + (expansion (variable_name) (string (expansion (variable_name)))))) + (command + (command_name + (expansion (variable_name) (string (expansion (variable_name))) (regex)))) + (command (command_name (expansion (variable_name) (regex) (word))))) + +================================================================================ +Other variable expansion operators +================================================================================ + +cat ${BAR} ${ABC=def} ${GHI:?jkl} +[ "$a" != "${a#[Bc]}" ] + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (expansion + (variable_name)) + (expansion + (variable_name) + (word)) + (expansion + (variable_name) + (word))) + (test_command + (binary_expression + (string + (simple_expansion + (variable_name))) + (string + (expansion + (variable_name) + (regex)))))) + +================================================================================ +Variable Expansions: Length +================================================================================ + +${parameter:-1} + +${parameter: -1} + +${parameter:(-1)} + +${matrix:$(($random%${#matrix})):1} + +"${_component_to_single:${len}:2}" + +"${PN::-1}" + +${trarr:$(ver_cut 2):1} + +${comp[@]:start:end*2-start} + +-------------------------------------------------------------------------------- + +(program + (command (command_name (expansion (variable_name) (word)))) + (command (command_name (expansion (variable_name) (number)))) + (command + (command_name + (expansion (variable_name) (parenthesized_expression (unary_expression (number)))))) + (command + (command_name + (expansion + (variable_name) + (arithmetic_expansion + (binary_expression (simple_expansion (variable_name)) (expansion (variable_name)))) + (number)))) + (command (command_name (string (expansion (variable_name) (expansion (variable_name)) (number))))) + (command (command_name (string (expansion (variable_name) (number))))) + (command + (command_name + (expansion (variable_name) (command_substitution (command (command_name (word)) (number))) (number)))) + (command + (command_name + (expansion + (subscript (variable_name) (word)) + (variable_name) + (binary_expression (binary_expression (variable_name) (number)) (variable_name)))))) + +================================================================================ +Variable Expansions with operators +================================================================================ + +${parameter-default} +${parameter- default} +${!varprefix*} +${!varprefix@} +${parameter@U} + +-------------------------------------------------------------------------------- + +(program + (command (command_name (expansion (variable_name) (word)))) + (command (command_name (expansion (variable_name) (word)))) + (command (command_name (expansion (variable_name)))) + (command (command_name (expansion (variable_name)))) + (command (command_name (expansion (variable_name))))) + +================================================================================ +Variable Expansions: Bizarre Cases +================================================================================ + +${!#} +${!# } +${!##} +${!## } +${!##/} +# here be dragons +echo "${kw}? ( ${cond:+${cond}? (} ${baseuri}-${ver}-${kw}.${suff} ${cond:+) })" + +-------------------------------------------------------------------------------- + +(program + (command (command_name (expansion))) + (command (command_name (expansion))) + (command (command_name (expansion))) + (command (command_name (expansion))) + (command (command_name (expansion (special_variable_name) (regex)))) + (comment) + (command + (command_name (word)) + (string + (expansion (variable_name)) + (string_content) + (expansion + (variable_name) + (concatenation (expansion (variable_name)) (word))) + (expansion (variable_name)) + (string_content) + (expansion (variable_name)) + (string_content) + (expansion (variable_name)) + (string_content) + (expansion (variable_name)) + (expansion (variable_name) (word)) + (string_content)))) + +================================================================================ +Variable Expansions: Weird Cases +================================================================================ + +${completions[*]} +${=1} +${2?} +${p_key#*=} +${abc:- } +${B[0]# } +${to_enables[0]##*/} +exec "${0#-}" --rcfile "${BASH_IT_BASHRC:-${HOME?}/.bashrc}" +recho "TDEFAULTS = ${selvecs:+-DSELECT_VECS=\"$selvecs\"}" +local msg="${2:-command '$1' does not exist}" +${cdir:+#} +${dict_langs:+;} +${UTIL_LINUX_LIBC[@]/%/? ( sys-apps/util-linux )} +${id}${2+ ${2}} +${BRANDING_GCC_PKGVERSION/(/(Gentoo ${PVR}${extvers}, } # look at that parenthesis! + +-------------------------------------------------------------------------------- + +(program + (command (command_name (expansion (subscript (variable_name) (word))))) + (command (command_name (expansion (variable_name)))) + (command (command_name (expansion (variable_name)))) + (command (command_name (expansion (variable_name) (regex)))) + (command (command_name (expansion (variable_name) (word)))) + (command (command_name (expansion (subscript (variable_name) (number)) (regex)))) + (command (command_name (expansion (subscript (variable_name) (number)) (regex)))) + (command + (command_name (word)) + (string (expansion (special_variable_name) (regex))) + (word) + (string (expansion (variable_name) (concatenation (expansion (variable_name)) (word))))) + (command + (command_name (word)) + (string + (string_content) + (expansion (variable_name) (concatenation (word) (simple_expansion (variable_name)) (word))))) + (declaration_command + (variable_assignment + (variable_name) + (string (expansion (variable_name) (concatenation (word) (raw_string) (word)))))) + (command (command_name (expansion (variable_name) (word)))) + (command (command_name (expansion (variable_name) (word)))) + (command (command_name (expansion (subscript (variable_name) (word)) (word)))) + (command + (command_name + (concatenation (expansion (variable_name)) (expansion (variable_name) (expansion (variable_name)))))) + (command + (command_name + (expansion + (variable_name) + (regex) + (concatenation (word) (expansion (variable_name)) (expansion (variable_name)) (word))))) + (comment)) + +================================================================================ +Variable Expansions: Regex +================================================================================ + +A=${B//:;;/$'\n'} +C="${D/#* -E /}" +BASH_IT_GIT_URL="${BASH_IT_GIT_URL/git@/https://}" +10#${command_start##*.} +echo ${LIB_DEPEND//\[static-libs(+)]} +${ALL_LLVM_TARGETS[@]/%/(-)?} +filterdiff -p1 ${paths[@]/#/-i } +${cflags//-O? /$(get-flag O) } + +-------------------------------------------------------------------------------- + +(program + (variable_assignment (variable_name) (expansion (variable_name) (regex) (ansi_c_string))) + (variable_assignment (variable_name) (string (expansion (variable_name) (regex)))) + (variable_assignment (variable_name) (string (expansion (variable_name) (regex) (word)))) + (command (command_name (number (expansion (variable_name) (regex))))) + (command (command_name (word)) (expansion (variable_name) (regex))) + (command (command_name (expansion (subscript (variable_name) (word)) (word)))) + (command (command_name (word)) (word) (expansion (subscript (variable_name) (word)) (word))) + (command + (command_name + (expansion (variable_name) (regex) (command_substitution (command (command_name (word)) (word))) (word))))) + +================================================================================ +Words ending with '$' +================================================================================ + +grep ^${var}$ + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (concatenation + (word) + (expansion + (variable_name))))) + +================================================================================ +Command substitutions +================================================================================ + +echo `echo hi` +echo `echo hi; echo there` +echo $(echo $(echo hi)) +echo $(< some-file) + +# both of these are concatenations! +echo `echo otherword`word +echo word`echo otherword` + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (command_substitution + (command + (command_name + (word)) + (word)))) + (command + (command_name + (word)) + (command_substitution + (command + (command_name + (word)) + (word)) + (command + (command_name + (word)) + (word)))) + (command + (command_name + (word)) + (command_substitution + (command + (command_name + (word)) + (command_substitution + (command + (command_name + (word)) + (word)))))) + (command + (command_name + (word)) + (command_substitution + (file_redirect + (word)))) + (comment) + (command + (command_name + (word)) + (concatenation + (command_substitution + (command + (command_name + (word)) + (word))) + (word))) + (command + (command_name + (word)) + (concatenation + (word) + (command_substitution + (command + (command_name + (word)) + (word)))))) + +================================================================================ +Process substitutions +================================================================================ + +wc -c <(echo abc && echo def) +wc -c <(echo abc; echo def) +echo abc > >(wc -c) + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (word) + (process_substitution + (list + (command + (command_name + (word)) + (word)) + (command + (command_name + (word)) + (word))))) + (command + (command_name + (word)) + (word) + (process_substitution + (command + (command_name + (word)) + (word)) + (command + (command_name + (word)) + (word)))) + (redirected_statement + (command + (command_name + (word)) + (word)) + (file_redirect + (process_substitution + (command + (command_name + (word)) + (word)))))) + +================================================================================ +Single quoted strings +================================================================================ + +echo 'a b' 'c d' + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (raw_string) + (raw_string))) + +================================================================================ +Double quoted strings +================================================================================ + +echo "a" "b" +echo "a ${b} c" "d $e" + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (string (string_content)) + (string (string_content))) + (command + (command_name + (word)) + (string + (string_content) + (expansion + (variable_name)) + (string_content)) + (string + (string_content) + (simple_expansion + (variable_name))))) + +================================================================================ +Strings containing command substitutions +================================================================================ + +find "`dirname $file`" -name "$base"'*' + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (string + (command_substitution + (command + (command_name + (word)) + (simple_expansion + (variable_name))))) + (word) + (concatenation + (string + (simple_expansion + (variable_name))) + (raw_string)))) + +================================================================================ +Strings containing escape sequence +================================================================================ + +echo "\"The great escape\`\${var}" + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (string (string_content)))) + +================================================================================ +Strings containing special characters +================================================================================ + +echo "s/$/'/" +echo "#" +echo "s$" + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (string (string_content) (string_content))) + (command + (command_name + (word)) + (string (string_content))) + (command + (command_name + (word)) + (string (string_content)))) + +================================================================================ +Strings with ANSI-C quoting +================================================================================ + +echo $'Here\'s Johnny!\r\n' + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (ansi_c_string))) + +================================================================================ +Arrays and array expansions +================================================================================ + +a=() +b=(1 2 3) + +echo ${a[@]} +echo ${#b[@]} + +a[$i]=50 +a+=(foo "bar" $(baz)) + +printf " %-9s" "${seq0:-(default)}" + +-------------------------------------------------------------------------------- + +(program + (variable_assignment + (variable_name) + (array)) + (variable_assignment + (variable_name) + (array + (number) + (number) + (number))) + (command + (command_name + (word)) + (expansion + (subscript + (variable_name) + (word)))) + (command + (command_name + (word)) + (expansion + (subscript + (variable_name) + (word)))) + (variable_assignment + (subscript + (variable_name) + (simple_expansion + (variable_name))) + (number)) + (variable_assignment + (variable_name) + (array + (word) + (string (string_content)) + (command_substitution + (command + (command_name + (word)))))) + (command + (command_name + (word)) + (string (string_content)) + (string + (expansion + (variable_name) + (array + (word)))))) + +================================================================================ +Escaped characters in strings +================================================================================ + +echo -ne "\033k$1\033\\" > /dev/stderr + +-------------------------------------------------------------------------------- + +(program + (redirected_statement + (command + (command_name + (word)) + (word) + (string + (string_content) + (simple_expansion + (variable_name)) + (string_content))) + (file_redirect + (word)))) + +================================================================================ +Words containing bare '#' +================================================================================ + +curl -# localhost #comment without space +nix build nixpkgs#hello -v # comment with space + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (word) + (word)) + (comment) + (command + (command_name + (word)) + (word) + (word) + (word)) + (comment)) + +================================================================================ +Words containing # that are not comments +================================================================================ + +echo 'word'#not-comment # a legit comment +echo $(uname -a)#not-comment # a legit comment +echo `uname -a`#not-comment # a legit comment +echo $hey#not-comment # a legit comment +var=#not-comment # a legit comment +echo "'$var'" # -> '#not-comment' + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (word)) + (concatenation + (raw_string) + (word))) + (comment) + (command + (command_name + (word)) + (concatenation + (command_substitution + (command + (command_name + (word)) + (word))) + (word))) + (comment) + (command + (command_name + (word)) + (concatenation + (command_substitution + (command + (command_name + (word)) + (word))) + (word))) + (comment) + (command + (command_name + (word)) + (concatenation + (simple_expansion + (variable_name)) + (word))) + (comment) + (variable_assignment + (variable_name) + (word)) + (comment) + (command + (command_name + (word)) + (string + (string_content) + (simple_expansion + (variable_name)) + (string_content))) + (comment)) + +================================================================================ +Variable assignments immediately followed by a terminator +================================================================================ + +loop=; variables=& here=;; + +-------------------------------------------------------------------------------- + +(program + (variable_assignment + (variable_name)) + (variable_assignment + (variable_name)) + (variable_assignment + (variable_name))) + +================================================================================ +Multiple variable assignments +================================================================================ + +component_type="${1}" item_name="${2?}" + +-------------------------------------------------------------------------------- + +(program + (variable_assignments + (variable_assignment + (variable_name) + (string + (expansion + (variable_name)))) + (variable_assignment + (variable_name) + (string + (expansion + (variable_name)))))) + +================================================================================ +Arithmetic expansions +================================================================================ + +echo $((1 + 2 - 3 * 4 / 5)) +a=$((6 % 7 ** 8 << 9 >> 10 & 11 | 12 ^ 13)) +$(((${1:-${SECONDS}} % 12) + 144)) +((foo=0)) +echo $((bar=1)) +echo $((-1, 1)) +echo $((! -a || ~ +b || ++c || --d)) +echo $((foo-- || bar++)) +(("${MULTIBUILD_VARIANTS}" > 1)) +$(("$(stat --printf '%05a' "${save_file}")" & 07177)) +soft_errors_count=$[soft_errors_count + 1] + +-------------------------------------------------------------------------------- + +(program + (command + (command_name (word)) + (arithmetic_expansion + (binary_expression + (binary_expression (number) (number)) + (binary_expression + (binary_expression (number) (number)) + (number))))) + (variable_assignment + (variable_name) + (arithmetic_expansion + (binary_expression + (binary_expression + (binary_expression + (binary_expression + (binary_expression + (number) + (binary_expression (number) (number))) + (number)) + (number)) + (number)) + (binary_expression (number) (number))))) + (command + (command_name + (arithmetic_expansion + (binary_expression + (parenthesized_expression + (binary_expression + (expansion + (variable_name) + (expansion (variable_name))) + (number))) + (number))))) + (command + (command_name + (arithmetic_expansion + (binary_expression (variable_name) (number))))) + (command + (command_name (word)) + (arithmetic_expansion + (binary_expression (variable_name) (number)))) + (command + (command_name (word)) + (arithmetic_expansion (unary_expression (number)) (number))) + (command + (command_name (word)) + (arithmetic_expansion + (binary_expression + (binary_expression + (binary_expression + (unary_expression (unary_expression (variable_name))) + (unary_expression (unary_expression (variable_name)))) + (unary_expression (variable_name))) + (unary_expression (variable_name))))) + (command + (command_name (word)) + (arithmetic_expansion + (binary_expression + (postfix_expression + (variable_name)) + (postfix_expression + (variable_name))))) + (command + (command_name + (arithmetic_expansion + (binary_expression + (string + (expansion + (variable_name))) + (number))))) + (command + (command_name + (arithmetic_expansion + (binary_expression + (string + (command_substitution + (command + (command_name + (word)) + (word) + (raw_string) + (string + (expansion + (variable_name)))))) + (number))))) + (variable_assignment + (variable_name) + (arithmetic_expansion + (binary_expression + (variable_name) + (number))))) + +================================================================================ +Concatenation with double backticks +================================================================================ + +main() { + local foo="asd"` + `"fgh" +} + +--- + +(program + (function_definition + (word) + (compound_statement + (declaration_command + (variable_assignment + (variable_name) + (concatenation + (string (string_content)) + (string (string_content)))))))) + +================================================================================ +Brace expressions and lookalikes +================================================================================ + +echo {1..2} +echo {0..5} +echo {0..2 # not a brace expression +echo }{0..2} +echo {0..n} # not a brace expression +echo {0..n..2} # not a brace expression +echo {0..2}{1..2} + +--- + +(program + (command + (command_name (word)) + (brace_expression (number) (number))) + (command + (command_name (word)) + (brace_expression (number) (number))) + (command + (command_name (word)) + (concatenation (word) (word))) + (comment) + (command + (command_name (word)) + (concatenation + (word) + (brace_expression (number) (number)))) + (command + (command_name (word)) + (concatenation (word) (word) (word))) + (comment) + (command + (command_name (word)) + (concatenation (word) (word) (word))) + (comment) + (command + (command_name (word)) + (concatenation + (brace_expression (number) (number)) + (brace_expression (number) (number))))) diff --git a/bash/test/corpus/programs.txt b/bash/test/corpus/programs.txt new file mode 100644 index 0000000..cae772e --- /dev/null +++ b/bash/test/corpus/programs.txt @@ -0,0 +1,108 @@ +=============================== +Comments +=============================== + +#!/bin/bash +# hi + +--- + +(program + (comment) + (comment)) + +=============================== +Escaped newlines +=============================== + +abc \ + d \ + e + +f=g \ + h=i \ + j \ + --k + +--- + +(program + (command + (command_name + (word)) + (word) + (word)) + (command + (variable_assignment + (variable_name) + (word)) + (variable_assignment + (variable_name) + (word)) + (command_name + (word)) + (word))) + +============================= +escaped newline immediately after a char +============================= + +echo a \ + b + +echo a\ + b + +echo a\ + b\ + c + + +----------------------------- + +(program + (command + (command_name + (word)) + (word) + (word)) + (command + (command_name + (word)) + (word) + (word)) + (command + (command_name + (word)) + (word) + (word) + (word))) + +============================= +Escaped whitespace +============================= + +echo 1 \ 2 \ 3 + +--- + +(program + (command + (command_name + (word)) + (number) + (number) + (number))) + +==================================== +Files without trailing terminators +==================================== + +echo hi +--- + +(program + (command + (command_name + (word)) + (word))) diff --git a/bash/test/corpus/statements.txt b/bash/test/corpus/statements.txt new file mode 100644 index 0000000..98913ce --- /dev/null +++ b/bash/test/corpus/statements.txt @@ -0,0 +1,1579 @@ +================================================================================ +Pipelines +================================================================================ + +whoami | cat +cat foo | grep -v bar +cat baz | head -n 1 + +-------------------------------------------------------------------------------- + +(program + (pipeline + (command + name: (command_name + (word))) + (command + name: (command_name + (word)))) + (pipeline + (command + name: (command_name + (word)) + argument: (word)) + (command + name: (command_name + (word)) + argument: (word) + argument: (word))) + (pipeline + (command + name: (command_name + (word)) + argument: (word)) + (command + name: (command_name + (word)) + argument: (word) + argument: (number)))) + + +================================================================================ +Lists +================================================================================ + +a | b && c && d; d e f || e g + +-------------------------------------------------------------------------------- + +(program + (list + (list + (pipeline + (command + (command_name + (word))) + (command + (command_name + (word)))) + (command + (command_name + (word)))) + (command + (command_name + (word)))) + (list + (command + (command_name + (word)) + (word) + (word)) + (command + (command_name + (word)) + (word)))) + +================================================================================ +While statements +================================================================================ + +while something happens; do + echo a + echo b +done + +while local name="$1" val="$2"; shift 2; do + printf "%s (%s)\n" "$val" "$name" +done + +-------------------------------------------------------------------------------- + +(program + (while_statement + condition: (command + name: (command_name + (word)) + argument: (word)) + body: (do_group + (command + name: (command_name + (word)) + argument: (word)) + (command + name: (command_name + (word)) + argument: (word)))) + (while_statement + condition: (declaration_command + (variable_assignment + name: (variable_name) + value: (string + (simple_expansion + (variable_name)))) + (variable_assignment + name: (variable_name) + value: (string + (simple_expansion + (variable_name))))) + condition: (command + name: (command_name + (word)) + argument: (number)) + body: (do_group + (command + name: (command_name + (word)) + argument: (string (string_content)) + argument: (string + (simple_expansion + (variable_name))) + argument: (string + (simple_expansion + (variable_name))))))) + +================================================================================ +Until statements +================================================================================ + +until something happens; do + echo a + echo b +done + +-------------------------------------------------------------------------------- + +(program + (while_statement + condition: (command + name: (command_name + (word)) + argument: (word)) + body: (do_group + (command + name: (command_name + (word)) + argument: (word)) + (command + name: (command_name + (word)) + argument: (word))))) + +================================================================================ +While statements with IO redirects +================================================================================ + +while read line; do + echo $line +done < <(cat file) + +-------------------------------------------------------------------------------- + +(program + (redirected_statement + body: (while_statement + condition: (command + name: (command_name + (word)) + argument: (word)) + body: (do_group + (command + name: (command_name + (word)) + argument: (simple_expansion + (variable_name))))) + redirect: (file_redirect + destination: (process_substitution + (command + name: (command_name + (word)) + argument: (word)))))) + +================================================================================ +For statements +================================================================================ + +for a in 1 2 $(seq 5 10); do + echo $a +done + +for ARG; do + echo $ARG + ARG='' +done + +for c in ${=1}; do + echo c +done + +-------------------------------------------------------------------------------- + +(program + (for_statement + variable: (variable_name) + value: (number) + value: (number) + value: (command_substitution + (command + name: (command_name + (word)) + argument: (number) + argument: (number))) + body: (do_group + (command + name: (command_name + (word)) + argument: (simple_expansion + (variable_name))))) + (for_statement + variable: (variable_name) + body: (do_group + (command + name: (command_name + (word)) + argument: (simple_expansion + (variable_name))) + (variable_assignment + name: (variable_name) + value: (raw_string)))) + (for_statement + variable: (variable_name) + value: (expansion + (variable_name)) + body: (do_group + (command + name: (command_name + (word)) + argument: (word))))) + +================================================================================ +Select statements +================================================================================ + +select choice in X Y $(ls); do + echo $choice + break +done + +select ARG; do + echo $ARG + ARG='' +done + +-------------------------------------------------------------------------------- + +(program + (for_statement + (variable_name) + (word) + (word) + (command_substitution + (command + (command_name + (word)))) + (do_group + (command + (command_name + (word)) + (simple_expansion + (variable_name))) + (command + (command_name + (word))))) + (for_statement + (variable_name) + (do_group + (command + (command_name + (word)) + (simple_expansion + (variable_name))) + (variable_assignment + (variable_name) + (raw_string))))) + +================================================================================ +C-style for statements +================================================================================ + +for (( c=1; c<=5; c++ )) +do + echo $c +done + +for (( c=1; c<=5; c++ )) { + echo $c +} + +for (( ; ; )) +do + echo 'forever' +done + +for ((cx = 0; c = $cx / $pf, c < $wc - $k; )); do + echo "$cx" +done + +for (( i = 4;;i--)) ; do echo $i; if (( $i == 0 )); then break; fi; done + +# added post-bash-4.2 +for (( i = j = k = 1; i % 9 || (j *= -1, $( ((i%9)) || printf " " >&2; echo 0), k++ <= 10); i += j )) +do +printf "$i" +done + +echo + +( for (( i = j = k = 1; i % 9 || (j *= -1, $( ((i%9)) || printf " " >&2; echo 0), k++ <= 10); i += j )) +do +printf "$i" +done ) + +-------------------------------------------------------------------------------- + +(program + (c_style_for_statement + (variable_assignment + (variable_name) + (number)) + (binary_expression + (word) + (number)) + (postfix_expression + (word)) + (do_group + (command + (command_name + (word)) + (simple_expansion + (variable_name))))) + (c_style_for_statement + (variable_assignment + (variable_name) + (number)) + (binary_expression + (word) + (number)) + (postfix_expression + (word)) + (compound_statement + (command + (command_name + (word)) + (simple_expansion + (variable_name))))) + (c_style_for_statement + (do_group + (command + (command_name + (word)) + (raw_string)))) + (c_style_for_statement + (variable_assignment + (variable_name) + (number)) + (variable_assignment + (variable_name) + (binary_expression + (simple_expansion + (variable_name)) + (simple_expansion + (variable_name)))) + (binary_expression + (word) + (binary_expression + (simple_expansion + (variable_name)) + (simple_expansion + (variable_name)))) + (do_group + (command + (command_name + (word)) + (string + (simple_expansion + (variable_name)))))) + (c_style_for_statement + (variable_assignment + (variable_name) + (number)) + (postfix_expression + (word)) + (do_group + (command + (command_name + (word)) + (simple_expansion + (variable_name))) + (if_statement + (command + (command_name + (arithmetic_expansion + (binary_expression + (simple_expansion + (variable_name)) + (number))))) + (command + (command_name + (word)))))) + (comment) + (c_style_for_statement + (variable_assignment + (variable_name) + (variable_assignment + (variable_name) + (variable_assignment + (variable_name) + (number)))) + (binary_expression + (binary_expression + (word) + (number)) + (parenthesized_expression + (binary_expression + (word) + (number)) + (command_substitution + (redirected_statement + (list + (command + (command_name + (arithmetic_expansion + (binary_expression + (variable_name) + (number))))) + (command + (command_name + (word)) + (string))) + (file_redirect + (number))) + (command + (command_name + (word)) + (number))) + (binary_expression + (postfix_expression + (word)) + (number)))) + (binary_expression + (word) + (word)) + (do_group + (command + (command_name + (word)) + (string + (simple_expansion + (variable_name)))))) + (command + (command_name + (word))) + (subshell + (c_style_for_statement + (variable_assignment + (variable_name) + (variable_assignment + (variable_name) + (variable_assignment + (variable_name) + (number)))) + (binary_expression + (binary_expression + (word) + (number)) + (parenthesized_expression + (binary_expression + (word) + (number)) + (command_substitution + (redirected_statement + (list + (command + (command_name + (arithmetic_expansion + (binary_expression + (variable_name) + (number))))) + (command + (command_name + (word)) + (string))) + (file_redirect + (number))) + (command + (command_name + (word)) + (number))) + (binary_expression + (postfix_expression + (word)) + (number)))) + (binary_expression + (word) + (word)) + (do_group + (command + (command_name + (word)) + (string + (simple_expansion + (variable_name)))))))) + +================================================================================ +If statements +================================================================================ + +if cat some_file | grep -v ok; then + echo one +elif cat other_file | grep -v ok; then + echo two +else + exit +fi + +-------------------------------------------------------------------------------- + +(program + (if_statement + (pipeline + (command + (command_name + (word)) + (word)) + (command + (command_name + (word)) + (word) + (word))) + (command + (command_name + (word)) + (word)) + (elif_clause + (pipeline + (command + (command_name + (word)) + (word)) + (command + (command_name + (word)) + (word) + (word))) + (command + (command_name + (word)) + (word))) + (else_clause + (command + (command_name + (word)))))) + +================================================================================ +If statements with conditional expressions +================================================================================ + +if [ "$(uname)" == 'Darwin' ]; then + echo one +fi + +if [ a = -d ]; then + echo two +fi + +[[ abc == +(a|b|c) ]] && echo 1 +[[ abc != +(a|b|c) ]] && echo 2 + +-------------------------------------------------------------------------------- + +(program + (if_statement + (test_command + (binary_expression + (string + (command_substitution + (command + (command_name + (word))))) + (raw_string))) + (command + (command_name + (word)) + (word))) + (if_statement + (test_command + (binary_expression + (word) + (word))) + (command + (command_name + (word)) + (word))) + (list + (test_command + (binary_expression + (word) + (extglob_pattern))) + (command + (command_name + (word)) + (number))) + (list + (test_command + (binary_expression + (word) + (extglob_pattern))) + (command + (command_name + (word)) + (number)))) + +================================================================================ +If statements with negated command +================================================================================ + +if ! command -v echo; then + echo 'hello' +fi + +-------------------------------------------------------------------------------- + +(program + (if_statement + condition: (negated_command + (command + name: (command_name + (word)) + argument: (word) + argument: (word))) + (command + name: (command_name + (word)) + argument: (raw_string)))) + +================================================================================ +If statements with command +================================================================================ + +if command -v echo; then + echo 'hello' +fi + +-------------------------------------------------------------------------------- + +(program + (if_statement + condition: (command + name: (command_name + (word)) + argument: (word) + argument: (word)) + (command + name: (command_name + (word)) + argument: (raw_string)))) + +================================================================================ +If statements with variable assignment by command substitution +================================================================================ + +if result=$(echo 'hello'); then + echo 'hello' +fi + +-------------------------------------------------------------------------------- + +(program + (if_statement + condition: (variable_assignment + name: (variable_name) + value: (command_substitution + (command + name: (command_name + (word)) + argument: (raw_string)))) + (command + name: (command_name + (word)) + argument: (raw_string)))) + +================================================================================ +If statements with negated variable assignment by command substitution +================================================================================ + +if ! result=$(echo 'hello'); then + echo 'hello' +fi + +-------------------------------------------------------------------------------- + +(program + (if_statement + condition: (negated_command + (variable_assignment + name: (variable_name) + value: (command_substitution + (command + name: (command_name + (word)) + argument: (raw_string))))) + (command + name: (command_name + (word)) + argument: (raw_string)))) + +================================================================================ +If statements with variable assignment +================================================================================ + +if foo=1; then + echo 'hello' +fi + +-------------------------------------------------------------------------------- + +(program + (if_statement + condition: (variable_assignment + name: (variable_name) + value: (number)) + (command + name: (command_name + (word)) + argument: (raw_string)))) + +================================================================================ +If statements with negated variable assignment +================================================================================ + +if ! foo=1; then + echo 'hello' +fi + +-------------------------------------------------------------------------------- + +(program + (if_statement + condition: (negated_command + (variable_assignment + name: (variable_name) + value: (number))) + (command + name: (command_name + (word)) + argument: (raw_string)))) + +================================================================================ +Case statements +================================================================================ + +case "opt" in + a) + echo a + ;; + + b) + echo b + ;& + + c) + echo c;; +esac + +case "opt" in + (a) + echo a + ;; + + (b) + echo b + ;& + + (c) + echo c;; +esac + +case "$Z" in + ab*|cd*) ef +esac + +case $dest in + *.[1357]) + exit $? + ;; +esac + +case x in x) echo meow ;; esac + +case foo in + bar\ baz) : ;; +esac + +case "$arg" in + *([0-9])([0-9])) echo "$arg" +esac + +case ${lang} in +CMakeLists.txt | \ + cmake_modules | \ + ${PN}.pot) ;; +*) rm -r ${lang} || die ;; +esac + +-------------------------------------------------------------------------------- + +(program + (case_statement + (string (string_content)) + (case_item (word) (command (command_name (word)) (word))) + (case_item (word) (command (command_name (word)) (word))) + (case_item (word) (command (command_name (word)) (word)))) + (case_statement + (string (string_content)) + (case_item (word) (command (command_name (word)) (word))) + (case_item (word) (command (command_name (word)) (word))) + (case_item (word) (command (command_name (word)) (word)))) + (case_statement + (string (simple_expansion (variable_name))) + (case_item (word) (word) (command (command_name (word))))) + (case_statement + (simple_expansion (variable_name)) + (case_item + (concatenation (word) (word) (number) (word)) + (command (command_name (word)) (simple_expansion (special_variable_name))))) + (case_statement (word) (case_item (word) (command (command_name (word)) (word)))) + (case_statement (word) (case_item (word) (command (command_name (word))))) + (case_statement + (string (simple_expansion (variable_name))) + (case_item + (extglob_pattern) + (command (command_name (word)) (string (simple_expansion (variable_name)))))) + (case_statement + (expansion (variable_name)) + (case_item (word) (word) (concatenation (expansion (variable_name)) (word))) + (case_item + (extglob_pattern) + (list + (command (command_name (word)) (word) (expansion (variable_name))) + (command (command_name (word))))))) + +================================================================================ +Test commands +================================================================================ + +if [[ "$lsb_dist" != 'Ubuntu' || $(ver_to_int "$lsb_release") < $(ver_to_int '14.04') ]]; then + return 1 +fi + +[[ ${PV} != $(sed -n -e 's/^Version: //p' "${ED}/usr/$(get_libdir)/pkgconfig/tss2-tcti-tabrmd.pc" || die) ]] + +[[ ${f} != */@(default).vim ]] + +[[ "${MY_LOCALES}" != *en_US* || a != 2 ]] + +[[ $(LC_ALL=C $(tc-getCC) ${LDFLAGS} -Wl,--version 2>/dev/null) != @(LLD|GNU\ ld)* ]] + +[[ -f "${EROOT}/usr/share/php/.packagexml/${MY_P}.xml" && \ + -x "${EROOT}/usr/bin/peardev" ]] + +[[ ${test} == @($(IFS='|'; echo "${skip[*]}")) ]] + +[[ ${SRC_URI} == */${a}* ]] + +[[ a == *_@(LIB|SYMLINK) ]] + +[[ ${1} =~ \.(lisp|lsp|cl)$ ]] + +[[ a == - ]] + +-------------------------------------------------------------------------------- + +(program + (if_statement + (test_command + (binary_expression + (binary_expression (string (simple_expansion (variable_name))) (raw_string)) + (binary_expression + (command_substitution + (command (command_name (word)) (string (simple_expansion (variable_name))))) + (command_substitution (command (command_name (word)) (raw_string)))))) + (command (command_name (word)) (number))) + (test_command + (binary_expression + (expansion (variable_name)) + (command_substitution + (list + (command + (command_name (word)) + (word) + (word) + (raw_string) + (string + (expansion (variable_name)) + (string_content) + (command_substitution (command (command_name (word)))) + (string_content))) + (command (command_name (word))))))) + (test_command (binary_expression (expansion (variable_name)) (extglob_pattern))) + (test_command + (binary_expression + (binary_expression (string (expansion (variable_name))) (extglob_pattern)) + (binary_expression (word) (number)))) + (test_command + (binary_expression + (command_substitution + (redirected_statement + (command + (variable_assignment (variable_name) (word)) + (command_name (command_substitution (command (command_name (word))))) + (expansion (variable_name)) + (word)) + (file_redirect (file_descriptor) (word)))) + (extglob_pattern))) + (test_command + (binary_expression + (unary_expression + (test_operator) + (string + (expansion (variable_name)) + (string_content) + (expansion (variable_name)) + (string_content))) + (unary_expression + (test_operator) + (string (expansion (variable_name)) (string_content))))) + (test_command + (binary_expression + (expansion (variable_name)) + (extglob_pattern) + (command_substitution + (variable_assignment (variable_name) (raw_string)) + (command + (command_name (word)) + (string (expansion (subscript (variable_name) (word)))))) + (extglob_pattern))) + (test_command + (binary_expression + (expansion (variable_name)) + (extglob_pattern) + (expansion (variable_name)) + (extglob_pattern))) + (test_command (binary_expression (word) (extglob_pattern))) + (test_command (binary_expression (expansion (variable_name)) (regex))) + (test_command (binary_expression (word) (extglob_pattern)))) + +================================================================================ +Test commands with ternary +================================================================================ + +if (( 1 < 2 ? 1 : 2 )); then + return 1 +fi + +-------------------------------------------------------------------------------- + +(program + (if_statement + (command + (command_name + (arithmetic_expansion + (ternary_expression + (binary_expression + (number) + (number)) + (number) + (number))))) + (command + (command_name + (word)) + (number)))) + +================================================================================ +Ternary expressions +================================================================================ + +$((n < 10 ? n : 10)) +$(($n < 10 ? $n : 10)) +$((${n} < 10 ? ${n} : 10)) + +-------------------------------------------------------------------------------- + +(program + (command + (command_name + (arithmetic_expansion + (ternary_expression + (binary_expression + (variable_name) + (number)) + (variable_name) + (number))))) + (command + (command_name + (arithmetic_expansion + (ternary_expression + (binary_expression + (simple_expansion + (variable_name)) + (number)) + (simple_expansion + (variable_name)) + (number))))) + (command + (command_name + (arithmetic_expansion + (ternary_expression + (binary_expression + (expansion + (variable_name)) + (number)) + (expansion + (variable_name)) + (number)))))) + +================================================================================ +Test commands with regexes +================================================================================ + +[[ "35d8b" =~ ^[0-9a-fA-F] ]] +[[ $CMD =~ (^|;)update_terminal_cwd($|;) ]] +[[ ! " ${completions[*]} " =~ " $alias_cmd " ]] +! [[ "$a" =~ ^a|b\ *c|d$ ]] +[[ "$1" =~ ^${var}${var}*=..* ]] +[[ "$1" =~ ^\-${var}+ ]] +[[ ${var1} == *${var2}* ]] +[[ "$server" =~ [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} ]] +[[ "$primary_wins" =~ ([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}) ]] +[[ -f ${x} && $(od -t x1 -N 4 "${x}") == *"7f 45 4c 46"* ]] +[[ ${PV} =~ 99999999$ ]] +[[ $1 == -- ]] +[[ " ${REPLACING_VERSIONS} " != *\ ${PVR}\ * ]] +[[ ${file} == @(*${GENTOO_PATCH_NAME}.tar.xz|*.asc|*.sig) ]] +[[ $RUBY_TARGETS != *$( eselect ruby show | awk 'NR==2' | tr -d ' ' )* ]] +[[ " ${m[0]##*/}" =~ ^(\ ${skip_files[*]/%/.*|\\} ) ]] + +-------------------------------------------------------------------------------- + +(program + (test_command (binary_expression (string (string_content)) (regex))) + (test_command (binary_expression (simple_expansion (variable_name)) (regex))) + (test_command + (binary_expression + (unary_expression + (string (expansion (subscript (variable_name) (word))))) + (string (simple_expansion (variable_name))))) + (negated_command (test_command (binary_expression (string (simple_expansion (variable_name))) (regex)))) + (test_command (binary_expression (string (simple_expansion (variable_name))) (regex))) + (test_command (binary_expression (string (simple_expansion (variable_name))) (regex))) + (test_command + (binary_expression + (expansion (variable_name)) + (extglob_pattern) + (expansion (variable_name)) + (extglob_pattern))) + (test_command (binary_expression (string (simple_expansion (variable_name))) (regex))) + (test_command (binary_expression (string (simple_expansion (variable_name))) (regex))) + (test_command + (binary_expression + (unary_expression (test_operator) (expansion (variable_name))) + (binary_expression + (command_substitution + (command + (command_name (word)) + (word) + (word) + (word) + (number) + (string (expansion (variable_name))))) + (extglob_pattern) + (string (string_content)) + (extglob_pattern)))) + (test_command (binary_expression (expansion (variable_name)) (regex))) + (test_command (binary_expression (simple_expansion (variable_name)) (extglob_pattern))) + (test_command + (binary_expression + (string (expansion (variable_name))) + (extglob_pattern) + (expansion (variable_name)) + (extglob_pattern))) + (test_command + (binary_expression + (expansion (variable_name)) + (extglob_pattern) + (expansion (variable_name)) + (extglob_pattern))) + (test_command + (binary_expression + (simple_expansion (variable_name)) + (extglob_pattern) + (command_substitution + (pipeline + (command (command_name (word)) (word) (word)) + (command (command_name (word)) (raw_string)) + (command (command_name (word)) (word) (raw_string)))) + (extglob_pattern))) + (test_command + (binary_expression (string (expansion (subscript (variable_name) (number)) (regex))) (regex)))) + +================================================================================ +Test command paren statefulness with a case glob +================================================================================ + +[[ ${test} == @($(IFS='|'; echo "${skip[*]}")) ]] + +case ${out} in +*"not supported"*|\ +*"operation not supported"*) + ;; +esac + +case $1 in +-o) + owner=$2 + shift + ;; +-g) ;; +esac + +[[ a == \"+(?)\" ]] + +--- + +(program + (test_command + (binary_expression + (expansion (variable_name)) + (extglob_pattern) + (command_substitution + (variable_assignment (variable_name) (raw_string)) + (command + (command_name (word)) + (string (expansion (subscript (variable_name) (word)))))) + (extglob_pattern))) + (case_statement + (expansion (variable_name)) + (case_item + (extglob_pattern) + (string (string_content)) + (extglob_pattern) + (concatenation (word) (string (string_content)) (word)))) + (case_statement + (simple_expansion (variable_name)) + (case_item + (extglob_pattern) + (variable_assignment (variable_name) (simple_expansion (variable_name))) + (command (command_name (word)))) + (case_item (word))) + (test_command (binary_expression (word) (extglob_pattern)))) + +================================================================================ +Subshells +================================================================================ + +( + ./start-server --port=80 +) & + +time ( cd tests && sh run-tests.sh ) + +-------------------------------------------------------------------------------- + +(program + (subshell (command (command_name (word)) (word))) + (command + (command_name (word)) + (subshell + (list + (command (command_name (word)) (word)) + (command (command_name (word)) (word)))))) + +================================================================================ +Function definitions +================================================================================ + +do_something() { + echo ok +} + +run_subshell_command() ( + true +) + +run_test_command() [[ -e foo ]] + +function do_something_else() { + a | xargs -I{} find xml/{} -type f +} + +function do_yet_another_thing { + echo ok +} 2>&1 + +do_nothing() { return 0; } + +-------------------------------------------------------------------------------- + +(program + (function_definition + (word) + (compound_statement + (command + (command_name + (word)) + (word)))) + (function_definition + (word) + (subshell + (command + (command_name + (word))))) + (function_definition + (word) + (test_command + (unary_expression + (test_operator) + (word)))) + (function_definition + (word) + (compound_statement + (pipeline + (command + (command_name + (word))) + (command + (command_name + (word)) + (concatenation + (word) + (word) + (word)) + (word) + (concatenation + (word) + (word) + (word)) + (word) + (word))))) + (function_definition + (word) + (compound_statement + (command + (command_name + (word)) + (word))) + (file_redirect + (file_descriptor) + (number))) + (function_definition + (word) + (compound_statement + (command + (command_name + (word)) + (number))))) + +================================================================================ +Variable declaration: declare & typeset +================================================================================ + +declare var1 +typeset -i -r var2=42 var3=10 + +-------------------------------------------------------------------------------- + +(program + (declaration_command + (variable_name)) + (declaration_command + (word) + (word) + (variable_assignment + (variable_name) + (number)) + (variable_assignment + (variable_name) + (number)))) + +================================================================================ +Variable declaration: readonly +================================================================================ + +readonly var1 +readonly var2=42 + +-------------------------------------------------------------------------------- + +(program + (declaration_command + (variable_name)) + (declaration_command + (variable_assignment + (variable_name) + (number)))) + +================================================================================ +Variable declaration: local +================================================================================ + +local a=42 b +local -r c +local var=word1\ word2 + +-------------------------------------------------------------------------------- + +(program + (declaration_command + (variable_assignment + (variable_name) + (number)) + (variable_name)) + (declaration_command + (word) + (variable_name)) + (declaration_command + (variable_assignment + (variable_name) + (word)))) + +================================================================================ +Variable declaration: export +================================================================================ + +export PATH +export FOOBAR PATH="$PATH:/usr/foobar/bin" +export $FOO:$BAR + +-------------------------------------------------------------------------------- + +(program + (declaration_command + (variable_name)) + (declaration_command + (variable_name) + (variable_assignment + (variable_name) + (string + (simple_expansion + (variable_name)) + (string_content)))) + (declaration_command + (concatenation + (simple_expansion + (variable_name)) + (word) + (simple_expansion + (variable_name))))) + +================================================================================ +Variable declaration: command substitution with semi-colon +================================================================================ + +_path=$( + while statement; do + cd ".." + done; + echo $PWD +) + +-------------------------------------------------------------------------------- + +(program + (variable_assignment + (variable_name) + (command_substitution + (while_statement + (command + (command_name + (word))) + (do_group + (command + (command_name + (word)) + (string (string_content))))) + (command + (command_name + (word)) + (simple_expansion + (variable_name)))))) + +=========================================================== +Command substution with $ and backticks +=========================================================== + +$(eval echo $`echo ${foo}`) + +--- + +(program + (command + (command_name + (command_substitution + (command + (command_name + (word)) + (word) + (command_substitution + (command + (command_name + (word)) + (expansion + (variable_name))))))))) + +================================================================================ +Expressions passed to declaration commands +================================================================================ + +export "$(echo ${key} | tr [:lower:] [:upper:])=${p_key#*=}" + +-------------------------------------------------------------------------------- + +(program + (declaration_command + (string + (command_substitution + (pipeline + (command + (command_name + (word)) + (expansion + (variable_name))) + (command + (command_name + (word)) + (concatenation + (word) + (word) + (word)) + (concatenation + (word) + (word) + (word))))) + (string_content) + (expansion + (variable_name) + (regex))))) + +================================================================================ +Unset commands +================================================================================ + +unset A +unset "$variable_name" +unsetenv -f ONE TWO + +-------------------------------------------------------------------------------- + +(program + (unset_command + (variable_name)) + (unset_command + (string + (simple_expansion + (variable_name)))) + (unset_command + (word) + (variable_name) + (variable_name))) + +================================================================================ +Compound statements +================================================================================ + +a () { + ls || { echo "b"; return 0; } + echo c +} + +{ echo "a" + echo "b" +} >&2 + +-------------------------------------------------------------------------------- + +(program + (function_definition + (word) + (compound_statement + (list + (command + (command_name + (word))) + (compound_statement + (command + (command_name + (word)) + (string (string_content))) + (command + (command_name + (word)) + (number)))) + (command + (command_name + (word)) + (word)))) + (redirected_statement + (compound_statement + (command + (command_name + (word)) + (string (string_content))) + (command + (command_name + (word)) + (string (string_content)))) + (file_redirect + (number)))) + +================================================================================ +If condition with subshell +================================================================================ + +if (echo $BASHPID; true); then echo $BASHPID; fi + +-------------------------------------------------------------------------------- + +(program + (if_statement + (subshell + (command + (command_name + (word)) + (simple_expansion + (variable_name))) + (command + (command_name + (word)))) + (command + (command_name + (word)) + (simple_expansion + (variable_name))))) + +================================================================================ +While condition with subshell +================================================================================ + +while (echo $BASHPID; true); do echo $BASHPID; break; done + +-------------------------------------------------------------------------------- + +(program + (while_statement + (subshell + (command + (command_name + (word)) + (simple_expansion + (variable_name))) + (command + (command_name + (word)))) + (do_group + (command + (command_name + (word)) + (simple_expansion + (variable_name))) + (command + (command_name + (word)))))) diff --git a/c/LICENSE b/c/LICENSE new file mode 100644 index 0000000..4b52d19 --- /dev/null +++ b/c/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 Max Brunsfeld + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/c/examples/cluster.c b/c/examples/cluster.c new file mode 100644 index 0000000..77ec2f1 --- /dev/null +++ b/c/examples/cluster.c @@ -0,0 +1,5446 @@ +/* Redis Cluster implementation. + * + * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" +#include "cluster.h" +#include "endianconv.h" + +#include <sys/types.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/stat.h> +#include <sys/file.h> +#include <math.h> + +/* A global reference to myself is handy to make code more clear. + * Myself always points to server.cluster->myself, that is, the clusterNode + * that represents this node. */ +clusterNode *myself = NULL; + +clusterNode *createClusterNode(char *nodename, int flags); +int clusterAddNode(clusterNode *node); +void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask); +void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask); +void clusterSendPing(clusterLink *link, int type); +void clusterSendFail(char *nodename); +void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request); +void clusterUpdateState(void); +int clusterNodeGetSlotBit(clusterNode *n, int slot); +sds clusterGenNodesDescription(int filter); +clusterNode *clusterLookupNode(char *name); +int clusterNodeAddSlave(clusterNode *master, clusterNode *slave); +int clusterAddSlot(clusterNode *n, int slot); +int clusterDelSlot(int slot); +int clusterDelNodeSlots(clusterNode *node); +int clusterNodeSetSlotBit(clusterNode *n, int slot); +void clusterSetMaster(clusterNode *n); +void clusterHandleSlaveFailover(void); +void clusterHandleSlaveMigration(int max_slaves); +int bitmapTestBit(unsigned char *bitmap, int pos); +void clusterDoBeforeSleep(int flags); +void clusterSendUpdate(clusterLink *link, clusterNode *node); +void resetManualFailover(void); +void clusterCloseAllSlots(void); +void clusterSetNodeAsMaster(clusterNode *n); +void clusterDelNode(clusterNode *delnode); +sds representClusterNodeFlags(sds ci, uint16_t flags); +uint64_t clusterGetMaxEpoch(void); +int clusterBumpConfigEpochWithoutConsensus(void); + +/* ----------------------------------------------------------------------------- + * Initialization + * -------------------------------------------------------------------------- */ + +/* Load the cluster config from 'filename'. + * + * If the file does not exist or is zero-length (this may happen because + * when we lock the nodes.conf file, we create a zero-length one for the + * sake of locking if it does not already exist), C_ERR is returned. + * If the configuration was loaded from the file, C_OK is returned. */ +int clusterLoadConfig(char *filename) { + FILE *fp = fopen(filename,"r"); + struct stat sb; + char *line; + int maxline, j; + + if (fp == NULL) { + if (errno == ENOENT) { + return C_ERR; + } else { + serverLog(LL_WARNING, + "Loading the cluster node config from %s: %s", + filename, strerror(errno)); + exit(1); + } + } + + /* Check if the file is zero-length: if so return C_ERR to signal + * we have to write the config. */ + if (fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) { + fclose(fp); + return C_ERR; + } + + /* Parse the file. Note that single lines of the cluster config file can + * be really long as they include all the hash slots of the node. + * This means in the worst possible case, half of the Redis slots will be + * present in a single line, possibly in importing or migrating state, so + * together with the node ID of the sender/receiver. + * + * To simplify we allocate 1024+CLUSTER_SLOTS*128 bytes per line. */ + maxline = 1024+CLUSTER_SLOTS*128; + line = zmalloc(maxline); + while(fgets(line,maxline,fp) != NULL) { + int argc; + sds *argv; + clusterNode *n, *master; + char *p, *s; + + /* Skip blank lines, they can be created either by users manually + * editing nodes.conf or by the config writing process if stopped + * before the truncate() call. */ + if (line[0] == '\n' || line[0] == '\0') continue; + + /* Split the line into arguments for processing. */ + argv = sdssplitargs(line,&argc); + if (argv == NULL) goto fmterr; + + /* Handle the special "vars" line. Don't pretend it is the last + * line even if it actually is when generated by Redis. */ + if (strcasecmp(argv[0],"vars") == 0) { + for (j = 1; j < argc; j += 2) { + if (strcasecmp(argv[j],"currentEpoch") == 0) { + server.cluster->currentEpoch = + strtoull(argv[j+1],NULL,10); + } else if (strcasecmp(argv[j],"lastVoteEpoch") == 0) { + server.cluster->lastVoteEpoch = + strtoull(argv[j+1],NULL,10); + } else { + serverLog(LL_WARNING, + "Skipping unknown cluster config variable '%s'", + argv[j]); + } + } + sdsfreesplitres(argv,argc); + continue; + } + + /* Regular config lines have at least eight fields */ + if (argc < 8) goto fmterr; + + /* Create this node if it does not exist */ + n = clusterLookupNode(argv[0]); + if (!n) { + n = createClusterNode(argv[0],0); + clusterAddNode(n); + } + /* Address and port */ + if ((p = strrchr(argv[1],':')) == NULL) goto fmterr; + *p = '\0'; + memcpy(n->ip,argv[1],strlen(argv[1])+1); + char *port = p+1; + char *busp = strchr(port,'@'); + if (busp) { + *busp = '\0'; + busp++; + } + n->port = atoi(port); + /* In older versions of nodes.conf the "@busport" part is missing. + * In this case we set it to the default offset of 10000 from the + * base port. */ + n->cport = busp ? atoi(busp) : n->port + CLUSTER_PORT_INCR; + + /* Parse flags */ + p = s = argv[2]; + while(p) { + p = strchr(s,','); + if (p) *p = '\0'; + if (!strcasecmp(s,"myself")) { + serverAssert(server.cluster->myself == NULL); + myself = server.cluster->myself = n; + n->flags |= CLUSTER_NODE_MYSELF; + } else if (!strcasecmp(s,"master")) { + n->flags |= CLUSTER_NODE_MASTER; + } else if (!strcasecmp(s,"slave")) { + n->flags |= CLUSTER_NODE_SLAVE; + } else if (!strcasecmp(s,"fail?")) { + n->flags |= CLUSTER_NODE_PFAIL; + } else if (!strcasecmp(s,"fail")) { + n->flags |= CLUSTER_NODE_FAIL; + n->fail_time = mstime(); + } else if (!strcasecmp(s,"handshake")) { + n->flags |= CLUSTER_NODE_HANDSHAKE; + } else if (!strcasecmp(s,"noaddr")) { + n->flags |= CLUSTER_NODE_NOADDR; + } else if (!strcasecmp(s,"noflags")) { + /* nothing to do */ + } else { + serverPanic("Unknown flag in redis cluster config file"); + } + if (p) s = p+1; + } + + /* Get master if any. Set the master and populate master's + * slave list. */ + if (argv[3][0] != '-') { + master = clusterLookupNode(argv[3]); + if (!master) { + master = createClusterNode(argv[3],0); + clusterAddNode(master); + } + n->slaveof = master; + clusterNodeAddSlave(master,n); + } + + /* Set ping sent / pong received timestamps */ + if (atoi(argv[4])) n->ping_sent = mstime(); + if (atoi(argv[5])) n->pong_received = mstime(); + + /* Set configEpoch for this node. */ + n->configEpoch = strtoull(argv[6],NULL,10); + + /* Populate hash slots served by this instance. */ + for (j = 8; j < argc; j++) { + int start, stop; + + if (argv[j][0] == '[') { + /* Here we handle migrating / importing slots */ + int slot; + char direction; + clusterNode *cn; + + p = strchr(argv[j],'-'); + serverAssert(p != NULL); + *p = '\0'; + direction = p[1]; /* Either '>' or '<' */ + slot = atoi(argv[j]+1); + p += 3; + cn = clusterLookupNode(p); + if (!cn) { + cn = createClusterNode(p,0); + clusterAddNode(cn); + } + if (direction == '>') { + server.cluster->migrating_slots_to[slot] = cn; + } else { + server.cluster->importing_slots_from[slot] = cn; + } + continue; + } else if ((p = strchr(argv[j],'-')) != NULL) { + *p = '\0'; + start = atoi(argv[j]); + stop = atoi(p+1); + } else { + start = stop = atoi(argv[j]); + } + while(start <= stop) clusterAddSlot(n, start++); + } + + sdsfreesplitres(argv,argc); + } + /* Config sanity check */ + if (server.cluster->myself == NULL) goto fmterr; + + zfree(line); + fclose(fp); + + serverLog(LL_NOTICE,"Node configuration loaded, I'm %.40s", myself->name); + + /* Something that should never happen: currentEpoch smaller than + * the max epoch found in the nodes configuration. However we handle this + * as some form of protection against manual editing of critical files. */ + if (clusterGetMaxEpoch() > server.cluster->currentEpoch) { + server.cluster->currentEpoch = clusterGetMaxEpoch(); + } + return C_OK; + +fmterr: + serverLog(LL_WARNING, + "Unrecoverable error: corrupted cluster config file."); + zfree(line); + if (fp) fclose(fp); + exit(1); +} + +/* Cluster node configuration is exactly the same as CLUSTER NODES output. + * + * This function writes the node config and returns 0, on error -1 + * is returned. + * + * Note: we need to write the file in an atomic way from the point of view + * of the POSIX filesystem semantics, so that if the server is stopped + * or crashes during the write, we'll end with either the old file or the + * new one. Since we have the full payload to write available we can use + * a single write to write the whole file. If the pre-existing file was + * bigger we pad our payload with newlines that are anyway ignored and truncate + * the file afterward. */ +int clusterSaveConfig(int do_fsync) { + sds ci; + size_t content_size; + struct stat sb; + int fd; + + server.cluster->todo_before_sleep &= ~CLUSTER_TODO_SAVE_CONFIG; + + /* Get the nodes description and concatenate our "vars" directive to + * save currentEpoch and lastVoteEpoch. */ + ci = clusterGenNodesDescription(CLUSTER_NODE_HANDSHAKE); + ci = sdscatprintf(ci,"vars currentEpoch %llu lastVoteEpoch %llu\n", + (unsigned long long) server.cluster->currentEpoch, + (unsigned long long) server.cluster->lastVoteEpoch); + content_size = sdslen(ci); + + if ((fd = open(server.cluster_configfile,O_WRONLY|O_CREAT,0644)) + == -1) goto err; + + /* Pad the new payload if the existing file length is greater. */ + if (fstat(fd,&sb) != -1) { + if (sb.st_size > (off_t)content_size) { + ci = sdsgrowzero(ci,sb.st_size); + memset(ci+content_size,'\n',sb.st_size-content_size); + } + } + if (write(fd,ci,sdslen(ci)) != (ssize_t)sdslen(ci)) goto err; + if (do_fsync) { + server.cluster->todo_before_sleep &= ~CLUSTER_TODO_FSYNC_CONFIG; + fsync(fd); + } + + /* Truncate the file if needed to remove the final \n padding that + * is just garbage. */ + if (content_size != sdslen(ci) && ftruncate(fd,content_size) == -1) { + /* ftruncate() failing is not a critical error. */ + } + close(fd); + sdsfree(ci); + return 0; + +err: + if (fd != -1) close(fd); + sdsfree(ci); + return -1; +} + +void clusterSaveConfigOrDie(int do_fsync) { + if (clusterSaveConfig(do_fsync) == -1) { + serverLog(LL_WARNING,"Fatal: can't update cluster config file."); + exit(1); + } +} + +/* Lock the cluster config using flock(), and leaks the file descritor used to + * acquire the lock so that the file will be locked forever. + * + * This works because we always update nodes.conf with a new version + * in-place, reopening the file, and writing to it in place (later adjusting + * the length with ftruncate()). + * + * On success C_OK is returned, otherwise an error is logged and + * the function returns C_ERR to signal a lock was not acquired. */ +int clusterLockConfig(char *filename) { +/* flock() does not exist on Solaris + * and a fcntl-based solution won't help, as we constantly re-open that file, + * which will release _all_ locks anyway + */ +#if !defined(__sun) + /* To lock it, we need to open the file in a way it is created if + * it does not exist, otherwise there is a race condition with other + * processes. */ + int fd = open(filename,O_WRONLY|O_CREAT,0644); + if (fd == -1) { + serverLog(LL_WARNING, + "Can't open %s in order to acquire a lock: %s", + filename, strerror(errno)); + return C_ERR; + } + + if (flock(fd,LOCK_EX|LOCK_NB) == -1) { + if (errno == EWOULDBLOCK) { + serverLog(LL_WARNING, + "Sorry, the cluster configuration file %s is already used " + "by a different Redis Cluster node. Please make sure that " + "different nodes use different cluster configuration " + "files.", filename); + } else { + serverLog(LL_WARNING, + "Impossible to lock %s: %s", filename, strerror(errno)); + } + close(fd); + return C_ERR; + } + /* Lock acquired: leak the 'fd' by not closing it, so that we'll retain the + * lock to the file as long as the process exists. */ +#endif /* __sun */ + + return C_OK; +} + +void clusterInit(void) { + int saveconf = 0; + + server.cluster = zmalloc(sizeof(clusterState)); + server.cluster->myself = NULL; + server.cluster->currentEpoch = 0; + server.cluster->state = CLUSTER_FAIL; + server.cluster->size = 1; + server.cluster->todo_before_sleep = 0; + server.cluster->nodes = dictCreate(&clusterNodesDictType,NULL); + server.cluster->nodes_black_list = + dictCreate(&clusterNodesBlackListDictType,NULL); + server.cluster->failover_auth_time = 0; + server.cluster->failover_auth_count = 0; + server.cluster->failover_auth_rank = 0; + server.cluster->failover_auth_epoch = 0; + server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE; + server.cluster->lastVoteEpoch = 0; + for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) { + server.cluster->stats_bus_messages_sent[i] = 0; + server.cluster->stats_bus_messages_received[i] = 0; + } + server.cluster->stats_pfail_nodes = 0; + memset(server.cluster->slots,0, sizeof(server.cluster->slots)); + clusterCloseAllSlots(); + + /* Lock the cluster config file to make sure every node uses + * its own nodes.conf. */ + if (clusterLockConfig(server.cluster_configfile) == C_ERR) + exit(1); + + /* Load or create a new nodes configuration. */ + if (clusterLoadConfig(server.cluster_configfile) == C_ERR) { + /* No configuration found. We will just use the random name provided + * by the createClusterNode() function. */ + myself = server.cluster->myself = + createClusterNode(NULL,CLUSTER_NODE_MYSELF|CLUSTER_NODE_MASTER); + serverLog(LL_NOTICE,"No cluster configuration found, I'm %.40s", + myself->name); + clusterAddNode(myself); + saveconf = 1; + } + if (saveconf) clusterSaveConfigOrDie(1); + + /* We need a listening TCP port for our cluster messaging needs. */ + server.cfd_count = 0; + + /* Port sanity check II + * The other handshake port check is triggered too late to stop + * us from trying to use a too-high cluster port number. */ + if (server.port > (65535-CLUSTER_PORT_INCR)) { + serverLog(LL_WARNING, "Redis port number too high. " + "Cluster communication port is 10,000 port " + "numbers higher than your Redis port. " + "Your Redis port number must be " + "lower than 55535."); + exit(1); + } + + if (listenToPort(server.port+CLUSTER_PORT_INCR, + server.cfd,&server.cfd_count) == C_ERR) + { + exit(1); + } else { + int j; + + for (j = 0; j < server.cfd_count; j++) { + if (aeCreateFileEvent(server.el, server.cfd[j], AE_READABLE, + clusterAcceptHandler, NULL) == AE_ERR) + serverPanic("Unrecoverable error creating Redis Cluster " + "file event."); + } + } + + /* The slots -> keys map is a radix tree. Initialize it here. */ + server.cluster->slots_to_keys = raxNew(); + memset(server.cluster->slots_keys_count,0, + sizeof(server.cluster->slots_keys_count)); + + /* Set myself->port / cport to my listening ports, we'll just need to + * discover the IP address via MEET messages. */ + myself->port = server.port; + myself->cport = server.port+CLUSTER_PORT_INCR; + if (server.cluster_announce_port) + myself->port = server.cluster_announce_port; + if (server.cluster_announce_bus_port) + myself->cport = server.cluster_announce_bus_port; + + server.cluster->mf_end = 0; + resetManualFailover(); +} + +/* Reset a node performing a soft or hard reset: + * + * 1) All other nodes are forget. + * 2) All the assigned / open slots are released. + * 3) If the node is a slave, it turns into a master. + * 5) Only for hard reset: a new Node ID is generated. + * 6) Only for hard reset: currentEpoch and configEpoch are set to 0. + * 7) The new configuration is saved and the cluster state updated. + * 8) If the node was a slave, the whole data set is flushed away. */ +void clusterReset(int hard) { + dictIterator *di; + dictEntry *de; + int j; + + /* Turn into master. */ + if (nodeIsSlave(myself)) { + clusterSetNodeAsMaster(myself); + replicationUnsetMaster(); + emptyDb(-1,EMPTYDB_NO_FLAGS,NULL); + } + + /* Close slots, reset manual failover state. */ + clusterCloseAllSlots(); + resetManualFailover(); + + /* Unassign all the slots. */ + for (j = 0; j < CLUSTER_SLOTS; j++) clusterDelSlot(j); + + /* Forget all the nodes, but myself. */ + di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + + if (node == myself) continue; + clusterDelNode(node); + } + dictReleaseIterator(di); + + /* Hard reset only: set epochs to 0, change node ID. */ + if (hard) { + sds oldname; + + server.cluster->currentEpoch = 0; + server.cluster->lastVoteEpoch = 0; + myself->configEpoch = 0; + serverLog(LL_WARNING, "configEpoch set to 0 via CLUSTER RESET HARD"); + + /* To change the Node ID we need to remove the old name from the + * nodes table, change the ID, and re-add back with new name. */ + oldname = sdsnewlen(myself->name, CLUSTER_NAMELEN); + dictDelete(server.cluster->nodes,oldname); + sdsfree(oldname); + getRandomHexChars(myself->name, CLUSTER_NAMELEN); + clusterAddNode(myself); + serverLog(LL_NOTICE,"Node hard reset, now I'm %.40s", myself->name); + } + + /* Make sure to persist the new config and update the state. */ + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_UPDATE_STATE| + CLUSTER_TODO_FSYNC_CONFIG); +} + +/* ----------------------------------------------------------------------------- + * CLUSTER communication link + * -------------------------------------------------------------------------- */ + +clusterLink *createClusterLink(clusterNode *node) { + clusterLink *link = zmalloc(sizeof(*link)); + link->ctime = mstime(); + link->sndbuf = sdsempty(); + link->rcvbuf = sdsempty(); + link->node = node; + link->fd = -1; + return link; +} + +/* Free a cluster link, but does not free the associated node of course. + * This function will just make sure that the original node associated + * with this link will have the 'link' field set to NULL. */ +void freeClusterLink(clusterLink *link) { + if (link->fd != -1) { + aeDeleteFileEvent(server.el, link->fd, AE_WRITABLE); + aeDeleteFileEvent(server.el, link->fd, AE_READABLE); + } + sdsfree(link->sndbuf); + sdsfree(link->rcvbuf); + if (link->node) + link->node->link = NULL; + close(link->fd); + zfree(link); +} + +#define MAX_CLUSTER_ACCEPTS_PER_CALL 1000 +void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) { + int cport, cfd; + int max = MAX_CLUSTER_ACCEPTS_PER_CALL; + char cip[NET_IP_STR_LEN]; + clusterLink *link; + UNUSED(el); + UNUSED(mask); + UNUSED(privdata); + + /* If the server is starting up, don't accept cluster connections: + * UPDATE messages may interact with the database content. */ + if (server.masterhost == NULL && server.loading) return; + + while(max--) { + cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport); + if (cfd == ANET_ERR) { + if (errno != EWOULDBLOCK) + serverLog(LL_VERBOSE, + "Error accepting cluster node: %s", server.neterr); + return; + } + anetNonBlock(NULL,cfd); + anetEnableTcpNoDelay(NULL,cfd); + + /* Use non-blocking I/O for cluster messages. */ + serverLog(LL_VERBOSE,"Accepted cluster node %s:%d", cip, cport); + /* Create a link object we use to handle the connection. + * It gets passed to the readable handler when data is available. + * Initiallly the link->node pointer is set to NULL as we don't know + * which node is, but the right node is references once we know the + * node identity. */ + link = createClusterLink(NULL); + link->fd = cfd; + aeCreateFileEvent(server.el,cfd,AE_READABLE,clusterReadHandler,link); + } +} + +/* ----------------------------------------------------------------------------- + * Key space handling + * -------------------------------------------------------------------------- */ + +/* We have 16384 hash slots. The hash slot of a given key is obtained + * as the least significant 14 bits of the crc16 of the key. + * + * However if the key contains the {...} pattern, only the part between + * { and } is hashed. This may be useful in the future to force certain + * keys to be in the same node (assuming no resharding is in progress). */ +unsigned int keyHashSlot(char *key, int keylen) { + int s, e; /* start-end indexes of { and } */ + + for (s = 0; s < keylen; s++) + if (key[s] == '{') break; + + /* No '{' ? Hash the whole key. This is the base case. */ + if (s == keylen) return crc16(key,keylen) & 0x3FFF; + + /* '{' found? Check if we have the corresponding '}'. */ + for (e = s+1; e < keylen; e++) + if (key[e] == '}') break; + + /* No '}' or nothing betweeen {} ? Hash the whole key. */ + if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF; + + /* If we are here there is both a { and a } on its right. Hash + * what is in the middle between { and }. */ + return crc16(key+s+1,e-s-1) & 0x3FFF; +} + +/* ----------------------------------------------------------------------------- + * CLUSTER node API + * -------------------------------------------------------------------------- */ + +/* Create a new cluster node, with the specified flags. + * If "nodename" is NULL this is considered a first handshake and a random + * node name is assigned to this node (it will be fixed later when we'll + * receive the first pong). + * + * The node is created and returned to the user, but it is not automatically + * added to the nodes hash table. */ +clusterNode *createClusterNode(char *nodename, int flags) { + clusterNode *node = zmalloc(sizeof(*node)); + + if (nodename) + memcpy(node->name, nodename, CLUSTER_NAMELEN); + else + getRandomHexChars(node->name, CLUSTER_NAMELEN); + node->ctime = mstime(); + node->configEpoch = 0; + node->flags = flags; + memset(node->slots,0,sizeof(node->slots)); + node->numslots = 0; + node->numslaves = 0; + node->slaves = NULL; + node->slaveof = NULL; + node->ping_sent = node->pong_received = 0; + node->fail_time = 0; + node->link = NULL; + memset(node->ip,0,sizeof(node->ip)); + node->port = 0; + node->cport = 0; + node->fail_reports = listCreate(); + node->voted_time = 0; + node->orphaned_time = 0; + node->repl_offset_time = 0; + node->repl_offset = 0; + listSetFreeMethod(node->fail_reports,zfree); + return node; +} + +/* This function is called every time we get a failure report from a node. + * The side effect is to populate the fail_reports list (or to update + * the timestamp of an existing report). + * + * 'failing' is the node that is in failure state according to the + * 'sender' node. + * + * The function returns 0 if it just updates a timestamp of an existing + * failure report from the same sender. 1 is returned if a new failure + * report is created. */ +int clusterNodeAddFailureReport(clusterNode *failing, clusterNode *sender) { + list *l = failing->fail_reports; + listNode *ln; + listIter li; + clusterNodeFailReport *fr; + + /* If a failure report from the same sender already exists, just update + * the timestamp. */ + listRewind(l,&li); + while ((ln = listNext(&li)) != NULL) { + fr = ln->value; + if (fr->node == sender) { + fr->time = mstime(); + return 0; + } + } + + /* Otherwise create a new report. */ + fr = zmalloc(sizeof(*fr)); + fr->node = sender; + fr->time = mstime(); + listAddNodeTail(l,fr); + return 1; +} + +/* Remove failure reports that are too old, where too old means reasonably + * older than the global node timeout. Note that anyway for a node to be + * flagged as FAIL we need to have a local PFAIL state that is at least + * older than the global node timeout, so we don't just trust the number + * of failure reports from other nodes. */ +void clusterNodeCleanupFailureReports(clusterNode *node) { + list *l = node->fail_reports; + listNode *ln; + listIter li; + clusterNodeFailReport *fr; + mstime_t maxtime = server.cluster_node_timeout * + CLUSTER_FAIL_REPORT_VALIDITY_MULT; + mstime_t now = mstime(); + + listRewind(l,&li); + while ((ln = listNext(&li)) != NULL) { + fr = ln->value; + if (now - fr->time > maxtime) listDelNode(l,ln); + } +} + +/* Remove the failing report for 'node' if it was previously considered + * failing by 'sender'. This function is called when a node informs us via + * gossip that a node is OK from its point of view (no FAIL or PFAIL flags). + * + * Note that this function is called relatively often as it gets called even + * when there are no nodes failing, and is O(N), however when the cluster is + * fine the failure reports list is empty so the function runs in constant + * time. + * + * The function returns 1 if the failure report was found and removed. + * Otherwise 0 is returned. */ +int clusterNodeDelFailureReport(clusterNode *node, clusterNode *sender) { + list *l = node->fail_reports; + listNode *ln; + listIter li; + clusterNodeFailReport *fr; + + /* Search for a failure report from this sender. */ + listRewind(l,&li); + while ((ln = listNext(&li)) != NULL) { + fr = ln->value; + if (fr->node == sender) break; + } + if (!ln) return 0; /* No failure report from this sender. */ + + /* Remove the failure report. */ + listDelNode(l,ln); + clusterNodeCleanupFailureReports(node); + return 1; +} + +/* Return the number of external nodes that believe 'node' is failing, + * not including this node, that may have a PFAIL or FAIL state for this + * node as well. */ +int clusterNodeFailureReportsCount(clusterNode *node) { + clusterNodeCleanupFailureReports(node); + return listLength(node->fail_reports); +} + +int clusterNodeRemoveSlave(clusterNode *master, clusterNode *slave) { + int j; + + for (j = 0; j < master->numslaves; j++) { + if (master->slaves[j] == slave) { + if ((j+1) < master->numslaves) { + int remaining_slaves = (master->numslaves - j) - 1; + memmove(master->slaves+j,master->slaves+(j+1), + (sizeof(*master->slaves) * remaining_slaves)); + } + master->numslaves--; + if (master->numslaves == 0) + master->flags &= ~CLUSTER_NODE_MIGRATE_TO; + return C_OK; + } + } + return C_ERR; +} + +int clusterNodeAddSlave(clusterNode *master, clusterNode *slave) { + int j; + + /* If it's already a slave, don't add it again. */ + for (j = 0; j < master->numslaves; j++) + if (master->slaves[j] == slave) return C_ERR; + master->slaves = zrealloc(master->slaves, + sizeof(clusterNode*)*(master->numslaves+1)); + master->slaves[master->numslaves] = slave; + master->numslaves++; + master->flags |= CLUSTER_NODE_MIGRATE_TO; + return C_OK; +} + +int clusterCountNonFailingSlaves(clusterNode *n) { + int j, okslaves = 0; + + for (j = 0; j < n->numslaves; j++) + if (!nodeFailed(n->slaves[j])) okslaves++; + return okslaves; +} + +/* Low level cleanup of the node structure. Only called by clusterDelNode(). */ +void freeClusterNode(clusterNode *n) { + sds nodename; + int j; + + /* If the node has associated slaves, we have to set + * all the slaves->slaveof fields to NULL (unknown). */ + for (j = 0; j < n->numslaves; j++) + n->slaves[j]->slaveof = NULL; + + /* Remove this node from the list of slaves of its master. */ + if (nodeIsSlave(n) && n->slaveof) clusterNodeRemoveSlave(n->slaveof,n); + + /* Unlink from the set of nodes. */ + nodename = sdsnewlen(n->name, CLUSTER_NAMELEN); + serverAssert(dictDelete(server.cluster->nodes,nodename) == DICT_OK); + sdsfree(nodename); + + /* Release link and associated data structures. */ + if (n->link) freeClusterLink(n->link); + listRelease(n->fail_reports); + zfree(n->slaves); + zfree(n); +} + +/* Add a node to the nodes hash table */ +int clusterAddNode(clusterNode *node) { + int retval; + + retval = dictAdd(server.cluster->nodes, + sdsnewlen(node->name,CLUSTER_NAMELEN), node); + return (retval == DICT_OK) ? C_OK : C_ERR; +} + +/* Remove a node from the cluster. The functio performs the high level + * cleanup, calling freeClusterNode() for the low level cleanup. + * Here we do the following: + * + * 1) Mark all the slots handled by it as unassigned. + * 2) Remove all the failure reports sent by this node and referenced by + * other nodes. + * 3) Free the node with freeClusterNode() that will in turn remove it + * from the hash table and from the list of slaves of its master, if + * it is a slave node. + */ +void clusterDelNode(clusterNode *delnode) { + int j; + dictIterator *di; + dictEntry *de; + + /* 1) Mark slots as unassigned. */ + for (j = 0; j < CLUSTER_SLOTS; j++) { + if (server.cluster->importing_slots_from[j] == delnode) + server.cluster->importing_slots_from[j] = NULL; + if (server.cluster->migrating_slots_to[j] == delnode) + server.cluster->migrating_slots_to[j] = NULL; + if (server.cluster->slots[j] == delnode) + clusterDelSlot(j); + } + + /* 2) Remove failure reports. */ + di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + + if (node == delnode) continue; + clusterNodeDelFailureReport(node,delnode); + } + dictReleaseIterator(di); + + /* 3) Free the node, unlinking it from the cluster. */ + freeClusterNode(delnode); +} + +/* Node lookup by name */ +clusterNode *clusterLookupNode(char *name) { + sds s = sdsnewlen(name, CLUSTER_NAMELEN); + dictEntry *de; + + de = dictFind(server.cluster->nodes,s); + sdsfree(s); + if (de == NULL) return NULL; + return dictGetVal(de); +} + +/* This is only used after the handshake. When we connect a given IP/PORT + * as a result of CLUSTER MEET we don't have the node name yet, so we + * pick a random one, and will fix it when we receive the PONG request using + * this function. */ +void clusterRenameNode(clusterNode *node, char *newname) { + int retval; + sds s = sdsnewlen(node->name, CLUSTER_NAMELEN); + + serverLog(LL_DEBUG,"Renaming node %.40s into %.40s", + node->name, newname); + retval = dictDelete(server.cluster->nodes, s); + sdsfree(s); + serverAssert(retval == DICT_OK); + memcpy(node->name, newname, CLUSTER_NAMELEN); + clusterAddNode(node); +} + +/* ----------------------------------------------------------------------------- + * CLUSTER config epoch handling + * -------------------------------------------------------------------------- */ + +/* Return the greatest configEpoch found in the cluster, or the current + * epoch if greater than any node configEpoch. */ +uint64_t clusterGetMaxEpoch(void) { + uint64_t max = 0; + dictIterator *di; + dictEntry *de; + + di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + if (node->configEpoch > max) max = node->configEpoch; + } + dictReleaseIterator(di); + if (max < server.cluster->currentEpoch) max = server.cluster->currentEpoch; + return max; +} + +/* If this node epoch is zero or is not already the greatest across the + * cluster (from the POV of the local configuration), this function will: + * + * 1) Generate a new config epoch, incrementing the current epoch. + * 2) Assign the new epoch to this node, WITHOUT any consensus. + * 3) Persist the configuration on disk before sending packets with the + * new configuration. + * + * If the new config epoch is generated and assigend, C_OK is returned, + * otherwise C_ERR is returned (since the node has already the greatest + * configuration around) and no operation is performed. + * + * Important note: this function violates the principle that config epochs + * should be generated with consensus and should be unique across the cluster. + * However Redis Cluster uses this auto-generated new config epochs in two + * cases: + * + * 1) When slots are closed after importing. Otherwise resharding would be + * too expensive. + * 2) When CLUSTER FAILOVER is called with options that force a slave to + * failover its master even if there is not master majority able to + * create a new configuration epoch. + * + * Redis Cluster will not explode using this function, even in the case of + * a collision between this node and another node, generating the same + * configuration epoch unilaterally, because the config epoch conflict + * resolution algorithm will eventually move colliding nodes to different + * config epochs. However using this function may violate the "last failover + * wins" rule, so should only be used with care. */ +int clusterBumpConfigEpochWithoutConsensus(void) { + uint64_t maxEpoch = clusterGetMaxEpoch(); + + if (myself->configEpoch == 0 || + myself->configEpoch != maxEpoch) + { + server.cluster->currentEpoch++; + myself->configEpoch = server.cluster->currentEpoch; + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_FSYNC_CONFIG); + serverLog(LL_WARNING, + "New configEpoch set to %llu", + (unsigned long long) myself->configEpoch); + return C_OK; + } else { + return C_ERR; + } +} + +/* This function is called when this node is a master, and we receive from + * another master a configuration epoch that is equal to our configuration + * epoch. + * + * BACKGROUND + * + * It is not possible that different slaves get the same config + * epoch during a failover election, because the slaves need to get voted + * by a majority. However when we perform a manual resharding of the cluster + * the node will assign a configuration epoch to itself without to ask + * for agreement. Usually resharding happens when the cluster is working well + * and is supervised by the sysadmin, however it is possible for a failover + * to happen exactly while the node we are resharding a slot to assigns itself + * a new configuration epoch, but before it is able to propagate it. + * + * So technically it is possible in this condition that two nodes end with + * the same configuration epoch. + * + * Another possibility is that there are bugs in the implementation causing + * this to happen. + * + * Moreover when a new cluster is created, all the nodes start with the same + * configEpoch. This collision resolution code allows nodes to automatically + * end with a different configEpoch at startup automatically. + * + * In all the cases, we want a mechanism that resolves this issue automatically + * as a safeguard. The same configuration epoch for masters serving different + * set of slots is not harmful, but it is if the nodes end serving the same + * slots for some reason (manual errors or software bugs) without a proper + * failover procedure. + * + * In general we want a system that eventually always ends with different + * masters having different configuration epochs whatever happened, since + * nothign is worse than a split-brain condition in a distributed system. + * + * BEHAVIOR + * + * When this function gets called, what happens is that if this node + * has the lexicographically smaller Node ID compared to the other node + * with the conflicting epoch (the 'sender' node), it will assign itself + * the greatest configuration epoch currently detected among nodes plus 1. + * + * This means that even if there are multiple nodes colliding, the node + * with the greatest Node ID never moves forward, so eventually all the nodes + * end with a different configuration epoch. + */ +void clusterHandleConfigEpochCollision(clusterNode *sender) { + /* Prerequisites: nodes have the same configEpoch and are both masters. */ + if (sender->configEpoch != myself->configEpoch || + !nodeIsMaster(sender) || !nodeIsMaster(myself)) return; + /* Don't act if the colliding node has a smaller Node ID. */ + if (memcmp(sender->name,myself->name,CLUSTER_NAMELEN) <= 0) return; + /* Get the next ID available at the best of this node knowledge. */ + server.cluster->currentEpoch++; + myself->configEpoch = server.cluster->currentEpoch; + clusterSaveConfigOrDie(1); + serverLog(LL_VERBOSE, + "WARNING: configEpoch collision with node %.40s." + " configEpoch set to %llu", + sender->name, + (unsigned long long) myself->configEpoch); +} + +/* ----------------------------------------------------------------------------- + * CLUSTER nodes blacklist + * + * The nodes blacklist is just a way to ensure that a given node with a given + * Node ID is not readded before some time elapsed (this time is specified + * in seconds in CLUSTER_BLACKLIST_TTL). + * + * This is useful when we want to remove a node from the cluster completely: + * when CLUSTER FORGET is called, it also puts the node into the blacklist so + * that even if we receive gossip messages from other nodes that still remember + * about the node we want to remove, we don't re-add it before some time. + * + * Currently the CLUSTER_BLACKLIST_TTL is set to 1 minute, this means + * that redis-trib has 60 seconds to send CLUSTER FORGET messages to nodes + * in the cluster without dealing with the problem of other nodes re-adding + * back the node to nodes we already sent the FORGET command to. + * + * The data structure used is a hash table with an sds string representing + * the node ID as key, and the time when it is ok to re-add the node as + * value. + * -------------------------------------------------------------------------- */ + +#define CLUSTER_BLACKLIST_TTL 60 /* 1 minute. */ + + +/* Before of the addNode() or Exists() operations we always remove expired + * entries from the black list. This is an O(N) operation but it is not a + * problem since add / exists operations are called very infrequently and + * the hash table is supposed to contain very little elements at max. + * However without the cleanup during long uptimes and with some automated + * node add/removal procedures, entries could accumulate. */ +void clusterBlacklistCleanup(void) { + dictIterator *di; + dictEntry *de; + + di = dictGetSafeIterator(server.cluster->nodes_black_list); + while((de = dictNext(di)) != NULL) { + int64_t expire = dictGetUnsignedIntegerVal(de); + + if (expire < server.unixtime) + dictDelete(server.cluster->nodes_black_list,dictGetKey(de)); + } + dictReleaseIterator(di); +} + +/* Cleanup the blacklist and add a new node ID to the black list. */ +void clusterBlacklistAddNode(clusterNode *node) { + dictEntry *de; + sds id = sdsnewlen(node->name,CLUSTER_NAMELEN); + + clusterBlacklistCleanup(); + if (dictAdd(server.cluster->nodes_black_list,id,NULL) == DICT_OK) { + /* If the key was added, duplicate the sds string representation of + * the key for the next lookup. We'll free it at the end. */ + id = sdsdup(id); + } + de = dictFind(server.cluster->nodes_black_list,id); + dictSetUnsignedIntegerVal(de,time(NULL)+CLUSTER_BLACKLIST_TTL); + sdsfree(id); +} + +/* Return non-zero if the specified node ID exists in the blacklist. + * You don't need to pass an sds string here, any pointer to 40 bytes + * will work. */ +int clusterBlacklistExists(char *nodeid) { + sds id = sdsnewlen(nodeid,CLUSTER_NAMELEN); + int retval; + + clusterBlacklistCleanup(); + retval = dictFind(server.cluster->nodes_black_list,id) != NULL; + sdsfree(id); + return retval; +} + +/* ----------------------------------------------------------------------------- + * CLUSTER messages exchange - PING/PONG and gossip + * -------------------------------------------------------------------------- */ + +/* This function checks if a given node should be marked as FAIL. + * It happens if the following conditions are met: + * + * 1) We received enough failure reports from other master nodes via gossip. + * Enough means that the majority of the masters signaled the node is + * down recently. + * 2) We believe this node is in PFAIL state. + * + * If a failure is detected we also inform the whole cluster about this + * event trying to force every other node to set the FAIL flag for the node. + * + * Note that the form of agreement used here is weak, as we collect the majority + * of masters state during some time, and even if we force agreement by + * propagating the FAIL message, because of partitions we may not reach every + * node. However: + * + * 1) Either we reach the majority and eventually the FAIL state will propagate + * to all the cluster. + * 2) Or there is no majority so no slave promotion will be authorized and the + * FAIL flag will be cleared after some time. + */ +void markNodeAsFailingIfNeeded(clusterNode *node) { + int failures; + int needed_quorum = (server.cluster->size / 2) + 1; + + if (!nodeTimedOut(node)) return; /* We can reach it. */ + if (nodeFailed(node)) return; /* Already FAILing. */ + + failures = clusterNodeFailureReportsCount(node); + /* Also count myself as a voter if I'm a master. */ + if (nodeIsMaster(myself)) failures++; + if (failures < needed_quorum) return; /* No weak agreement from masters. */ + + serverLog(LL_NOTICE, + "Marking node %.40s as failing (quorum reached).", node->name); + + /* Mark the node as failing. */ + node->flags &= ~CLUSTER_NODE_PFAIL; + node->flags |= CLUSTER_NODE_FAIL; + node->fail_time = mstime(); + + /* Broadcast the failing node name to everybody, forcing all the other + * reachable nodes to flag the node as FAIL. */ + if (nodeIsMaster(myself)) clusterSendFail(node->name); + clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG); +} + +/* This function is called only if a node is marked as FAIL, but we are able + * to reach it again. It checks if there are the conditions to undo the FAIL + * state. */ +void clearNodeFailureIfNeeded(clusterNode *node) { + mstime_t now = mstime(); + + serverAssert(nodeFailed(node)); + + /* For slaves we always clear the FAIL flag if we can contact the + * node again. */ + if (nodeIsSlave(node) || node->numslots == 0) { + serverLog(LL_NOTICE, + "Clear FAIL state for node %.40s: %s is reachable again.", + node->name, + nodeIsSlave(node) ? "slave" : "master without slots"); + node->flags &= ~CLUSTER_NODE_FAIL; + clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG); + } + + /* If it is a master and... + * 1) The FAIL state is old enough. + * 2) It is yet serving slots from our point of view (not failed over). + * Apparently no one is going to fix these slots, clear the FAIL flag. */ + if (nodeIsMaster(node) && node->numslots > 0 && + (now - node->fail_time) > + (server.cluster_node_timeout * CLUSTER_FAIL_UNDO_TIME_MULT)) + { + serverLog(LL_NOTICE, + "Clear FAIL state for node %.40s: is reachable again and nobody is serving its slots after some time.", + node->name); + node->flags &= ~CLUSTER_NODE_FAIL; + clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG); + } +} + +/* Return true if we already have a node in HANDSHAKE state matching the + * specified ip address and port number. This function is used in order to + * avoid adding a new handshake node for the same address multiple times. */ +int clusterHandshakeInProgress(char *ip, int port, int cport) { + dictIterator *di; + dictEntry *de; + + di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + + if (!nodeInHandshake(node)) continue; + if (!strcasecmp(node->ip,ip) && + node->port == port && + node->cport == cport) break; + } + dictReleaseIterator(di); + return de != NULL; +} + +/* Start an handshake with the specified address if there is not one + * already in progress. Returns non-zero if the handshake was actually + * started. On error zero is returned and errno is set to one of the + * following values: + * + * EAGAIN - There is already an handshake in progress for this address. + * EINVAL - IP or port are not valid. */ +int clusterStartHandshake(char *ip, int port, int cport) { + clusterNode *n; + char norm_ip[NET_IP_STR_LEN]; + struct sockaddr_storage sa; + + /* IP sanity check */ + if (inet_pton(AF_INET,ip, + &(((struct sockaddr_in *)&sa)->sin_addr))) + { + sa.ss_family = AF_INET; + } else if (inet_pton(AF_INET6,ip, + &(((struct sockaddr_in6 *)&sa)->sin6_addr))) + { + sa.ss_family = AF_INET6; + } else { + errno = EINVAL; + return 0; + } + + /* Port sanity check */ + if (port <= 0 || port > 65535 || cport <= 0 || cport > 65535) { + errno = EINVAL; + return 0; + } + + /* Set norm_ip as the normalized string representation of the node + * IP address. */ + memset(norm_ip,0,NET_IP_STR_LEN); + if (sa.ss_family == AF_INET) + inet_ntop(AF_INET, + (void*)&(((struct sockaddr_in *)&sa)->sin_addr), + norm_ip,NET_IP_STR_LEN); + else + inet_ntop(AF_INET6, + (void*)&(((struct sockaddr_in6 *)&sa)->sin6_addr), + norm_ip,NET_IP_STR_LEN); + + if (clusterHandshakeInProgress(norm_ip,port,cport)) { + errno = EAGAIN; + return 0; + } + + /* Add the node with a random address (NULL as first argument to + * createClusterNode()). Everything will be fixed during the + * handshake. */ + n = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_MEET); + memcpy(n->ip,norm_ip,sizeof(n->ip)); + n->port = port; + n->cport = cport; + clusterAddNode(n); + return 1; +} + +/* Process the gossip section of PING or PONG packets. + * Note that this function assumes that the packet is already sanity-checked + * by the caller, not in the content of the gossip section, but in the + * length. */ +void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { + uint16_t count = ntohs(hdr->count); + clusterMsgDataGossip *g = (clusterMsgDataGossip*) hdr->data.ping.gossip; + clusterNode *sender = link->node ? link->node : clusterLookupNode(hdr->sender); + + while(count--) { + uint16_t flags = ntohs(g->flags); + clusterNode *node; + sds ci; + + ci = representClusterNodeFlags(sdsempty(), flags); + serverLog(LL_DEBUG,"GOSSIP %.40s %s:%d@%d %s", + g->nodename, + g->ip, + ntohs(g->port), + ntohs(g->cport), + ci); + sdsfree(ci); + + /* Update our state accordingly to the gossip sections */ + node = clusterLookupNode(g->nodename); + if (node) { + /* We already know this node. + Handle failure reports, only when the sender is a master. */ + if (sender && nodeIsMaster(sender) && node != myself) { + if (flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) { + if (clusterNodeAddFailureReport(node,sender)) { + serverLog(LL_VERBOSE, + "Node %.40s reported node %.40s as not reachable.", + sender->name, node->name); + } + markNodeAsFailingIfNeeded(node); + } else { + if (clusterNodeDelFailureReport(node,sender)) { + serverLog(LL_VERBOSE, + "Node %.40s reported node %.40s is back online.", + sender->name, node->name); + } + } + } + + /* If from our POV the node is up (no failure flags are set), + * we have no pending ping for the node, nor we have failure + * reports for this node, update the last pong time with the + * one we see from the other nodes. */ + if (!(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) && + node->ping_sent == 0 && + clusterNodeFailureReportsCount(node) == 0) + { + mstime_t pongtime = ntohl(g->pong_received); + pongtime *= 1000; /* Convert back to milliseconds. */ + + /* Replace the pong time with the received one only if + * it's greater than our view but is not in the future + * (with 500 milliseconds tolerance) from the POV of our + * clock. */ + if (pongtime <= (server.mstime+500) && + pongtime > node->pong_received) + { + node->pong_received = pongtime; + } + } + + /* If we already know this node, but it is not reachable, and + * we see a different address in the gossip section of a node that + * can talk with this other node, update the address, disconnect + * the old link if any, so that we'll attempt to connect with the + * new address. */ + if (node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL) && + !(flags & CLUSTER_NODE_NOADDR) && + !(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) && + (strcasecmp(node->ip,g->ip) || + node->port != ntohs(g->port) || + node->cport != ntohs(g->cport))) + { + if (node->link) freeClusterLink(node->link); + memcpy(node->ip,g->ip,NET_IP_STR_LEN); + node->port = ntohs(g->port); + node->cport = ntohs(g->cport); + node->flags &= ~CLUSTER_NODE_NOADDR; + } + } else { + /* If it's not in NOADDR state and we don't have it, we + * start a handshake process against this IP/PORT pairs. + * + * Note that we require that the sender of this gossip message + * is a well known node in our cluster, otherwise we risk + * joining another cluster. */ + if (sender && + !(flags & CLUSTER_NODE_NOADDR) && + !clusterBlacklistExists(g->nodename)) + { + clusterStartHandshake(g->ip,ntohs(g->port),ntohs(g->cport)); + } + } + + /* Next node */ + g++; + } +} + +/* IP -> string conversion. 'buf' is supposed to at least be 46 bytes. + * If 'announced_ip' length is non-zero, it is used instead of extracting + * the IP from the socket peer address. */ +void nodeIp2String(char *buf, clusterLink *link, char *announced_ip) { + if (announced_ip[0] != '\0') { + memcpy(buf,announced_ip,NET_IP_STR_LEN); + buf[NET_IP_STR_LEN-1] = '\0'; /* We are not sure the input is sane. */ + } else { + anetPeerToString(link->fd, buf, NET_IP_STR_LEN, NULL); + } +} + +/* Update the node address to the IP address that can be extracted + * from link->fd, or if hdr->myip is non empty, to the address the node + * is announcing us. The port is taken from the packet header as well. + * + * If the address or port changed, disconnect the node link so that we'll + * connect again to the new address. + * + * If the ip/port pair are already correct no operation is performed at + * all. + * + * The function returns 0 if the node address is still the same, + * otherwise 1 is returned. */ +int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link, + clusterMsg *hdr) +{ + char ip[NET_IP_STR_LEN] = {0}; + int port = ntohs(hdr->port); + int cport = ntohs(hdr->cport); + + /* We don't proceed if the link is the same as the sender link, as this + * function is designed to see if the node link is consistent with the + * symmetric link that is used to receive PINGs from the node. + * + * As a side effect this function never frees the passed 'link', so + * it is safe to call during packet processing. */ + if (link == node->link) return 0; + + nodeIp2String(ip,link,hdr->myip); + if (node->port == port && node->cport == cport && + strcmp(ip,node->ip) == 0) return 0; + + /* IP / port is different, update it. */ + memcpy(node->ip,ip,sizeof(ip)); + node->port = port; + node->cport = cport; + if (node->link) freeClusterLink(node->link); + node->flags &= ~CLUSTER_NODE_NOADDR; + serverLog(LL_WARNING,"Address updated for node %.40s, now %s:%d", + node->name, node->ip, node->port); + + /* Check if this is our master and we have to change the + * replication target as well. */ + if (nodeIsSlave(myself) && myself->slaveof == node) + replicationSetMaster(node->ip, node->port); + return 1; +} + +/* Reconfigure the specified node 'n' as a master. This function is called when + * a node that we believed to be a slave is now acting as master in order to + * update the state of the node. */ +void clusterSetNodeAsMaster(clusterNode *n) { + if (nodeIsMaster(n)) return; + + if (n->slaveof) { + clusterNodeRemoveSlave(n->slaveof,n); + if (n != myself) n->flags |= CLUSTER_NODE_MIGRATE_TO; + } + n->flags &= ~CLUSTER_NODE_SLAVE; + n->flags |= CLUSTER_NODE_MASTER; + n->slaveof = NULL; + + /* Update config and state. */ + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_UPDATE_STATE); +} + +/* This function is called when we receive a master configuration via a + * PING, PONG or UPDATE packet. What we receive is a node, a configEpoch of the + * node, and the set of slots claimed under this configEpoch. + * + * What we do is to rebind the slots with newer configuration compared to our + * local configuration, and if needed, we turn ourself into a replica of the + * node (see the function comments for more info). + * + * The 'sender' is the node for which we received a configuration update. + * Sometimes it is not actually the "Sender" of the information, like in the + * case we receive the info via an UPDATE packet. */ +void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoch, unsigned char *slots) { + int j; + clusterNode *curmaster, *newmaster = NULL; + /* The dirty slots list is a list of slots for which we lose the ownership + * while having still keys inside. This usually happens after a failover + * or after a manual cluster reconfiguration operated by the admin. + * + * If the update message is not able to demote a master to slave (in this + * case we'll resync with the master updating the whole key space), we + * need to delete all the keys in the slots we lost ownership. */ + uint16_t dirty_slots[CLUSTER_SLOTS]; + int dirty_slots_count = 0; + + /* Here we set curmaster to this node or the node this node + * replicates to if it's a slave. In the for loop we are + * interested to check if slots are taken away from curmaster. */ + curmaster = nodeIsMaster(myself) ? myself : myself->slaveof; + + if (sender == myself) { + serverLog(LL_WARNING,"Discarding UPDATE message about myself."); + return; + } + + for (j = 0; j < CLUSTER_SLOTS; j++) { + if (bitmapTestBit(slots,j)) { + /* The slot is already bound to the sender of this message. */ + if (server.cluster->slots[j] == sender) continue; + + /* The slot is in importing state, it should be modified only + * manually via redis-trib (example: a resharding is in progress + * and the migrating side slot was already closed and is advertising + * a new config. We still want the slot to be closed manually). */ + if (server.cluster->importing_slots_from[j]) continue; + + /* We rebind the slot to the new node claiming it if: + * 1) The slot was unassigned or the new node claims it with a + * greater configEpoch. + * 2) We are not currently importing the slot. */ + if (server.cluster->slots[j] == NULL || + server.cluster->slots[j]->configEpoch < senderConfigEpoch) + { + /* Was this slot mine, and still contains keys? Mark it as + * a dirty slot. */ + if (server.cluster->slots[j] == myself && + countKeysInSlot(j) && + sender != myself) + { + dirty_slots[dirty_slots_count] = j; + dirty_slots_count++; + } + + if (server.cluster->slots[j] == curmaster) + newmaster = sender; + clusterDelSlot(j); + clusterAddSlot(sender,j); + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_UPDATE_STATE| + CLUSTER_TODO_FSYNC_CONFIG); + } + } + } + + /* If at least one slot was reassigned from a node to another node + * with a greater configEpoch, it is possible that: + * 1) We are a master left without slots. This means that we were + * failed over and we should turn into a replica of the new + * master. + * 2) We are a slave and our master is left without slots. We need + * to replicate to the new slots owner. */ + if (newmaster && curmaster->numslots == 0) { + serverLog(LL_WARNING, + "Configuration change detected. Reconfiguring myself " + "as a replica of %.40s", sender->name); + clusterSetMaster(sender); + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_UPDATE_STATE| + CLUSTER_TODO_FSYNC_CONFIG); + } else if (dirty_slots_count) { + /* If we are here, we received an update message which removed + * ownership for certain slots we still have keys about, but still + * we are serving some slots, so this master node was not demoted to + * a slave. + * + * In order to maintain a consistent state between keys and slots + * we need to remove all the keys from the slots we lost. */ + for (j = 0; j < dirty_slots_count; j++) + delKeysInSlot(dirty_slots[j]); + } +} + +/* When this function is called, there is a packet to process starting + * at node->rcvbuf. Releasing the buffer is up to the caller, so this + * function should just handle the higher level stuff of processing the + * packet, modifying the cluster state if needed. + * + * The function returns 1 if the link is still valid after the packet + * was processed, otherwise 0 if the link was freed since the packet + * processing lead to some inconsistency error (for instance a PONG + * received from the wrong sender ID). */ +int clusterProcessPacket(clusterLink *link) { + clusterMsg *hdr = (clusterMsg*) link->rcvbuf; + uint32_t totlen = ntohl(hdr->totlen); + uint16_t type = ntohs(hdr->type); + + if (type < CLUSTERMSG_TYPE_COUNT) + server.cluster->stats_bus_messages_received[type]++; + serverLog(LL_DEBUG,"--- Processing packet of type %d, %lu bytes", + type, (unsigned long) totlen); + + /* Perform sanity checks */ + if (totlen < 16) return 1; /* At least signature, version, totlen, count. */ + if (totlen > sdslen(link->rcvbuf)) return 1; + + if (ntohs(hdr->ver) != CLUSTER_PROTO_VER) { + /* Can't handle messages of different versions. */ + return 1; + } + + uint16_t flags = ntohs(hdr->flags); + uint64_t senderCurrentEpoch = 0, senderConfigEpoch = 0; + clusterNode *sender; + + if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG || + type == CLUSTERMSG_TYPE_MEET) + { + uint16_t count = ntohs(hdr->count); + uint32_t explen; /* expected length of this packet */ + + explen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + explen += (sizeof(clusterMsgDataGossip)*count); + if (totlen != explen) return 1; + } else if (type == CLUSTERMSG_TYPE_FAIL) { + uint32_t explen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + + explen += sizeof(clusterMsgDataFail); + if (totlen != explen) return 1; + } else if (type == CLUSTERMSG_TYPE_PUBLISH) { + uint32_t explen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + + explen += sizeof(clusterMsgDataPublish) - + 8 + + ntohl(hdr->data.publish.msg.channel_len) + + ntohl(hdr->data.publish.msg.message_len); + if (totlen != explen) return 1; + } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST || + type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK || + type == CLUSTERMSG_TYPE_MFSTART) + { + uint32_t explen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + + if (totlen != explen) return 1; + } else if (type == CLUSTERMSG_TYPE_UPDATE) { + uint32_t explen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + + explen += sizeof(clusterMsgDataUpdate); + if (totlen != explen) return 1; + } + + /* Check if the sender is a known node. */ + sender = clusterLookupNode(hdr->sender); + if (sender && !nodeInHandshake(sender)) { + /* Update our curretEpoch if we see a newer epoch in the cluster. */ + senderCurrentEpoch = ntohu64(hdr->currentEpoch); + senderConfigEpoch = ntohu64(hdr->configEpoch); + if (senderCurrentEpoch > server.cluster->currentEpoch) + server.cluster->currentEpoch = senderCurrentEpoch; + /* Update the sender configEpoch if it is publishing a newer one. */ + if (senderConfigEpoch > sender->configEpoch) { + sender->configEpoch = senderConfigEpoch; + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_FSYNC_CONFIG); + } + /* Update the replication offset info for this node. */ + sender->repl_offset = ntohu64(hdr->offset); + sender->repl_offset_time = mstime(); + /* If we are a slave performing a manual failover and our master + * sent its offset while already paused, populate the MF state. */ + if (server.cluster->mf_end && + nodeIsSlave(myself) && + myself->slaveof == sender && + hdr->mflags[0] & CLUSTERMSG_FLAG0_PAUSED && + server.cluster->mf_master_offset == 0) + { + server.cluster->mf_master_offset = sender->repl_offset; + serverLog(LL_WARNING, + "Received replication offset for paused " + "master manual failover: %lld", + server.cluster->mf_master_offset); + } + } + + /* Initial processing of PING and MEET requests replying with a PONG. */ + if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_MEET) { + serverLog(LL_DEBUG,"Ping packet received: %p", (void*)link->node); + + /* We use incoming MEET messages in order to set the address + * for 'myself', since only other cluster nodes will send us + * MEET messages on handshakes, when the cluster joins, or + * later if we changed address, and those nodes will use our + * official address to connect to us. So by obtaining this address + * from the socket is a simple way to discover / update our own + * address in the cluster without it being hardcoded in the config. + * + * However if we don't have an address at all, we update the address + * even with a normal PING packet. If it's wrong it will be fixed + * by MEET later. */ + if ((type == CLUSTERMSG_TYPE_MEET || myself->ip[0] == '\0') && + server.cluster_announce_ip == NULL) + { + char ip[NET_IP_STR_LEN]; + + if (anetSockName(link->fd,ip,sizeof(ip),NULL) != -1 && + strcmp(ip,myself->ip)) + { + memcpy(myself->ip,ip,NET_IP_STR_LEN); + serverLog(LL_WARNING,"IP address for this node updated to %s", + myself->ip); + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG); + } + } + + /* Add this node if it is new for us and the msg type is MEET. + * In this stage we don't try to add the node with the right + * flags, slaveof pointer, and so forth, as this details will be + * resolved when we'll receive PONGs from the node. */ + if (!sender && type == CLUSTERMSG_TYPE_MEET) { + clusterNode *node; + + node = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE); + nodeIp2String(node->ip,link,hdr->myip); + node->port = ntohs(hdr->port); + node->cport = ntohs(hdr->cport); + clusterAddNode(node); + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG); + } + + /* If this is a MEET packet from an unknown node, we still process + * the gossip section here since we have to trust the sender because + * of the message type. */ + if (!sender && type == CLUSTERMSG_TYPE_MEET) + clusterProcessGossipSection(hdr,link); + + /* Anyway reply with a PONG */ + clusterSendPing(link,CLUSTERMSG_TYPE_PONG); + } + + /* PING, PONG, MEET: process config information. */ + if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG || + type == CLUSTERMSG_TYPE_MEET) + { + serverLog(LL_DEBUG,"%s packet received: %p", + type == CLUSTERMSG_TYPE_PING ? "ping" : "pong", + (void*)link->node); + if (link->node) { + if (nodeInHandshake(link->node)) { + /* If we already have this node, try to change the + * IP/port of the node with the new one. */ + if (sender) { + serverLog(LL_VERBOSE, + "Handshake: we already know node %.40s, " + "updating the address if needed.", sender->name); + if (nodeUpdateAddressIfNeeded(sender,link,hdr)) + { + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_UPDATE_STATE); + } + /* Free this node as we already have it. This will + * cause the link to be freed as well. */ + clusterDelNode(link->node); + return 0; + } + + /* First thing to do is replacing the random name with the + * right node name if this was a handshake stage. */ + clusterRenameNode(link->node, hdr->sender); + serverLog(LL_DEBUG,"Handshake with node %.40s completed.", + link->node->name); + link->node->flags &= ~CLUSTER_NODE_HANDSHAKE; + link->node->flags |= flags&(CLUSTER_NODE_MASTER|CLUSTER_NODE_SLAVE); + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG); + } else if (memcmp(link->node->name,hdr->sender, + CLUSTER_NAMELEN) != 0) + { + /* If the reply has a non matching node ID we + * disconnect this node and set it as not having an associated + * address. */ + serverLog(LL_DEBUG,"PONG contains mismatching sender ID. About node %.40s added %d ms ago, having flags %d", + link->node->name, + (int)(mstime()-(link->node->ctime)), + link->node->flags); + link->node->flags |= CLUSTER_NODE_NOADDR; + link->node->ip[0] = '\0'; + link->node->port = 0; + link->node->cport = 0; + freeClusterLink(link); + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG); + return 0; + } + } + + /* Update the node address if it changed. */ + if (sender && type == CLUSTERMSG_TYPE_PING && + !nodeInHandshake(sender) && + nodeUpdateAddressIfNeeded(sender,link,hdr)) + { + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_UPDATE_STATE); + } + + /* Update our info about the node */ + if (link->node && type == CLUSTERMSG_TYPE_PONG) { + link->node->pong_received = mstime(); + link->node->ping_sent = 0; + + /* The PFAIL condition can be reversed without external + * help if it is momentary (that is, if it does not + * turn into a FAIL state). + * + * The FAIL condition is also reversible under specific + * conditions detected by clearNodeFailureIfNeeded(). */ + if (nodeTimedOut(link->node)) { + link->node->flags &= ~CLUSTER_NODE_PFAIL; + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_UPDATE_STATE); + } else if (nodeFailed(link->node)) { + clearNodeFailureIfNeeded(link->node); + } + } + + /* Check for role switch: slave -> master or master -> slave. */ + if (sender) { + if (!memcmp(hdr->slaveof,CLUSTER_NODE_NULL_NAME, + sizeof(hdr->slaveof))) + { + /* Node is a master. */ + clusterSetNodeAsMaster(sender); + } else { + /* Node is a slave. */ + clusterNode *master = clusterLookupNode(hdr->slaveof); + + if (nodeIsMaster(sender)) { + /* Master turned into a slave! Reconfigure the node. */ + clusterDelNodeSlots(sender); + sender->flags &= ~(CLUSTER_NODE_MASTER| + CLUSTER_NODE_MIGRATE_TO); + sender->flags |= CLUSTER_NODE_SLAVE; + + /* Update config and state. */ + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_UPDATE_STATE); + } + + /* Master node changed for this slave? */ + if (master && sender->slaveof != master) { + if (sender->slaveof) + clusterNodeRemoveSlave(sender->slaveof,sender); + clusterNodeAddSlave(master,sender); + sender->slaveof = master; + + /* Update config. */ + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG); + } + } + } + + /* Update our info about served slots. + * + * Note: this MUST happen after we update the master/slave state + * so that CLUSTER_NODE_MASTER flag will be set. */ + + /* Many checks are only needed if the set of served slots this + * instance claims is different compared to the set of slots we have + * for it. Check this ASAP to avoid other computational expansive + * checks later. */ + clusterNode *sender_master = NULL; /* Sender or its master if slave. */ + int dirty_slots = 0; /* Sender claimed slots don't match my view? */ + + if (sender) { + sender_master = nodeIsMaster(sender) ? sender : sender->slaveof; + if (sender_master) { + dirty_slots = memcmp(sender_master->slots, + hdr->myslots,sizeof(hdr->myslots)) != 0; + } + } + + /* 1) If the sender of the message is a master, and we detected that + * the set of slots it claims changed, scan the slots to see if we + * need to update our configuration. */ + if (sender && nodeIsMaster(sender) && dirty_slots) + clusterUpdateSlotsConfigWith(sender,senderConfigEpoch,hdr->myslots); + + /* 2) We also check for the reverse condition, that is, the sender + * claims to serve slots we know are served by a master with a + * greater configEpoch. If this happens we inform the sender. + * + * This is useful because sometimes after a partition heals, a + * reappearing master may be the last one to claim a given set of + * hash slots, but with a configuration that other instances know to + * be deprecated. Example: + * + * A and B are master and slave for slots 1,2,3. + * A is partitioned away, B gets promoted. + * B is partitioned away, and A returns available. + * + * Usually B would PING A publishing its set of served slots and its + * configEpoch, but because of the partition B can't inform A of the + * new configuration, so other nodes that have an updated table must + * do it. In this way A will stop to act as a master (or can try to + * failover if there are the conditions to win the election). */ + if (sender && dirty_slots) { + int j; + + for (j = 0; j < CLUSTER_SLOTS; j++) { + if (bitmapTestBit(hdr->myslots,j)) { + if (server.cluster->slots[j] == sender || + server.cluster->slots[j] == NULL) continue; + if (server.cluster->slots[j]->configEpoch > + senderConfigEpoch) + { + serverLog(LL_VERBOSE, + "Node %.40s has old slots configuration, sending " + "an UPDATE message about %.40s", + sender->name, server.cluster->slots[j]->name); + clusterSendUpdate(sender->link, + server.cluster->slots[j]); + + /* TODO: instead of exiting the loop send every other + * UPDATE packet for other nodes that are the new owner + * of sender's slots. */ + break; + } + } + } + } + + /* If our config epoch collides with the sender's try to fix + * the problem. */ + if (sender && + nodeIsMaster(myself) && nodeIsMaster(sender) && + senderConfigEpoch == myself->configEpoch) + { + clusterHandleConfigEpochCollision(sender); + } + + /* Get info from the gossip section */ + if (sender) clusterProcessGossipSection(hdr,link); + } else if (type == CLUSTERMSG_TYPE_FAIL) { + clusterNode *failing; + + if (sender) { + failing = clusterLookupNode(hdr->data.fail.about.nodename); + if (failing && + !(failing->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_MYSELF))) + { + serverLog(LL_NOTICE, + "FAIL message received from %.40s about %.40s", + hdr->sender, hdr->data.fail.about.nodename); + failing->flags |= CLUSTER_NODE_FAIL; + failing->fail_time = mstime(); + failing->flags &= ~CLUSTER_NODE_PFAIL; + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_UPDATE_STATE); + } + } else { + serverLog(LL_NOTICE, + "Ignoring FAIL message from unknown node %.40s about %.40s", + hdr->sender, hdr->data.fail.about.nodename); + } + } else if (type == CLUSTERMSG_TYPE_PUBLISH) { + robj *channel, *message; + uint32_t channel_len, message_len; + + /* Don't bother creating useless objects if there are no + * Pub/Sub subscribers. */ + if (dictSize(server.pubsub_channels) || + listLength(server.pubsub_patterns)) + { + channel_len = ntohl(hdr->data.publish.msg.channel_len); + message_len = ntohl(hdr->data.publish.msg.message_len); + channel = createStringObject( + (char*)hdr->data.publish.msg.bulk_data,channel_len); + message = createStringObject( + (char*)hdr->data.publish.msg.bulk_data+channel_len, + message_len); + pubsubPublishMessage(channel,message); + decrRefCount(channel); + decrRefCount(message); + } + } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST) { + if (!sender) return 1; /* We don't know that node. */ + clusterSendFailoverAuthIfNeeded(sender,hdr); + } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK) { + if (!sender) return 1; /* We don't know that node. */ + /* We consider this vote only if the sender is a master serving + * a non zero number of slots, and its currentEpoch is greater or + * equal to epoch where this node started the election. */ + if (nodeIsMaster(sender) && sender->numslots > 0 && + senderCurrentEpoch >= server.cluster->failover_auth_epoch) + { + server.cluster->failover_auth_count++; + /* Maybe we reached a quorum here, set a flag to make sure + * we check ASAP. */ + clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER); + } + } else if (type == CLUSTERMSG_TYPE_MFSTART) { + /* This message is acceptable only if I'm a master and the sender + * is one of my slaves. */ + if (!sender || sender->slaveof != myself) return 1; + /* Manual failover requested from slaves. Initialize the state + * accordingly. */ + resetManualFailover(); + server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT; + server.cluster->mf_slave = sender; + pauseClients(mstime()+(CLUSTER_MF_TIMEOUT*2)); + serverLog(LL_WARNING,"Manual failover requested by slave %.40s.", + sender->name); + } else if (type == CLUSTERMSG_TYPE_UPDATE) { + clusterNode *n; /* The node the update is about. */ + uint64_t reportedConfigEpoch = + ntohu64(hdr->data.update.nodecfg.configEpoch); + + if (!sender) return 1; /* We don't know the sender. */ + n = clusterLookupNode(hdr->data.update.nodecfg.nodename); + if (!n) return 1; /* We don't know the reported node. */ + if (n->configEpoch >= reportedConfigEpoch) return 1; /* Nothing new. */ + + /* If in our current config the node is a slave, set it as a master. */ + if (nodeIsSlave(n)) clusterSetNodeAsMaster(n); + + /* Update the node's configEpoch. */ + n->configEpoch = reportedConfigEpoch; + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_FSYNC_CONFIG); + + /* Check the bitmap of served slots and update our + * config accordingly. */ + clusterUpdateSlotsConfigWith(n,reportedConfigEpoch, + hdr->data.update.nodecfg.slots); + } else { + serverLog(LL_WARNING,"Received unknown packet type: %d", type); + } + return 1; +} + +/* This function is called when we detect the link with this node is lost. + We set the node as no longer connected. The Cluster Cron will detect + this connection and will try to get it connected again. + + Instead if the node is a temporary node used to accept a query, we + completely free the node on error. */ +void handleLinkIOError(clusterLink *link) { + freeClusterLink(link); +} + +/* Send data. This is handled using a trivial send buffer that gets + * consumed by write(). We don't try to optimize this for speed too much + * as this is a very low traffic channel. */ +void clusterWriteHandler(aeEventLoop *el, int fd, void *privdata, int mask) { + clusterLink *link = (clusterLink*) privdata; + ssize_t nwritten; + UNUSED(el); + UNUSED(mask); + + nwritten = write(fd, link->sndbuf, sdslen(link->sndbuf)); + if (nwritten <= 0) { + serverLog(LL_DEBUG,"I/O error writing to node link: %s", + strerror(errno)); + handleLinkIOError(link); + return; + } + sdsrange(link->sndbuf,nwritten,-1); + if (sdslen(link->sndbuf) == 0) + aeDeleteFileEvent(server.el, link->fd, AE_WRITABLE); +} + +/* Read data. Try to read the first field of the header first to check the + * full length of the packet. When a whole packet is in memory this function + * will call the function to process the packet. And so forth. */ +void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask) { + char buf[sizeof(clusterMsg)]; + ssize_t nread; + clusterMsg *hdr; + clusterLink *link = (clusterLink*) privdata; + unsigned int readlen, rcvbuflen; + UNUSED(el); + UNUSED(mask); + + while(1) { /* Read as long as there is data to read. */ + rcvbuflen = sdslen(link->rcvbuf); + if (rcvbuflen < 8) { + /* First, obtain the first 8 bytes to get the full message + * length. */ + readlen = 8 - rcvbuflen; + } else { + /* Finally read the full message. */ + hdr = (clusterMsg*) link->rcvbuf; + if (rcvbuflen == 8) { + /* Perform some sanity check on the message signature + * and length. */ + if (memcmp(hdr->sig,"RCmb",4) != 0 || + ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN) + { + serverLog(LL_WARNING, + "Bad message length or signature received " + "from Cluster bus."); + handleLinkIOError(link); + return; + } + } + readlen = ntohl(hdr->totlen) - rcvbuflen; + if (readlen > sizeof(buf)) readlen = sizeof(buf); + } + + nread = read(fd,buf,readlen); + if (nread == -1 && errno == EAGAIN) return; /* No more data ready. */ + + if (nread <= 0) { + /* I/O error... */ + serverLog(LL_DEBUG,"I/O error reading from node link: %s", + (nread == 0) ? "connection closed" : strerror(errno)); + handleLinkIOError(link); + return; + } else { + /* Read data and recast the pointer to the new buffer. */ + link->rcvbuf = sdscatlen(link->rcvbuf,buf,nread); + hdr = (clusterMsg*) link->rcvbuf; + rcvbuflen += nread; + } + + /* Total length obtained? Process this packet. */ + if (rcvbuflen >= 8 && rcvbuflen == ntohl(hdr->totlen)) { + if (clusterProcessPacket(link)) { + sdsfree(link->rcvbuf); + link->rcvbuf = sdsempty(); + } else { + return; /* Link no longer valid. */ + } + } + } +} + +/* Put stuff into the send buffer. + * + * It is guaranteed that this function will never have as a side effect + * the link to be invalidated, so it is safe to call this function + * from event handlers that will do stuff with the same link later. */ +void clusterSendMessage(clusterLink *link, unsigned char *msg, size_t msglen) { + if (sdslen(link->sndbuf) == 0 && msglen != 0) + aeCreateFileEvent(server.el,link->fd,AE_WRITABLE, + clusterWriteHandler,link); + + link->sndbuf = sdscatlen(link->sndbuf, msg, msglen); + + /* Populate sent messages stats. */ + clusterMsg *hdr = (clusterMsg*) msg; + uint16_t type = ntohs(hdr->type); + if (type < CLUSTERMSG_TYPE_COUNT) + server.cluster->stats_bus_messages_sent[type]++; +} + +/* Send a message to all the nodes that are part of the cluster having + * a connected link. + * + * It is guaranteed that this function will never have as a side effect + * some node->link to be invalidated, so it is safe to call this function + * from event handlers that will do stuff with node links later. */ +void clusterBroadcastMessage(void *buf, size_t len) { + dictIterator *di; + dictEntry *de; + + di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + + if (!node->link) continue; + if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE)) + continue; + clusterSendMessage(node->link,buf,len); + } + dictReleaseIterator(di); +} + +/* Build the message header. hdr must point to a buffer at least + * sizeof(clusterMsg) in bytes. */ +void clusterBuildMessageHdr(clusterMsg *hdr, int type) { + int totlen = 0; + uint64_t offset; + clusterNode *master; + + /* If this node is a master, we send its slots bitmap and configEpoch. + * If this node is a slave we send the master's information instead (the + * node is flagged as slave so the receiver knows that it is NOT really + * in charge for this slots. */ + master = (nodeIsSlave(myself) && myself->slaveof) ? + myself->slaveof : myself; + + memset(hdr,0,sizeof(*hdr)); + hdr->ver = htons(CLUSTER_PROTO_VER); + hdr->sig[0] = 'R'; + hdr->sig[1] = 'C'; + hdr->sig[2] = 'm'; + hdr->sig[3] = 'b'; + hdr->type = htons(type); + memcpy(hdr->sender,myself->name,CLUSTER_NAMELEN); + + /* If cluster-announce-ip option is enabled, force the receivers of our + * packets to use the specified address for this node. Otherwise if the + * first byte is zero, they'll do auto discovery. */ + memset(hdr->myip,0,NET_IP_STR_LEN); + if (server.cluster_announce_ip) { + strncpy(hdr->myip,server.cluster_announce_ip,NET_IP_STR_LEN); + hdr->myip[NET_IP_STR_LEN-1] = '\0'; + } + + /* Handle cluster-announce-port as well. */ + int announced_port = server.cluster_announce_port ? + server.cluster_announce_port : server.port; + int announced_cport = server.cluster_announce_bus_port ? + server.cluster_announce_bus_port : + (server.port + CLUSTER_PORT_INCR); + + memcpy(hdr->myslots,master->slots,sizeof(hdr->myslots)); + memset(hdr->slaveof,0,CLUSTER_NAMELEN); + if (myself->slaveof != NULL) + memcpy(hdr->slaveof,myself->slaveof->name, CLUSTER_NAMELEN); + hdr->port = htons(announced_port); + hdr->cport = htons(announced_cport); + hdr->flags = htons(myself->flags); + hdr->state = server.cluster->state; + + /* Set the currentEpoch and configEpochs. */ + hdr->currentEpoch = htonu64(server.cluster->currentEpoch); + hdr->configEpoch = htonu64(master->configEpoch); + + /* Set the replication offset. */ + if (nodeIsSlave(myself)) + offset = replicationGetSlaveOffset(); + else + offset = server.master_repl_offset; + hdr->offset = htonu64(offset); + + /* Set the message flags. */ + if (nodeIsMaster(myself) && server.cluster->mf_end) + hdr->mflags[0] |= CLUSTERMSG_FLAG0_PAUSED; + + /* Compute the message length for certain messages. For other messages + * this is up to the caller. */ + if (type == CLUSTERMSG_TYPE_FAIL) { + totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + totlen += sizeof(clusterMsgDataFail); + } else if (type == CLUSTERMSG_TYPE_UPDATE) { + totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + totlen += sizeof(clusterMsgDataUpdate); + } + hdr->totlen = htonl(totlen); + /* For PING, PONG, and MEET, fixing the totlen field is up to the caller. */ +} + +/* Return non zero if the node is already present in the gossip section of the + * message pointed by 'hdr' and having 'count' gossip entries. Otherwise + * zero is returned. Helper for clusterSendPing(). */ +int clusterNodeIsInGossipSection(clusterMsg *hdr, int count, clusterNode *n) { + int j; + for (j = 0; j < count; j++) { + if (memcmp(hdr->data.ping.gossip[j].nodename,n->name, + CLUSTER_NAMELEN) == 0) break; + } + return j != count; +} + +/* Set the i-th entry of the gossip section in the message pointed by 'hdr' + * to the info of the specified node 'n'. */ +void clusterSetGossipEntry(clusterMsg *hdr, int i, clusterNode *n) { + clusterMsgDataGossip *gossip; + gossip = &(hdr->data.ping.gossip[i]); + memcpy(gossip->nodename,n->name,CLUSTER_NAMELEN); + gossip->ping_sent = htonl(n->ping_sent/1000); + gossip->pong_received = htonl(n->pong_received/1000); + memcpy(gossip->ip,n->ip,sizeof(n->ip)); + gossip->port = htons(n->port); + gossip->cport = htons(n->cport); + gossip->flags = htons(n->flags); + gossip->notused1 = 0; +} + +/* Send a PING or PONG packet to the specified node, making sure to add enough + * gossip informations. */ +void clusterSendPing(clusterLink *link, int type) { + unsigned char *buf; + clusterMsg *hdr; + int gossipcount = 0; /* Number of gossip sections added so far. */ + int wanted; /* Number of gossip sections we want to append if possible. */ + int totlen; /* Total packet length. */ + /* freshnodes is the max number of nodes we can hope to append at all: + * nodes available minus two (ourself and the node we are sending the + * message to). However practically there may be less valid nodes since + * nodes in handshake state, disconnected, are not considered. */ + int freshnodes = dictSize(server.cluster->nodes)-2; + + /* How many gossip sections we want to add? 1/10 of the number of nodes + * and anyway at least 3. Why 1/10? + * + * If we have N masters, with N/10 entries, and we consider that in + * node_timeout we exchange with each other node at least 4 packets + * (we ping in the worst case in node_timeout/2 time, and we also + * receive two pings from the host), we have a total of 8 packets + * in the node_timeout*2 falure reports validity time. So we have + * that, for a single PFAIL node, we can expect to receive the following + * number of failure reports (in the specified window of time): + * + * PROB * GOSSIP_ENTRIES_PER_PACKET * TOTAL_PACKETS: + * + * PROB = probability of being featured in a single gossip entry, + * which is 1 / NUM_OF_NODES. + * ENTRIES = 10. + * TOTAL_PACKETS = 2 * 4 * NUM_OF_MASTERS. + * + * If we assume we have just masters (so num of nodes and num of masters + * is the same), with 1/10 we always get over the majority, and specifically + * 80% of the number of nodes, to account for many masters failing at the + * same time. + * + * Since we have non-voting slaves that lower the probability of an entry + * to feature our node, we set the number of entires per packet as + * 10% of the total nodes we have. */ + wanted = floor(dictSize(server.cluster->nodes)/10); + if (wanted < 3) wanted = 3; + if (wanted > freshnodes) wanted = freshnodes; + + /* Include all the nodes in PFAIL state, so that failure reports are + * faster to propagate to go from PFAIL to FAIL state. */ + int pfail_wanted = server.cluster->stats_pfail_nodes; + + /* Compute the maxium totlen to allocate our buffer. We'll fix the totlen + * later according to the number of gossip sections we really were able + * to put inside the packet. */ + totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + totlen += (sizeof(clusterMsgDataGossip)*(wanted+pfail_wanted)); + /* Note: clusterBuildMessageHdr() expects the buffer to be always at least + * sizeof(clusterMsg) or more. */ + if (totlen < (int)sizeof(clusterMsg)) totlen = sizeof(clusterMsg); + buf = zcalloc(totlen); + hdr = (clusterMsg*) buf; + + /* Populate the header. */ + if (link->node && type == CLUSTERMSG_TYPE_PING) + link->node->ping_sent = mstime(); + clusterBuildMessageHdr(hdr,type); + + /* Populate the gossip fields */ + int maxiterations = wanted*3; + while(freshnodes > 0 && gossipcount < wanted && maxiterations--) { + dictEntry *de = dictGetRandomKey(server.cluster->nodes); + clusterNode *this = dictGetVal(de); + + /* Don't include this node: the whole packet header is about us + * already, so we just gossip about other nodes. */ + if (this == myself) continue; + + /* PFAIL nodes will be added later. */ + if (this->flags & CLUSTER_NODE_PFAIL) continue; + + /* In the gossip section don't include: + * 1) Nodes in HANDSHAKE state. + * 3) Nodes with the NOADDR flag set. + * 4) Disconnected nodes if they don't have configured slots. + */ + if (this->flags & (CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_NOADDR) || + (this->link == NULL && this->numslots == 0)) + { + freshnodes--; /* Tecnically not correct, but saves CPU. */ + continue; + } + + /* Do not add a node we already have. */ + if (clusterNodeIsInGossipSection(hdr,gossipcount,this)) continue; + + /* Add it */ + clusterSetGossipEntry(hdr,gossipcount,this); + freshnodes--; + gossipcount++; + } + + /* If there are PFAIL nodes, add them at the end. */ + if (pfail_wanted) { + dictIterator *di; + dictEntry *de; + + di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL && pfail_wanted > 0) { + clusterNode *node = dictGetVal(de); + if (node->flags & CLUSTER_NODE_HANDSHAKE) continue; + if (node->flags & CLUSTER_NODE_NOADDR) continue; + if (!(node->flags & CLUSTER_NODE_PFAIL)) continue; + clusterSetGossipEntry(hdr,gossipcount,node); + freshnodes--; + gossipcount++; + /* We take the count of the slots we allocated, since the + * PFAIL stats may not match perfectly with the current number + * of PFAIL nodes. */ + pfail_wanted--; + } + dictReleaseIterator(di); + } + + /* Ready to send... fix the totlen fiend and queue the message in the + * output buffer. */ + totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + totlen += (sizeof(clusterMsgDataGossip)*gossipcount); + hdr->count = htons(gossipcount); + hdr->totlen = htonl(totlen); + clusterSendMessage(link,buf,totlen); + zfree(buf); +} + +/* Send a PONG packet to every connected node that's not in handshake state + * and for which we have a valid link. + * + * In Redis Cluster pongs are not used just for failure detection, but also + * to carry important configuration information. So broadcasting a pong is + * useful when something changes in the configuration and we want to make + * the cluster aware ASAP (for instance after a slave promotion). + * + * The 'target' argument specifies the receiving instances using the + * defines below: + * + * CLUSTER_BROADCAST_ALL -> All known instances. + * CLUSTER_BROADCAST_LOCAL_SLAVES -> All slaves in my master-slaves ring. + */ +#define CLUSTER_BROADCAST_ALL 0 +#define CLUSTER_BROADCAST_LOCAL_SLAVES 1 +void clusterBroadcastPong(int target) { + dictIterator *di; + dictEntry *de; + + di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + + if (!node->link) continue; + if (node == myself || nodeInHandshake(node)) continue; + if (target == CLUSTER_BROADCAST_LOCAL_SLAVES) { + int local_slave = + nodeIsSlave(node) && node->slaveof && + (node->slaveof == myself || node->slaveof == myself->slaveof); + if (!local_slave) continue; + } + clusterSendPing(node->link,CLUSTERMSG_TYPE_PONG); + } + dictReleaseIterator(di); +} + +/* Send a PUBLISH message. + * + * If link is NULL, then the message is broadcasted to the whole cluster. */ +void clusterSendPublish(clusterLink *link, robj *channel, robj *message) { + unsigned char buf[sizeof(clusterMsg)], *payload; + clusterMsg *hdr = (clusterMsg*) buf; + uint32_t totlen; + uint32_t channel_len, message_len; + + channel = getDecodedObject(channel); + message = getDecodedObject(message); + channel_len = sdslen(channel->ptr); + message_len = sdslen(message->ptr); + + clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_PUBLISH); + totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + totlen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len; + + hdr->data.publish.msg.channel_len = htonl(channel_len); + hdr->data.publish.msg.message_len = htonl(message_len); + hdr->totlen = htonl(totlen); + + /* Try to use the local buffer if possible */ + if (totlen < sizeof(buf)) { + payload = buf; + } else { + payload = zmalloc(totlen); + memcpy(payload,hdr,sizeof(*hdr)); + hdr = (clusterMsg*) payload; + } + memcpy(hdr->data.publish.msg.bulk_data,channel->ptr,sdslen(channel->ptr)); + memcpy(hdr->data.publish.msg.bulk_data+sdslen(channel->ptr), + message->ptr,sdslen(message->ptr)); + + if (link) + clusterSendMessage(link,payload,totlen); + else + clusterBroadcastMessage(payload,totlen); + + decrRefCount(channel); + decrRefCount(message); + if (payload != buf) zfree(payload); +} + +/* Send a FAIL message to all the nodes we are able to contact. + * The FAIL message is sent when we detect that a node is failing + * (CLUSTER_NODE_PFAIL) and we also receive a gossip confirmation of this: + * we switch the node state to CLUSTER_NODE_FAIL and ask all the other + * nodes to do the same ASAP. */ +void clusterSendFail(char *nodename) { + unsigned char buf[sizeof(clusterMsg)]; + clusterMsg *hdr = (clusterMsg*) buf; + + clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_FAIL); + memcpy(hdr->data.fail.about.nodename,nodename,CLUSTER_NAMELEN); + clusterBroadcastMessage(buf,ntohl(hdr->totlen)); +} + +/* Send an UPDATE message to the specified link carrying the specified 'node' + * slots configuration. The node name, slots bitmap, and configEpoch info + * are included. */ +void clusterSendUpdate(clusterLink *link, clusterNode *node) { + unsigned char buf[sizeof(clusterMsg)]; + clusterMsg *hdr = (clusterMsg*) buf; + + if (link == NULL) return; + clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_UPDATE); + memcpy(hdr->data.update.nodecfg.nodename,node->name,CLUSTER_NAMELEN); + hdr->data.update.nodecfg.configEpoch = htonu64(node->configEpoch); + memcpy(hdr->data.update.nodecfg.slots,node->slots,sizeof(node->slots)); + clusterSendMessage(link,buf,ntohl(hdr->totlen)); +} + +/* ----------------------------------------------------------------------------- + * CLUSTER Pub/Sub support + * + * For now we do very little, just propagating PUBLISH messages across the whole + * cluster. In the future we'll try to get smarter and avoiding propagating those + * messages to hosts without receives for a given channel. + * -------------------------------------------------------------------------- */ +void clusterPropagatePublish(robj *channel, robj *message) { + clusterSendPublish(NULL, channel, message); +} + +/* ----------------------------------------------------------------------------- + * SLAVE node specific functions + * -------------------------------------------------------------------------- */ + +/* This function sends a FAILOVE_AUTH_REQUEST message to every node in order to + * see if there is the quorum for this slave instance to failover its failing + * master. + * + * Note that we send the failover request to everybody, master and slave nodes, + * but only the masters are supposed to reply to our query. */ +void clusterRequestFailoverAuth(void) { + unsigned char buf[sizeof(clusterMsg)]; + clusterMsg *hdr = (clusterMsg*) buf; + uint32_t totlen; + + clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST); + /* If this is a manual failover, set the CLUSTERMSG_FLAG0_FORCEACK bit + * in the header to communicate the nodes receiving the message that + * they should authorized the failover even if the master is working. */ + if (server.cluster->mf_end) hdr->mflags[0] |= CLUSTERMSG_FLAG0_FORCEACK; + totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + hdr->totlen = htonl(totlen); + clusterBroadcastMessage(buf,totlen); +} + +/* Send a FAILOVER_AUTH_ACK message to the specified node. */ +void clusterSendFailoverAuth(clusterNode *node) { + unsigned char buf[sizeof(clusterMsg)]; + clusterMsg *hdr = (clusterMsg*) buf; + uint32_t totlen; + + if (!node->link) return; + clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK); + totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + hdr->totlen = htonl(totlen); + clusterSendMessage(node->link,buf,totlen); +} + +/* Send a MFSTART message to the specified node. */ +void clusterSendMFStart(clusterNode *node) { + unsigned char buf[sizeof(clusterMsg)]; + clusterMsg *hdr = (clusterMsg*) buf; + uint32_t totlen; + + if (!node->link) return; + clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_MFSTART); + totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); + hdr->totlen = htonl(totlen); + clusterSendMessage(node->link,buf,totlen); +} + +/* Vote for the node asking for our vote if there are the conditions. */ +void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) { + clusterNode *master = node->slaveof; + uint64_t requestCurrentEpoch = ntohu64(request->currentEpoch); + uint64_t requestConfigEpoch = ntohu64(request->configEpoch); + unsigned char *claimed_slots = request->myslots; + int force_ack = request->mflags[0] & CLUSTERMSG_FLAG0_FORCEACK; + int j; + + /* IF we are not a master serving at least 1 slot, we don't have the + * right to vote, as the cluster size in Redis Cluster is the number + * of masters serving at least one slot, and quorum is the cluster + * size + 1 */ + if (nodeIsSlave(myself) || myself->numslots == 0) return; + + /* Request epoch must be >= our currentEpoch. + * Note that it is impossible for it to actually be greater since + * our currentEpoch was updated as a side effect of receiving this + * request, if the request epoch was greater. */ + if (requestCurrentEpoch < server.cluster->currentEpoch) { + serverLog(LL_WARNING, + "Failover auth denied to %.40s: reqEpoch (%llu) < curEpoch(%llu)", + node->name, + (unsigned long long) requestCurrentEpoch, + (unsigned long long) server.cluster->currentEpoch); + return; + } + + /* I already voted for this epoch? Return ASAP. */ + if (server.cluster->lastVoteEpoch == server.cluster->currentEpoch) { + serverLog(LL_WARNING, + "Failover auth denied to %.40s: already voted for epoch %llu", + node->name, + (unsigned long long) server.cluster->currentEpoch); + return; + } + + /* Node must be a slave and its master down. + * The master can be non failing if the request is flagged + * with CLUSTERMSG_FLAG0_FORCEACK (manual failover). */ + if (nodeIsMaster(node) || master == NULL || + (!nodeFailed(master) && !force_ack)) + { + if (nodeIsMaster(node)) { + serverLog(LL_WARNING, + "Failover auth denied to %.40s: it is a master node", + node->name); + } else if (master == NULL) { + serverLog(LL_WARNING, + "Failover auth denied to %.40s: I don't know its master", + node->name); + } else if (!nodeFailed(master)) { + serverLog(LL_WARNING, + "Failover auth denied to %.40s: its master is up", + node->name); + } + return; + } + + /* We did not voted for a slave about this master for two + * times the node timeout. This is not strictly needed for correctness + * of the algorithm but makes the base case more linear. */ + if (mstime() - node->slaveof->voted_time < server.cluster_node_timeout * 2) + { + serverLog(LL_WARNING, + "Failover auth denied to %.40s: " + "can't vote about this master before %lld milliseconds", + node->name, + (long long) ((server.cluster_node_timeout*2)- + (mstime() - node->slaveof->voted_time))); + return; + } + + /* The slave requesting the vote must have a configEpoch for the claimed + * slots that is >= the one of the masters currently serving the same + * slots in the current configuration. */ + for (j = 0; j < CLUSTER_SLOTS; j++) { + if (bitmapTestBit(claimed_slots, j) == 0) continue; + if (server.cluster->slots[j] == NULL || + server.cluster->slots[j]->configEpoch <= requestConfigEpoch) + { + continue; + } + /* If we reached this point we found a slot that in our current slots + * is served by a master with a greater configEpoch than the one claimed + * by the slave requesting our vote. Refuse to vote for this slave. */ + serverLog(LL_WARNING, + "Failover auth denied to %.40s: " + "slot %d epoch (%llu) > reqEpoch (%llu)", + node->name, j, + (unsigned long long) server.cluster->slots[j]->configEpoch, + (unsigned long long) requestConfigEpoch); + return; + } + + /* We can vote for this slave. */ + clusterSendFailoverAuth(node); + server.cluster->lastVoteEpoch = server.cluster->currentEpoch; + node->slaveof->voted_time = mstime(); + serverLog(LL_WARNING, "Failover auth granted to %.40s for epoch %llu", + node->name, (unsigned long long) server.cluster->currentEpoch); +} + +/* This function returns the "rank" of this instance, a slave, in the context + * of its master-slaves ring. The rank of the slave is given by the number of + * other slaves for the same master that have a better replication offset + * compared to the local one (better means, greater, so they claim more data). + * + * A slave with rank 0 is the one with the greatest (most up to date) + * replication offset, and so forth. Note that because how the rank is computed + * multiple slaves may have the same rank, in case they have the same offset. + * + * The slave rank is used to add a delay to start an election in order to + * get voted and replace a failing master. Slaves with better replication + * offsets are more likely to win. */ +int clusterGetSlaveRank(void) { + long long myoffset; + int j, rank = 0; + clusterNode *master; + + serverAssert(nodeIsSlave(myself)); + master = myself->slaveof; + if (master == NULL) return 0; /* Never called by slaves without master. */ + + myoffset = replicationGetSlaveOffset(); + for (j = 0; j < master->numslaves; j++) + if (master->slaves[j] != myself && + master->slaves[j]->repl_offset > myoffset) rank++; + return rank; +} + +/* This function is called by clusterHandleSlaveFailover() in order to + * let the slave log why it is not able to failover. Sometimes there are + * not the conditions, but since the failover function is called again and + * again, we can't log the same things continuously. + * + * This function works by logging only if a given set of conditions are + * true: + * + * 1) The reason for which the failover can't be initiated changed. + * The reasons also include a NONE reason we reset the state to + * when the slave finds that its master is fine (no FAIL flag). + * 2) Also, the log is emitted again if the master is still down and + * the reason for not failing over is still the same, but more than + * CLUSTER_CANT_FAILOVER_RELOG_PERIOD seconds elapsed. + * 3) Finally, the function only logs if the slave is down for more than + * five seconds + NODE_TIMEOUT. This way nothing is logged when a + * failover starts in a reasonable time. + * + * The function is called with the reason why the slave can't failover + * which is one of the integer macros CLUSTER_CANT_FAILOVER_*. + * + * The function is guaranteed to be called only if 'myself' is a slave. */ +void clusterLogCantFailover(int reason) { + char *msg; + static time_t lastlog_time = 0; + mstime_t nolog_fail_time = server.cluster_node_timeout + 5000; + + /* Don't log if we have the same reason for some time. */ + if (reason == server.cluster->cant_failover_reason && + time(NULL)-lastlog_time < CLUSTER_CANT_FAILOVER_RELOG_PERIOD) + return; + + server.cluster->cant_failover_reason = reason; + + /* We also don't emit any log if the master failed no long ago, the + * goal of this function is to log slaves in a stalled condition for + * a long time. */ + if (myself->slaveof && + nodeFailed(myself->slaveof) && + (mstime() - myself->slaveof->fail_time) < nolog_fail_time) return; + + switch(reason) { + case CLUSTER_CANT_FAILOVER_DATA_AGE: + msg = "Disconnected from master for longer than allowed. " + "Please check the 'cluster-slave-validity-factor' configuration " + "option."; + break; + case CLUSTER_CANT_FAILOVER_WAITING_DELAY: + msg = "Waiting the delay before I can start a new failover."; + break; + case CLUSTER_CANT_FAILOVER_EXPIRED: + msg = "Failover attempt expired."; + break; + case CLUSTER_CANT_FAILOVER_WAITING_VOTES: + msg = "Waiting for votes, but majority still not reached."; + break; + default: + msg = "Unknown reason code."; + break; + } + lastlog_time = time(NULL); + serverLog(LL_WARNING,"Currently unable to failover: %s", msg); +} + +/* This function implements the final part of automatic and manual failovers, + * where the slave grabs its master's hash slots, and propagates the new + * configuration. + * + * Note that it's up to the caller to be sure that the node got a new + * configuration epoch already. */ +void clusterFailoverReplaceYourMaster(void) { + int j; + clusterNode *oldmaster = myself->slaveof; + + if (nodeIsMaster(myself) || oldmaster == NULL) return; + + /* 1) Turn this node into a master. */ + clusterSetNodeAsMaster(myself); + replicationUnsetMaster(); + + /* 2) Claim all the slots assigned to our master. */ + for (j = 0; j < CLUSTER_SLOTS; j++) { + if (clusterNodeGetSlotBit(oldmaster,j)) { + clusterDelSlot(j); + clusterAddSlot(myself,j); + } + } + + /* 3) Update state and save config. */ + clusterUpdateState(); + clusterSaveConfigOrDie(1); + + /* 4) Pong all the other nodes so that they can update the state + * accordingly and detect that we switched to master role. */ + clusterBroadcastPong(CLUSTER_BROADCAST_ALL); + + /* 5) If there was a manual failover in progress, clear the state. */ + resetManualFailover(); +} + +/* This function is called if we are a slave node and our master serving + * a non-zero amount of hash slots is in FAIL state. + * + * The gaol of this function is: + * 1) To check if we are able to perform a failover, is our data updated? + * 2) Try to get elected by masters. + * 3) Perform the failover informing all the other nodes. + */ +void clusterHandleSlaveFailover(void) { + mstime_t data_age; + mstime_t auth_age = mstime() - server.cluster->failover_auth_time; + int needed_quorum = (server.cluster->size / 2) + 1; + int manual_failover = server.cluster->mf_end != 0 && + server.cluster->mf_can_start; + mstime_t auth_timeout, auth_retry_time; + + server.cluster->todo_before_sleep &= ~CLUSTER_TODO_HANDLE_FAILOVER; + + /* Compute the failover timeout (the max time we have to send votes + * and wait for replies), and the failover retry time (the time to wait + * before trying to get voted again). + * + * Timeout is MAX(NODE_TIMEOUT*2,2000) milliseconds. + * Retry is two times the Timeout. + */ + auth_timeout = server.cluster_node_timeout*2; + if (auth_timeout < 2000) auth_timeout = 2000; + auth_retry_time = auth_timeout*2; + + /* Pre conditions to run the function, that must be met both in case + * of an automatic or manual failover: + * 1) We are a slave. + * 2) Our master is flagged as FAIL, or this is a manual failover. + * 3) It is serving slots. */ + if (nodeIsMaster(myself) || + myself->slaveof == NULL || + (!nodeFailed(myself->slaveof) && !manual_failover) || + myself->slaveof->numslots == 0) + { + /* There are no reasons to failover, so we set the reason why we + * are returning without failing over to NONE. */ + server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE; + return; + } + + /* Set data_age to the number of seconds we are disconnected from + * the master. */ + if (server.repl_state == REPL_STATE_CONNECTED) { + data_age = (mstime_t)(server.unixtime - server.master->lastinteraction) + * 1000; + } else { + data_age = (mstime_t)(server.unixtime - server.repl_down_since) * 1000; + } + + /* Remove the node timeout from the data age as it is fine that we are + * disconnected from our master at least for the time it was down to be + * flagged as FAIL, that's the baseline. */ + if (data_age > server.cluster_node_timeout) + data_age -= server.cluster_node_timeout; + + /* Check if our data is recent enough according to the slave validity + * factor configured by the user. + * + * Check bypassed for manual failovers. */ + if (server.cluster_slave_validity_factor && + data_age > + (((mstime_t)server.repl_ping_slave_period * 1000) + + (server.cluster_node_timeout * server.cluster_slave_validity_factor))) + { + if (!manual_failover) { + clusterLogCantFailover(CLUSTER_CANT_FAILOVER_DATA_AGE); + return; + } + } + + /* If the previous failover attempt timedout and the retry time has + * elapsed, we can setup a new one. */ + if (auth_age > auth_retry_time) { + server.cluster->failover_auth_time = mstime() + + 500 + /* Fixed delay of 500 milliseconds, let FAIL msg propagate. */ + random() % 500; /* Random delay between 0 and 500 milliseconds. */ + server.cluster->failover_auth_count = 0; + server.cluster->failover_auth_sent = 0; + server.cluster->failover_auth_rank = clusterGetSlaveRank(); + /* We add another delay that is proportional to the slave rank. + * Specifically 1 second * rank. This way slaves that have a probably + * less updated replication offset, are penalized. */ + server.cluster->failover_auth_time += + server.cluster->failover_auth_rank * 1000; + /* However if this is a manual failover, no delay is needed. */ + if (server.cluster->mf_end) { + server.cluster->failover_auth_time = mstime(); + server.cluster->failover_auth_rank = 0; + } + serverLog(LL_WARNING, + "Start of election delayed for %lld milliseconds " + "(rank #%d, offset %lld).", + server.cluster->failover_auth_time - mstime(), + server.cluster->failover_auth_rank, + replicationGetSlaveOffset()); + /* Now that we have a scheduled election, broadcast our offset + * to all the other slaves so that they'll updated their offsets + * if our offset is better. */ + clusterBroadcastPong(CLUSTER_BROADCAST_LOCAL_SLAVES); + return; + } + + /* It is possible that we received more updated offsets from other + * slaves for the same master since we computed our election delay. + * Update the delay if our rank changed. + * + * Not performed if this is a manual failover. */ + if (server.cluster->failover_auth_sent == 0 && + server.cluster->mf_end == 0) + { + int newrank = clusterGetSlaveRank(); + if (newrank > server.cluster->failover_auth_rank) { + long long added_delay = + (newrank - server.cluster->failover_auth_rank) * 1000; + server.cluster->failover_auth_time += added_delay; + server.cluster->failover_auth_rank = newrank; + serverLog(LL_WARNING, + "Slave rank updated to #%d, added %lld milliseconds of delay.", + newrank, added_delay); + } + } + + /* Return ASAP if we can't still start the election. */ + if (mstime() < server.cluster->failover_auth_time) { + clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_DELAY); + return; + } + + /* Return ASAP if the election is too old to be valid. */ + if (auth_age > auth_timeout) { + clusterLogCantFailover(CLUSTER_CANT_FAILOVER_EXPIRED); + return; + } + + /* Ask for votes if needed. */ + if (server.cluster->failover_auth_sent == 0) { + server.cluster->currentEpoch++; + server.cluster->failover_auth_epoch = server.cluster->currentEpoch; + serverLog(LL_WARNING,"Starting a failover election for epoch %llu.", + (unsigned long long) server.cluster->currentEpoch); + clusterRequestFailoverAuth(); + server.cluster->failover_auth_sent = 1; + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_UPDATE_STATE| + CLUSTER_TODO_FSYNC_CONFIG); + return; /* Wait for replies. */ + } + + /* Check if we reached the quorum. */ + if (server.cluster->failover_auth_count >= needed_quorum) { + /* We have the quorum, we can finally failover the master. */ + + serverLog(LL_WARNING, + "Failover election won: I'm the new master."); + + /* Update my configEpoch to the epoch of the election. */ + if (myself->configEpoch < server.cluster->failover_auth_epoch) { + myself->configEpoch = server.cluster->failover_auth_epoch; + serverLog(LL_WARNING, + "configEpoch set to %llu after successful failover", + (unsigned long long) myself->configEpoch); + } + + /* Take responsability for the cluster slots. */ + clusterFailoverReplaceYourMaster(); + } else { + clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_VOTES); + } +} + +/* ----------------------------------------------------------------------------- + * CLUSTER slave migration + * + * Slave migration is the process that allows a slave of a master that is + * already covered by at least another slave, to "migrate" to a master that + * is orpaned, that is, left with no working slaves. + * ------------------------------------------------------------------------- */ + +/* This function is responsible to decide if this replica should be migrated + * to a different (orphaned) master. It is called by the clusterCron() function + * only if: + * + * 1) We are a slave node. + * 2) It was detected that there is at least one orphaned master in + * the cluster. + * 3) We are a slave of one of the masters with the greatest number of + * slaves. + * + * This checks are performed by the caller since it requires to iterate + * the nodes anyway, so we spend time into clusterHandleSlaveMigration() + * if definitely needed. + * + * The fuction is called with a pre-computed max_slaves, that is the max + * number of working (not in FAIL state) slaves for a single master. + * + * Additional conditions for migration are examined inside the function. + */ +void clusterHandleSlaveMigration(int max_slaves) { + int j, okslaves = 0; + clusterNode *mymaster = myself->slaveof, *target = NULL, *candidate = NULL; + dictIterator *di; + dictEntry *de; + + /* Step 1: Don't migrate if the cluster state is not ok. */ + if (server.cluster->state != CLUSTER_OK) return; + + /* Step 2: Don't migrate if my master will not be left with at least + * 'migration-barrier' slaves after my migration. */ + if (mymaster == NULL) return; + for (j = 0; j < mymaster->numslaves; j++) + if (!nodeFailed(mymaster->slaves[j]) && + !nodeTimedOut(mymaster->slaves[j])) okslaves++; + if (okslaves <= server.cluster_migration_barrier) return; + + /* Step 3: Idenitfy a candidate for migration, and check if among the + * masters with the greatest number of ok slaves, I'm the one with the + * smallest node ID (the "candidate slave"). + * + * Note: this means that eventually a replica migration will occurr + * since slaves that are reachable again always have their FAIL flag + * cleared, so eventually there must be a candidate. At the same time + * this does not mean that there are no race conditions possible (two + * slaves migrating at the same time), but this is unlikely to + * happen, and harmless when happens. */ + candidate = myself; + di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + int okslaves = 0, is_orphaned = 1; + + /* We want to migrate only if this master is working, orphaned, and + * used to have slaves or if failed over a master that had slaves + * (MIGRATE_TO flag). This way we only migrate to instances that were + * supposed to have replicas. */ + if (nodeIsSlave(node) || nodeFailed(node)) is_orphaned = 0; + if (!(node->flags & CLUSTER_NODE_MIGRATE_TO)) is_orphaned = 0; + + /* Check number of working slaves. */ + if (nodeIsMaster(node)) okslaves = clusterCountNonFailingSlaves(node); + if (okslaves > 0) is_orphaned = 0; + + if (is_orphaned) { + if (!target && node->numslots > 0) target = node; + + /* Track the starting time of the orphaned condition for this + * master. */ + if (!node->orphaned_time) node->orphaned_time = mstime(); + } else { + node->orphaned_time = 0; + } + + /* Check if I'm the slave candidate for the migration: attached + * to a master with the maximum number of slaves and with the smallest + * node ID. */ + if (okslaves == max_slaves) { + for (j = 0; j < node->numslaves; j++) { + if (memcmp(node->slaves[j]->name, + candidate->name, + CLUSTER_NAMELEN) < 0) + { + candidate = node->slaves[j]; + } + } + } + } + dictReleaseIterator(di); + + /* Step 4: perform the migration if there is a target, and if I'm the + * candidate, but only if the master is continuously orphaned for a + * couple of seconds, so that during failovers, we give some time to + * the natural slaves of this instance to advertise their switch from + * the old master to the new one. */ + if (target && candidate == myself && + (mstime()-target->orphaned_time) > CLUSTER_SLAVE_MIGRATION_DELAY) + { + serverLog(LL_WARNING,"Migrating to orphaned master %.40s", + target->name); + clusterSetMaster(target); + } +} + +/* ----------------------------------------------------------------------------- + * CLUSTER manual failover + * + * This are the important steps performed by slaves during a manual failover: + * 1) User send CLUSTER FAILOVER command. The failover state is initialized + * setting mf_end to the millisecond unix time at which we'll abort the + * attempt. + * 2) Slave sends a MFSTART message to the master requesting to pause clients + * for two times the manual failover timeout CLUSTER_MF_TIMEOUT. + * When master is paused for manual failover, it also starts to flag + * packets with CLUSTERMSG_FLAG0_PAUSED. + * 3) Slave waits for master to send its replication offset flagged as PAUSED. + * 4) If slave received the offset from the master, and its offset matches, + * mf_can_start is set to 1, and clusterHandleSlaveFailover() will perform + * the failover as usually, with the difference that the vote request + * will be modified to force masters to vote for a slave that has a + * working master. + * + * From the point of view of the master things are simpler: when a + * PAUSE_CLIENTS packet is received the master sets mf_end as well and + * the sender in mf_slave. During the time limit for the manual failover + * the master will just send PINGs more often to this slave, flagged with + * the PAUSED flag, so that the slave will set mf_master_offset when receiving + * a packet from the master with this flag set. + * + * The gaol of the manual failover is to perform a fast failover without + * data loss due to the asynchronous master-slave replication. + * -------------------------------------------------------------------------- */ + +/* Reset the manual failover state. This works for both masters and slavesa + * as all the state about manual failover is cleared. + * + * The function can be used both to initialize the manual failover state at + * startup or to abort a manual failover in progress. */ +void resetManualFailover(void) { + if (server.cluster->mf_end && clientsArePaused()) { + server.clients_pause_end_time = 0; + clientsArePaused(); /* Just use the side effect of the function. */ + } + server.cluster->mf_end = 0; /* No manual failover in progress. */ + server.cluster->mf_can_start = 0; + server.cluster->mf_slave = NULL; + server.cluster->mf_master_offset = 0; +} + +/* If a manual failover timed out, abort it. */ +void manualFailoverCheckTimeout(void) { + if (server.cluster->mf_end && server.cluster->mf_end < mstime()) { + serverLog(LL_WARNING,"Manual failover timed out."); + resetManualFailover(); + } +} + +/* This function is called from the cluster cron function in order to go + * forward with a manual failover state machine. */ +void clusterHandleManualFailover(void) { + /* Return ASAP if no manual failover is in progress. */ + if (server.cluster->mf_end == 0) return; + + /* If mf_can_start is non-zero, the failover was already triggered so the + * next steps are performed by clusterHandleSlaveFailover(). */ + if (server.cluster->mf_can_start) return; + + if (server.cluster->mf_master_offset == 0) return; /* Wait for offset... */ + + if (server.cluster->mf_master_offset == replicationGetSlaveOffset()) { + /* Our replication offset matches the master replication offset + * announced after clients were paused. We can start the failover. */ + server.cluster->mf_can_start = 1; + serverLog(LL_WARNING, + "All master replication stream processed, " + "manual failover can start."); + } +} + +/* ----------------------------------------------------------------------------- + * CLUSTER cron job + * -------------------------------------------------------------------------- */ + +/* This is executed 10 times every second */ +void clusterCron(void) { + dictIterator *di; + dictEntry *de; + int update_state = 0; + int orphaned_masters; /* How many masters there are without ok slaves. */ + int max_slaves; /* Max number of ok slaves for a single master. */ + int this_slaves; /* Number of ok slaves for our master (if we are slave). */ + mstime_t min_pong = 0, now = mstime(); + clusterNode *min_pong_node = NULL; + static unsigned long long iteration = 0; + mstime_t handshake_timeout; + + iteration++; /* Number of times this function was called so far. */ + + /* We want to take myself->ip in sync with the cluster-announce-ip option. + * The option can be set at runtime via CONFIG SET, so we periodically check + * if the option changed to reflect this into myself->ip. */ + { + static char *prev_ip = NULL; + char *curr_ip = server.cluster_announce_ip; + int changed = 0; + + if (prev_ip == NULL && curr_ip != NULL) changed = 1; + if (prev_ip != NULL && curr_ip == NULL) changed = 1; + if (prev_ip && curr_ip && strcmp(prev_ip,curr_ip)) changed = 1; + + if (changed) { + prev_ip = curr_ip; + if (prev_ip) prev_ip = zstrdup(prev_ip); + + if (curr_ip) { + strncpy(myself->ip,server.cluster_announce_ip,NET_IP_STR_LEN); + myself->ip[NET_IP_STR_LEN-1] = '\0'; + } else { + myself->ip[0] = '\0'; /* Force autodetection. */ + } + } + } + + /* The handshake timeout is the time after which a handshake node that was + * not turned into a normal node is removed from the nodes. Usually it is + * just the NODE_TIMEOUT value, but when NODE_TIMEOUT is too small we use + * the value of 1 second. */ + handshake_timeout = server.cluster_node_timeout; + if (handshake_timeout < 1000) handshake_timeout = 1000; + + /* Check if we have disconnected nodes and re-establish the connection. + * Also update a few stats while we are here, that can be used to make + * better decisions in other part of the code. */ + di = dictGetSafeIterator(server.cluster->nodes); + server.cluster->stats_pfail_nodes = 0; + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + + /* Not interested in reconnecting the link with myself or nodes + * for which we have no address. */ + if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR)) continue; + + if (node->flags & CLUSTER_NODE_PFAIL) + server.cluster->stats_pfail_nodes++; + + /* A Node in HANDSHAKE state has a limited lifespan equal to the + * configured node timeout. */ + if (nodeInHandshake(node) && now - node->ctime > handshake_timeout) { + clusterDelNode(node); + continue; + } + + if (node->link == NULL) { + int fd; + mstime_t old_ping_sent; + clusterLink *link; + + fd = anetTcpNonBlockBindConnect(server.neterr, node->ip, + node->cport, NET_FIRST_BIND_ADDR); + if (fd == -1) { + /* We got a synchronous error from connect before + * clusterSendPing() had a chance to be called. + * If node->ping_sent is zero, failure detection can't work, + * so we claim we actually sent a ping now (that will + * be really sent as soon as the link is obtained). */ + if (node->ping_sent == 0) node->ping_sent = mstime(); + serverLog(LL_DEBUG, "Unable to connect to " + "Cluster Node [%s]:%d -> %s", node->ip, + node->cport, server.neterr); + continue; + } + link = createClusterLink(node); + link->fd = fd; + node->link = link; + aeCreateFileEvent(server.el,link->fd,AE_READABLE, + clusterReadHandler,link); + /* Queue a PING in the new connection ASAP: this is crucial + * to avoid false positives in failure detection. + * + * If the node is flagged as MEET, we send a MEET message instead + * of a PING one, to force the receiver to add us in its node + * table. */ + old_ping_sent = node->ping_sent; + clusterSendPing(link, node->flags & CLUSTER_NODE_MEET ? + CLUSTERMSG_TYPE_MEET : CLUSTERMSG_TYPE_PING); + if (old_ping_sent) { + /* If there was an active ping before the link was + * disconnected, we want to restore the ping time, otherwise + * replaced by the clusterSendPing() call. */ + node->ping_sent = old_ping_sent; + } + /* We can clear the flag after the first packet is sent. + * If we'll never receive a PONG, we'll never send new packets + * to this node. Instead after the PONG is received and we + * are no longer in meet/handshake status, we want to send + * normal PING packets. */ + node->flags &= ~CLUSTER_NODE_MEET; + + serverLog(LL_DEBUG,"Connecting with Node %.40s at %s:%d", + node->name, node->ip, node->cport); + } + } + dictReleaseIterator(di); + + /* Ping some random node 1 time every 10 iterations, so that we usually ping + * one random node every second. */ + if (!(iteration % 10)) { + int j; + + /* Check a few random nodes and ping the one with the oldest + * pong_received time. */ + for (j = 0; j < 5; j++) { + de = dictGetRandomKey(server.cluster->nodes); + clusterNode *this = dictGetVal(de); + + /* Don't ping nodes disconnected or with a ping currently active. */ + if (this->link == NULL || this->ping_sent != 0) continue; + if (this->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE)) + continue; + if (min_pong_node == NULL || min_pong > this->pong_received) { + min_pong_node = this; + min_pong = this->pong_received; + } + } + if (min_pong_node) { + serverLog(LL_DEBUG,"Pinging node %.40s", min_pong_node->name); + clusterSendPing(min_pong_node->link, CLUSTERMSG_TYPE_PING); + } + } + + /* Iterate nodes to check if we need to flag something as failing. + * This loop is also responsible to: + * 1) Check if there are orphaned masters (masters without non failing + * slaves). + * 2) Count the max number of non failing slaves for a single master. + * 3) Count the number of slaves for our master, if we are a slave. */ + orphaned_masters = 0; + max_slaves = 0; + this_slaves = 0; + di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + now = mstime(); /* Use an updated time at every iteration. */ + mstime_t delay; + + if (node->flags & + (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE)) + continue; + + /* Orphaned master check, useful only if the current instance + * is a slave that may migrate to another master. */ + if (nodeIsSlave(myself) && nodeIsMaster(node) && !nodeFailed(node)) { + int okslaves = clusterCountNonFailingSlaves(node); + + /* A master is orphaned if it is serving a non-zero number of + * slots, have no working slaves, but used to have at least one + * slave, or failed over a master that used to have slaves. */ + if (okslaves == 0 && node->numslots > 0 && + node->flags & CLUSTER_NODE_MIGRATE_TO) + { + orphaned_masters++; + } + if (okslaves > max_slaves) max_slaves = okslaves; + if (nodeIsSlave(myself) && myself->slaveof == node) + this_slaves = okslaves; + } + + /* If we are waiting for the PONG more than half the cluster + * timeout, reconnect the link: maybe there is a connection + * issue even if the node is alive. */ + if (node->link && /* is connected */ + now - node->link->ctime > + server.cluster_node_timeout && /* was not already reconnected */ + node->ping_sent && /* we already sent a ping */ + node->pong_received < node->ping_sent && /* still waiting pong */ + /* and we are waiting for the pong more than timeout/2 */ + now - node->ping_sent > server.cluster_node_timeout/2) + { + /* Disconnect the link, it will be reconnected automatically. */ + freeClusterLink(node->link); + } + + /* If we have currently no active ping in this instance, and the + * received PONG is older than half the cluster timeout, send + * a new ping now, to ensure all the nodes are pinged without + * a too big delay. */ + if (node->link && + node->ping_sent == 0 && + (now - node->pong_received) > server.cluster_node_timeout/2) + { + clusterSendPing(node->link, CLUSTERMSG_TYPE_PING); + continue; + } + + /* If we are a master and one of the slaves requested a manual + * failover, ping it continuously. */ + if (server.cluster->mf_end && + nodeIsMaster(myself) && + server.cluster->mf_slave == node && + node->link) + { + clusterSendPing(node->link, CLUSTERMSG_TYPE_PING); + continue; + } + + /* Check only if we have an active ping for this instance. */ + if (node->ping_sent == 0) continue; + + /* Compute the delay of the PONG. Note that if we already received + * the PONG, then node->ping_sent is zero, so can't reach this + * code at all. */ + delay = now - node->ping_sent; + + if (delay > server.cluster_node_timeout) { + /* Timeout reached. Set the node as possibly failing if it is + * not already in this state. */ + if (!(node->flags & (CLUSTER_NODE_PFAIL|CLUSTER_NODE_FAIL))) { + serverLog(LL_DEBUG,"*** NODE %.40s possibly failing", + node->name); + node->flags |= CLUSTER_NODE_PFAIL; + update_state = 1; + } + } + } + dictReleaseIterator(di); + + /* If we are a slave node but the replication is still turned off, + * enable it if we know the address of our master and it appears to + * be up. */ + if (nodeIsSlave(myself) && + server.masterhost == NULL && + myself->slaveof && + nodeHasAddr(myself->slaveof)) + { + replicationSetMaster(myself->slaveof->ip, myself->slaveof->port); + } + + /* Abourt a manual failover if the timeout is reached. */ + manualFailoverCheckTimeout(); + + if (nodeIsSlave(myself)) { + clusterHandleManualFailover(); + clusterHandleSlaveFailover(); + /* If there are orphaned slaves, and we are a slave among the masters + * with the max number of non-failing slaves, consider migrating to + * the orphaned masters. Note that it does not make sense to try + * a migration if there is no master with at least *two* working + * slaves. */ + if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves) + clusterHandleSlaveMigration(max_slaves); + } + + if (update_state || server.cluster->state == CLUSTER_FAIL) + clusterUpdateState(); +} + +/* This function is called before the event handler returns to sleep for + * events. It is useful to perform operations that must be done ASAP in + * reaction to events fired but that are not safe to perform inside event + * handlers, or to perform potentially expansive tasks that we need to do + * a single time before replying to clients. */ +void clusterBeforeSleep(void) { + /* Handle failover, this is needed when it is likely that there is already + * the quorum from masters in order to react fast. */ + if (server.cluster->todo_before_sleep & CLUSTER_TODO_HANDLE_FAILOVER) + clusterHandleSlaveFailover(); + + /* Update the cluster state. */ + if (server.cluster->todo_before_sleep & CLUSTER_TODO_UPDATE_STATE) + clusterUpdateState(); + + /* Save the config, possibly using fsync. */ + if (server.cluster->todo_before_sleep & CLUSTER_TODO_SAVE_CONFIG) { + int fsync = server.cluster->todo_before_sleep & + CLUSTER_TODO_FSYNC_CONFIG; + clusterSaveConfigOrDie(fsync); + } + + /* Reset our flags (not strictly needed since every single function + * called for flags set should be able to clear its flag). */ + server.cluster->todo_before_sleep = 0; +} + +void clusterDoBeforeSleep(int flags) { + server.cluster->todo_before_sleep |= flags; +} + +/* ----------------------------------------------------------------------------- + * Slots management + * -------------------------------------------------------------------------- */ + +/* Test bit 'pos' in a generic bitmap. Return 1 if the bit is set, + * otherwise 0. */ +int bitmapTestBit(unsigned char *bitmap, int pos) { + off_t byte = pos/8; + int bit = pos&7; + return (bitmap[byte] & (1<<bit)) != 0; +} + +/* Set the bit at position 'pos' in a bitmap. */ +void bitmapSetBit(unsigned char *bitmap, int pos) { + off_t byte = pos/8; + int bit = pos&7; + bitmap[byte] |= 1<<bit; +} + +/* Clear the bit at position 'pos' in a bitmap. */ +void bitmapClearBit(unsigned char *bitmap, int pos) { + off_t byte = pos/8; + int bit = pos&7; + bitmap[byte] &= ~(1<<bit); +} + +/* Return non-zero if there is at least one master with slaves in the cluster. + * Otherwise zero is returned. Used by clusterNodeSetSlotBit() to set the + * MIGRATE_TO flag the when a master gets the first slot. */ +int clusterMastersHaveSlaves(void) { + dictIterator *di = dictGetSafeIterator(server.cluster->nodes); + dictEntry *de; + int slaves = 0; + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + + if (nodeIsSlave(node)) continue; + slaves += node->numslaves; + } + dictReleaseIterator(di); + return slaves != 0; +} + +/* Set the slot bit and return the old value. */ +int clusterNodeSetSlotBit(clusterNode *n, int slot) { + int old = bitmapTestBit(n->slots,slot); + bitmapSetBit(n->slots,slot); + if (!old) { + n->numslots++; + /* When a master gets its first slot, even if it has no slaves, + * it gets flagged with MIGRATE_TO, that is, the master is a valid + * target for replicas migration, if and only if at least one of + * the other masters has slaves right now. + * + * Normally masters are valid targerts of replica migration if: + * 1. The used to have slaves (but no longer have). + * 2. They are slaves failing over a master that used to have slaves. + * + * However new masters with slots assigned are considered valid + * migration tagets if the rest of the cluster is not a slave-less. + * + * See https://github.com/antirez/redis/issues/3043 for more info. */ + if (n->numslots == 1 && clusterMastersHaveSlaves()) + n->flags |= CLUSTER_NODE_MIGRATE_TO; + } + return old; +} + +/* Clear the slot bit and return the old value. */ +int clusterNodeClearSlotBit(clusterNode *n, int slot) { + int old = bitmapTestBit(n->slots,slot); + bitmapClearBit(n->slots,slot); + if (old) n->numslots--; + return old; +} + +/* Return the slot bit from the cluster node structure. */ +int clusterNodeGetSlotBit(clusterNode *n, int slot) { + return bitmapTestBit(n->slots,slot); +} + +/* Add the specified slot to the list of slots that node 'n' will + * serve. Return C_OK if the operation ended with success. + * If the slot is already assigned to another instance this is considered + * an error and C_ERR is returned. */ +int clusterAddSlot(clusterNode *n, int slot) { + if (server.cluster->slots[slot]) return C_ERR; + clusterNodeSetSlotBit(n,slot); + server.cluster->slots[slot] = n; + return C_OK; +} + +/* Delete the specified slot marking it as unassigned. + * Returns C_OK if the slot was assigned, otherwise if the slot was + * already unassigned C_ERR is returned. */ +int clusterDelSlot(int slot) { + clusterNode *n = server.cluster->slots[slot]; + + if (!n) return C_ERR; + serverAssert(clusterNodeClearSlotBit(n,slot) == 1); + server.cluster->slots[slot] = NULL; + return C_OK; +} + +/* Delete all the slots associated with the specified node. + * The number of deleted slots is returned. */ +int clusterDelNodeSlots(clusterNode *node) { + int deleted = 0, j; + + for (j = 0; j < CLUSTER_SLOTS; j++) { + if (clusterNodeGetSlotBit(node,j)) clusterDelSlot(j); + deleted++; + } + return deleted; +} + +/* Clear the migrating / importing state for all the slots. + * This is useful at initialization and when turning a master into slave. */ +void clusterCloseAllSlots(void) { + memset(server.cluster->migrating_slots_to,0, + sizeof(server.cluster->migrating_slots_to)); + memset(server.cluster->importing_slots_from,0, + sizeof(server.cluster->importing_slots_from)); +} + +/* ----------------------------------------------------------------------------- + * Cluster state evaluation function + * -------------------------------------------------------------------------- */ + +/* The following are defines that are only used in the evaluation function + * and are based on heuristics. Actaully the main point about the rejoin and + * writable delay is that they should be a few orders of magnitude larger + * than the network latency. */ +#define CLUSTER_MAX_REJOIN_DELAY 5000 +#define CLUSTER_MIN_REJOIN_DELAY 500 +#define CLUSTER_WRITABLE_DELAY 2000 + +void clusterUpdateState(void) { + int j, new_state; + int reachable_masters = 0; + static mstime_t among_minority_time; + static mstime_t first_call_time = 0; + + server.cluster->todo_before_sleep &= ~CLUSTER_TODO_UPDATE_STATE; + + /* If this is a master node, wait some time before turning the state + * into OK, since it is not a good idea to rejoin the cluster as a writable + * master, after a reboot, without giving the cluster a chance to + * reconfigure this node. Note that the delay is calculated starting from + * the first call to this function and not since the server start, in order + * to don't count the DB loading time. */ + if (first_call_time == 0) first_call_time = mstime(); + if (nodeIsMaster(myself) && + server.cluster->state == CLUSTER_FAIL && + mstime() - first_call_time < CLUSTER_WRITABLE_DELAY) return; + + /* Start assuming the state is OK. We'll turn it into FAIL if there + * are the right conditions. */ + new_state = CLUSTER_OK; + + /* Check if all the slots are covered. */ + if (server.cluster_require_full_coverage) { + for (j = 0; j < CLUSTER_SLOTS; j++) { + if (server.cluster->slots[j] == NULL || + server.cluster->slots[j]->flags & (CLUSTER_NODE_FAIL)) + { + new_state = CLUSTER_FAIL; + break; + } + } + } + + /* Compute the cluster size, that is the number of master nodes + * serving at least a single slot. + * + * At the same time count the number of reachable masters having + * at least one slot. */ + { + dictIterator *di; + dictEntry *de; + + server.cluster->size = 0; + di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + + if (nodeIsMaster(node) && node->numslots) { + server.cluster->size++; + if ((node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) == 0) + reachable_masters++; + } + } + dictReleaseIterator(di); + } + + /* If we are in a minority partition, change the cluster state + * to FAIL. */ + { + int needed_quorum = (server.cluster->size / 2) + 1; + + if (reachable_masters < needed_quorum) { + new_state = CLUSTER_FAIL; + among_minority_time = mstime(); + } + } + + /* Log a state change */ + if (new_state != server.cluster->state) { + mstime_t rejoin_delay = server.cluster_node_timeout; + + /* If the instance is a master and was partitioned away with the + * minority, don't let it accept queries for some time after the + * partition heals, to make sure there is enough time to receive + * a configuration update. */ + if (rejoin_delay > CLUSTER_MAX_REJOIN_DELAY) + rejoin_delay = CLUSTER_MAX_REJOIN_DELAY; + if (rejoin_delay < CLUSTER_MIN_REJOIN_DELAY) + rejoin_delay = CLUSTER_MIN_REJOIN_DELAY; + + if (new_state == CLUSTER_OK && + nodeIsMaster(myself) && + mstime() - among_minority_time < rejoin_delay) + { + return; + } + + /* Change the state and log the event. */ + serverLog(LL_WARNING,"Cluster state changed: %s", + new_state == CLUSTER_OK ? "ok" : "fail"); + server.cluster->state = new_state; + } +} + +/* This function is called after the node startup in order to verify that data + * loaded from disk is in agreement with the cluster configuration: + * + * 1) If we find keys about hash slots we have no responsibility for, the + * following happens: + * A) If no other node is in charge according to the current cluster + * configuration, we add these slots to our node. + * B) If according to our config other nodes are already in charge for + * this lots, we set the slots as IMPORTING from our point of view + * in order to justify we have those slots, and in order to make + * redis-trib aware of the issue, so that it can try to fix it. + * 2) If we find data in a DB different than DB0 we return C_ERR to + * signal the caller it should quit the server with an error message + * or take other actions. + * + * The function always returns C_OK even if it will try to correct + * the error described in "1". However if data is found in DB different + * from DB0, C_ERR is returned. + * + * The function also uses the logging facility in order to warn the user + * about desynchronizations between the data we have in memory and the + * cluster configuration. */ +int verifyClusterConfigWithData(void) { + int j; + int update_config = 0; + + /* If this node is a slave, don't perform the check at all as we + * completely depend on the replication stream. */ + if (nodeIsSlave(myself)) return C_OK; + + /* Make sure we only have keys in DB0. */ + for (j = 1; j < server.dbnum; j++) { + if (dictSize(server.db[j].dict)) return C_ERR; + } + + /* Check that all the slots we see populated memory have a corresponding + * entry in the cluster table. Otherwise fix the table. */ + for (j = 0; j < CLUSTER_SLOTS; j++) { + if (!countKeysInSlot(j)) continue; /* No keys in this slot. */ + /* Check if we are assigned to this slot or if we are importing it. + * In both cases check the next slot as the configuration makes + * sense. */ + if (server.cluster->slots[j] == myself || + server.cluster->importing_slots_from[j] != NULL) continue; + + /* If we are here data and cluster config don't agree, and we have + * slot 'j' populated even if we are not importing it, nor we are + * assigned to this slot. Fix this condition. */ + + update_config++; + /* Case A: slot is unassigned. Take responsibility for it. */ + if (server.cluster->slots[j] == NULL) { + serverLog(LL_WARNING, "I have keys for unassigned slot %d. " + "Taking responsibility for it.",j); + clusterAddSlot(myself,j); + } else { + serverLog(LL_WARNING, "I have keys for slot %d, but the slot is " + "assigned to another node. " + "Setting it to importing state.",j); + server.cluster->importing_slots_from[j] = server.cluster->slots[j]; + } + } + if (update_config) clusterSaveConfigOrDie(1); + return C_OK; +} + +/* ----------------------------------------------------------------------------- + * SLAVE nodes handling + * -------------------------------------------------------------------------- */ + +/* Set the specified node 'n' as master for this node. + * If this node is currently a master, it is turned into a slave. */ +void clusterSetMaster(clusterNode *n) { + serverAssert(n != myself); + serverAssert(myself->numslots == 0); + + if (nodeIsMaster(myself)) { + myself->flags &= ~(CLUSTER_NODE_MASTER|CLUSTER_NODE_MIGRATE_TO); + myself->flags |= CLUSTER_NODE_SLAVE; + clusterCloseAllSlots(); + } else { + if (myself->slaveof) + clusterNodeRemoveSlave(myself->slaveof,myself); + } + myself->slaveof = n; + clusterNodeAddSlave(n,myself); + replicationSetMaster(n->ip, n->port); + resetManualFailover(); +} + +/* ----------------------------------------------------------------------------- + * Nodes to string representation functions. + * -------------------------------------------------------------------------- */ + +struct redisNodeFlags { + uint16_t flag; + char *name; +}; + +static struct redisNodeFlags redisNodeFlagsTable[] = { + {CLUSTER_NODE_MYSELF, "myself,"}, + {CLUSTER_NODE_MASTER, "master,"}, + {CLUSTER_NODE_SLAVE, "slave,"}, + {CLUSTER_NODE_PFAIL, "fail?,"}, + {CLUSTER_NODE_FAIL, "fail,"}, + {CLUSTER_NODE_HANDSHAKE, "handshake,"}, + {CLUSTER_NODE_NOADDR, "noaddr,"} +}; + +/* Concatenate the comma separated list of node flags to the given SDS + * string 'ci'. */ +sds representClusterNodeFlags(sds ci, uint16_t flags) { + if (flags == 0) { + ci = sdscat(ci,"noflags,"); + } else { + int i, size = sizeof(redisNodeFlagsTable)/sizeof(struct redisNodeFlags); + for (i = 0; i < size; i++) { + struct redisNodeFlags *nodeflag = redisNodeFlagsTable + i; + if (flags & nodeflag->flag) ci = sdscat(ci, nodeflag->name); + } + } + sdsIncrLen(ci,-1); /* Remove trailing comma. */ + return ci; +} + +/* Generate a csv-alike representation of the specified cluster node. + * See clusterGenNodesDescription() top comment for more information. + * + * The function returns the string representation as an SDS string. */ +sds clusterGenNodeDescription(clusterNode *node) { + int j, start; + sds ci; + + /* Node coordinates */ + ci = sdscatprintf(sdsempty(),"%.40s %s:%d@%d ", + node->name, + node->ip, + node->port, + node->cport); + + /* Flags */ + ci = representClusterNodeFlags(ci, node->flags); + + /* Slave of... or just "-" */ + if (node->slaveof) + ci = sdscatprintf(ci," %.40s ",node->slaveof->name); + else + ci = sdscatlen(ci," - ",3); + + /* Latency from the POV of this node, config epoch, link status */ + ci = sdscatprintf(ci,"%lld %lld %llu %s", + (long long) node->ping_sent, + (long long) node->pong_received, + (unsigned long long) node->configEpoch, + (node->link || node->flags & CLUSTER_NODE_MYSELF) ? + "connected" : "disconnected"); + + /* Slots served by this instance */ + start = -1; + for (j = 0; j < CLUSTER_SLOTS; j++) { + int bit; + + if ((bit = clusterNodeGetSlotBit(node,j)) != 0) { + if (start == -1) start = j; + } + if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) { + if (bit && j == CLUSTER_SLOTS-1) j++; + + if (start == j-1) { + ci = sdscatprintf(ci," %d",start); + } else { + ci = sdscatprintf(ci," %d-%d",start,j-1); + } + start = -1; + } + } + + /* Just for MYSELF node we also dump info about slots that + * we are migrating to other instances or importing from other + * instances. */ + if (node->flags & CLUSTER_NODE_MYSELF) { + for (j = 0; j < CLUSTER_SLOTS; j++) { + if (server.cluster->migrating_slots_to[j]) { + ci = sdscatprintf(ci," [%d->-%.40s]",j, + server.cluster->migrating_slots_to[j]->name); + } else if (server.cluster->importing_slots_from[j]) { + ci = sdscatprintf(ci," [%d-<-%.40s]",j, + server.cluster->importing_slots_from[j]->name); + } + } + } + return ci; +} + +/* Generate a csv-alike representation of the nodes we are aware of, + * including the "myself" node, and return an SDS string containing the + * representation (it is up to the caller to free it). + * + * All the nodes matching at least one of the node flags specified in + * "filter" are excluded from the output, so using zero as a filter will + * include all the known nodes in the representation, including nodes in + * the HANDSHAKE state. + * + * The representation obtained using this function is used for the output + * of the CLUSTER NODES function, and as format for the cluster + * configuration file (nodes.conf) for a given node. */ +sds clusterGenNodesDescription(int filter) { + sds ci = sdsempty(), ni; + dictIterator *di; + dictEntry *de; + + di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + + if (node->flags & filter) continue; + ni = clusterGenNodeDescription(node); + ci = sdscatsds(ci,ni); + sdsfree(ni); + ci = sdscatlen(ci,"\n",1); + } + dictReleaseIterator(di); + return ci; +} + +/* ----------------------------------------------------------------------------- + * CLUSTER command + * -------------------------------------------------------------------------- */ + +const char *clusterGetMessageTypeString(int type) { + switch(type) { + case CLUSTERMSG_TYPE_PING: return "ping"; + case CLUSTERMSG_TYPE_PONG: return "pong"; + case CLUSTERMSG_TYPE_MEET: return "meet"; + case CLUSTERMSG_TYPE_FAIL: return "fail"; + case CLUSTERMSG_TYPE_PUBLISH: return "publish"; + case CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST: return "auth-req"; + case CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK: return "auth-ack"; + case CLUSTERMSG_TYPE_UPDATE: return "update"; + case CLUSTERMSG_TYPE_MFSTART: return "mfstart"; + } + return "unknown"; +} + +int getSlotOrReply(client *c, robj *o) { + long long slot; + + if (getLongLongFromObject(o,&slot) != C_OK || + slot < 0 || slot >= CLUSTER_SLOTS) + { + addReplyError(c,"Invalid or out of range slot"); + return -1; + } + return (int) slot; +} + +void clusterReplyMultiBulkSlots(client *c) { + /* Format: 1) 1) start slot + * 2) end slot + * 3) 1) master IP + * 2) master port + * 3) node ID + * 4) 1) replica IP + * 2) replica port + * 3) node ID + * ... continued until done + */ + + int num_masters = 0; + void *slot_replylen = addDeferredMultiBulkLength(c); + + dictEntry *de; + dictIterator *di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + int j = 0, start = -1; + + /* Skip slaves (that are iterated when producing the output of their + * master) and masters not serving any slot. */ + if (!nodeIsMaster(node) || node->numslots == 0) continue; + + for (j = 0; j < CLUSTER_SLOTS; j++) { + int bit, i; + + if ((bit = clusterNodeGetSlotBit(node,j)) != 0) { + if (start == -1) start = j; + } + if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) { + int nested_elements = 3; /* slots (2) + master addr (1). */ + void *nested_replylen = addDeferredMultiBulkLength(c); + + if (bit && j == CLUSTER_SLOTS-1) j++; + + /* If slot exists in output map, add to it's list. + * else, create a new output map for this slot */ + if (start == j-1) { + addReplyLongLong(c, start); /* only one slot; low==high */ + addReplyLongLong(c, start); + } else { + addReplyLongLong(c, start); /* low */ + addReplyLongLong(c, j-1); /* high */ + } + start = -1; + + /* First node reply position is always the master */ + addReplyMultiBulkLen(c, 3); + addReplyBulkCString(c, node->ip); + addReplyLongLong(c, node->port); + addReplyBulkCBuffer(c, node->name, CLUSTER_NAMELEN); + + /* Remaining nodes in reply are replicas for slot range */ + for (i = 0; i < node->numslaves; i++) { + /* This loop is copy/pasted from clusterGenNodeDescription() + * with modifications for per-slot node aggregation */ + if (nodeFailed(node->slaves[i])) continue; + addReplyMultiBulkLen(c, 3); + addReplyBulkCString(c, node->slaves[i]->ip); + addReplyLongLong(c, node->slaves[i]->port); + addReplyBulkCBuffer(c, node->slaves[i]->name, CLUSTER_NAMELEN); + nested_elements++; + } + setDeferredMultiBulkLength(c, nested_replylen, nested_elements); + num_masters++; + } + } + } + dictReleaseIterator(di); + setDeferredMultiBulkLength(c, slot_replylen, num_masters); +} + +void clusterCommand(client *c) { + if (server.cluster_enabled == 0) { + addReplyError(c,"This instance has cluster support disabled"); + return; + } + + if (!strcasecmp(c->argv[1]->ptr,"meet") && (c->argc == 4 || c->argc == 5)) { + /* CLUSTER MEET <ip> <port> [cport] */ + long long port, cport; + + if (getLongLongFromObject(c->argv[3], &port) != C_OK) { + addReplyErrorFormat(c,"Invalid TCP base port specified: %s", + (char*)c->argv[3]->ptr); + return; + } + + if (c->argc == 5) { + if (getLongLongFromObject(c->argv[4], &cport) != C_OK) { + addReplyErrorFormat(c,"Invalid TCP bus port specified: %s", + (char*)c->argv[4]->ptr); + return; + } + } else { + cport = port + CLUSTER_PORT_INCR; + } + + if (clusterStartHandshake(c->argv[2]->ptr,port,cport) == 0 && + errno == EINVAL) + { + addReplyErrorFormat(c,"Invalid node address specified: %s:%s", + (char*)c->argv[2]->ptr, (char*)c->argv[3]->ptr); + } else { + addReply(c,shared.ok); + } + } else if (!strcasecmp(c->argv[1]->ptr,"nodes") && c->argc == 2) { + /* CLUSTER NODES */ + robj *o; + sds ci = clusterGenNodesDescription(0); + + o = createObject(OBJ_STRING,ci); + addReplyBulk(c,o); + decrRefCount(o); + } else if (!strcasecmp(c->argv[1]->ptr,"myid") && c->argc == 2) { + /* CLUSTER MYID */ + addReplyBulkCBuffer(c,myself->name, CLUSTER_NAMELEN); + } else if (!strcasecmp(c->argv[1]->ptr,"slots") && c->argc == 2) { + /* CLUSTER SLOTS */ + clusterReplyMultiBulkSlots(c); + } else if (!strcasecmp(c->argv[1]->ptr,"flushslots") && c->argc == 2) { + /* CLUSTER FLUSHSLOTS */ + if (dictSize(server.db[0].dict) != 0) { + addReplyError(c,"DB must be empty to perform CLUSTER FLUSHSLOTS."); + return; + } + clusterDelNodeSlots(myself); + clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG); + addReply(c,shared.ok); + } else if ((!strcasecmp(c->argv[1]->ptr,"addslots") || + !strcasecmp(c->argv[1]->ptr,"delslots")) && c->argc >= 3) + { + /* CLUSTER ADDSLOTS <slot> [slot] ... */ + /* CLUSTER DELSLOTS <slot> [slot] ... */ + int j, slot; + unsigned char *slots = zmalloc(CLUSTER_SLOTS); + int del = !strcasecmp(c->argv[1]->ptr,"delslots"); + + memset(slots,0,CLUSTER_SLOTS); + /* Check that all the arguments are parseable and that all the + * slots are not already busy. */ + for (j = 2; j < c->argc; j++) { + if ((slot = getSlotOrReply(c,c->argv[j])) == -1) { + zfree(slots); + return; + } + if (del && server.cluster->slots[slot] == NULL) { + addReplyErrorFormat(c,"Slot %d is already unassigned", slot); + zfree(slots); + return; + } else if (!del && server.cluster->slots[slot]) { + addReplyErrorFormat(c,"Slot %d is already busy", slot); + zfree(slots); + return; + } + if (slots[slot]++ == 1) { + addReplyErrorFormat(c,"Slot %d specified multiple times", + (int)slot); + zfree(slots); + return; + } + } + for (j = 0; j < CLUSTER_SLOTS; j++) { + if (slots[j]) { + int retval; + + /* If this slot was set as importing we can clear this + * state as now we are the real owner of the slot. */ + if (server.cluster->importing_slots_from[j]) + server.cluster->importing_slots_from[j] = NULL; + + retval = del ? clusterDelSlot(j) : + clusterAddSlot(myself,j); + serverAssertWithInfo(c,NULL,retval == C_OK); + } + } + zfree(slots); + clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG); + addReply(c,shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr,"setslot") && c->argc >= 4) { + /* SETSLOT 10 MIGRATING <node ID> */ + /* SETSLOT 10 IMPORTING <node ID> */ + /* SETSLOT 10 STABLE */ + /* SETSLOT 10 NODE <node ID> */ + int slot; + clusterNode *n; + + if (nodeIsSlave(myself)) { + addReplyError(c,"Please use SETSLOT only with masters."); + return; + } + + if ((slot = getSlotOrReply(c,c->argv[2])) == -1) return; + + if (!strcasecmp(c->argv[3]->ptr,"migrating") && c->argc == 5) { + if (server.cluster->slots[slot] != myself) { + addReplyErrorFormat(c,"I'm not the owner of hash slot %u",slot); + return; + } + if ((n = clusterLookupNode(c->argv[4]->ptr)) == NULL) { + addReplyErrorFormat(c,"I don't know about node %s", + (char*)c->argv[4]->ptr); + return; + } + server.cluster->migrating_slots_to[slot] = n; + } else if (!strcasecmp(c->argv[3]->ptr,"importing") && c->argc == 5) { + if (server.cluster->slots[slot] == myself) { + addReplyErrorFormat(c, + "I'm already the owner of hash slot %u",slot); + return; + } + if ((n = clusterLookupNode(c->argv[4]->ptr)) == NULL) { + addReplyErrorFormat(c,"I don't know about node %s", + (char*)c->argv[3]->ptr); + return; + } + server.cluster->importing_slots_from[slot] = n; + } else if (!strcasecmp(c->argv[3]->ptr,"stable") && c->argc == 4) { + /* CLUSTER SETSLOT <SLOT> STABLE */ + server.cluster->importing_slots_from[slot] = NULL; + server.cluster->migrating_slots_to[slot] = NULL; + } else if (!strcasecmp(c->argv[3]->ptr,"node") && c->argc == 5) { + /* CLUSTER SETSLOT <SLOT> NODE <NODE ID> */ + clusterNode *n = clusterLookupNode(c->argv[4]->ptr); + + if (!n) { + addReplyErrorFormat(c,"Unknown node %s", + (char*)c->argv[4]->ptr); + return; + } + /* If this hash slot was served by 'myself' before to switch + * make sure there are no longer local keys for this hash slot. */ + if (server.cluster->slots[slot] == myself && n != myself) { + if (countKeysInSlot(slot) != 0) { + addReplyErrorFormat(c, + "Can't assign hashslot %d to a different node " + "while I still hold keys for this hash slot.", slot); + return; + } + } + /* If this slot is in migrating status but we have no keys + * for it assigning the slot to another node will clear + * the migratig status. */ + if (countKeysInSlot(slot) == 0 && + server.cluster->migrating_slots_to[slot]) + server.cluster->migrating_slots_to[slot] = NULL; + + /* If this node was importing this slot, assigning the slot to + * itself also clears the importing status. */ + if (n == myself && + server.cluster->importing_slots_from[slot]) + { + /* This slot was manually migrated, set this node configEpoch + * to a new epoch so that the new version can be propagated + * by the cluster. + * + * Note that if this ever results in a collision with another + * node getting the same configEpoch, for example because a + * failover happens at the same time we close the slot, the + * configEpoch collision resolution will fix it assigning + * a different epoch to each node. */ + if (clusterBumpConfigEpochWithoutConsensus() == C_OK) { + serverLog(LL_WARNING, + "configEpoch updated after importing slot %d", slot); + } + server.cluster->importing_slots_from[slot] = NULL; + } + clusterDelSlot(slot); + clusterAddSlot(n,slot); + } else { + addReplyError(c, + "Invalid CLUSTER SETSLOT action or number of arguments"); + return; + } + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_UPDATE_STATE); + addReply(c,shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr,"bumpepoch") && c->argc == 2) { + /* CLUSTER BUMPEPOCH */ + int retval = clusterBumpConfigEpochWithoutConsensus(); + sds reply = sdscatprintf(sdsempty(),"+%s %llu\r\n", + (retval == C_OK) ? "BUMPED" : "STILL", + (unsigned long long) myself->configEpoch); + addReplySds(c,reply); + } else if (!strcasecmp(c->argv[1]->ptr,"info") && c->argc == 2) { + /* CLUSTER INFO */ + char *statestr[] = {"ok","fail","needhelp"}; + int slots_assigned = 0, slots_ok = 0, slots_pfail = 0, slots_fail = 0; + uint64_t myepoch; + int j; + + for (j = 0; j < CLUSTER_SLOTS; j++) { + clusterNode *n = server.cluster->slots[j]; + + if (n == NULL) continue; + slots_assigned++; + if (nodeFailed(n)) { + slots_fail++; + } else if (nodeTimedOut(n)) { + slots_pfail++; + } else { + slots_ok++; + } + } + + myepoch = (nodeIsSlave(myself) && myself->slaveof) ? + myself->slaveof->configEpoch : myself->configEpoch; + + sds info = sdscatprintf(sdsempty(), + "cluster_state:%s\r\n" + "cluster_slots_assigned:%d\r\n" + "cluster_slots_ok:%d\r\n" + "cluster_slots_pfail:%d\r\n" + "cluster_slots_fail:%d\r\n" + "cluster_known_nodes:%lu\r\n" + "cluster_size:%d\r\n" + "cluster_current_epoch:%llu\r\n" + "cluster_my_epoch:%llu\r\n" + , statestr[server.cluster->state], + slots_assigned, + slots_ok, + slots_pfail, + slots_fail, + dictSize(server.cluster->nodes), + server.cluster->size, + (unsigned long long) server.cluster->currentEpoch, + (unsigned long long) myepoch + ); + + /* Show stats about messages sent and received. */ + long long tot_msg_sent = 0; + long long tot_msg_received = 0; + + for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) { + if (server.cluster->stats_bus_messages_sent[i] == 0) continue; + tot_msg_sent += server.cluster->stats_bus_messages_sent[i]; + info = sdscatprintf(info, + "cluster_stats_messages_%s_sent:%lld\r\n", + clusterGetMessageTypeString(i), + server.cluster->stats_bus_messages_sent[i]); + } + info = sdscatprintf(info, + "cluster_stats_messages_sent:%lld\r\n", tot_msg_sent); + + for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) { + if (server.cluster->stats_bus_messages_received[i] == 0) continue; + tot_msg_received += server.cluster->stats_bus_messages_received[i]; + info = sdscatprintf(info, + "cluster_stats_messages_%s_received:%lld\r\n", + clusterGetMessageTypeString(i), + server.cluster->stats_bus_messages_received[i]); + } + info = sdscatprintf(info, + "cluster_stats_messages_received:%lld\r\n", tot_msg_received); + + /* Produce the reply protocol. */ + addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n", + (unsigned long)sdslen(info))); + addReplySds(c,info); + addReply(c,shared.crlf); + } else if (!strcasecmp(c->argv[1]->ptr,"saveconfig") && c->argc == 2) { + int retval = clusterSaveConfig(1); + + if (retval == 0) + addReply(c,shared.ok); + else + addReplyErrorFormat(c,"error saving the cluster node config: %s", + strerror(errno)); + } else if (!strcasecmp(c->argv[1]->ptr,"keyslot") && c->argc == 3) { + /* CLUSTER KEYSLOT <key> */ + sds key = c->argv[2]->ptr; + + addReplyLongLong(c,keyHashSlot(key,sdslen(key))); + } else if (!strcasecmp(c->argv[1]->ptr,"countkeysinslot") && c->argc == 3) { + /* CLUSTER COUNTKEYSINSLOT <slot> */ + long long slot; + + if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK) + return; + if (slot < 0 || slot >= CLUSTER_SLOTS) { + addReplyError(c,"Invalid slot"); + return; + } + addReplyLongLong(c,countKeysInSlot(slot)); + } else if (!strcasecmp(c->argv[1]->ptr,"getkeysinslot") && c->argc == 4) { + /* CLUSTER GETKEYSINSLOT <slot> <count> */ + long long maxkeys, slot; + unsigned int numkeys, j; + robj **keys; + + if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK) + return; + if (getLongLongFromObjectOrReply(c,c->argv[3],&maxkeys,NULL) + != C_OK) + return; + if (slot < 0 || slot >= CLUSTER_SLOTS || maxkeys < 0) { + addReplyError(c,"Invalid slot or number of keys"); + return; + } + + keys = zmalloc(sizeof(robj*)*maxkeys); + numkeys = getKeysInSlot(slot, keys, maxkeys); + addReplyMultiBulkLen(c,numkeys); + for (j = 0; j < numkeys; j++) { + addReplyBulk(c,keys[j]); + decrRefCount(keys[j]); + } + zfree(keys); + } else if (!strcasecmp(c->argv[1]->ptr,"forget") && c->argc == 3) { + /* CLUSTER FORGET <NODE ID> */ + clusterNode *n = clusterLookupNode(c->argv[2]->ptr); + + if (!n) { + addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr); + return; + } else if (n == myself) { + addReplyError(c,"I tried hard but I can't forget myself..."); + return; + } else if (nodeIsSlave(myself) && myself->slaveof == n) { + addReplyError(c,"Can't forget my master!"); + return; + } + clusterBlacklistAddNode(n); + clusterDelNode(n); + clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE| + CLUSTER_TODO_SAVE_CONFIG); + addReply(c,shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr,"replicate") && c->argc == 3) { + /* CLUSTER REPLICATE <NODE ID> */ + clusterNode *n = clusterLookupNode(c->argv[2]->ptr); + + /* Lookup the specified node in our table. */ + if (!n) { + addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr); + return; + } + + /* I can't replicate myself. */ + if (n == myself) { + addReplyError(c,"Can't replicate myself"); + return; + } + + /* Can't replicate a slave. */ + if (nodeIsSlave(n)) { + addReplyError(c,"I can only replicate a master, not a slave."); + return; + } + + /* If the instance is currently a master, it should have no assigned + * slots nor keys to accept to replicate some other node. + * Slaves can switch to another master without issues. */ + if (nodeIsMaster(myself) && + (myself->numslots != 0 || dictSize(server.db[0].dict) != 0)) { + addReplyError(c, + "To set a master the node must be empty and " + "without assigned slots."); + return; + } + + /* Set the master. */ + clusterSetMaster(n); + clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG); + addReply(c,shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr,"slaves") && c->argc == 3) { + /* CLUSTER SLAVES <NODE ID> */ + clusterNode *n = clusterLookupNode(c->argv[2]->ptr); + int j; + + /* Lookup the specified node in our table. */ + if (!n) { + addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr); + return; + } + + if (nodeIsSlave(n)) { + addReplyError(c,"The specified node is not a master"); + return; + } + + addReplyMultiBulkLen(c,n->numslaves); + for (j = 0; j < n->numslaves; j++) { + sds ni = clusterGenNodeDescription(n->slaves[j]); + addReplyBulkCString(c,ni); + sdsfree(ni); + } + } else if (!strcasecmp(c->argv[1]->ptr,"count-failure-reports") && + c->argc == 3) + { + /* CLUSTER COUNT-FAILURE-REPORTS <NODE ID> */ + clusterNode *n = clusterLookupNode(c->argv[2]->ptr); + + if (!n) { + addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr); + return; + } else { + addReplyLongLong(c,clusterNodeFailureReportsCount(n)); + } + } else if (!strcasecmp(c->argv[1]->ptr,"failover") && + (c->argc == 2 || c->argc == 3)) + { + /* CLUSTER FAILOVER [FORCE|TAKEOVER] */ + int force = 0, takeover = 0; + + if (c->argc == 3) { + if (!strcasecmp(c->argv[2]->ptr,"force")) { + force = 1; + } else if (!strcasecmp(c->argv[2]->ptr,"takeover")) { + takeover = 1; + force = 1; /* Takeover also implies force. */ + } else { + addReply(c,shared.syntaxerr); + return; + } + } + + /* Check preconditions. */ + if (nodeIsMaster(myself)) { + addReplyError(c,"You should send CLUSTER FAILOVER to a slave"); + return; + } else if (myself->slaveof == NULL) { + addReplyError(c,"I'm a slave but my master is unknown to me"); + return; + } else if (!force && + (nodeFailed(myself->slaveof) || + myself->slaveof->link == NULL)) + { + addReplyError(c,"Master is down or failed, " + "please use CLUSTER FAILOVER FORCE"); + return; + } + resetManualFailover(); + server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT; + + if (takeover) { + /* A takeover does not perform any initial check. It just + * generates a new configuration epoch for this node without + * consensus, claims the master's slots, and broadcast the new + * configuration. */ + serverLog(LL_WARNING,"Taking over the master (user request)."); + clusterBumpConfigEpochWithoutConsensus(); + clusterFailoverReplaceYourMaster(); + } else if (force) { + /* If this is a forced failover, we don't need to talk with our + * master to agree about the offset. We just failover taking over + * it without coordination. */ + serverLog(LL_WARNING,"Forced failover user request accepted."); + server.cluster->mf_can_start = 1; + } else { + serverLog(LL_WARNING,"Manual failover user request accepted."); + clusterSendMFStart(myself->slaveof); + } + addReply(c,shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr,"set-config-epoch") && c->argc == 3) + { + /* CLUSTER SET-CONFIG-EPOCH <epoch> + * + * The user is allowed to set the config epoch only when a node is + * totally fresh: no config epoch, no other known node, and so forth. + * This happens at cluster creation time to start with a cluster where + * every node has a different node ID, without to rely on the conflicts + * resolution system which is too slow when a big cluster is created. */ + long long epoch; + + if (getLongLongFromObjectOrReply(c,c->argv[2],&epoch,NULL) != C_OK) + return; + + if (epoch < 0) { + addReplyErrorFormat(c,"Invalid config epoch specified: %lld",epoch); + } else if (dictSize(server.cluster->nodes) > 1) { + addReplyError(c,"The user can assign a config epoch only when the " + "node does not know any other node."); + } else if (myself->configEpoch != 0) { + addReplyError(c,"Node config epoch is already non-zero"); + } else { + myself->configEpoch = epoch; + serverLog(LL_WARNING, + "configEpoch set to %llu via CLUSTER SET-CONFIG-EPOCH", + (unsigned long long) myself->configEpoch); + + if (server.cluster->currentEpoch < (uint64_t)epoch) + server.cluster->currentEpoch = epoch; + /* No need to fsync the config here since in the unlucky event + * of a failure to persist the config, the conflict resolution code + * will assign an unique config to this node. */ + clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE| + CLUSTER_TODO_SAVE_CONFIG); + addReply(c,shared.ok); + } + } else if (!strcasecmp(c->argv[1]->ptr,"reset") && + (c->argc == 2 || c->argc == 3)) + { + /* CLUSTER RESET [SOFT|HARD] */ + int hard = 0; + + /* Parse soft/hard argument. Default is soft. */ + if (c->argc == 3) { + if (!strcasecmp(c->argv[2]->ptr,"hard")) { + hard = 1; + } else if (!strcasecmp(c->argv[2]->ptr,"soft")) { + hard = 0; + } else { + addReply(c,shared.syntaxerr); + return; + } + } + + /* Slaves can be reset while containing data, but not master nodes + * that must be empty. */ + if (nodeIsMaster(myself) && dictSize(c->db->dict) != 0) { + addReplyError(c,"CLUSTER RESET can't be called with " + "master nodes containing keys"); + return; + } + clusterReset(hard); + addReply(c,shared.ok); + } else { + addReplyError(c,"Wrong CLUSTER subcommand or number of arguments"); + } +} + +/* ----------------------------------------------------------------------------- + * DUMP, RESTORE and MIGRATE commands + * -------------------------------------------------------------------------- */ + +/* Generates a DUMP-format representation of the object 'o', adding it to the + * io stream pointed by 'rio'. This function can't fail. */ +void createDumpPayload(rio *payload, robj *o) { + unsigned char buf[2]; + uint64_t crc; + + /* Serialize the object in a RDB-like format. It consist of an object type + * byte followed by the serialized object. This is understood by RESTORE. */ + rioInitWithBuffer(payload,sdsempty()); + serverAssert(rdbSaveObjectType(payload,o)); + serverAssert(rdbSaveObject(payload,o)); + + /* Write the footer, this is how it looks like: + * ----------------+---------------------+---------------+ + * ... RDB payload | 2 bytes RDB version | 8 bytes CRC64 | + * ----------------+---------------------+---------------+ + * RDB version and CRC are both in little endian. + */ + + /* RDB version */ + buf[0] = RDB_VERSION & 0xff; + buf[1] = (RDB_VERSION >> 8) & 0xff; + payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,buf,2); + + /* CRC64 */ + crc = crc64(0,(unsigned char*)payload->io.buffer.ptr, + sdslen(payload->io.buffer.ptr)); + memrev64ifbe(&crc); + payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,&crc,8); +} + +/* Verify that the RDB version of the dump payload matches the one of this Redis + * instance and that the checksum is ok. + * If the DUMP payload looks valid C_OK is returned, otherwise C_ERR + * is returned. */ +int verifyDumpPayload(unsigned char *p, size_t len) { + unsigned char *footer; + uint16_t rdbver; + uint64_t crc; + + /* At least 2 bytes of RDB version and 8 of CRC64 should be present. */ + if (len < 10) return C_ERR; + footer = p+(len-10); + + /* Verify RDB version */ + rdbver = (footer[1] << 8) | footer[0]; + if (rdbver > RDB_VERSION) return C_ERR; + + /* Verify CRC64 */ + crc = crc64(0,p,len-8); + memrev64ifbe(&crc); + return (memcmp(&crc,footer+2,8) == 0) ? C_OK : C_ERR; +} + +/* DUMP keyname + * DUMP is actually not used by Redis Cluster but it is the obvious + * complement of RESTORE and can be useful for different applications. */ +void dumpCommand(client *c) { + robj *o, *dumpobj; + rio payload; + + /* Check if the key is here. */ + if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) { + addReply(c,shared.nullbulk); + return; + } + + /* Create the DUMP encoded representation. */ + createDumpPayload(&payload,o); + + /* Transfer to the client */ + dumpobj = createObject(OBJ_STRING,payload.io.buffer.ptr); + addReplyBulk(c,dumpobj); + decrRefCount(dumpobj); + return; +} + +/* RESTORE key ttl serialized-value [REPLACE] */ +void restoreCommand(client *c) { + long long ttl; + rio payload; + int j, type, replace = 0; + robj *obj; + + /* Parse additional options */ + for (j = 4; j < c->argc; j++) { + if (!strcasecmp(c->argv[j]->ptr,"replace")) { + replace = 1; + } else { + addReply(c,shared.syntaxerr); + return; + } + } + + /* Make sure this key does not already exist here... */ + if (!replace && lookupKeyWrite(c->db,c->argv[1]) != NULL) { + addReply(c,shared.busykeyerr); + return; + } + + /* Check if the TTL value makes sense */ + if (getLongLongFromObjectOrReply(c,c->argv[2],&ttl,NULL) != C_OK) { + return; + } else if (ttl < 0) { + addReplyError(c,"Invalid TTL value, must be >= 0"); + return; + } + + /* Verify RDB version and data checksum. */ + if (verifyDumpPayload(c->argv[3]->ptr,sdslen(c->argv[3]->ptr)) == C_ERR) + { + addReplyError(c,"DUMP payload version or checksum are wrong"); + return; + } + + rioInitWithBuffer(&payload,c->argv[3]->ptr); + if (((type = rdbLoadObjectType(&payload)) == -1) || + ((obj = rdbLoadObject(type,&payload)) == NULL)) + { + addReplyError(c,"Bad data format"); + return; + } + + /* Remove the old key if needed. */ + if (replace) dbDelete(c->db,c->argv[1]); + + /* Create the key and set the TTL if any */ + dbAdd(c->db,c->argv[1],obj); + if (ttl) setExpire(c,c->db,c->argv[1],mstime()+ttl); + signalModifiedKey(c->db,c->argv[1]); + addReply(c,shared.ok); + server.dirty++; +} + +/* MIGRATE socket cache implementation. + * + * We take a map between host:ip and a TCP socket that we used to connect + * to this instance in recent time. + * This sockets are closed when the max number we cache is reached, and also + * in serverCron() when they are around for more than a few seconds. */ +#define MIGRATE_SOCKET_CACHE_ITEMS 64 /* max num of items in the cache. */ +#define MIGRATE_SOCKET_CACHE_TTL 10 /* close cached sockets after 10 sec. */ + +typedef struct migrateCachedSocket { + int fd; + long last_dbid; + time_t last_use_time; +} migrateCachedSocket; + +/* Return a migrateCachedSocket containing a TCP socket connected with the + * target instance, possibly returning a cached one. + * + * This function is responsible of sending errors to the client if a + * connection can't be established. In this case -1 is returned. + * Otherwise on success the socket is returned, and the caller should not + * attempt to free it after usage. + * + * If the caller detects an error while using the socket, migrateCloseSocket() + * should be called so that the connection will be created from scratch + * the next time. */ +migrateCachedSocket* migrateGetSocket(client *c, robj *host, robj *port, long timeout) { + int fd; + sds name = sdsempty(); + migrateCachedSocket *cs; + + /* Check if we have an already cached socket for this ip:port pair. */ + name = sdscatlen(name,host->ptr,sdslen(host->ptr)); + name = sdscatlen(name,":",1); + name = sdscatlen(name,port->ptr,sdslen(port->ptr)); + cs = dictFetchValue(server.migrate_cached_sockets,name); + if (cs) { + sdsfree(name); + cs->last_use_time = server.unixtime; + return cs; + } + + /* No cached socket, create one. */ + if (dictSize(server.migrate_cached_sockets) == MIGRATE_SOCKET_CACHE_ITEMS) { + /* Too many items, drop one at random. */ + dictEntry *de = dictGetRandomKey(server.migrate_cached_sockets); + cs = dictGetVal(de); + close(cs->fd); + zfree(cs); + dictDelete(server.migrate_cached_sockets,dictGetKey(de)); + } + + /* Create the socket */ + fd = anetTcpNonBlockConnect(server.neterr,c->argv[1]->ptr, + atoi(c->argv[2]->ptr)); + if (fd == -1) { + sdsfree(name); + addReplyErrorFormat(c,"Can't connect to target node: %s", + server.neterr); + return NULL; + } + anetEnableTcpNoDelay(server.neterr,fd); + + /* Check if it connects within the specified timeout. */ + if ((aeWait(fd,AE_WRITABLE,timeout) & AE_WRITABLE) == 0) { + sdsfree(name); + addReplySds(c, + sdsnew("-IOERR error or timeout connecting to the client\r\n")); + close(fd); + return NULL; + } + + /* Add to the cache and return it to the caller. */ + cs = zmalloc(sizeof(*cs)); + cs->fd = fd; + cs->last_dbid = -1; + cs->last_use_time = server.unixtime; + dictAdd(server.migrate_cached_sockets,name,cs); + return cs; +} + +/* Free a migrate cached connection. */ +void migrateCloseSocket(robj *host, robj *port) { + sds name = sdsempty(); + migrateCachedSocket *cs; + + name = sdscatlen(name,host->ptr,sdslen(host->ptr)); + name = sdscatlen(name,":",1); + name = sdscatlen(name,port->ptr,sdslen(port->ptr)); + cs = dictFetchValue(server.migrate_cached_sockets,name); + if (!cs) { + sdsfree(name); + return; + } + + close(cs->fd); + zfree(cs); + dictDelete(server.migrate_cached_sockets,name); + sdsfree(name); +} + +void migrateCloseTimedoutSockets(void) { + dictIterator *di = dictGetSafeIterator(server.migrate_cached_sockets); + dictEntry *de; + + while((de = dictNext(di)) != NULL) { + migrateCachedSocket *cs = dictGetVal(de); + + if ((server.unixtime - cs->last_use_time) > MIGRATE_SOCKET_CACHE_TTL) { + close(cs->fd); + zfree(cs); + dictDelete(server.migrate_cached_sockets,dictGetKey(de)); + } + } + dictReleaseIterator(di); +} + +/* MIGRATE host port key dbid timeout [COPY | REPLACE] + * + * On in the multiple keys form: + * + * MIGRATE host port "" dbid timeout [COPY | REPLACE] KEYS key1 key2 ... keyN */ +void migrateCommand(client *c) { + migrateCachedSocket *cs; + int copy, replace, j; + long timeout; + long dbid; + robj **ov = NULL; /* Objects to migrate. */ + robj **kv = NULL; /* Key names. */ + robj **newargv = NULL; /* Used to rewrite the command as DEL ... keys ... */ + rio cmd, payload; + int may_retry = 1; + int write_error = 0; + int argv_rewritten = 0; + + /* To support the KEYS option we need the following additional state. */ + int first_key = 3; /* Argument index of the first key. */ + int num_keys = 1; /* By default only migrate the 'key' argument. */ + + /* Initialization */ + copy = 0; + replace = 0; + + /* Parse additional options */ + for (j = 6; j < c->argc; j++) { + if (!strcasecmp(c->argv[j]->ptr,"copy")) { + copy = 1; + } else if (!strcasecmp(c->argv[j]->ptr,"replace")) { + replace = 1; + } else if (!strcasecmp(c->argv[j]->ptr,"keys")) { + if (sdslen(c->argv[3]->ptr) != 0) { + addReplyError(c, + "When using MIGRATE KEYS option, the key argument" + " must be set to the empty string"); + return; + } + first_key = j+1; + num_keys = c->argc - j - 1; + break; /* All the remaining args are keys. */ + } else { + addReply(c,shared.syntaxerr); + return; + } + } + + /* Sanity check */ + if (getLongFromObjectOrReply(c,c->argv[5],&timeout,NULL) != C_OK || + getLongFromObjectOrReply(c,c->argv[4],&dbid,NULL) != C_OK) + { + return; + } + if (timeout <= 0) timeout = 1000; + + /* Check if the keys are here. If at least one key is to migrate, do it + * otherwise if all the keys are missing reply with "NOKEY" to signal + * the caller there was nothing to migrate. We don't return an error in + * this case, since often this is due to a normal condition like the key + * expiring in the meantime. */ + ov = zrealloc(ov,sizeof(robj*)*num_keys); + kv = zrealloc(kv,sizeof(robj*)*num_keys); + int oi = 0; + + for (j = 0; j < num_keys; j++) { + if ((ov[oi] = lookupKeyRead(c->db,c->argv[first_key+j])) != NULL) { + kv[oi] = c->argv[first_key+j]; + oi++; + } + } + num_keys = oi; + if (num_keys == 0) { + zfree(ov); zfree(kv); + addReplySds(c,sdsnew("+NOKEY\r\n")); + return; + } + +try_again: + write_error = 0; + + /* Connect */ + cs = migrateGetSocket(c,c->argv[1],c->argv[2],timeout); + if (cs == NULL) { + zfree(ov); zfree(kv); + return; /* error sent to the client by migrateGetSocket() */ + } + + rioInitWithBuffer(&cmd,sdsempty()); + + /* Send the SELECT command if the current DB is not already selected. */ + int select = cs->last_dbid != dbid; /* Should we emit SELECT? */ + if (select) { + serverAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2)); + serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"SELECT",6)); + serverAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,dbid)); + } + + /* Create RESTORE payload and generate the protocol to call the command. */ + for (j = 0; j < num_keys; j++) { + long long ttl = 0; + long long expireat = getExpire(c->db,kv[j]); + + if (expireat != -1) { + ttl = expireat-mstime(); + if (ttl < 1) ttl = 1; + } + serverAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',replace ? 5 : 4)); + if (server.cluster_enabled) + serverAssertWithInfo(c,NULL, + rioWriteBulkString(&cmd,"RESTORE-ASKING",14)); + else + serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7)); + serverAssertWithInfo(c,NULL,sdsEncodedObject(kv[j])); + serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,kv[j]->ptr, + sdslen(kv[j]->ptr))); + serverAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl)); + + /* Emit the payload argument, that is the serialized object using + * the DUMP format. */ + createDumpPayload(&payload,ov[j]); + serverAssertWithInfo(c,NULL, + rioWriteBulkString(&cmd,payload.io.buffer.ptr, + sdslen(payload.io.buffer.ptr))); + sdsfree(payload.io.buffer.ptr); + + /* Add the REPLACE option to the RESTORE command if it was specified + * as a MIGRATE option. */ + if (replace) + serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"REPLACE",7)); + } + + /* Transfer the query to the other node in 64K chunks. */ + errno = 0; + { + sds buf = cmd.io.buffer.ptr; + size_t pos = 0, towrite; + int nwritten = 0; + + while ((towrite = sdslen(buf)-pos) > 0) { + towrite = (towrite > (64*1024) ? (64*1024) : towrite); + nwritten = syncWrite(cs->fd,buf+pos,towrite,timeout); + if (nwritten != (signed)towrite) { + write_error = 1; + goto socket_err; + } + pos += nwritten; + } + } + + char buf1[1024]; /* Select reply. */ + char buf2[1024]; /* Restore reply. */ + + /* Read the SELECT reply if needed. */ + if (select && syncReadLine(cs->fd, buf1, sizeof(buf1), timeout) <= 0) + goto socket_err; + + /* Read the RESTORE replies. */ + int error_from_target = 0; + int socket_error = 0; + int del_idx = 1; /* Index of the key argument for the replicated DEL op. */ + + if (!copy) newargv = zmalloc(sizeof(robj*)*(num_keys+1)); + + for (j = 0; j < num_keys; j++) { + if (syncReadLine(cs->fd, buf2, sizeof(buf2), timeout) <= 0) { + socket_error = 1; + break; + } + if ((select && buf1[0] == '-') || buf2[0] == '-') { + /* On error assume that last_dbid is no longer valid. */ + if (!error_from_target) { + cs->last_dbid = -1; + addReplyErrorFormat(c,"Target instance replied with error: %s", + (select && buf1[0] == '-') ? buf1+1 : buf2+1); + error_from_target = 1; + } + } else { + if (!copy) { + /* No COPY option: remove the local key, signal the change. */ + dbDelete(c->db,kv[j]); + signalModifiedKey(c->db,kv[j]); + server.dirty++; + + /* Populate the argument vector to replace the old one. */ + newargv[del_idx++] = kv[j]; + incrRefCount(kv[j]); + } + } + } + + /* On socket error, if we want to retry, do it now before rewriting the + * command vector. We only retry if we are sure nothing was processed + * and we failed to read the first reply (j == 0 test). */ + if (!error_from_target && socket_error && j == 0 && may_retry && + errno != ETIMEDOUT) + { + goto socket_err; /* A retry is guaranteed because of tested conditions.*/ + } + + /* On socket errors, close the migration socket now that we still have + * the original host/port in the ARGV. Later the original command may be + * rewritten to DEL and will be too later. */ + if (socket_error) migrateCloseSocket(c->argv[1],c->argv[2]); + + if (!copy) { + /* Translate MIGRATE as DEL for replication/AOF. Note that we do + * this only for the keys for which we received an acknowledgement + * from the receiving Redis server, by using the del_idx index. */ + if (del_idx > 1) { + newargv[0] = createStringObject("DEL",3); + /* Note that the following call takes ownership of newargv. */ + replaceClientCommandVector(c,del_idx,newargv); + argv_rewritten = 1; + } else { + /* No key transfer acknowledged, no need to rewrite as DEL. */ + zfree(newargv); + } + newargv = NULL; /* Make it safe to call zfree() on it in the future. */ + } + + /* If we are here and a socket error happened, we don't want to retry. + * Just signal the problem to the client, but only do it if we did not + * already queue a different error reported by the destination server. */ + if (!error_from_target && socket_error) { + may_retry = 0; + goto socket_err; + } + + if (!error_from_target) { + /* Success! Update the last_dbid in migrateCachedSocket, so that we can + * avoid SELECT the next time if the target DB is the same. Reply +OK. + * + * Note: If we reached this point, even if socket_error is true + * still the SELECT command succeeded (otherwise the code jumps to + * socket_err label. */ + cs->last_dbid = dbid; + addReply(c,shared.ok); + } else { + /* On error we already sent it in the for loop above, and set + * the curretly selected socket to -1 to force SELECT the next time. */ + } + + sdsfree(cmd.io.buffer.ptr); + zfree(ov); zfree(kv); zfree(newargv); + return; + +/* On socket errors we try to close the cached socket and try again. + * It is very common for the cached socket to get closed, if just reopening + * it works it's a shame to notify the error to the caller. */ +socket_err: + /* Cleanup we want to perform in both the retry and no retry case. + * Note: Closing the migrate socket will also force SELECT next time. */ + sdsfree(cmd.io.buffer.ptr); + + /* If the command was rewritten as DEL and there was a socket error, + * we already closed the socket earlier. While migrateCloseSocket() + * is idempotent, the host/port arguments are now gone, so don't do it + * again. */ + if (!argv_rewritten) migrateCloseSocket(c->argv[1],c->argv[2]); + zfree(newargv); + newargv = NULL; /* This will get reallocated on retry. */ + + /* Retry only if it's not a timeout and we never attempted a retry + * (or the code jumping here did not set may_retry to zero). */ + if (errno != ETIMEDOUT && may_retry) { + may_retry = 0; + goto try_again; + } + + /* Cleanup we want to do if no retry is attempted. */ + zfree(ov); zfree(kv); + addReplySds(c, + sdscatprintf(sdsempty(), + "-IOERR error or timeout %s to target instance\r\n", + write_error ? "writing" : "reading")); + return; +} + +/* ----------------------------------------------------------------------------- + * Cluster functions related to serving / redirecting clients + * -------------------------------------------------------------------------- */ + +/* The ASKING command is required after a -ASK redirection. + * The client should issue ASKING before to actually send the command to + * the target instance. See the Redis Cluster specification for more + * information. */ +void askingCommand(client *c) { + if (server.cluster_enabled == 0) { + addReplyError(c,"This instance has cluster support disabled"); + return; + } + c->flags |= CLIENT_ASKING; + addReply(c,shared.ok); +} + +/* The READONLY command is used by clients to enter the read-only mode. + * In this mode slaves will not redirect clients as long as clients access + * with read-only commands to keys that are served by the slave's master. */ +void readonlyCommand(client *c) { + if (server.cluster_enabled == 0) { + addReplyError(c,"This instance has cluster support disabled"); + return; + } + c->flags |= CLIENT_READONLY; + addReply(c,shared.ok); +} + +/* The READWRITE command just clears the READONLY command state. */ +void readwriteCommand(client *c) { + c->flags &= ~CLIENT_READONLY; + addReply(c,shared.ok); +} + +/* Return the pointer to the cluster node that is able to serve the command. + * For the function to succeed the command should only target either: + * + * 1) A single key (even multiple times like LPOPRPUSH mylist mylist). + * 2) Multiple keys in the same hash slot, while the slot is stable (no + * resharding in progress). + * + * On success the function returns the node that is able to serve the request. + * If the node is not 'myself' a redirection must be perfomed. The kind of + * redirection is specified setting the integer passed by reference + * 'error_code', which will be set to CLUSTER_REDIR_ASK or + * CLUSTER_REDIR_MOVED. + * + * When the node is 'myself' 'error_code' is set to CLUSTER_REDIR_NONE. + * + * If the command fails NULL is returned, and the reason of the failure is + * provided via 'error_code', which will be set to: + * + * CLUSTER_REDIR_CROSS_SLOT if the request contains multiple keys that + * don't belong to the same hash slot. + * + * CLUSTER_REDIR_UNSTABLE if the request contains multiple keys + * belonging to the same slot, but the slot is not stable (in migration or + * importing state, likely because a resharding is in progress). + * + * CLUSTER_REDIR_DOWN_UNBOUND if the request addresses a slot which is + * not bound to any node. In this case the cluster global state should be + * already "down" but it is fragile to rely on the update of the global state, + * so we also handle it here. + * + * CLUSTER_REDIR_DOWN_STATE if the cluster is down but the user attempts to + * execute a command that addresses one or more keys. */ +clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *error_code) { + clusterNode *n = NULL; + robj *firstkey = NULL; + int multiple_keys = 0; + multiState *ms, _ms; + multiCmd mc; + int i, slot = 0, migrating_slot = 0, importing_slot = 0, missing_keys = 0; + + /* Set error code optimistically for the base case. */ + if (error_code) *error_code = CLUSTER_REDIR_NONE; + + /* We handle all the cases as if they were EXEC commands, so we have + * a common code path for everything */ + if (cmd->proc == execCommand) { + /* If CLIENT_MULTI flag is not set EXEC is just going to return an + * error. */ + if (!(c->flags & CLIENT_MULTI)) return myself; + ms = &c->mstate; + } else { + /* In order to have a single codepath create a fake Multi State + * structure if the client is not in MULTI/EXEC state, this way + * we have a single codepath below. */ + ms = &_ms; + _ms.commands = &mc; + _ms.count = 1; + mc.argv = argv; + mc.argc = argc; + mc.cmd = cmd; + } + + /* Check that all the keys are in the same hash slot, and obtain this + * slot and the node associated. */ + for (i = 0; i < ms->count; i++) { + struct redisCommand *mcmd; + robj **margv; + int margc, *keyindex, numkeys, j; + + mcmd = ms->commands[i].cmd; + margc = ms->commands[i].argc; + margv = ms->commands[i].argv; + + keyindex = getKeysFromCommand(mcmd,margv,margc,&numkeys); + for (j = 0; j < numkeys; j++) { + robj *thiskey = margv[keyindex[j]]; + int thisslot = keyHashSlot((char*)thiskey->ptr, + sdslen(thiskey->ptr)); + + if (firstkey == NULL) { + /* This is the first key we see. Check what is the slot + * and node. */ + firstkey = thiskey; + slot = thisslot; + n = server.cluster->slots[slot]; + + /* Error: If a slot is not served, we are in "cluster down" + * state. However the state is yet to be updated, so this was + * not trapped earlier in processCommand(). Report the same + * error to the client. */ + if (n == NULL) { + getKeysFreeResult(keyindex); + if (error_code) + *error_code = CLUSTER_REDIR_DOWN_UNBOUND; + return NULL; + } + + /* If we are migrating or importing this slot, we need to check + * if we have all the keys in the request (the only way we + * can safely serve the request, otherwise we return a TRYAGAIN + * error). To do so we set the importing/migrating state and + * increment a counter for every missing key. */ + if (n == myself && + server.cluster->migrating_slots_to[slot] != NULL) + { + migrating_slot = 1; + } else if (server.cluster->importing_slots_from[slot] != NULL) { + importing_slot = 1; + } + } else { + /* If it is not the first key, make sure it is exactly + * the same key as the first we saw. */ + if (!equalStringObjects(firstkey,thiskey)) { + if (slot != thisslot) { + /* Error: multiple keys from different slots. */ + getKeysFreeResult(keyindex); + if (error_code) + *error_code = CLUSTER_REDIR_CROSS_SLOT; + return NULL; + } else { + /* Flag this request as one with multiple different + * keys. */ + multiple_keys = 1; + } + } + } + + /* Migarting / Improrting slot? Count keys we don't have. */ + if ((migrating_slot || importing_slot) && + lookupKeyRead(&server.db[0],thiskey) == NULL) + { + missing_keys++; + } + } + getKeysFreeResult(keyindex); + } + + /* No key at all in command? then we can serve the request + * without redirections or errors in all the cases. */ + if (n == NULL) return myself; + + /* Cluster is globally down but we got keys? We can't serve the request. */ + if (server.cluster->state != CLUSTER_OK) { + if (error_code) *error_code = CLUSTER_REDIR_DOWN_STATE; + return NULL; + } + + /* Return the hashslot by reference. */ + if (hashslot) *hashslot = slot; + + /* MIGRATE always works in the context of the local node if the slot + * is open (migrating or importing state). We need to be able to freely + * move keys among instances in this case. */ + if ((migrating_slot || importing_slot) && cmd->proc == migrateCommand) + return myself; + + /* If we don't have all the keys and we are migrating the slot, send + * an ASK redirection. */ + if (migrating_slot && missing_keys) { + if (error_code) *error_code = CLUSTER_REDIR_ASK; + return server.cluster->migrating_slots_to[slot]; + } + + /* If we are receiving the slot, and the client correctly flagged the + * request as "ASKING", we can serve the request. However if the request + * involves multiple keys and we don't have them all, the only option is + * to send a TRYAGAIN error. */ + if (importing_slot && + (c->flags & CLIENT_ASKING || cmd->flags & CMD_ASKING)) + { + if (multiple_keys && missing_keys) { + if (error_code) *error_code = CLUSTER_REDIR_UNSTABLE; + return NULL; + } else { + return myself; + } + } + + /* Handle the read-only client case reading from a slave: if this + * node is a slave and the request is about an hash slot our master + * is serving, we can reply without redirection. */ + if (c->flags & CLIENT_READONLY && + cmd->flags & CMD_READONLY && + nodeIsSlave(myself) && + myself->slaveof == n) + { + return myself; + } + + /* Base case: just return the right node. However if this node is not + * myself, set error_code to MOVED since we need to issue a rediretion. */ + if (n != myself && error_code) *error_code = CLUSTER_REDIR_MOVED; + return n; +} + +/* Send the client the right redirection code, according to error_code + * that should be set to one of CLUSTER_REDIR_* macros. + * + * If CLUSTER_REDIR_ASK or CLUSTER_REDIR_MOVED error codes + * are used, then the node 'n' should not be NULL, but should be the + * node we want to mention in the redirection. Moreover hashslot should + * be set to the hash slot that caused the redirection. */ +void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code) { + if (error_code == CLUSTER_REDIR_CROSS_SLOT) { + addReplySds(c,sdsnew("-CROSSSLOT Keys in request don't hash to the same slot\r\n")); + } else if (error_code == CLUSTER_REDIR_UNSTABLE) { + /* The request spawns mutliple keys in the same slot, + * but the slot is not "stable" currently as there is + * a migration or import in progress. */ + addReplySds(c,sdsnew("-TRYAGAIN Multiple keys request during rehashing of slot\r\n")); + } else if (error_code == CLUSTER_REDIR_DOWN_STATE) { + addReplySds(c,sdsnew("-CLUSTERDOWN The cluster is down\r\n")); + } else if (error_code == CLUSTER_REDIR_DOWN_UNBOUND) { + addReplySds(c,sdsnew("-CLUSTERDOWN Hash slot not served\r\n")); + } else if (error_code == CLUSTER_REDIR_MOVED || + error_code == CLUSTER_REDIR_ASK) + { + addReplySds(c,sdscatprintf(sdsempty(), + "-%s %d %s:%d\r\n", + (error_code == CLUSTER_REDIR_ASK) ? "ASK" : "MOVED", + hashslot,n->ip,n->port)); + } else { + serverPanic("getNodeByQuery() unknown error."); + } +} + +/* This function is called by the function processing clients incrementally + * to detect timeouts, in order to handle the following case: + * + * 1) A client blocks with BLPOP or similar blocking operation. + * 2) The master migrates the hash slot elsewhere or turns into a slave. + * 3) The client may remain blocked forever (or up to the max timeout time) + * waiting for a key change that will never happen. + * + * If the client is found to be blocked into an hash slot this node no + * longer handles, the client is sent a redirection error, and the function + * returns 1. Otherwise 0 is returned and no operation is performed. */ +int clusterRedirectBlockedClientIfNeeded(client *c) { + if (c->flags & CLIENT_BLOCKED && c->btype == BLOCKED_LIST) { + dictEntry *de; + dictIterator *di; + + /* If the cluster is down, unblock the client with the right error. */ + if (server.cluster->state == CLUSTER_FAIL) { + clusterRedirectClient(c,NULL,0,CLUSTER_REDIR_DOWN_STATE); + return 1; + } + + di = dictGetIterator(c->bpop.keys); + while((de = dictNext(di)) != NULL) { + robj *key = dictGetKey(de); + int slot = keyHashSlot((char*)key->ptr, sdslen(key->ptr)); + clusterNode *node = server.cluster->slots[slot]; + + /* We send an error and unblock the client if: + * 1) The slot is unassigned, emitting a cluster down error. + * 2) The slot is not handled by this node, nor being imported. */ + if (node != myself && + server.cluster->importing_slots_from[slot] == NULL) + { + if (node == NULL) { + clusterRedirectClient(c,NULL,0, + CLUSTER_REDIR_DOWN_UNBOUND); + } else { + clusterRedirectClient(c,node,slot, + CLUSTER_REDIR_MOVED); + } + return 1; + } + } + dictReleaseIterator(di); + } + return 0; +} diff --git a/c/examples/malloc.c b/c/examples/malloc.c new file mode 100644 index 0000000..d5ee428 --- /dev/null +++ b/c/examples/malloc.c @@ -0,0 +1,532 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <stdint.h> +#include <errno.h> +#include <sys/mman.h> +#include "libc.h" +#include "atomic.h" +#include "pthread_impl.h" + +#if defined(__GNUC__) && defined(__PIC__) +#define inline inline __attribute__((always_inline)) +#endif + +void *__mmap(void *, size_t, int, int, int, off_t); +int __munmap(void *, size_t); +void *__mremap(void *, size_t, size_t, int, ...); +int __madvise(void *, size_t, int); + +struct chunk { + size_t psize, csize; + struct chunk *next, *prev; +}; + +struct bin { + volatile int lock[2]; + struct chunk *head; + struct chunk *tail; +}; + +static struct { + volatile uint64_t binmap; + struct bin bins[64]; + volatile int free_lock[2]; +} mal; + + +#define SIZE_ALIGN (4*sizeof(size_t)) +#define SIZE_MASK (-SIZE_ALIGN) +#define OVERHEAD (2*sizeof(size_t)) +#define MMAP_THRESHOLD (0x1c00*SIZE_ALIGN) +#define DONTCARE 16 +#define RECLAIM 163840 + +#define CHUNK_SIZE(c) ((c)->csize & -2) +#define CHUNK_PSIZE(c) ((c)->psize & -2) +#define PREV_CHUNK(c) ((struct chunk *)((char *)(c) - CHUNK_PSIZE(c))) +#define NEXT_CHUNK(c) ((struct chunk *)((char *)(c) + CHUNK_SIZE(c))) +#define MEM_TO_CHUNK(p) (struct chunk *)((char *)(p) - OVERHEAD) +#define CHUNK_TO_MEM(c) (void *)((char *)(c) + OVERHEAD) +#define BIN_TO_CHUNK(i) (MEM_TO_CHUNK(&mal.bins[i].head)) + +#define C_INUSE ((size_t)1) + +#define IS_MMAPPED(c) !((c)->csize & (C_INUSE)) + + +/* Synchronization tools */ + +static inline void lock(volatile int *lk) +{ + if (libc.threads_minus_1) + while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1); +} + +static inline void unlock(volatile int *lk) +{ + if (lk[0]) { + a_store(lk, 0); + if (lk[1]) __wake(lk, 1, 1); + } +} + +static inline void lock_bin(int i) +{ + lock(mal.bins[i].lock); + if (!mal.bins[i].head) + mal.bins[i].head = mal.bins[i].tail = BIN_TO_CHUNK(i); +} + +static inline void unlock_bin(int i) +{ + unlock(mal.bins[i].lock); +} + +static int first_set(uint64_t x) +{ +#if 1 + return a_ctz_64(x); +#else + static const char debruijn64[64] = { + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12 + }; + static const char debruijn32[32] = { + 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, + 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 + }; + if (sizeof(long) < 8) { + uint32_t y = x; + if (!y) { + y = x>>32; + return 32 + debruijn32[(y&-y)*0x076be629 >> 27]; + } + return debruijn32[(y&-y)*0x076be629 >> 27]; + } + return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58]; +#endif +} + +static const unsigned char bin_tab[60] = { + 32,33,34,35,36,36,37,37,38,38,39,39, + 40,40,40,40,41,41,41,41,42,42,42,42,43,43,43,43, + 44,44,44,44,44,44,44,44,45,45,45,45,45,45,45,45, + 46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47, +}; + +static int bin_index(size_t x) +{ + x = x / SIZE_ALIGN - 1; + if (x <= 32) return x; + if (x < 512) return bin_tab[x/8-4]; + if (x > 0x1c00) return 63; + return bin_tab[x/128-4] + 16; +} + +static int bin_index_up(size_t x) +{ + x = x / SIZE_ALIGN - 1; + if (x <= 32) return x; + x--; + if (x < 512) return bin_tab[x/8-4] + 1; + return bin_tab[x/128-4] + 17; +} + +#if 0 +void __dump_heap(int x) +{ + struct chunk *c; + int i; + for (c = (void *)mal.heap; CHUNK_SIZE(c); c = NEXT_CHUNK(c)) + fprintf(stderr, "base %p size %zu (%d) flags %d/%d\n", + c, CHUNK_SIZE(c), bin_index(CHUNK_SIZE(c)), + c->csize & 15, + NEXT_CHUNK(c)->psize & 15); + for (i=0; i<64; i++) { + if (mal.bins[i].head != BIN_TO_CHUNK(i) && mal.bins[i].head) { + fprintf(stderr, "bin %d: %p\n", i, mal.bins[i].head); + if (!(mal.binmap & 1ULL<<i)) + fprintf(stderr, "missing from binmap!\n"); + } else if (mal.binmap & 1ULL<<i) + fprintf(stderr, "binmap wrongly contains %d!\n", i); + } +} +#endif + +void *__expand_heap(size_t *); + +static struct chunk *expand_heap(size_t n) +{ + static int heap_lock[2]; + static void *end; + void *p; + struct chunk *w; + + /* The argument n already accounts for the caller's chunk + * overhead needs, but if the heap can't be extended in-place, + * we need room for an extra zero-sized sentinel chunk. */ + n += SIZE_ALIGN; + + lock(heap_lock); + + p = __expand_heap(&n); + if (!p) { + unlock(heap_lock); + return 0; + } + + /* If not just expanding existing space, we need to make a + * new sentinel chunk below the allocated space. */ + if (p != end) { + /* Valid/safe because of the prologue increment. */ + n -= SIZE_ALIGN; + p = (char *)p + SIZE_ALIGN; + w = MEM_TO_CHUNK(p); + w->psize = 0 | C_INUSE; + } + + /* Record new heap end and fill in footer. */ + end = (char *)p + n; + w = MEM_TO_CHUNK(end); + w->psize = n | C_INUSE; + w->csize = 0 | C_INUSE; + + /* Fill in header, which may be new or may be replacing a + * zero-size sentinel header at the old end-of-heap. */ + w = MEM_TO_CHUNK(p); + w->csize = n | C_INUSE; + + unlock(heap_lock); + + return w; +} + +static int adjust_size(size_t *n) +{ + /* Result of pointer difference must fit in ptrdiff_t. */ + if (*n-1 > PTRDIFF_MAX - SIZE_ALIGN - PAGE_SIZE) { + if (*n) { + errno = ENOMEM; + return -1; + } else { + *n = SIZE_ALIGN; + return 0; + } + } + *n = (*n + OVERHEAD + SIZE_ALIGN - 1) & SIZE_MASK; + return 0; +} + +static void unbin(struct chunk *c, int i) +{ + if (c->prev == c->next) + a_and_64(&mal.binmap, ~(1ULL<<i)); + c->prev->next = c->next; + c->next->prev = c->prev; + c->csize |= C_INUSE; + NEXT_CHUNK(c)->psize |= C_INUSE; +} + +static int alloc_fwd(struct chunk *c) +{ + int i; + size_t k; + while (!((k=c->csize) & C_INUSE)) { + i = bin_index(k); + lock_bin(i); + if (c->csize == k) { + unbin(c, i); + unlock_bin(i); + return 1; + } + unlock_bin(i); + } + return 0; +} + +static int alloc_rev(struct chunk *c) +{ + int i; + size_t k; + while (!((k=c->psize) & C_INUSE)) { + i = bin_index(k); + lock_bin(i); + if (c->psize == k) { + unbin(PREV_CHUNK(c), i); + unlock_bin(i); + return 1; + } + unlock_bin(i); + } + return 0; +} + + +/* pretrim - trims a chunk _prior_ to removing it from its bin. + * Must be called with i as the ideal bin for size n, j the bin + * for the _free_ chunk self, and bin j locked. */ +static int pretrim(struct chunk *self, size_t n, int i, int j) +{ + size_t n1; + struct chunk *next, *split; + + /* We cannot pretrim if it would require re-binning. */ + if (j < 40) return 0; + if (j < i+3) { + if (j != 63) return 0; + n1 = CHUNK_SIZE(self); + if (n1-n <= MMAP_THRESHOLD) return 0; + } else { + n1 = CHUNK_SIZE(self); + } + if (bin_index(n1-n) != j) return 0; + + next = NEXT_CHUNK(self); + split = (void *)((char *)self + n); + + split->prev = self->prev; + split->next = self->next; + split->prev->next = split; + split->next->prev = split; + split->psize = n | C_INUSE; + split->csize = n1-n; + next->psize = n1-n; + self->csize = n | C_INUSE; + return 1; +} + +static void trim(struct chunk *self, size_t n) +{ + size_t n1 = CHUNK_SIZE(self); + struct chunk *next, *split; + + if (n >= n1 - DONTCARE) return; + + next = NEXT_CHUNK(self); + split = (void *)((char *)self + n); + + split->psize = n | C_INUSE; + split->csize = n1-n | C_INUSE; + next->psize = n1-n | C_INUSE; + self->csize = n | C_INUSE; + + free(CHUNK_TO_MEM(split)); +} + +void *malloc(size_t n) +{ + struct chunk *c; + int i, j; + + if (adjust_size(&n) < 0) return 0; + + if (n > MMAP_THRESHOLD) { + size_t len = n + OVERHEAD + PAGE_SIZE - 1 & -PAGE_SIZE; + char *base = __mmap(0, len, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (base == (void *)-1) return 0; + c = (void *)(base + SIZE_ALIGN - OVERHEAD); + c->csize = len - (SIZE_ALIGN - OVERHEAD); + c->psize = SIZE_ALIGN - OVERHEAD; + return CHUNK_TO_MEM(c); + } + + i = bin_index_up(n); + for (;;) { + uint64_t mask = mal.binmap & -(1ULL<<i); + if (!mask) { + c = expand_heap(n); + if (!c) return 0; + if (alloc_rev(c)) { + struct chunk *x = c; + c = PREV_CHUNK(c); + NEXT_CHUNK(x)->psize = c->csize = + x->csize + CHUNK_SIZE(c); + } + break; + } + j = first_set(mask); + lock_bin(j); + c = mal.bins[j].head; + if (c != BIN_TO_CHUNK(j)) { + if (!pretrim(c, n, i, j)) unbin(c, j); + unlock_bin(j); + break; + } + unlock_bin(j); + } + + /* Now patch up in case we over-allocated */ + trim(c, n); + + return CHUNK_TO_MEM(c); +} + +void *__malloc0(size_t n) +{ + void *p = malloc(n); + if (p && !IS_MMAPPED(MEM_TO_CHUNK(p))) { + size_t *z; + n = (n + sizeof *z - 1)/sizeof *z; + for (z=p; n; n--, z++) if (*z) *z=0; + } + return p; +} + +void *realloc(void *p, size_t n) +{ + struct chunk *self, *next; + size_t n0, n1; + void *new; + + if (!p) return malloc(n); + + if (adjust_size(&n) < 0) return 0; + + self = MEM_TO_CHUNK(p); + n1 = n0 = CHUNK_SIZE(self); + + if (IS_MMAPPED(self)) { + size_t extra = self->psize; + char *base = (char *)self - extra; + size_t oldlen = n0 + extra; + size_t newlen = n + extra; + /* Crash on realloc of freed chunk */ + if (extra & 1) a_crash(); + if (newlen < PAGE_SIZE && (new = malloc(n))) { + memcpy(new, p, n-OVERHEAD); + free(p); + return new; + } + newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE; + if (oldlen == newlen) return p; + base = __mremap(base, oldlen, newlen, MREMAP_MAYMOVE); + if (base == (void *)-1) + goto copy_realloc; + self = (void *)(base + extra); + self->csize = newlen - extra; + return CHUNK_TO_MEM(self); + } + + next = NEXT_CHUNK(self); + + /* Crash on corrupted footer (likely from buffer overflow) */ + if (next->psize != self->csize) a_crash(); + + /* Merge adjacent chunks if we need more space. This is not + * a waste of time even if we fail to get enough space, because our + * subsequent call to free would otherwise have to do the merge. */ + if (n > n1 && alloc_fwd(next)) { + n1 += CHUNK_SIZE(next); + next = NEXT_CHUNK(next); + } + /* FIXME: find what's wrong here and reenable it..? */ + if (0 && n > n1 && alloc_rev(self)) { + self = PREV_CHUNK(self); + n1 += CHUNK_SIZE(self); + } + self->csize = n1 | C_INUSE; + next->psize = n1 | C_INUSE; + + /* If we got enough space, split off the excess and return */ + if (n <= n1) { + //memmove(CHUNK_TO_MEM(self), p, n0-OVERHEAD); + trim(self, n); + return CHUNK_TO_MEM(self); + } + +copy_realloc: + /* As a last resort, allocate a new chunk and copy to it. */ + new = malloc(n-OVERHEAD); + if (!new) return 0; + memcpy(new, p, n0-OVERHEAD); + free(CHUNK_TO_MEM(self)); + return new; +} + +void free(void *p) +{ + struct chunk *self = MEM_TO_CHUNK(p); + struct chunk *next; + size_t final_size, new_size, size; + int reclaim=0; + int i; + + if (!p) return; + + if (IS_MMAPPED(self)) { + size_t extra = self->psize; + char *base = (char *)self - extra; + size_t len = CHUNK_SIZE(self) + extra; + /* Crash on double free */ + if (extra & 1) a_crash(); + __munmap(base, len); + return; + } + + final_size = new_size = CHUNK_SIZE(self); + next = NEXT_CHUNK(self); + + /* Crash on corrupted footer (likely from buffer overflow) */ + if (next->psize != self->csize) a_crash(); + + for (;;) { + if (self->psize & next->csize & C_INUSE) { + self->csize = final_size | C_INUSE; + next->psize = final_size | C_INUSE; + i = bin_index(final_size); + lock_bin(i); + lock(mal.free_lock); + if (self->psize & next->csize & C_INUSE) + break; + unlock(mal.free_lock); + unlock_bin(i); + } + + if (alloc_rev(self)) { + self = PREV_CHUNK(self); + size = CHUNK_SIZE(self); + final_size += size; + if (new_size+size > RECLAIM && (new_size+size^size) > size) + reclaim = 1; + } + + if (alloc_fwd(next)) { + size = CHUNK_SIZE(next); + final_size += size; + if (new_size+size > RECLAIM && (new_size+size^size) > size) + reclaim = 1; + next = NEXT_CHUNK(next); + } + } + + if (!(mal.binmap & 1ULL<<i)) + a_or_64(&mal.binmap, 1ULL<<i); + + self->csize = final_size; + next->psize = final_size; + unlock(mal.free_lock); + + self->next = BIN_TO_CHUNK(i); + self->prev = mal.bins[i].tail; + self->next->prev = self; + self->prev->next = self; + + /* Replace middle of large chunks with fresh zero pages */ + if (reclaim) { + uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE; + uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE; +#if 1 + __madvise((void *)a, b-a, MADV_DONTNEED); +#else + __mmap((void *)a, b-a, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0); +#endif + } + + unlock_bin(i); +} diff --git a/c/examples/parser.c b/c/examples/parser.c new file mode 100644 index 0000000..2bb8638 --- /dev/null +++ b/c/examples/parser.c @@ -0,0 +1,1283 @@ +#include "runtime/parser.h" +#include <assert.h> +#include <stdio.h> +#include <limits.h> +#include <stdbool.h> +#include "tree_sitter/runtime.h" +#include "runtime/tree.h" +#include "runtime/lexer.h" +#include "runtime/length.h" +#include "runtime/array.h" +#include "runtime/language.h" +#include "runtime/alloc.h" +#include "runtime/reduce_action.h" +#include "runtime/error_costs.h" + +#define LOG(...) \ + if (self->lexer.logger.log) { \ + snprintf(self->lexer.debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \ + self->lexer.logger.log(self->lexer.logger.payload, TSLogTypeParse, \ + self->lexer.debug_buffer); \ + } \ + if (self->print_debugging_graphs) { \ + fprintf(stderr, "graph {\nlabel=\""); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\"\n}\n\n"); \ + } + +#define LOG_STACK() \ + if (self->print_debugging_graphs) { \ + ts_stack_print_dot_graph(self->stack, self->language->symbol_names, \ + stderr); \ + fputs("\n\n", stderr); \ + } + +#define LOG_TREE() \ + if (self->print_debugging_graphs) { \ + ts_tree_print_dot_graph(self->finished_tree, self->language, stderr); \ + fputs("\n", stderr); \ + } + +#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) + +typedef struct { + Parser *parser; + TSSymbol lookahead_symbol; + TreeArray *trees_above_error; + uint32_t tree_count_above_error; + bool found_repair; + ReduceAction best_repair; + TSStateId best_repair_next_state; + uint32_t best_repair_skip_count; +} ErrorRepairSession; + +typedef struct { + Parser *parser; + TSSymbol lookahead_symbol; +} SkipPrecedingTreesSession; + +static void parser__push(Parser *self, StackVersion version, Tree *tree, + TSStateId state) { + ts_stack_push(self->stack, version, tree, false, state); + ts_tree_release(tree); +} + +static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) { + bool did_break_down = false; + bool pending = false; + + do { + StackPopResult pop = ts_stack_pop_pending(self->stack, version); + if (!pop.slices.size) + break; + + did_break_down = true; + pending = false; + for (uint32_t i = 0; i < pop.slices.size; i++) { + StackSlice slice = pop.slices.contents[i]; + TSStateId state = ts_stack_top_state(self->stack, slice.version); + Tree *parent = *array_front(&slice.trees); + + for (uint32_t j = 0; j < parent->child_count; j++) { + Tree *child = parent->children[j]; + pending = child->child_count > 0; + + if (child->symbol == ts_builtin_sym_error) { + state = ERROR_STATE; + } else if (!child->extra) { + state = ts_language_next_state(self->language, state, child->symbol); + } + + ts_stack_push(self->stack, slice.version, child, pending, state); + } + + for (uint32_t j = 1; j < slice.trees.size; j++) { + Tree *tree = slice.trees.contents[j]; + parser__push(self, slice.version, tree, state); + } + + LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol)); + LOG_STACK(); + + ts_stack_decrease_push_count(self->stack, slice.version, + parent->child_count + 1); + ts_tree_release(parent); + array_delete(&slice.trees); + } + } while (pending); + + return did_break_down; +} + +static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead, + TSStateId state, + ReusableNode *reusable_node) { + bool result = false; + while (reusable_node->tree->child_count > 0 && + (self->is_split || reusable_node->tree->parse_state != state || + reusable_node->tree->fragile_left || + reusable_node->tree->fragile_right)) { + LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol)); + reusable_node_breakdown(reusable_node); + result = true; + } + + if (result) { + ts_tree_release(*lookahead); + ts_tree_retain(*lookahead = reusable_node->tree); + } + + return result; +} + +static inline bool ts_lex_mode_eq(TSLexMode self, TSLexMode other) { + return self.lex_state == other.lex_state && + self.external_lex_state == other.external_lex_state; +} + +static bool parser__can_reuse(Parser *self, TSStateId state, Tree *tree, + TableEntry *table_entry) { + TSLexMode current_lex_mode = self->language->lex_modes[state]; + if (ts_lex_mode_eq(tree->first_leaf.lex_mode, current_lex_mode)) + return true; + if (current_lex_mode.external_lex_state != 0) + return false; + if (tree->size.bytes == 0) + return false; + if (!table_entry->is_reusable) + return false; + if (!table_entry->depends_on_lookahead) + return true; + return tree->child_count > 1 && tree->error_cost == 0; +} + +typedef int CondenseResult; +static int CondenseResultMadeChange = 1; +static int CondenseResultAllVersionsHadError = 2; + +static CondenseResult parser__condense_stack(Parser *self) { + CondenseResult result = 0; + bool has_version_without_errors = false; + + for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { + if (ts_stack_is_halted(self->stack, i)) { + ts_stack_remove_version(self->stack, i); + result |= CondenseResultMadeChange; + i--; + continue; + } + + ErrorStatus error_status = ts_stack_error_status(self->stack, i); + if (error_status.count == 0) has_version_without_errors = true; + + for (StackVersion j = 0; j < i; j++) { + if (ts_stack_merge(self->stack, j, i)) { + result |= CondenseResultMadeChange; + i--; + break; + } + + switch (error_status_compare(error_status, + ts_stack_error_status(self->stack, j))) { + case -1: + ts_stack_remove_version(self->stack, j); + result |= CondenseResultMadeChange; + i--; + j--; + break; + case 1: + ts_stack_remove_version(self->stack, i); + result |= CondenseResultMadeChange; + i--; + break; + } + } + } + + if (!has_version_without_errors && ts_stack_version_count(self->stack) > 0) { + result |= CondenseResultAllVersionsHadError; + } + + return result; +} + +static void parser__restore_external_scanner(Parser *self, StackVersion version) { + const TSExternalTokenState *state = ts_stack_external_token_state(self->stack, version); + if (self->lexer.last_external_token_state != state) { + LOG("restore_external_scanner"); + self->lexer.last_external_token_state = state; + if (state) { + self->language->external_scanner.deserialize( + self->external_scanner_payload, + *state + ); + } else { + self->language->external_scanner.reset(self->external_scanner_payload); + } + } +} + +static Tree *parser__lex(Parser *self, StackVersion version) { + TSStateId parse_state = ts_stack_top_state(self->stack, version); + Length start_position = ts_stack_top_position(self->stack, version); + TSLexMode lex_mode = self->language->lex_modes[parse_state]; + const bool *valid_external_tokens = ts_language_enabled_external_tokens( + self->language, + lex_mode.external_lex_state + ); + + bool found_external_token = false; + bool found_error = false; + bool skipped_error = false; + int32_t first_error_character = 0; + Length error_start_position, error_end_position; + ts_lexer_reset(&self->lexer, start_position); + + for (;;) { + Length current_position = self->lexer.current_position; + + if (valid_external_tokens) { + LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state, + current_position.extent.row, current_position.extent.column); + parser__restore_external_scanner(self, version); + ts_lexer_start(&self->lexer); + if (self->language->external_scanner.scan(self->external_scanner_payload, + &self->lexer.data, valid_external_tokens)) { + if (length_has_unknown_chars(self->lexer.token_end_position)) { + self->lexer.token_end_position = self->lexer.current_position; + } + if (lex_mode.lex_state != 0 || + self->lexer.token_end_position.bytes > current_position.bytes) { + found_external_token = true; + break; + } + } + ts_lexer_reset(&self->lexer, current_position); + } + + LOG("lex_internal state:%d, row:%u, column:%u", lex_mode.lex_state, + current_position.extent.row, current_position.extent.column); + ts_lexer_start(&self->lexer); + if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) { + if (length_has_unknown_chars(self->lexer.token_end_position)) { + self->lexer.token_end_position = self->lexer.current_position; + } + break; + } + + if (!found_error) { + LOG("retry_in_error_mode"); + found_error = true; + lex_mode = self->language->lex_modes[ERROR_STATE]; + valid_external_tokens = ts_language_enabled_external_tokens( + self->language, + lex_mode.external_lex_state + ); + ts_lexer_reset(&self->lexer, start_position); + continue; + } + + if (!skipped_error) { + LOG("skip_unrecognized_character"); + skipped_error = true; + error_start_position = self->lexer.token_start_position; + error_end_position = self->lexer.token_start_position; + first_error_character = self->lexer.data.lookahead; + } + + if (self->lexer.current_position.bytes == error_end_position.bytes) { + if (self->lexer.data.lookahead == 0) { + self->lexer.data.result_symbol = ts_builtin_sym_error; + break; + } + self->lexer.data.advance(&self->lexer, false); + } + + error_end_position = self->lexer.current_position; + } + + Tree *result; + if (skipped_error) { + Length padding = length_sub(error_start_position, start_position); + Length size = length_sub(error_end_position, error_start_position); + result = ts_tree_make_error(size, padding, first_error_character); + } else { + TSSymbol symbol = self->lexer.data.result_symbol; + if (found_external_token) { + symbol = self->language->external_scanner.symbol_map[symbol]; + } + + Length padding = length_sub(self->lexer.token_start_position, start_position); + Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position); + TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol); + result = ts_tree_make_leaf(symbol, padding, size, metadata); + + if (found_external_token) { + result->has_external_tokens = true; + result->has_external_token_state = true; + memset(result->external_token_state, 0, sizeof(TSExternalTokenState)); + self->language->external_scanner.serialize(self->external_scanner_payload, result->external_token_state); + self->lexer.last_external_token_state = &result->external_token_state; + } + } + + result->bytes_scanned = self->lexer.current_position.bytes - start_position.bytes + 1; + result->parse_state = parse_state; + result->first_leaf.lex_mode = lex_mode; + + LOG("lexed_lookahead sym:%s, size:%u", SYM_NAME(result->symbol), result->size.bytes); + return result; +} + +static void parser__clear_cached_token(Parser *self) { + ts_tree_release(self->cached_token); + self->cached_token = NULL; +} + +static Tree *parser__get_lookahead(Parser *self, StackVersion version, + ReusableNode *reusable_node, + bool *is_fresh) { + Length position = ts_stack_top_position(self->stack, version); + + while (reusable_node->tree) { + if (reusable_node->byte_index > position.bytes) { + LOG("before_reusable_node sym:%s", SYM_NAME(reusable_node->tree->symbol)); + break; + } + + if (reusable_node->byte_index < position.bytes) { + LOG("past_reusable sym:%s", SYM_NAME(reusable_node->tree->symbol)); + reusable_node_pop(reusable_node); + continue; + } + + if (reusable_node->tree->has_changes) { + LOG("cant_reuse_changed tree:%s, size:%u", + SYM_NAME(reusable_node->tree->symbol), + reusable_node->tree->size.bytes); + if (!reusable_node_breakdown(reusable_node)) { + reusable_node_pop(reusable_node); + parser__breakdown_top_of_stack(self, version); + } + continue; + } + + if (reusable_node->tree->symbol == ts_builtin_sym_error) { + LOG("cant_reuse_error tree:%s, size:%u", + SYM_NAME(reusable_node->tree->symbol), + reusable_node->tree->size.bytes); + if (!reusable_node_breakdown(reusable_node)) { + reusable_node_pop(reusable_node); + parser__breakdown_top_of_stack(self, version); + } + continue; + } + + if (!ts_external_token_state_eq( + reusable_node->preceding_external_token_state, + ts_stack_external_token_state(self->stack, version))) { + LOG("cant_reuse_external_tokens tree:%s, size:%u", + SYM_NAME(reusable_node->tree->symbol), + reusable_node->tree->size.bytes); + if (!reusable_node_breakdown(reusable_node)) { + reusable_node_pop(reusable_node); + parser__breakdown_top_of_stack(self, version); + } + continue; + } + + Tree *result = reusable_node->tree; + ts_tree_retain(result); + return result; + } + + if (self->cached_token && position.bytes == self->cached_token_byte_index) { + ts_tree_retain(self->cached_token); + return self->cached_token; + } + + *is_fresh = true; + return parser__lex(self, version); +} + +static bool parser__select_tree(Parser *self, Tree *left, Tree *right) { + if (!left) + return true; + if (!right) + return false; + if (right->error_cost < left->error_cost) { + LOG("select_smaller_error symbol:%s, over_symbol:%s", + SYM_NAME(right->symbol), SYM_NAME(left->symbol)); + return true; + } + if (left->error_cost < right->error_cost) { + LOG("select_smaller_error symbol:%s, over_symbol:%s", + SYM_NAME(left->symbol), SYM_NAME(right->symbol)); + return false; + } + + int comparison = ts_tree_compare(left, right); + switch (comparison) { + case -1: + LOG("select_earlier symbol:%s, over_symbol:%s", SYM_NAME(left->symbol), + SYM_NAME(right->symbol)); + return false; + break; + case 1: + LOG("select_earlier symbol:%s, over_symbol:%s", SYM_NAME(right->symbol), + SYM_NAME(left->symbol)); + return true; + default: + LOG("select_existing symbol:%s, over_symbol:%s", SYM_NAME(left->symbol), + SYM_NAME(right->symbol)); + return false; + } +} + +static bool parser__better_version_exists(Parser *self, StackVersion version, + ErrorStatus my_error_status) { + if (self->finished_tree && + self->finished_tree->error_cost <= my_error_status.cost) + return true; + + for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { + if (i == version || ts_stack_is_halted(self->stack, i)) + continue; + + switch (error_status_compare(my_error_status, + ts_stack_error_status(self->stack, i))) { + case -1: + LOG("halt_other version:%u", i); + ts_stack_halt(self->stack, i); + break; + case 1: + return true; + } + } + + return false; +} + +static void parser__shift(Parser *self, StackVersion version, TSStateId state, + Tree *lookahead, bool extra) { + if (extra != lookahead->extra) { + TSSymbolMetadata metadata = + ts_language_symbol_metadata(self->language, lookahead->symbol); + if (metadata.structural && ts_stack_version_count(self->stack) > 1) { + lookahead = ts_tree_make_copy(lookahead); + } else { + ts_tree_retain(lookahead); + } + lookahead->extra = extra; + } else { + ts_tree_retain(lookahead); + } + + bool is_pending = lookahead->child_count > 0; + ts_stack_push(self->stack, version, lookahead, is_pending, state); + if (lookahead->has_external_token_state) { + ts_stack_set_external_token_state( + self->stack, version, ts_tree_last_external_token_state(lookahead)); + } + ts_tree_release(lookahead); +} + +static bool parser__switch_children(Parser *self, Tree *tree, + Tree **children, uint32_t count) { + self->scratch_tree.symbol = tree->symbol; + self->scratch_tree.child_count = 0; + ts_tree_set_children(&self->scratch_tree, count, children); + if (parser__select_tree(self, tree, &self->scratch_tree)) { + tree->size = self->scratch_tree.size; + tree->padding = self->scratch_tree.padding; + tree->error_cost = self->scratch_tree.error_cost; + tree->children = self->scratch_tree.children; + tree->child_count = self->scratch_tree.child_count; + tree->named_child_count = self->scratch_tree.named_child_count; + tree->visible_child_count = self->scratch_tree.visible_child_count; + return true; + } else { + return false; + } +} + +static StackPopResult parser__reduce(Parser *self, StackVersion version, + TSSymbol symbol, unsigned count, + bool fragile, bool allow_skipping) { + uint32_t initial_version_count = ts_stack_version_count(self->stack); + + StackPopResult pop = ts_stack_pop_count(self->stack, version, count); + if (pop.stopped_at_error) + return pop; + + const TSLanguage *language = self->language; + TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); + + for (uint32_t i = 0; i < pop.slices.size; i++) { + StackSlice slice = pop.slices.contents[i]; + + // Extra tokens on top of the stack should not be included in this new parent + // node. They will be re-pushed onto the stack after the parent node is + // created and pushed. + uint32_t child_count = slice.trees.size; + while (child_count > 0 && slice.trees.contents[child_count - 1]->extra) + child_count--; + + Tree *parent = ts_tree_make_node(symbol, child_count, slice.trees.contents, metadata); + + // This pop operation may have caused multiple stack versions to collapse + // into one, because they all diverged from a common state. In that case, + // choose one of the arrays of trees to be the parent node's children, and + // delete the rest of the tree arrays. + while (i + 1 < pop.slices.size) { + StackSlice next_slice = pop.slices.contents[i + 1]; + if (next_slice.version != slice.version) + break; + i++; + + uint32_t child_count = next_slice.trees.size; + while (child_count > 0 && next_slice.trees.contents[child_count - 1]->extra) + child_count--; + + if (parser__switch_children(self, parent, next_slice.trees.contents, child_count)) { + ts_tree_array_delete(&slice.trees); + slice = next_slice; + } else { + ts_tree_array_delete(&next_slice.trees); + } + } + + TSStateId state = ts_stack_top_state(self->stack, slice.version); + TSStateId next_state = ts_language_next_state(language, state, symbol); + if (fragile || self->is_split || pop.slices.size > 1 || initial_version_count > 1) { + parent->fragile_left = true; + parent->fragile_right = true; + parent->parse_state = TS_TREE_STATE_NONE; + } else { + parent->parse_state = state; + } + + // If this pop operation terminated at the end of an error region, then + // create two stack versions: one in which the parent node is interpreted + // normally, and one in which the parent node is skipped. + if (state == ERROR_STATE && allow_skipping && child_count > 1) { + StackVersion other_version = ts_stack_copy_version(self->stack, slice.version); + + ts_stack_push(self->stack, other_version, parent, false, ERROR_STATE); + for (uint32_t j = parent->child_count; j < slice.trees.size; j++) { + Tree *tree = slice.trees.contents[j]; + ts_stack_push(self->stack, other_version, tree, false, ERROR_STATE); + } + + ErrorStatus error_status = ts_stack_error_status(self->stack, other_version); + if (parser__better_version_exists(self, version, error_status)) + ts_stack_remove_version(self->stack, other_version); + } + + // Push the parent node onto the stack, along with any extra tokens that + // were previously on top of the stack. + parser__push(self, slice.version, parent, next_state); + for (uint32_t j = parent->child_count; j < slice.trees.size; j++) { + Tree *tree = slice.trees.contents[j]; + parser__push(self, slice.version, tree, next_state); + } + } + + for (StackVersion i = initial_version_count; i < ts_stack_version_count(self->stack); i++) { + for (StackVersion j = initial_version_count; j < i; j++) { + if (ts_stack_merge(self->stack, j, i)) { + i--; + break; + } + } + } + + return pop; +} + +static inline const TSParseAction *parser__reductions_after_sequence( + Parser *self, TSStateId start_state, const TreeArray *trees_below, + uint32_t tree_count_below, const TreeArray *trees_above, + TSSymbol lookahead_symbol, uint32_t *count) { + TSStateId state = start_state; + uint32_t child_count = 0; + *count = 0; + + for (uint32_t i = 0; i < trees_below->size; i++) { + if (child_count == tree_count_below) + break; + Tree *tree = trees_below->contents[trees_below->size - 1 - i]; + if (tree->extra) continue; + TSStateId next_state = ts_language_next_state(self->language, state, tree->symbol); + if (next_state == ERROR_STATE) + return NULL; + if (next_state != state) { + child_count++; + state = next_state; + } + } + + for (uint32_t i = 0; i < trees_above->size; i++) { + Tree *tree = trees_above->contents[i]; + if (tree->extra) continue; + TSStateId next_state = ts_language_next_state(self->language, state, tree->symbol); + if (next_state == ERROR_STATE) + return NULL; + if (next_state != state) { + child_count++; + state = next_state; + } + } + + const TSParseAction *actions = + ts_language_actions(self->language, state, lookahead_symbol, count); + + if (*count > 0 && actions[*count - 1].type != TSParseActionTypeReduce) { + (*count)--; + } + + while (*count > 0 && actions[0].params.child_count < child_count) { + actions++; + (*count)--; + } + + while (*count > 0 && actions[*count - 1].params.child_count > child_count) { + (*count)--; + } + + return actions; +} + +static StackIterateAction parser__repair_error_callback( + void *payload, TSStateId state, TreeArray *trees, uint32_t tree_count, + bool is_done, bool is_pending) { + + ErrorRepairSession *session = (ErrorRepairSession *)payload; + Parser *self = session->parser; + TSSymbol lookahead_symbol = session->lookahead_symbol; + ReduceActionSet *repairs = &self->reduce_actions; + TreeArray *trees_above_error = session->trees_above_error; + uint32_t tree_count_above_error = session->tree_count_above_error; + + StackIterateAction result = StackIterateNone; + + uint32_t last_repair_count = -1; + uint32_t repair_reduction_count = -1; + const TSParseAction *repair_reductions = NULL; + + for (uint32_t i = 0; i < repairs->size; i++) { + ReduceAction *repair = &repairs->contents[i]; + uint32_t count_needed_below_error = repair->count - tree_count_above_error; + if (count_needed_below_error > tree_count) + break; + + uint32_t skip_count = tree_count - count_needed_below_error; + if (session->found_repair && skip_count >= session->best_repair_skip_count) { + array_erase(repairs, i--); + continue; + } + + TSStateId state_after_repair = ts_language_next_state(self->language, state, repair->symbol); + if (state == ERROR_STATE || state_after_repair == ERROR_STATE) + continue; + + uint32_t action_count; + ts_language_actions(self->language, state_after_repair, lookahead_symbol, &action_count); + if (action_count == 0) + continue; + + if (count_needed_below_error != last_repair_count) { + last_repair_count = count_needed_below_error; + repair_reductions = parser__reductions_after_sequence( + self, state, trees, count_needed_below_error, trees_above_error, + lookahead_symbol, &repair_reduction_count); + } + + for (uint32_t j = 0; j < repair_reduction_count; j++) { + if (repair_reductions[j].params.symbol == repair->symbol) { + result |= StackIteratePop; + session->found_repair = true; + session->best_repair = *repair; + session->best_repair_skip_count = skip_count; + session->best_repair_next_state = state_after_repair; + array_erase(repairs, i--); + break; + } + } + } + + if (repairs->size == 0) + result |= StackIterateStop; + + return result; +} + +static bool parser__repair_error(Parser *self, StackSlice slice, + TSSymbol lookahead_symbol, TableEntry entry) { + LOG("repair_error"); + ErrorRepairSession session = { + .parser = self, + .lookahead_symbol = lookahead_symbol, + .found_repair = false, + .trees_above_error = &slice.trees, + .tree_count_above_error = ts_tree_array_essential_count(&slice.trees), + }; + + array_clear(&self->reduce_actions); + for (uint32_t i = 0; i < entry.action_count; i++) { + if (entry.actions[i].type == TSParseActionTypeReduce) { + TSSymbol symbol = entry.actions[i].params.symbol; + uint32_t child_count = entry.actions[i].params.child_count; + if ((child_count > session.tree_count_above_error) || + (child_count == session.tree_count_above_error && + !ts_language_symbol_metadata(self->language, symbol).visible)) + array_push(&self->reduce_actions, ((ReduceAction){ + .symbol = symbol, + .count = child_count + })); + } + } + + StackPopResult pop = ts_stack_iterate( + self->stack, slice.version, parser__repair_error_callback, &session); + + if (!session.found_repair) { + LOG("no_repair_found"); + ts_stack_remove_version(self->stack, slice.version); + ts_tree_array_delete(&slice.trees); + return false; + } + + ReduceAction repair = session.best_repair; + TSStateId next_state = session.best_repair_next_state; + uint32_t skip_count = session.best_repair_skip_count; + TSSymbol symbol = repair.symbol; + + StackSlice new_slice = array_pop(&pop.slices); + TreeArray children = new_slice.trees; + ts_stack_renumber_version(self->stack, new_slice.version, slice.version); + + for (uint32_t i = pop.slices.size - 1; i + 1 > 0; i--) { + StackSlice other_slice = pop.slices.contents[i]; + ts_tree_array_delete(&other_slice.trees); + if (other_slice.version != pop.slices.contents[i + 1].version) + ts_stack_remove_version(self->stack, other_slice.version); + } + + TreeArray skipped_children = ts_tree_array_remove_last_n(&children, skip_count); + TreeArray trailing_extras = ts_tree_array_remove_trailing_extras(&skipped_children); + Tree *error = ts_tree_make_error_node(&skipped_children); + array_push(&children, error); + array_push_all(&children, &trailing_extras); + trailing_extras.size = 0; + array_delete(&trailing_extras); + + for (uint32_t i = 0; i < slice.trees.size; i++) + array_push(&children, slice.trees.contents[i]); + array_delete(&slice.trees); + + Tree *parent = + ts_tree_make_node(symbol, children.size, children.contents, + ts_language_symbol_metadata(self->language, symbol)); + parser__push(self, slice.version, parent, next_state); + ts_stack_decrease_push_count(self->stack, slice.version, error->child_count); + + ErrorStatus error_status = ts_stack_error_status(self->stack, slice.version); + if (parser__better_version_exists(self, slice.version, error_status)) { + LOG("no_better_repair_found"); + ts_stack_halt(self->stack, slice.version); + return false; + } else { + LOG("repair_found sym:%s, child_count:%u, cost:%u", SYM_NAME(symbol), + repair.count, parent->error_cost); + return true; + } +} + +static void parser__start(Parser *self, TSInput input, Tree *previous_tree) { + if (previous_tree) { + LOG("parse_after_edit"); + } else { + LOG("new_parse"); + } + + if (self->language->external_scanner.reset) { + self->language->external_scanner.reset(self->external_scanner_payload); + } + + ts_lexer_set_input(&self->lexer, input); + ts_stack_clear(self->stack); + self->reusable_node = reusable_node_new(previous_tree); + self->cached_token = NULL; + self->finished_tree = NULL; +} + +static void parser__accept(Parser *self, StackVersion version, + Tree *lookahead) { + lookahead->extra = true; + assert(lookahead->symbol == ts_builtin_sym_end); + ts_stack_push(self->stack, version, lookahead, false, 1); + StackPopResult pop = ts_stack_pop_all(self->stack, version); + + for (uint32_t i = 0; i < pop.slices.size; i++) { + StackSlice slice = pop.slices.contents[i]; + TreeArray trees = slice.trees; + + Tree *root = NULL; + if (trees.size == 1) { + root = trees.contents[0]; + array_delete(&trees); + } else { + for (uint32_t j = trees.size - 1; j + 1 > 0; j--) { + Tree *child = trees.contents[j]; + if (!child->extra) { + root = ts_tree_make_copy(child); + root->child_count = 0; + for (uint32_t k = 0; k < child->child_count; k++) + ts_tree_retain(child->children[k]); + array_splice(&trees, j, 1, child->child_count, child->children); + ts_tree_set_children(root, trees.size, trees.contents); + ts_tree_release(child); + break; + } + } + } + + if (parser__select_tree(self, self->finished_tree, root)) { + ts_tree_release(self->finished_tree); + assert(root->ref_count > 0); + self->finished_tree = root; + } else { + ts_tree_release(root); + } + } + + ts_stack_remove_version(self->stack, pop.slices.contents[0].version); + ts_stack_halt(self->stack, version); +} + +static bool parser__do_potential_reductions(Parser *self, StackVersion version) { + bool has_shift_action = false; + TSStateId state = ts_stack_top_state(self->stack, version); + uint32_t previous_version_count = ts_stack_version_count(self->stack); + + array_clear(&self->reduce_actions); + for (TSSymbol symbol = 0; symbol < self->language->token_count; symbol++) { + TableEntry entry; + ts_language_table_entry(self->language, state, symbol, &entry); + for (uint32_t i = 0; i < entry.action_count; i++) { + TSParseAction action = entry.actions[i]; + if (action.extra) + continue; + switch (action.type) { + case TSParseActionTypeShift: + case TSParseActionTypeRecover: + has_shift_action = true; + break; + case TSParseActionTypeReduce: + if (action.params.child_count > 0) + ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){ + .symbol = action.params.symbol, + .count = action.params.child_count, + }); + default: + break; + } + } + } + + bool did_reduce = false; + for (uint32_t i = 0; i < self->reduce_actions.size; i++) { + ReduceAction action = self->reduce_actions.contents[i]; + StackPopResult reduction = + parser__reduce(self, version, action.symbol, action.count, true, false); + if (reduction.stopped_at_error) { + ts_tree_array_delete(&reduction.slices.contents[0].trees); + ts_stack_remove_version(self->stack, reduction.slices.contents[0].version); + continue; + } else { + did_reduce = true; + } + } + + if (did_reduce) { + if (has_shift_action) { + return true; + } else { + ts_stack_renumber_version(self->stack, previous_version_count, version); + return false; + } + } else { + return true; + } +} + +static StackIterateAction parser__skip_preceding_trees_callback( + void *payload, TSStateId state, TreeArray *trees, uint32_t tree_count, + bool is_done, bool is_pending) { + if (tree_count > 0 && state != ERROR_STATE) { + uint32_t bytes_skipped = 0; + for (uint32_t i = 0; i < trees->size; i++) { + bytes_skipped += ts_tree_total_bytes(trees->contents[i]); + } + if (bytes_skipped == 0) return StackIterateNone; + SkipPrecedingTreesSession *session = payload; + Parser *self = session->parser; + TSSymbol lookahead_symbol = session->lookahead_symbol; + uint32_t action_count; + const TSParseAction *actions = + ts_language_actions(self->language, state, lookahead_symbol, &action_count); + if (action_count > 0 && actions[0].type == TSParseActionTypeReduce) { + return StackIteratePop | StackIterateStop; + } + } + return StackIterateNone; +} + +static bool parser__skip_preceding_trees(Parser *self, StackVersion version, + TSSymbol lookahead_symbol) { + SkipPrecedingTreesSession session = { self, lookahead_symbol }; + StackPopResult pop = ts_stack_iterate( + self->stack, version, parser__skip_preceding_trees_callback, &session); + + StackVersion previous_version = STACK_VERSION_NONE; + for (uint32_t i = 0; i < pop.slices.size; i++) { + StackSlice slice = pop.slices.contents[i]; + if (slice.version == previous_version) { + ts_tree_array_delete(&slice.trees); + continue; + } + + previous_version = slice.version; + Tree *error = ts_tree_make_error_node(&slice.trees); + error->extra = true; + TSStateId state = ts_stack_top_state(self->stack, slice.version); + parser__push(self, slice.version, error, state); + } + + return pop.slices.size > 0; +} + +static void parser__handle_error(Parser *self, StackVersion version, + TSSymbol lookahead_symbol) { + // If there are other stack versions that are clearly better than this one, + // just halt this version. + ErrorStatus error_status = ts_stack_error_status(self->stack, version); + error_status.count++; + if (parser__better_version_exists(self, version, error_status)) { + ts_stack_halt(self->stack, version); + LOG("bail_on_error"); + return; + } + + LOG("handle_error"); + + // If the current lookahead symbol would have been valid in some previous + // state on the stack, create one stack version that repairs the error + // immediately by simply skipping all of the trees that came after that state. + if (parser__skip_preceding_trees(self, version, lookahead_symbol)) { + LOG("skip_preceding_trees"); + LOG_STACK(); + } + + // Perform any reductions that could have happened in this state, regardless + // of the lookahead. + uint32_t previous_version_count = ts_stack_version_count(self->stack); + for (StackVersion v = version; v < ts_stack_version_count(self->stack);) { + if (parser__do_potential_reductions(self, v)) { + if (v == version) { + v = previous_version_count; + } else { + v++; + } + } + } + + // Push a discontinuity onto the stack. Merge all of the stack versions that + // were created in the previous step. + ts_stack_push(self->stack, version, NULL, false, ERROR_STATE); + while (ts_stack_version_count(self->stack) > previous_version_count) { + ts_stack_push(self->stack, previous_version_count, NULL, false, ERROR_STATE); + assert(ts_stack_merge(self->stack, version, previous_version_count)); + } +} + +static void parser__halt_parse(Parser *self) { + LOG("halting_parse"); + LOG_STACK(); + + ts_lexer_advance_to_end(&self->lexer); + Length remaining_length = length_sub( + self->lexer.current_position, + ts_stack_top_position(self->stack, 0) + ); + + Tree *filler_node = ts_tree_make_error(remaining_length, length_zero(), 0); + filler_node->visible = false; + parser__push(self, 0, filler_node, 0); + + TreeArray children = array_new(); + Tree *root_error = ts_tree_make_error_node(&children); + parser__push(self, 0, root_error, 0); + + TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, ts_builtin_sym_end); + Tree *eof = ts_tree_make_leaf(ts_builtin_sym_end, length_zero(), length_zero(), metadata); + parser__accept(self, 0, eof); + ts_tree_release(eof); +} + +static void parser__recover(Parser *self, StackVersion version, TSStateId state, + Tree *lookahead) { + if (lookahead->symbol == ts_builtin_sym_end) { + LOG("recover_eof"); + TreeArray children = array_new(); + Tree *parent = ts_tree_make_error_node(&children); + parser__push(self, version, parent, 1); + parser__accept(self, version, lookahead); + } + + LOG("recover state:%u", state); + + StackVersion new_version = ts_stack_copy_version(self->stack, version); + + parser__shift( + self, new_version, ERROR_STATE, lookahead, + ts_language_symbol_metadata(self->language, lookahead->symbol).extra); + ErrorStatus error_status = ts_stack_error_status(self->stack, new_version); + if (parser__better_version_exists(self, version, error_status)) { + ts_stack_remove_version(self->stack, new_version); + LOG("bail_on_recovery"); + } + + parser__shift(self, version, state, lookahead, false); +} + +static void parser__advance(Parser *self, StackVersion version, + ReusableNode *reusable_node) { + bool validated_lookahead = false; + Tree *lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead); + + for (;;) { + TSStateId state = ts_stack_top_state(self->stack, version); + + TableEntry table_entry; + ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol, &table_entry); + + if (!validated_lookahead) { + if (!parser__can_reuse(self, state, lookahead, &table_entry)) { + if (lookahead == reusable_node->tree) { + reusable_node_pop_leaf(reusable_node); + } else { + parser__clear_cached_token(self); + } + + ts_tree_release(lookahead); + lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead); + continue; + } + + validated_lookahead = true; + LOG("reused_lookahead sym:%s, size:%u", SYM_NAME(lookahead->symbol), lookahead->size.bytes); + } + + bool reduction_stopped_at_error = false; + StackVersion last_reduction_version = STACK_VERSION_NONE; + + for (uint32_t i = 0; i < table_entry.action_count; i++) { + TSParseAction action = table_entry.actions[i]; + + switch (action.type) { + case TSParseActionTypeShift: { + bool extra = action.extra; + TSStateId next_state; + + if (action.extra) { + next_state = state; + LOG("shift_extra"); + } else { + next_state = action.params.to_state; + LOG("shift state:%u", next_state); + } + + if (lookahead->child_count > 0) { + if (parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) { + if (!parser__can_reuse(self, state, lookahead, &table_entry)) { + reusable_node_pop(reusable_node); + ts_tree_release(lookahead); + lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead); + } + } + + next_state = ts_language_next_state(self->language, state, lookahead->symbol); + } + + parser__shift(self, version, next_state, lookahead, extra); + + if (lookahead == reusable_node->tree) + reusable_node_pop(reusable_node); + + ts_tree_release(lookahead); + return; + } + + case TSParseActionTypeReduce: { + if (reduction_stopped_at_error) + continue; + + unsigned child_count = action.params.child_count; + TSSymbol symbol = action.params.symbol; + bool fragile = action.fragile; + + LOG("reduce sym:%s, child_count:%u", SYM_NAME(symbol), child_count); + + StackPopResult reduction = + parser__reduce(self, version, symbol, child_count, fragile, true); + StackSlice slice = *array_front(&reduction.slices); + if (reduction.stopped_at_error) { + reduction_stopped_at_error = true; + if (!parser__repair_error(self, slice, lookahead->first_leaf.symbol, + table_entry)) + break; + } + + last_reduction_version = slice.version; + break; + } + + case TSParseActionTypeAccept: { + if (ts_stack_error_status(self->stack, version).count > 0) + continue; + + LOG("accept"); + parser__accept(self, version, lookahead); + ts_tree_release(lookahead); + return; + } + + case TSParseActionTypeRecover: { + while (lookahead->child_count > 0) { + reusable_node_breakdown(reusable_node); + ts_tree_release(lookahead); + lookahead = reusable_node->tree; + ts_tree_retain(lookahead); + } + + parser__recover(self, version, action.params.to_state, lookahead); + if (lookahead == reusable_node->tree) + reusable_node_pop(reusable_node); + ts_tree_release(lookahead); + return; + } + } + } + + if (last_reduction_version != STACK_VERSION_NONE) { + ts_stack_renumber_version(self->stack, last_reduction_version, version); + LOG_STACK(); + continue; + } + + if (parser__breakdown_top_of_stack(self, version)) { + continue; + } + + if (state == ERROR_STATE) { + parser__push(self, version, lookahead, ERROR_STATE); + return; + } + + parser__handle_error(self, version, lookahead->first_leaf.symbol); + + if (ts_stack_is_halted(self->stack, version)) { + ts_tree_release(lookahead); + return; + } + } +} + +bool parser_init(Parser *self) { + ts_lexer_init(&self->lexer); + array_init(&self->reduce_actions); + array_init(&self->tree_path1); + array_init(&self->tree_path2); + array_grow(&self->reduce_actions, 4); + self->stack = ts_stack_new(); + self->finished_tree = NULL; + return true; +} + +void parser_set_language(Parser *self, const TSLanguage *language) { + if (self->external_scanner_payload && self->language->external_scanner.destroy) + self->language->external_scanner.destroy(self->external_scanner_payload); + + if (language && language->external_scanner.create) + self->external_scanner_payload = language->external_scanner.create(); + else + self->external_scanner_payload = NULL; + + self->language = language; +} + +void parser_destroy(Parser *self) { + if (self->stack) + ts_stack_delete(self->stack); + if (self->reduce_actions.contents) + array_delete(&self->reduce_actions); + if (self->tree_path1.contents) + array_delete(&self->tree_path1); + if (self->tree_path2.contents) + array_delete(&self->tree_path2); + parser_set_language(self, NULL); +} + +Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_error) { + parser__start(self, input, old_tree); + + StackVersion version = STACK_VERSION_NONE; + uint32_t position = 0, last_position = 0; + ReusableNode reusable_node; + + do { + for (version = 0; version < ts_stack_version_count(self->stack); version++) { + reusable_node = self->reusable_node; + last_position = position; + + while (!ts_stack_is_halted(self->stack, version)) { + position = ts_stack_top_position(self->stack, version).chars; + if (position > last_position || (version > 0 && position == last_position)) + break; + + LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u", + version, ts_stack_version_count(self->stack), + ts_stack_top_state(self->stack, version), + ts_stack_top_position(self->stack, version).extent.row, + ts_stack_top_position(self->stack, version).extent.column); + + parser__advance(self, version, &reusable_node); + LOG_STACK(); + } + } + + self->reusable_node = reusable_node; + + CondenseResult condense_result = parser__condense_stack(self); + if (halt_on_error && (condense_result & CondenseResultAllVersionsHadError)) { + parser__halt_parse(self); + break; + } + + if (condense_result & CondenseResultMadeChange) { + LOG("condense"); + LOG_STACK(); + } + + self->is_split = (version > 1); + } while (version != 0); + + LOG("done"); + LOG_TREE(); + ts_stack_clear(self->stack); + parser__clear_cached_token(self); + ts_tree_assign_parents(self->finished_tree, &self->tree_path1); + return self->finished_tree; +} diff --git a/c/grammar.js b/c/grammar.js new file mode 100644 index 0000000..a166e6b --- /dev/null +++ b/c/grammar.js @@ -0,0 +1,1360 @@ +/** + * @file C grammar for tree-sitter + * @author Max Brunsfeld + * @license MIT + */ + +/* eslint-disable arrow-parens */ +/* eslint-disable camelcase */ +/* eslint-disable-next-line spaced-comment */ +/// <reference types="tree-sitter-cli/dsl" /> +// @ts-check + +const PREC = { + PAREN_DECLARATOR: -10, + ASSIGNMENT: -2, + CONDITIONAL: -1, + DEFAULT: 0, + LOGICAL_OR: 1, + LOGICAL_AND: 2, + INCLUSIVE_OR: 3, + EXCLUSIVE_OR: 4, + BITWISE_AND: 5, + EQUAL: 6, + RELATIONAL: 7, + OFFSETOF: 8, + SHIFT: 9, + ADD: 10, + MULTIPLY: 11, + CAST: 12, + SIZEOF: 13, + UNARY: 14, + CALL: 15, + FIELD: 16, + SUBSCRIPT: 17, +}; + +module.exports = grammar({ + name: 'c', + + extras: $ => [ + /\s|\\\r?\n/, + $.comment, + ], + + inline: $ => [ + $._statement, + $._block_item, + $._top_level_item, + $._top_level_statement, + $._type_identifier, + $._field_identifier, + $._statement_identifier, + $._non_case_statement, + $._assignment_left_expression, + ], + + conflicts: $ => [ + [$._type_specifier, $._declarator], + [$._type_specifier, $._declarator, $.macro_type_specifier], + [$._type_specifier, $._expression_not_binary], + [$._type_specifier, $._expression_not_binary, $.macro_type_specifier], + [$._type_specifier, $.macro_type_specifier], + [$.sized_type_specifier], + [$.attributed_statement], + [$._declaration_modifiers, $.attributed_statement], + [$.enum_specifier], + [$._type_specifier, $._old_style_parameter_list], + [$.parameter_list, $._old_style_parameter_list], + ], + + word: $ => $.identifier, + + rules: { + translation_unit: $ => repeat($._top_level_item), + + // Top level items are block items with the exception of the expression statement + _top_level_item: $ => choice( + $.function_definition, + alias($._old_style_function_definition, $.function_definition), + $.linkage_specification, + $.declaration, + $._top_level_statement, + $.attributed_statement, + $.type_definition, + $._empty_declaration, + $.preproc_if, + $.preproc_ifdef, + $.preproc_include, + $.preproc_def, + $.preproc_function_def, + $.preproc_call, + ), + + _block_item: $ => choice( + $.function_definition, + alias($._old_style_function_definition, $.function_definition), + $.linkage_specification, + $.declaration, + $._statement, + $.attributed_statement, + $.type_definition, + $._empty_declaration, + $.preproc_if, + $.preproc_ifdef, + $.preproc_include, + $.preproc_def, + $.preproc_function_def, + $.preproc_call, + ), + + // Preprocesser + + preproc_include: $ => seq( + preprocessor('include'), + field('path', choice( + $.string_literal, + $.system_lib_string, + $.identifier, + alias($.preproc_call_expression, $.call_expression), + )), + token.immediate(/\r?\n/), + ), + + preproc_def: $ => seq( + preprocessor('define'), + field('name', $.identifier), + field('value', optional($.preproc_arg)), + token.immediate(/\r?\n/), + ), + + preproc_function_def: $ => seq( + preprocessor('define'), + field('name', $.identifier), + field('parameters', $.preproc_params), + field('value', optional($.preproc_arg)), + token.immediate(/\r?\n/), + ), + + preproc_params: $ => seq( + token.immediate('('), commaSep(choice($.identifier, '...')), ')', + ), + + preproc_call: $ => seq( + field('directive', $.preproc_directive), + field('argument', optional($.preproc_arg)), + token.immediate(/\r?\n/), + ), + + ...preprocIf('', $ => $._block_item), + ...preprocIf('_in_field_declaration_list', $ => $._field_declaration_list_item), + + preproc_arg: _ => token(prec(-1, /\S([^/\n]|\/[^*]|\\\r?\n)*/)), + preproc_directive: _ => /#[ \t]*[a-zA-Z0-9]\w*/, + + _preproc_expression: $ => choice( + $.identifier, + alias($.preproc_call_expression, $.call_expression), + $.number_literal, + $.char_literal, + $.preproc_defined, + alias($.preproc_unary_expression, $.unary_expression), + alias($.preproc_binary_expression, $.binary_expression), + alias($.preproc_parenthesized_expression, $.parenthesized_expression), + ), + + preproc_parenthesized_expression: $ => seq( + '(', + $._preproc_expression, + ')', + ), + + preproc_defined: $ => choice( + prec(PREC.CALL, seq('defined', '(', $.identifier, ')')), + seq('defined', $.identifier), + ), + + preproc_unary_expression: $ => prec.left(PREC.UNARY, seq( + field('operator', choice('!', '~', '-', '+')), + field('argument', $._preproc_expression), + )), + + preproc_call_expression: $ => prec(PREC.CALL, seq( + field('function', $.identifier), + field('arguments', alias($.preproc_argument_list, $.argument_list)), + )), + + preproc_argument_list: $ => seq( + '(', + commaSep($._preproc_expression), + ')', + ), + + preproc_binary_expression: $ => { + const table = [ + ['+', PREC.ADD], + ['-', PREC.ADD], + ['*', PREC.MULTIPLY], + ['/', PREC.MULTIPLY], + ['%', PREC.MULTIPLY], + ['||', PREC.LOGICAL_OR], + ['&&', PREC.LOGICAL_AND], + ['|', PREC.INCLUSIVE_OR], + ['^', PREC.EXCLUSIVE_OR], + ['&', PREC.BITWISE_AND], + ['==', PREC.EQUAL], + ['!=', PREC.EQUAL], + ['>', PREC.RELATIONAL], + ['>=', PREC.RELATIONAL], + ['<=', PREC.RELATIONAL], + ['<', PREC.RELATIONAL], + ['<<', PREC.SHIFT], + ['>>', PREC.SHIFT], + ]; + + return choice(...table.map(([operator, precedence]) => { + return prec.left(precedence, seq( + field('left', $._preproc_expression), + // @ts-ignore + field('operator', operator), + field('right', $._preproc_expression), + )); + })); + }, + + // Main Grammar + + function_definition: $ => seq( + optional($.ms_call_modifier), + $._declaration_specifiers, + field('declarator', $._declarator), + field('body', $.compound_statement), + ), + + _old_style_function_definition: $ => seq( + optional($.ms_call_modifier), + $._declaration_specifiers, + field('declarator', alias($._old_style_function_declarator, $.function_declarator)), + repeat($.declaration), + field('body', $.compound_statement), + ), + + declaration: $ => seq( + $._declaration_specifiers, + $._declaration_declarator, + ';', + ), + _declaration_declarator: $ => commaSep1(field('declarator', choice( + seq($._declarator, optional($.gnu_asm_expression)), + $.init_declarator, + ))), + + type_definition: $ => seq( + optional('__extension__'), + 'typedef', + $._type_definition_type, + $._type_definition_declarators, + repeat($.attribute_specifier), + ';', + ), + _type_definition_type: $ => seq(repeat($.type_qualifier), field('type', $._type_specifier), repeat($.type_qualifier)), + _type_definition_declarators: $ => commaSep1(field('declarator', $._type_declarator)), + + _declaration_modifiers: $ => choice( + $.storage_class_specifier, + $.type_qualifier, + $.attribute_specifier, + $.attribute_declaration, + $.ms_declspec_modifier, + ), + + _declaration_specifiers: $ => prec.right(seq( + repeat($._declaration_modifiers), + field('type', $._type_specifier), + repeat($._declaration_modifiers), + )), + + linkage_specification: $ => seq( + 'extern', + field('value', $.string_literal), + field('body', choice( + $.function_definition, + $.declaration, + $.declaration_list, + )), + ), + + attribute_specifier: $ => seq( + '__attribute__', + '(', + $.argument_list, + ')', + ), + + attribute: $ => seq( + optional(seq(field('prefix', $.identifier), '::')), + field('name', $.identifier), + optional($.argument_list), + ), + + attribute_declaration: $ => seq( + '[[', + commaSep1($.attribute), + ']]', + ), + + ms_declspec_modifier: $ => seq( + '__declspec', + '(', + $.identifier, + ')', + ), + + ms_based_modifier: $ => seq( + '__based', + $.argument_list, + ), + + ms_call_modifier: _ => choice( + '__cdecl', + '__clrcall', + '__stdcall', + '__fastcall', + '__thiscall', + '__vectorcall', + ), + + ms_restrict_modifier: _ => '__restrict', + + ms_unsigned_ptr_modifier: _ => '__uptr', + + ms_signed_ptr_modifier: _ => '__sptr', + + ms_unaligned_ptr_modifier: _ => choice('_unaligned', '__unaligned'), + + ms_pointer_modifier: $ => choice( + $.ms_unaligned_ptr_modifier, + $.ms_restrict_modifier, + $.ms_unsigned_ptr_modifier, + $.ms_signed_ptr_modifier, + ), + + declaration_list: $ => seq( + '{', + repeat($._block_item), + '}', + ), + + _declarator: $ => choice( + $.attributed_declarator, + $.pointer_declarator, + $.function_declarator, + $.array_declarator, + $.parenthesized_declarator, + $.identifier, + ), + + _field_declarator: $ => choice( + alias($.attributed_field_declarator, $.attributed_declarator), + alias($.pointer_field_declarator, $.pointer_declarator), + alias($.function_field_declarator, $.function_declarator), + alias($.array_field_declarator, $.array_declarator), + alias($.parenthesized_field_declarator, $.parenthesized_declarator), + $._field_identifier, + ), + + _type_declarator: $ => choice( + alias($.attributed_type_declarator, $.attributed_declarator), + alias($.pointer_type_declarator, $.pointer_declarator), + alias($.function_type_declarator, $.function_declarator), + alias($.array_type_declarator, $.array_declarator), + alias($.parenthesized_type_declarator, $.parenthesized_declarator), + $._type_identifier, + alias(choice('signed', 'unsigned', 'long', 'short'), $.primitive_type), + $.primitive_type, + ), + + _abstract_declarator: $ => choice( + $.abstract_pointer_declarator, + $.abstract_function_declarator, + $.abstract_array_declarator, + $.abstract_parenthesized_declarator, + ), + + parenthesized_declarator: $ => prec.dynamic(PREC.PAREN_DECLARATOR, seq( + '(', + $._declarator, + ')', + )), + parenthesized_field_declarator: $ => prec.dynamic(PREC.PAREN_DECLARATOR, seq( + '(', + $._field_declarator, + ')', + )), + parenthesized_type_declarator: $ => prec.dynamic(PREC.PAREN_DECLARATOR, seq( + '(', + $._type_declarator, + ')', + )), + abstract_parenthesized_declarator: $ => prec(1, seq( + '(', + $._abstract_declarator, + ')', + )), + + + attributed_declarator: $ => prec.right(seq( + $._declarator, + repeat1($.attribute_declaration), + )), + attributed_field_declarator: $ => prec.right(seq( + $._field_declarator, + repeat1($.attribute_declaration), + )), + attributed_type_declarator: $ => prec.right(seq( + $._type_declarator, + repeat1($.attribute_declaration), + )), + + pointer_declarator: $ => prec.dynamic(1, prec.right(seq( + optional($.ms_based_modifier), + '*', + repeat($.ms_pointer_modifier), + repeat($.type_qualifier), + field('declarator', $._declarator), + ))), + pointer_field_declarator: $ => prec.dynamic(1, prec.right(seq( + optional($.ms_based_modifier), + '*', + repeat($.ms_pointer_modifier), + repeat($.type_qualifier), + field('declarator', $._field_declarator), + ))), + pointer_type_declarator: $ => prec.dynamic(1, prec.right(seq( + optional($.ms_based_modifier), + '*', + repeat($.ms_pointer_modifier), + repeat($.type_qualifier), + field('declarator', $._type_declarator), + ))), + abstract_pointer_declarator: $ => prec.dynamic(1, prec.right(seq('*', + repeat($.type_qualifier), + field('declarator', optional($._abstract_declarator)), + ))), + + function_declarator: $ => prec.right(1, + seq( + field('declarator', $._declarator), + field('parameters', $.parameter_list), + optional($.gnu_asm_expression), + repeat($.attribute_specifier), + )), + function_field_declarator: $ => prec(1, seq( + field('declarator', $._field_declarator), + field('parameters', $.parameter_list), + )), + function_type_declarator: $ => prec(1, seq( + field('declarator', $._type_declarator), + field('parameters', $.parameter_list), + )), + abstract_function_declarator: $ => prec(1, seq( + field('declarator', optional($._abstract_declarator)), + field('parameters', $.parameter_list), + )), + + _old_style_function_declarator: $ => seq( + field('declarator', $._declarator), + field('parameters', alias($._old_style_parameter_list, $.parameter_list)), + ), + + array_declarator: $ => prec(1, seq( + field('declarator', $._declarator), + '[', + repeat($.type_qualifier), + field('size', optional(choice($._expression, '*'))), + ']', + )), + array_field_declarator: $ => prec(1, seq( + field('declarator', $._field_declarator), + '[', + repeat($.type_qualifier), + field('size', optional(choice($._expression, '*'))), + ']', + )), + array_type_declarator: $ => prec(1, seq( + field('declarator', $._type_declarator), + '[', + repeat($.type_qualifier), + field('size', optional(choice($._expression, '*'))), + ']', + )), + abstract_array_declarator: $ => prec(1, seq( + field('declarator', optional($._abstract_declarator)), + '[', + repeat($.type_qualifier), + field('size', optional(choice($._expression, '*'))), + ']', + )), + + init_declarator: $ => seq( + field('declarator', $._declarator), + '=', + field('value', choice($.initializer_list, $._expression)), + ), + + compound_statement: $ => seq( + '{', + repeat($._block_item), + '}', + ), + + storage_class_specifier: _ => choice( + 'extern', + 'static', + 'auto', + 'register', + 'inline', + '__inline', + '__inline__', + '__forceinline', + 'thread_local', + '__thread', + ), + + type_qualifier: _ => choice( + 'const', + 'constexpr', + 'volatile', + 'restrict', + '__restrict__', + '__extension__', + '_Atomic', + '_Noreturn', + 'noreturn', + ), + + _type_specifier: $ => choice( + $.struct_specifier, + $.union_specifier, + $.enum_specifier, + $.macro_type_specifier, + $.sized_type_specifier, + $.primitive_type, + $._type_identifier, + ), + + sized_type_specifier: $ => seq( + repeat1(choice( + 'signed', + 'unsigned', + 'long', + 'short', + )), + field('type', optional(choice( + prec.dynamic(-1, $._type_identifier), + $.primitive_type, + ))), + ), + + primitive_type: _ => token(choice( + 'bool', + 'char', + 'int', + 'float', + 'double', + 'void', + 'size_t', + 'ssize_t', + 'ptrdiff_t', + 'intptr_t', + 'uintptr_t', + 'charptr_t', + 'nullptr_t', + 'max_align_t', + ...[8, 16, 32, 64].map(n => `int${n}_t`), + ...[8, 16, 32, 64].map(n => `uint${n}_t`), + ...[8, 16, 32, 64].map(n => `char${n}_t`), + )), + + enum_specifier: $ => seq( + 'enum', + choice( + seq( + field('name', $._type_identifier), + optional(seq(':', field('underlying_type', $.primitive_type))), + field('body', optional($.enumerator_list)), + ), + field('body', $.enumerator_list), + ), + optional($.attribute_specifier), + ), + + enumerator_list: $ => seq( + '{', + commaSep($.enumerator), + optional(','), + '}', + ), + + struct_specifier: $ => prec.right(seq( + 'struct', + optional($.attribute_specifier), + optional($.ms_declspec_modifier), + choice( + seq( + field('name', $._type_identifier), + field('body', optional($.field_declaration_list)), + ), + field('body', $.field_declaration_list), + ), + optional($.attribute_specifier), + )), + + union_specifier: $ => prec.right(seq( + 'union', + optional($.ms_declspec_modifier), + choice( + seq( + field('name', $._type_identifier), + field('body', optional($.field_declaration_list)), + ), + field('body', $.field_declaration_list), + ), + optional($.attribute_specifier), + )), + + field_declaration_list: $ => seq( + '{', + repeat($._field_declaration_list_item), + '}', + ), + + _field_declaration_list_item: $ => choice( + $.field_declaration, + $.preproc_def, + $.preproc_function_def, + $.preproc_call, + alias($.preproc_if_in_field_declaration_list, $.preproc_if), + alias($.preproc_ifdef_in_field_declaration_list, $.preproc_ifdef), + ), + + field_declaration: $ => seq( + $._declaration_specifiers, + optional($._field_declaration_declarator), + optional($.attribute_specifier), + ';', + ), + _field_declaration_declarator: $ => commaSep1(seq( + field('declarator', $._field_declarator), + optional($.bitfield_clause), + )), + + bitfield_clause: $ => seq(':', $._expression), + + enumerator: $ => seq( + field('name', $.identifier), + optional(seq('=', field('value', $._expression))), + ), + + variadic_parameter: _ => seq( + '...', + ), + + parameter_list: $ => seq( + '(', + commaSep(choice($.parameter_declaration, $.variadic_parameter)), + ')', + ), + _old_style_parameter_list: $ => seq( + '(', + commaSep(choice($.identifier, $.variadic_parameter)), + ')', + ), + + parameter_declaration: $ => seq( + $._declaration_specifiers, + optional(field('declarator', choice( + $._declarator, + $._abstract_declarator, + ))), + ), + + // Statements + + attributed_statement: $ => seq( + repeat1($.attribute_declaration), + $._statement, + ), + + _statement: $ => choice( + $.case_statement, + $._non_case_statement, + ), + + _non_case_statement: $ => choice( + $.attributed_statement, + $.labeled_statement, + $.compound_statement, + $.expression_statement, + $.if_statement, + $.switch_statement, + $.do_statement, + $.while_statement, + $.for_statement, + $.return_statement, + $.break_statement, + $.continue_statement, + $.goto_statement, + $.seh_try_statement, + $.seh_leave_statement, + ), + + _top_level_statement: $ => choice( + $.case_statement, + $.attributed_statement, + $.labeled_statement, + $.compound_statement, + alias($._top_level_expression_statement, $.expression_statement), + $.if_statement, + $.switch_statement, + $.do_statement, + $.while_statement, + $.for_statement, + $.return_statement, + $.break_statement, + $.continue_statement, + $.goto_statement, + ), + + labeled_statement: $ => seq( + field('label', $._statement_identifier), + ':', + $._statement, + ), + + // This is missing binary expressions, others were kept so that macro code can be parsed better and code examples + _top_level_expression_statement: $ => seq( + $._expression_not_binary, + ';', + ), + + expression_statement: $ => seq( + optional(choice( + $._expression, + $.comma_expression, + )), + ';', + ), + + if_statement: $ => prec.right(seq( + 'if', + field('condition', $.parenthesized_expression), + field('consequence', $._statement), + optional(field('alternative', $.else_clause)), + )), + + else_clause: $ => seq('else', $._statement), + + switch_statement: $ => seq( + 'switch', + field('condition', $.parenthesized_expression), + field('body', $.compound_statement), + ), + + case_statement: $ => prec.right(seq( + choice( + seq('case', field('value', $._expression)), + 'default', + ), + ':', + repeat(choice( + $._non_case_statement, + $.declaration, + $.type_definition, + )), + )), + + while_statement: $ => seq( + 'while', + field('condition', $.parenthesized_expression), + field('body', $._statement), + ), + + do_statement: $ => seq( + 'do', + field('body', $._statement), + 'while', + field('condition', $.parenthesized_expression), + ';', + ), + + for_statement: $ => seq( + 'for', + '(', + $._for_statement_body, + ')', + field('body', $._statement), + ), + _for_statement_body: $ => seq( + choice( + field('initializer', $.declaration), + seq(field('initializer', optional(choice($._expression, $.comma_expression))), ';'), + ), + field('condition', optional(choice($._expression, $.comma_expression))), + ';', + field('update', optional(choice($._expression, $.comma_expression))), + ), + + return_statement: $ => seq( + 'return', + optional(choice($._expression, $.comma_expression)), + ';', + ), + + break_statement: _ => seq( + 'break', ';', + ), + + continue_statement: _ => seq( + 'continue', ';', + ), + + goto_statement: $ => seq( + 'goto', + field('label', $._statement_identifier), + ';', + ), + + seh_try_statement: $ => seq( + '__try', + field('body', $.compound_statement), + choice($.seh_except_clause, $.seh_finally_clause), + ), + + seh_except_clause: $ => seq( + '__except', + field('filter', $.parenthesized_expression), + field('body', $.compound_statement), + ), + + seh_finally_clause: $ => seq( + '__finally', + field('body', $.compound_statement), + ), + + seh_leave_statement: _ => seq( + '__leave', ';', + ), + + // Expressions + + _expression: $ => choice( + $._expression_not_binary, + $.binary_expression, + ), + + _expression_not_binary: $ => choice( + $.conditional_expression, + $.assignment_expression, + $.unary_expression, + $.update_expression, + $.cast_expression, + $.pointer_expression, + $.sizeof_expression, + $.alignof_expression, + $.offsetof_expression, + $.generic_expression, + $.subscript_expression, + $.call_expression, + $.field_expression, + $.compound_literal_expression, + $.identifier, + $.number_literal, + $.string_literal, + $.true, + $.false, + $.null, + $.concatenated_string, + $.char_literal, + $.parenthesized_expression, + $.gnu_asm_expression, + ), + + comma_expression: $ => seq( + field('left', $._expression), + ',', + field('right', choice($._expression, $.comma_expression)), + ), + + conditional_expression: $ => prec.right(PREC.CONDITIONAL, seq( + field('condition', $._expression), + '?', + optional(field('consequence', $._expression)), + ':', + field('alternative', $._expression), + )), + + _assignment_left_expression: $ => choice( + $.identifier, + $.call_expression, + $.field_expression, + $.pointer_expression, + $.subscript_expression, + $.parenthesized_expression, + ), + + assignment_expression: $ => prec.right(PREC.ASSIGNMENT, seq( + field('left', $._assignment_left_expression), + field('operator', choice( + '=', + '*=', + '/=', + '%=', + '+=', + '-=', + '<<=', + '>>=', + '&=', + '^=', + '|=', + )), + field('right', $._expression), + )), + + pointer_expression: $ => prec.left(PREC.CAST, seq( + field('operator', choice('*', '&')), + field('argument', $._expression), + )), + + unary_expression: $ => prec.left(PREC.UNARY, seq( + field('operator', choice('!', '~', '-', '+')), + field('argument', $._expression), + )), + + binary_expression: $ => { + const table = [ + ['+', PREC.ADD], + ['-', PREC.ADD], + ['*', PREC.MULTIPLY], + ['/', PREC.MULTIPLY], + ['%', PREC.MULTIPLY], + ['||', PREC.LOGICAL_OR], + ['&&', PREC.LOGICAL_AND], + ['|', PREC.INCLUSIVE_OR], + ['^', PREC.EXCLUSIVE_OR], + ['&', PREC.BITWISE_AND], + ['==', PREC.EQUAL], + ['!=', PREC.EQUAL], + ['>', PREC.RELATIONAL], + ['>=', PREC.RELATIONAL], + ['<=', PREC.RELATIONAL], + ['<', PREC.RELATIONAL], + ['<<', PREC.SHIFT], + ['>>', PREC.SHIFT], + ]; + + return choice(...table.map(([operator, precedence]) => { + return prec.left(precedence, seq( + field('left', $._expression), + // @ts-ignore + field('operator', operator), + field('right', $._expression), + )); + })); + }, + + update_expression: $ => { + const argument = field('argument', $._expression); + const operator = field('operator', choice('--', '++')); + return prec.right(PREC.UNARY, choice( + seq(operator, argument), + seq(argument, operator), + )); + }, + + cast_expression: $ => prec(PREC.CAST, seq( + '(', + field('type', $.type_descriptor), + ')', + field('value', $._expression), + )), + + type_descriptor: $ => seq( + repeat($.type_qualifier), + field('type', $._type_specifier), + repeat($.type_qualifier), + field('declarator', optional($._abstract_declarator)), + ), + + sizeof_expression: $ => prec(PREC.SIZEOF, seq( + 'sizeof', + choice( + field('value', $._expression), + seq('(', field('type', $.type_descriptor), ')'), + ), + )), + + alignof_expression: $ => prec(PREC.SIZEOF, seq( + choice('__alignof__', '__alignof', '_alignof', 'alignof', '_Alignof'), + seq('(', field('type', $.type_descriptor), ')'), + )), + + offsetof_expression: $ => prec(PREC.OFFSETOF, seq( + 'offsetof', + seq('(', field('type', $.type_descriptor), ',', field('member', $._field_identifier), ')'), + )), + + generic_expression: $ => prec(PREC.CALL, seq( + '_Generic', + '(', + $._expression, + ',', + commaSep1(seq($.type_descriptor, ':', $._expression)), + ')', + )), + + subscript_expression: $ => prec(PREC.SUBSCRIPT, seq( + field('argument', $._expression), + '[', + field('index', $._expression), + ']', + )), + + call_expression: $ => prec(PREC.CALL, seq( + field('function', $._expression), + field('arguments', $.argument_list), + )), + + gnu_asm_expression: $ => prec(PREC.CALL, seq( + choice('asm', '__asm__'), + repeat($.gnu_asm_qualifier), + '(', + field('assembly_code', choice($.string_literal, $.concatenated_string)), + optional(seq( + field('output_operands', $.gnu_asm_output_operand_list), + optional(seq( + field('input_operands', $.gnu_asm_input_operand_list), + optional(seq( + field('clobbers', $.gnu_asm_clobber_list), + optional(field('goto_labels', $.gnu_asm_goto_list)), + )), + )), + )), + ')', + )), + + gnu_asm_qualifier: _ => choice( + 'volatile', + 'inline', + 'goto', + ), + + gnu_asm_output_operand_list: $ => seq( + ':', + commaSep(field('operand', $.gnu_asm_output_operand)), + ), + + gnu_asm_output_operand: $ => seq( + optional(seq( + '[', + field('symbol', $.identifier), + ']', + )), + field('constraint', $.string_literal), + '(', + field('value', $.identifier), + ')', + ), + + gnu_asm_input_operand_list: $ => seq( + ':', + commaSep(field('operand', $.gnu_asm_input_operand)), + ), + + gnu_asm_input_operand: $ => seq( + optional(seq( + '[', + field('symbol', $.identifier), + ']', + )), + field('constraint', $.string_literal), + '(', + field('value', $._expression), + ')', + ), + + gnu_asm_clobber_list: $ => seq( + ':', + commaSep(field('register', $.string_literal)), + ), + + gnu_asm_goto_list: $ => seq( + ':', + commaSep(field('label', $.identifier)), + ), + + // The compound_statement is added to parse macros taking statements as arguments, e.g. MYFORLOOP(1, 10, i, { foo(i); bar(i); }) + argument_list: $ => seq('(', commaSep(choice(seq(optional('__extension__'), $._expression), $.compound_statement)), ')'), + + field_expression: $ => seq( + prec(PREC.FIELD, seq( + field('argument', $._expression), + field('operator', choice('.', '->')), + )), + field('field', $._field_identifier), + ), + + compound_literal_expression: $ => seq( + '(', + field('type', $.type_descriptor), + ')', + field('value', $.initializer_list), + ), + + parenthesized_expression: $ => seq( + '(', + choice($._expression, $.comma_expression), + ')', + ), + + initializer_list: $ => seq( + '{', + commaSep(choice( + $.initializer_pair, + $._expression, + $.initializer_list, + )), + optional(','), + '}', + ), + + initializer_pair: $ => seq( + field('designator', repeat1(choice($.subscript_designator, $.field_designator))), + '=', + field('value', choice($._expression, $.initializer_list)), + ), + + subscript_designator: $ => seq('[', $._expression, ']'), + + field_designator: $ => seq('.', $._field_identifier), + + number_literal: _ => { + const separator = '\''; + const hex = /[0-9a-fA-F]/; + const decimal = /[0-9]/; + const hexDigits = seq(repeat1(hex), repeat(seq(separator, repeat1(hex)))); + const decimalDigits = seq(repeat1(decimal), repeat(seq(separator, repeat1(decimal)))); + return token(seq( + optional(/[-\+]/), + optional(choice(/0[xX]/, /0[bB]/)), + choice( + seq( + choice( + decimalDigits, + seq(/0[bB]/, decimalDigits), + seq(/0[xX]/, hexDigits), + ), + optional(seq('.', optional(hexDigits))), + ), + seq('.', decimalDigits), + ), + optional(seq( + /[eEpP]/, + optional(seq( + optional(/[-\+]/), + hexDigits, + )), + )), + /[uUlLwWfFbBdD]*/, + )); + }, + + char_literal: $ => seq( + choice('L\'', 'u\'', 'U\'', 'u8\'', '\''), + choice( + $.escape_sequence, + alias(token.immediate(/[^\n']/), $.character), + ), + '\'', + ), + + concatenated_string: $ => seq( + choice($.identifier, $.string_literal), + $.string_literal, + repeat(choice($.string_literal, $.identifier)), // Identifier is added to parse macros that are strings, like PRIu64 + ), + + string_literal: $ => seq( + choice('L"', 'u"', 'U"', 'u8"', '"'), + repeat(choice( + alias(token.immediate(prec(1, /[^\\"\n]+/)), $.string_content), + $.escape_sequence, + )), + '"', + ), + + escape_sequence: _ => token(prec(1, seq( + '\\', + choice( + /[^xuU]/, + /\d{2,3}/, + /x[0-9a-fA-F]{2,}/, + /u[0-9a-fA-F]{4}/, + /U[0-9a-fA-F]{8}/, + ), + ))), + + system_lib_string: _ => token(seq( + '<', + repeat(choice(/[^>\n]/, '\\>')), + '>', + )), + + true: _ => token(choice('TRUE', 'true')), + false: _ => token(choice('FALSE', 'false')), + null: _ => choice('NULL', 'nullptr'), + + identifier: _ => + // eslint-disable-next-line max-len + /(\p{XID_Start}|\$|_|\\u[0-9A-Fa-f]{4}|\\U[0-9A-Fa-f]{8})(\p{XID_Continue}|\$|\\u[0-9A-Fa-f]{4}|\\U[0-9A-Fa-f]{8})*/, + + _type_identifier: $ => alias( + $.identifier, + $.type_identifier, + ), + _field_identifier: $ => alias($.identifier, $.field_identifier), + _statement_identifier: $ => alias($.identifier, $.statement_identifier), + + _empty_declaration: $ => seq( + $._type_specifier, + ';', + ), + + macro_type_specifier: $ => prec.dynamic(-1, seq( + field('name', $.identifier), + '(', + field('type', $.type_descriptor), + ')', + )), + + // http://stackoverflow.com/questions/13014947/regex-to-match-a-c-style-multiline-comment/36328890#36328890 + comment: _ => token(choice( + seq('//', /(\\+(.|\r?\n)|[^\\\n])*/), + seq( + '/*', + /[^*]*\*+([^/*][^*]*\*+)*/, + '/', + ), + )), + }, + + supertypes: $ => [ + $._expression, + $._statement, + $._type_specifier, + $._declarator, + $._field_declarator, + $._type_declarator, + $._abstract_declarator, + ], +}); + +module.exports.PREC = PREC; + +/** + * + * @param {string} suffix + * + * @param {RuleBuilder<string>} content + * + * @return {RuleBuilders<string, string>} + */ +function preprocIf(suffix, content) { + /** + * + * @param {GrammarSymbols<string>} $ + * + * @return {ChoiceRule} + * + */ + function elseBlock($) { + return choice( + suffix ? alias($['preproc_else' + suffix], $.preproc_else) : $.preproc_else, + suffix ? alias($['preproc_elif' + suffix], $.preproc_elif) : $.preproc_elif, + ); + } + + return { + ['preproc_if' + suffix]: $ => seq( + preprocessor('if'), + field('condition', $._preproc_expression), + '\n', + repeat(content($)), + field('alternative', optional(elseBlock($))), + preprocessor('endif'), + ), + + ['preproc_ifdef' + suffix]: $ => seq( + choice(preprocessor('ifdef'), preprocessor('ifndef')), + field('name', $.identifier), + repeat(content($)), + field('alternative', optional(choice(elseBlock($), $.preproc_elifdef))), + preprocessor('endif'), + ), + + ['preproc_else' + suffix]: $ => seq( + preprocessor('else'), + repeat(content($)), + ), + + ['preproc_elif' + suffix]: $ => seq( + preprocessor('elif'), + field('condition', $._preproc_expression), + '\n', + repeat(content($)), + field('alternative', optional(elseBlock($))), + ), + + ['preproc_elifdef' + suffix]: $ => seq( + choice(preprocessor('elifdef'), preprocessor('elifndef')), + field('name', $.identifier), + repeat(content($)), + field('alternative', optional(elseBlock($))), + ), + }; +} + +/** + * Creates a preprocessor regex rule + * + * @param {RegExp|Rule|String} command + * + * @return {AliasRule} + */ +function preprocessor(command) { + return alias(new RegExp('#[ \t]*' + command), '#' + command); +} + +/** + * Creates a rule to optionally match one or more of the rules separated by a comma + * + * @param {Rule} rule + * + * @return {ChoiceRule} + * + */ +function commaSep(rule) { + return optional(commaSep1(rule)); +} + +/** + * Creates a rule to match one or more of the rules separated by a comma + * + * @param {Rule} rule + * + * @return {SeqRule} + * + */ +function commaSep1(rule) { + return seq(rule, repeat(seq(',', rule))); +} diff --git a/c/test/corpus/ambiguities.txt b/c/test/corpus/ambiguities.txt new file mode 100644 index 0000000..424490d --- /dev/null +++ b/c/test/corpus/ambiguities.txt @@ -0,0 +1,274 @@ +================================================================================ +pointer declarations vs expressions +================================================================================ + +TSLanguage *(*lang_parser)(void); + +char (*ptr_to_array)[]; + +int main() { + // declare a function pointer + T1 * b(T2 a); + + // evaluate expressions + c * d(5); + e(f * g); +} + +-------------------------------------------------------------------------------- + +(translation_unit + (declaration + (type_identifier) + (pointer_declarator + (function_declarator + (parenthesized_declarator + (pointer_declarator + (identifier))) + (parameter_list + (parameter_declaration + (primitive_type)))))) + (declaration + (primitive_type) + (array_declarator + (parenthesized_declarator + (pointer_declarator + (identifier))))) + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (comment) + (declaration + (type_identifier) + (pointer_declarator + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (type_identifier) + (identifier)))))) + (comment) + (expression_statement + (binary_expression + (identifier) + (call_expression + (identifier) + (argument_list + (number_literal))))) + (expression_statement + (call_expression + (identifier) + (argument_list + (binary_expression + (identifier) + (identifier)))))))) + +================================================================================ +casts vs multiplications +================================================================================ + +/* + * ambiguities + */ + +int main() { + // cast + a((B *)c); + + // parenthesized product + d((e * f)); +} + +-------------------------------------------------------------------------------- + +(translation_unit + (comment) + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (comment) + (expression_statement + (call_expression + (identifier) + (argument_list + (cast_expression + (type_descriptor + (type_identifier) + (abstract_pointer_declarator)) + (identifier))))) + (comment) + (expression_statement + (call_expression + (identifier) + (argument_list + (parenthesized_expression + (binary_expression + (identifier) + (identifier))))))))) + +================================================================================ +function-like type macros vs function calls +================================================================================ + +// this is a macro +GIT_INLINE(int *) x = 5; + +-------------------------------------------------------------------------------- + +(translation_unit + (comment) + (declaration + (macro_type_specifier + (identifier) + (type_descriptor + (primitive_type) + (abstract_pointer_declarator))) + (init_declarator + (identifier) + (number_literal)))) + +================================================================================ +function calls vs parenthesized declarators vs macro types +================================================================================ + +int main() { + /* + * Could be either: + * - function call + * - declaration w/ parenthesized declarator + * - declaration w/ macro type, no declarator + */ + ABC(d); + + /* + * Normal declaration + */ + efg hij; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (comment) + (expression_statement + (call_expression + (identifier) + (argument_list + (identifier)))) + (comment) + (declaration + (type_identifier) + (identifier))))) + +================================================================================ +Call expressions vs empty declarations w/ macros as types +================================================================================ + +int main() { + int a = 1; + b(a); + A(A *); +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (declaration + (primitive_type) + (init_declarator + (identifier) + (number_literal))) + (expression_statement + (call_expression + (identifier) + (argument_list + (identifier)))) + (macro_type_specifier + (identifier) + (type_descriptor + (type_identifier) + (abstract_pointer_declarator)))))) + +================================================================================ +Comments after for loops with ambiguities +================================================================================ + +int main() { + for (a *b = c; d; e) { + aff; + } + + // a-comment + + g; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (for_statement + (declaration + (type_identifier) + (init_declarator + (pointer_declarator + (identifier)) + (identifier))) + (identifier) + (identifier) + (compound_statement + (expression_statement + (identifier)))) + (comment) + (expression_statement + (identifier))))) + +================================================================================ +Top-level macro invocations +================================================================================ + +DEFINE_SOMETHING(THING_A, "this is a thing a"); +DEFINE_SOMETHING(THING_B, "this is a thing b", "thanks"); + +-------------------------------------------------------------------------------- + +(translation_unit + (expression_statement + (call_expression + (identifier) + (argument_list + (identifier) + (string_literal + (string_content))))) + (expression_statement + (call_expression + (identifier) + (argument_list + (identifier) + (string_literal + (string_content)) + (string_literal + (string_content)))))) diff --git a/c/test/corpus/crlf.txt b/c/test/corpus/crlf.txt new file mode 100644 index 0000000..9673cae --- /dev/null +++ b/c/test/corpus/crlf.txt @@ -0,0 +1,13 @@ +============================================ +Line comments with escaped CRLF line endings +============================================ + +// hello \ + this is still a comment +this_is_not a_comment; + +--- + +(translation_unit + (comment) + (declaration (type_identifier) (identifier))) diff --git a/c/test/corpus/declarations.txt b/c/test/corpus/declarations.txt new file mode 100644 index 0000000..3e1eacd --- /dev/null +++ b/c/test/corpus/declarations.txt @@ -0,0 +1,1036 @@ +================================================================================ +Struct declarations +================================================================================ + +struct s1; + +struct s2 { + int x; + float y : 5; +}; + +struct s3 { + int x : 1, y : 2; +}; + +-------------------------------------------------------------------------------- + +(translation_unit + (struct_specifier + name: (type_identifier)) + (struct_specifier + name: (type_identifier) + body: (field_declaration_list + (field_declaration + type: (primitive_type) + declarator: (field_identifier)) + (field_declaration + type: (primitive_type) + declarator: (field_identifier) + (bitfield_clause + (number_literal))))) + (struct_specifier + name: (type_identifier) + body: (field_declaration_list + (field_declaration + type: (primitive_type) + declarator: (field_identifier) + (bitfield_clause + (number_literal)) + declarator: (field_identifier) + (bitfield_clause + (number_literal)))))) + +================================================================================ +Union declarations +================================================================================ + +union u1; + +union s2 { + int x; + float y; +}; + +-------------------------------------------------------------------------------- + +(translation_unit + (union_specifier + name: (type_identifier)) + (union_specifier + name: (type_identifier) + body: (field_declaration_list + (field_declaration + type: (primitive_type) + declarator: (field_identifier)) + (field_declaration + type: (primitive_type) + declarator: (field_identifier))))) + +================================================================================ +Enum declarations +================================================================================ + +enum e1; + +enum e2 { + val1, + val2 = 5, + val3 +}; + +enum e3 { + val1, +}; + +enum e4: int { + val1, +}; + +-------------------------------------------------------------------------------- + +(translation_unit + (enum_specifier + name: (type_identifier)) + (enum_specifier + name: (type_identifier) + body: (enumerator_list + (enumerator + name: (identifier)) + (enumerator + name: (identifier) + value: (number_literal)) + (enumerator + name: (identifier)))) + (enum_specifier + name: (type_identifier) + body: (enumerator_list + (enumerator + name: (identifier)))) + (enum_specifier + name: (type_identifier) + underlying_type: (primitive_type) + body: (enumerator_list + (enumerator + name: (identifier))))) + +================================================================================ +Struct declarations containing preprocessor directives +================================================================================ + +struct s { + #define A 5 + int b[a]; + #undef A +}; + +-------------------------------------------------------------------------------- + +(translation_unit + (struct_specifier + (type_identifier) + (field_declaration_list + (preproc_def + (identifier) + (preproc_arg)) + (field_declaration + (primitive_type) + (array_declarator + (field_identifier) + (identifier))) + (preproc_call + (preproc_directive) + (preproc_arg))))) + +================================================================================ +Primitive-typed variable declarations +================================================================================ + +unsigned short int a; +long int b, c = 5, d; +float d, e; +unsigned f; +short g, h; + +-------------------------------------------------------------------------------- + +(translation_unit + (declaration + type: (sized_type_specifier + type: (primitive_type)) + declarator: (identifier)) + (declaration + type: (sized_type_specifier + type: (primitive_type)) + declarator: (identifier) + declarator: (init_declarator + declarator: (identifier) + value: (number_literal)) + declarator: (identifier)) + (declaration + type: (primitive_type) + declarator: (identifier) + declarator: (identifier)) + (declaration + type: (sized_type_specifier) + declarator: (identifier)) + (declaration + type: (sized_type_specifier) + declarator: (identifier) + declarator: (identifier))) + +================================================================================ +Variable storage classes +================================================================================ + +int a; +extern int b, c; +auto int d; +register int e; +static int f; +register uint64_t rd_ asm("x" "10"); + +-------------------------------------------------------------------------------- + +(translation_unit + (declaration + (primitive_type) + (identifier)) + (declaration + (storage_class_specifier) + (primitive_type) + (identifier) + (identifier)) + (declaration + (storage_class_specifier) + (primitive_type) + (identifier)) + (declaration + (storage_class_specifier) + (primitive_type) + (identifier)) + (declaration + (storage_class_specifier) + (primitive_type) + (identifier)) + (declaration + (storage_class_specifier) + (primitive_type) + (identifier) + (gnu_asm_expression + (concatenated_string + (string_literal + (string_content)) + (string_literal + (string_content)))))) + +================================================================================ +Composite-typed variable declarations +================================================================================ + +struct b c; +union { int e; } f; +enum { g, h } i; + +-------------------------------------------------------------------------------- + +(translation_unit + (declaration + type: (struct_specifier + name: (type_identifier)) + declarator: (identifier)) + (declaration + type: (union_specifier + body: (field_declaration_list + (field_declaration + type: (primitive_type) + declarator: (field_identifier)))) + declarator: (identifier)) + (declaration + type: (enum_specifier + body: (enumerator_list + (enumerator + name: (identifier)) + (enumerator + name: (identifier)))) + declarator: (identifier))) + +================================================================================ +Pointer variable declarations +================================================================================ + +char *the_string; +const char **the_strings; +int const * const restrict x; + +-------------------------------------------------------------------------------- + +(translation_unit + (declaration + type: (primitive_type) + declarator: (pointer_declarator + declarator: (identifier))) + (declaration + (type_qualifier) + type: (primitive_type) + declarator: (pointer_declarator + declarator: (pointer_declarator + declarator: (identifier)))) + (declaration + type: (primitive_type) + (type_qualifier) + declarator: (pointer_declarator + (type_qualifier) + (type_qualifier) + declarator: (identifier)))) + +================================================================================ +Typedefs +================================================================================ + +typedef int my_int; + +typedef struct { + int x; +} *a; + +typedef void my_callback(void *, size_t); + +typedef struct A { + int i; +} a, b; + +typedef void const *voidpc; +typedef void volatile *voidpv; +typedef void const volatile *const voidpcv; + +typedef unsigned long int; +typedef unsigned short ptrdiff_t; +typedef short charptr_t; +typedef unsigned nullptr_t; +typedef signed max_align_t; + +typedef unsigned long ulong_t; +typedef long long_t; +typedef unsigned short ushort_t; +typedef short short_t; +typedef unsigned unsigned_t; +typedef signed signed_t; + +typedef long long; +typedef short short; +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned unsigned short; +typedef signed signed short; +typedef signed signed unsigned; + +typedef int register_t __attribute__((__mode__(__word__))); + +__extension__ typedef long int greg_t; + +__extension__ typedef struct { + long long int quot; + long long int rem; +} lldiv_t; + +-------------------------------------------------------------------------------- + +(translation_unit + (type_definition + type: (primitive_type) + declarator: (type_identifier)) + (type_definition + type: (struct_specifier + body: (field_declaration_list + (field_declaration + type: (primitive_type) + declarator: (field_identifier)))) + declarator: (pointer_declarator + declarator: (type_identifier))) + (type_definition + type: (primitive_type) + declarator: (function_declarator + declarator: (type_identifier) + parameters: (parameter_list + (parameter_declaration + type: (primitive_type) + declarator: (abstract_pointer_declarator)) + (parameter_declaration + type: (primitive_type))))) + (type_definition + type: (struct_specifier + name: (type_identifier) + body: (field_declaration_list + (field_declaration + type: (primitive_type) + declarator: (field_identifier)))) + declarator: (type_identifier) + declarator: (type_identifier)) + (type_definition + type: (primitive_type) + (type_qualifier) + declarator: (pointer_declarator + declarator: (type_identifier))) + (type_definition + type: (primitive_type) + (type_qualifier) + declarator: (pointer_declarator + declarator: (type_identifier))) + (type_definition + type: (primitive_type) + (type_qualifier) + (type_qualifier) + declarator: (pointer_declarator + (type_qualifier) + declarator: (type_identifier))) + (type_definition + type: (sized_type_specifier) + declarator: (primitive_type)) + (type_definition + type: (sized_type_specifier) + declarator: (primitive_type)) + (type_definition + type: (sized_type_specifier) + declarator: (primitive_type)) + (type_definition + type: (sized_type_specifier) + declarator: (primitive_type)) + (type_definition + type: (sized_type_specifier) + declarator: (primitive_type)) + (type_definition + type: (sized_type_specifier) + declarator: (type_identifier)) + (type_definition + type: (sized_type_specifier) + declarator: (type_identifier)) + (type_definition + type: (sized_type_specifier) + declarator: (type_identifier)) + (type_definition + type: (sized_type_specifier) + declarator: (type_identifier)) + (type_definition + type: (sized_type_specifier) + declarator: (type_identifier)) + (type_definition + type: (sized_type_specifier) + declarator: (type_identifier)) + (type_definition + type: (sized_type_specifier) + declarator: (primitive_type)) + (type_definition + type: (sized_type_specifier) + declarator: (primitive_type)) + (type_definition + type: (sized_type_specifier + type: (primitive_type)) + declarator: (type_identifier)) + (type_definition + type: (sized_type_specifier) + declarator: (type_identifier)) + (type_definition + type: (sized_type_specifier) + declarator: (primitive_type)) + (type_definition + type: (sized_type_specifier) + declarator: (primitive_type)) + (type_definition + type: (sized_type_specifier) + declarator: (primitive_type)) + (type_definition + type: (primitive_type) + declarator: (type_identifier) + (attribute_specifier + (argument_list + (call_expression + function: (identifier) + arguments: (argument_list + (identifier)))))) + (type_definition + type: (sized_type_specifier + type: (primitive_type)) + declarator: (type_identifier)) + (type_definition + type: (struct_specifier + body: (field_declaration_list + (field_declaration + type: (sized_type_specifier + type: (primitive_type)) + declarator: (field_identifier)) + (field_declaration + type: (sized_type_specifier + type: (primitive_type)) + declarator: (field_identifier)))) + declarator: (type_identifier))) + +================================================================================ +Function declarations +================================================================================ + +int main(int argc, const char **argv); +static foo bar(); +static baz quux(...); + +-------------------------------------------------------------------------------- + +(translation_unit + (declaration + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (primitive_type) + (identifier)) + (parameter_declaration + (type_qualifier) + (primitive_type) + (pointer_declarator + (pointer_declarator + (identifier))))))) + (declaration + (storage_class_specifier) + (type_identifier) + (function_declarator + (identifier) + (parameter_list))) + (declaration + (storage_class_specifier) + (type_identifier) + (function_declarator + (identifier) + (parameter_list + (variadic_parameter))))) + +================================================================================ +Function definitions +================================================================================ + +void * do_stuff(int arg1) { + return 5; +} + +// K&R style +int foo(bar, baz, qux) +int bar, baz; +char *qux; +{ +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + type: (primitive_type) + declarator: (pointer_declarator + declarator: (function_declarator + declarator: (identifier) + parameters: (parameter_list + (parameter_declaration + type: (primitive_type) + declarator: (identifier))))) + body: (compound_statement + (return_statement + (number_literal)))) + (comment) + (function_definition + type: (primitive_type) + declarator: (function_declarator + declarator: (identifier) + parameters: (parameter_list + (identifier) + (identifier) + (identifier))) + (declaration + type: (primitive_type) + declarator: (identifier) + declarator: (identifier)) + (declaration + type: (primitive_type) + declarator: (pointer_declarator + declarator: (identifier))) + body: (compound_statement))) + +================================================================================ +Function specifiers after types +================================================================================ + +int static inline do_stuff(int arg1) { + return 5; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (storage_class_specifier) + (storage_class_specifier) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (primitive_type) + (identifier)))) + (compound_statement + (return_statement + (number_literal))))) + +================================================================================ +Linkage specifications +================================================================================ + +extern "C" int foo(); + +extern "C" int foo() { return 0; } + +extern "C" { + int bar(); + int baz(); +} + +-------------------------------------------------------------------------------- + +(translation_unit + (linkage_specification + (string_literal + (string_content)) + (declaration + (primitive_type) + (function_declarator + (identifier) + (parameter_list)))) + (linkage_specification + (string_literal + (string_content)) + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (return_statement + (number_literal))))) + (linkage_specification + (string_literal + (string_content)) + (declaration_list + (declaration + (primitive_type) + (function_declarator + (identifier) + (parameter_list))) + (declaration + (primitive_type) + (function_declarator + (identifier) + (parameter_list)))))) + +================================================================================ +Type qualifiers +================================================================================ + +const _Atomic unsigned long int x = 5; +restrict int y = 6; +volatile int z = 7; +constexpr int a = 8; +__thread int c = 9; +noreturn void b() {} + __extension__ extern int ffsll (long long int __ll) + __attribute__ ((__nothrow__ )) __attribute__ ((__const__)); + +-------------------------------------------------------------------------------- + +(translation_unit + (declaration + (type_qualifier) + (type_qualifier) + (sized_type_specifier + (primitive_type)) + (init_declarator + (identifier) + (number_literal))) + (declaration + (type_qualifier) + (primitive_type) + (init_declarator + (identifier) + (number_literal))) + (declaration + (type_qualifier) + (primitive_type) + (init_declarator + (identifier) + (number_literal))) + (declaration + (type_qualifier) + (primitive_type) + (init_declarator + (identifier) + (number_literal))) + (declaration + (storage_class_specifier) + (primitive_type) + (init_declarator + (identifier) + (number_literal))) + (function_definition + (type_qualifier) + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement)) + (declaration + (type_qualifier) + (storage_class_specifier) + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (sized_type_specifier + (primitive_type)) + (identifier))) + (attribute_specifier + (argument_list + (identifier))) + (attribute_specifier + (argument_list + (identifier)))))) + +================================================================================ +Local array declarations +================================================================================ + +int main() { + char the_buffer[the_size]; + char the_other_buffer[*]; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (declaration + (primitive_type) + (array_declarator + (identifier) + (identifier))) + (declaration + (primitive_type) + (array_declarator + (identifier)))))) + +================================================================================ +Attributes +================================================================================ + +extern __attribute__((visibility("hidden"))) int foo(); +extern int bar() __attribute__((const)); +void die(const char *format, ...) __attribute__((noreturn)) + __attribute__((format(printf,1,2))); +extern __attribute__((visibility("default"), weak)) int print_status(); + +extern int strerror_r(int __errnum, char *__buf, + int __buflen) __asm__("" + "__xpg_strerror_r") + __attribute__((__nothrow__)) __attribute__((__nonnull__(2))); + +int f([[a::b(c), d]] int x) {} + +[[gnu::always_inline]] [[gnu::hot]] [[gnu::const]] [[nodiscard]] +int g(void); + +[[gnu::always_inline, gnu::hot, gnu::const, nodiscard]] +int g(void); + +int i [[maybe_unused]]; +void f[[gnu::always_inline]](); + +[[nodiscard("reason")]] int foo; + +[[fallthrough]]; + +struct S { + int a [[deprecated]]; +}; + +typedef int MyInt [[deprecated]]; + +struct X { + int a __attribute__((aligned(4))); +} __attribute__((aligned(16))); + +union Y { + int a __attribute__((aligned(4))); +} __attribute__((aligned(16))); + +enum Z { + A +} __attribute__((aligned(16))); + +struct __attribute__((__packed__)) foo_t { + int x; +}; + +-------------------------------------------------------------------------------- + +(translation_unit + (declaration + (storage_class_specifier) + (attribute_specifier + (argument_list + (call_expression + (identifier) + (argument_list + (string_literal + (string_content)))))) + (primitive_type) + (function_declarator + (identifier) + (parameter_list))) + (declaration + (storage_class_specifier) + (primitive_type) + (function_declarator + (identifier) + (parameter_list) + (attribute_specifier + (argument_list + (identifier))))) + (declaration + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (type_qualifier) + (primitive_type) + (pointer_declarator + (identifier))) + (variadic_parameter)) + (attribute_specifier + (argument_list + (identifier))) + (attribute_specifier + (argument_list + (call_expression + (identifier) + (argument_list + (identifier) + (number_literal) + (number_literal))))))) + (declaration + (storage_class_specifier) + (attribute_specifier + (argument_list + (call_expression + (identifier) + (argument_list + (string_literal + (string_content)))) + (identifier))) + (primitive_type) + (function_declarator + (identifier) + (parameter_list))) + (declaration + (storage_class_specifier) + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (primitive_type) + (identifier)) + (parameter_declaration + (primitive_type) + (pointer_declarator + (identifier))) + (parameter_declaration + (primitive_type) + (identifier))) + (gnu_asm_expression + (concatenated_string + (string_literal) + (string_literal + (string_content)))) + (attribute_specifier + (argument_list + (identifier))) + (attribute_specifier + (argument_list + (call_expression + (identifier) + (argument_list + (number_literal))))))) + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (attribute_declaration + (attribute + (identifier) + (identifier) + (argument_list + (identifier))) + (attribute + (identifier))) + (primitive_type) + (identifier)))) + (compound_statement)) + (declaration + (attribute_declaration + (attribute + (identifier) + (identifier))) + (attribute_declaration + (attribute + (identifier) + (identifier))) + (attribute_declaration + (attribute + (identifier) + (identifier))) + (attribute_declaration + (attribute + (identifier))) + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (primitive_type))))) + (declaration + (attribute_declaration + (attribute + (identifier) + (identifier)) + (attribute + (identifier) + (identifier)) + (attribute + (identifier) + (identifier)) + (attribute + (identifier))) + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (primitive_type))))) + (declaration + (primitive_type) + (attributed_declarator + (identifier) + (attribute_declaration + (attribute + (identifier))))) + (declaration + (primitive_type) + (function_declarator + (attributed_declarator + (identifier) + (attribute_declaration + (attribute + (identifier) + (identifier)))) + (parameter_list))) + (declaration + (attribute_declaration + (attribute + (identifier) + (argument_list + (string_literal + (string_content))))) + (primitive_type) + (identifier)) + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (expression_statement)) + (struct_specifier + (type_identifier) + (field_declaration_list + (field_declaration + (primitive_type) + (attributed_declarator + (field_identifier) + (attribute_declaration + (attribute + (identifier))))))) + (type_definition + (primitive_type) + (attributed_declarator + (type_identifier) + (attribute_declaration + (attribute + (identifier))))) + (struct_specifier + (type_identifier) + (field_declaration_list + (field_declaration + (primitive_type) + (field_identifier) + (attribute_specifier + (argument_list + (call_expression + (identifier) + (argument_list + (number_literal))))))) + (attribute_specifier + (argument_list + (call_expression + (identifier) + (argument_list + (number_literal)))))) + (union_specifier + (type_identifier) + (field_declaration_list + (field_declaration + (primitive_type) + (field_identifier) + (attribute_specifier + (argument_list + (call_expression + (identifier) + (argument_list + (number_literal))))))) + (attribute_specifier + (argument_list + (call_expression + (identifier) + (argument_list + (number_literal)))))) + (enum_specifier + (type_identifier) + (enumerator_list + (enumerator + (identifier))) + (attribute_specifier + (argument_list + (call_expression + (identifier) + (argument_list + (number_literal)))))) + (struct_specifier + (attribute_specifier + (argument_list + (identifier))) + (type_identifier) + (field_declaration_list + (field_declaration + (primitive_type) + (field_identifier))))) diff --git a/c/test/corpus/expressions.txt b/c/test/corpus/expressions.txt new file mode 100644 index 0000000..962fa2a --- /dev/null +++ b/c/test/corpus/expressions.txt @@ -0,0 +1,1335 @@ +================================================================================ +Number literals +================================================================================ + +double a = { + 0xAC00, + 0.123, + 0b1010001, + 0xabc00ull, + -0.1f, + 1'000'000.000'001, + 24e-5, + 0.1E, + 58., + 4e2, + 123.456e-67, + .1E4f, + 0x10.1p0, + 0X1, 0B1, + 2.0dd, 5wb, +}; + +-------------------------------------------------------------------------------- + +(translation_unit + (declaration + (primitive_type) + (init_declarator + (identifier) + (initializer_list + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal) + (number_literal))))) + +================================================================================ +Identifiers +================================================================================ + +int main() { + _abc; + d_EG123; + $f; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (identifier)) + (expression_statement + (identifier)) + (expression_statement + (identifier))))) + +================================================================================ +Unicode Identifiers +================================================================================ + +int main() { + µs; + blah_accenté; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (identifier)) + (expression_statement + (identifier))))) + +================================================================================ +Common constants +================================================================================ + +int main() { + true; + false; + NULL; + + // regression test - identifiers starting w/ these strings should tokenize correctly. + true_value; + false_value; + NULL_value; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (true)) + (expression_statement + (false)) + (expression_statement + (null)) + (comment) + (expression_statement + (identifier)) + (expression_statement + (identifier)) + (expression_statement + (identifier))))) + +================================================================================ +Function calls +================================================================================ + +int main() { + printf("hi! %d\n", x); + __assert_fail("some_error_message", 115, __extension__ __func__); +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)) + (identifier)))) + (expression_statement + (call_expression + (identifier) + (argument_list + (string_literal + (string_content)) + (number_literal) + (identifier))))))) + +================================================================================ +GNU inline assembly +================================================================================ + +asm volatile ( + "mov r0, %0\n" + "mov r1, %[y]\n" + "add r2, r0, r1\n" + "mov %1, r2\n" + : "r" (z) + : "=r" (x), + [y] "=r" ((uintptr_t) y) + : "r2"); + +-------------------------------------------------------------------------------- + +(translation_unit + (expression_statement + (gnu_asm_expression + (gnu_asm_qualifier) + (concatenated_string + (string_literal + (string_content) + (escape_sequence)) + (string_literal + (string_content) + (escape_sequence)) + (string_literal + (string_content) + (escape_sequence)) + (string_literal + (string_content) + (escape_sequence))) + (gnu_asm_output_operand_list + (gnu_asm_output_operand + (string_literal + (string_content)) + (identifier))) + (gnu_asm_input_operand_list + (gnu_asm_input_operand + (string_literal + (string_content)) + (identifier)) + (gnu_asm_input_operand + (identifier) + (string_literal + (string_content)) + (cast_expression + (type_descriptor + (primitive_type)) + (identifier)))) + (gnu_asm_clobber_list + (string_literal + (string_content)))))) + +================================================================================ +Function call with compound statement +================================================================================ + +#define TAKES_BLOCK(x, block) for (i = 0; i < x; i++) block + +int main(void) { + { + int x = 0; + } + TAKES_BLOCK(10, { + // Doesn't matter what I put in here + }); +} + +-------------------------------------------------------------------------------- + +(translation_unit + (preproc_function_def + (identifier) + (preproc_params + (identifier) + (identifier)) + (preproc_arg)) + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (primitive_type)))) + (compound_statement + (compound_statement + (declaration + (primitive_type) + (init_declarator + (identifier) + (number_literal)))) + (expression_statement + (call_expression + (identifier) + (argument_list + (number_literal) + (compound_statement + (comment)))))))) + +================================================================================ +String literals +================================================================================ + +int main() { + "a"; + "b" "c" "d"; + e "f" g; + "\"hi\""; + L"bonjour"; + u"guten morgen"; + U"buenos dias"; + u8"buongiorno"; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (string_literal + (string_content))) + (expression_statement + (concatenated_string + (string_literal + (string_content)) + (string_literal + (string_content)) + (string_literal + (string_content)))) + (expression_statement + (concatenated_string + (identifier) + (string_literal + (string_content)) + (identifier))) + (expression_statement + (string_literal + (escape_sequence) + (string_content) + (escape_sequence))) + (expression_statement + (string_literal + (string_content))) + (expression_statement + (string_literal + (string_content))) + (expression_statement + (string_literal + (string_content))) + (expression_statement + (string_literal + (string_content)))))) + +================================================================================ +Character literals +================================================================================ + +int main() { + 'a'; + '\0'; + '\t'; + '\''; + L'b'; + u'c'; + U'\xa1'; + u8'\x1A'; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (char_literal + (character))) + (expression_statement + (char_literal + (escape_sequence))) + (expression_statement + (char_literal + (escape_sequence))) + (expression_statement + (char_literal + (escape_sequence))) + (expression_statement + (char_literal + (character))) + (expression_statement + (char_literal + (character))) + (expression_statement + (char_literal + (escape_sequence))) + (expression_statement + (char_literal + (escape_sequence)))))) + +================================================================================ +Field access +================================================================================ + +int main() { + s.data1; + p->data2; + q[data3]; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (field_expression + (identifier) + (field_identifier))) + (expression_statement + (field_expression + (identifier) + (field_identifier))) + (expression_statement + (subscript_expression + (identifier) + (identifier)))))) + +================================================================================ +Boolean operators +================================================================================ + +int main() { + !x || !y && !z; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (binary_expression + (unary_expression + (identifier)) + (binary_expression + (unary_expression + (identifier)) + (unary_expression + (identifier)))))))) + +================================================================================ +Math operators +================================================================================ + +int main() { + -a / b + c * -d; + a++ - ++b + c-- + --d; + ++L; + } + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (binary_expression + (binary_expression + (unary_expression + (identifier)) + (identifier)) + (binary_expression + (identifier) + (unary_expression + (identifier))))) + (expression_statement + (binary_expression + (binary_expression + (binary_expression + (update_expression + (identifier)) + (update_expression + (identifier))) + (update_expression + (identifier))) + (update_expression + (identifier)))) + (expression_statement + (update_expression + (identifier)))))) + +================================================================================ +The comma operator +================================================================================ + +int main() { + i--, j--; + (i--, j--); +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (comma_expression + (update_expression + (identifier)) + (update_expression + (identifier)))) + (expression_statement + (parenthesized_expression + (comma_expression + (update_expression + (identifier)) + (update_expression + (identifier)))))))) + +================================================================================ +Assignments +================================================================================ + +int main() { + static int a = 1; + b = *c = 2; + d.e = 3; + f->g = 4; + h[i] = j; + k += l; + m -= o; + n *= p; + q /= r; + *s++ = 1; + (*t) = 1; + a *= ((b!=c) ? d : e); +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + type: (primitive_type) + declarator: (function_declarator + declarator: (identifier) + parameters: (parameter_list)) + body: (compound_statement + (declaration + (storage_class_specifier) + type: (primitive_type) + declarator: (init_declarator + declarator: (identifier) + value: (number_literal))) + (expression_statement + (assignment_expression + left: (identifier) + right: (assignment_expression + left: (pointer_expression + argument: (identifier)) + right: (number_literal)))) + (expression_statement + (assignment_expression + left: (field_expression + argument: (identifier) + field: (field_identifier)) + right: (number_literal))) + (expression_statement + (assignment_expression + left: (field_expression + argument: (identifier) + field: (field_identifier)) + right: (number_literal))) + (expression_statement + (assignment_expression + left: (subscript_expression + argument: (identifier) + index: (identifier)) + right: (identifier))) + (expression_statement + (assignment_expression + left: (identifier) + right: (identifier))) + (expression_statement + (assignment_expression + left: (identifier) + right: (identifier))) + (expression_statement + (assignment_expression + left: (identifier) + right: (identifier))) + (expression_statement + (assignment_expression + left: (identifier) + right: (identifier))) + (expression_statement + (assignment_expression + left: (pointer_expression + argument: (update_expression + argument: (identifier))) + right: (number_literal))) + (expression_statement + (assignment_expression + left: (parenthesized_expression + (pointer_expression + argument: (identifier))) + right: (number_literal))) + (expression_statement + (assignment_expression + left: (identifier) + right: (parenthesized_expression + (conditional_expression + condition: (parenthesized_expression + (binary_expression + left: (identifier) + right: (identifier))) + consequence: (identifier) + alternative: (identifier)))))))) + +================================================================================ +Pointer operations +================================================================================ + +int main() { + doSomething(&x, *x); +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (call_expression + (identifier) + (argument_list + (pointer_expression + (identifier)) + (pointer_expression + (identifier)))))))) + +================================================================================ +Type-casts +================================================================================ + +int main() { + x = (const SomeType *)thing; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + type: (primitive_type) + declarator: (function_declarator + declarator: (identifier) + parameters: (parameter_list)) + body: (compound_statement + (expression_statement + (assignment_expression + left: (identifier) + right: (cast_expression + type: (type_descriptor + (type_qualifier) + type: (type_identifier) + declarator: (abstract_pointer_declarator)) + value: (identifier))))))) + +================================================================================ +Sizeof expressions +================================================================================ + +int main() { + sizeof x.a; + sizeof(x.a); + sizeof(const char **); + sizeof(char * ()); + sizeof(1) + 1; + sizeof((1) + 1); + sizeof(int) + 1; + sizeof(struct foo) + sizeof(struct bar) + 1; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (sizeof_expression + (field_expression + (identifier) + (field_identifier)))) + (expression_statement + (sizeof_expression + (parenthesized_expression + (field_expression + (identifier) + (field_identifier))))) + (expression_statement + (sizeof_expression + (type_descriptor + (type_qualifier) + (primitive_type) + (abstract_pointer_declarator + (abstract_pointer_declarator))))) + (expression_statement + (sizeof_expression + (type_descriptor + (primitive_type) + (abstract_pointer_declarator + (abstract_function_declarator + (parameter_list)))))) + (expression_statement + (binary_expression + (sizeof_expression + (parenthesized_expression + (number_literal))) + (number_literal))) + (expression_statement + (sizeof_expression + (parenthesized_expression + (binary_expression + (parenthesized_expression + (number_literal)) + (number_literal))))) + (expression_statement + (binary_expression + (sizeof_expression + (type_descriptor + (primitive_type))) + (number_literal))) + (expression_statement + (binary_expression + (binary_expression + (sizeof_expression + (type_descriptor + (struct_specifier + (type_identifier)))) + (sizeof_expression + (type_descriptor + (struct_specifier + (type_identifier))))) + (number_literal)))))) + +================================================================================ +Alignof expressions +================================================================================ + +typedef struct { + long long __clang_max_align_nonce1 + __attribute__((__aligned__(__alignof__(long long)))); + long double __clang_max_align_nonce2 + __attribute__((__aligned__(__alignof__(long double)))); +} max_align_t; + +-------------------------------------------------------------------------------- + +(translation_unit + (type_definition + (struct_specifier + (field_declaration_list + (field_declaration + (sized_type_specifier) + (field_identifier) + (attribute_specifier + (argument_list + (call_expression + (identifier) + (argument_list + (alignof_expression + (type_descriptor + (sized_type_specifier)))))))) + (field_declaration + (sized_type_specifier + (primitive_type)) + (field_identifier) + (attribute_specifier + (argument_list + (call_expression + (identifier) + (argument_list + (alignof_expression + (type_descriptor + (sized_type_specifier + (primitive_type))))))))))) + (primitive_type))) + +================================================================================ +Offsetof expressions +================================================================================ + +int main() { + offsetof( struct x, a ); + offsetof( x, a ); + offsetof( x, a ) + 1; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (offsetof_expression + (type_descriptor + (struct_specifier + (type_identifier))) + (field_identifier))) + (expression_statement + (offsetof_expression + (type_descriptor + (type_identifier)) + (field_identifier))) + (expression_statement + (binary_expression + (offsetof_expression + (type_descriptor + (type_identifier)) + (field_identifier)) + (number_literal)))))) + +================================================================================ +Compound literals +================================================================================ + +int main() { + x = (SomeType) { + .f1.f2[f3] = 5, + .f4 = {} + }; + y = (struct SomeStruct) { + 7, + 8 + }; + z = (char const []) {'a', 'b'}; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (assignment_expression + (identifier) + (compound_literal_expression + (type_descriptor + (type_identifier)) + (initializer_list + (initializer_pair + (field_designator + (field_identifier)) + (field_designator + (field_identifier)) + (subscript_designator + (identifier)) + (number_literal)) + (initializer_pair + (field_designator + (field_identifier)) + (initializer_list)))))) + (expression_statement + (assignment_expression + (identifier) + (compound_literal_expression + (type_descriptor + (struct_specifier + (type_identifier))) + (initializer_list + (number_literal) + (number_literal))))) + (expression_statement + (assignment_expression + (identifier) + (compound_literal_expression + (type_descriptor + (primitive_type) + (type_qualifier) + (abstract_array_declarator)) + (initializer_list + (char_literal + (character)) + (char_literal + (character))))))))) + +================================================================================ +Compound literals with trailing commas +================================================================================ + +int main() { + y = (struct SomeStruct) { 7, 8, }; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (assignment_expression + (identifier) + (compound_literal_expression + (type_descriptor + (struct_specifier + (type_identifier))) + (initializer_list + (number_literal) + (number_literal)))))))) + +================================================================================ +Comments with escaped newlines +================================================================================ + +// one \ + two + +-------------------------------------------------------------------------------- + +(translation_unit + (comment)) + +================================================================================ +Comments with escaped chars and newlines +================================================================================ + +// one \a \b \ + two +// one \c \d +-------------------------------------------------------------------------------- + +(translation_unit + (comment) + (comment)) + +================================================================================ +Generic Expressions +================================================================================ + +int main(int argc, char **argv) { + int a = 10; + float b = 3.14; + double c = 2.71828; + char d = 'A'; + + a = _Generic(d, int: 5, float: 0, char: 100); + b = _Generic(a, void *: 0, int: 4.0, float: 3.14, double: 2.71828, char: 1.0); + c = _Generic(b, void *: 0, int: 4.0, float: 3.14, double: 2.71828, char: 1.0); + d = _Generic(c, void *: '\0', int: '0', float: '3', double: '2', char: '1'); + + _Generic(a, int: printf("a is an int\n"), float: printf("a is a float\n"), double: printf("a is a double\n"), char: printf("a is a char\n")); + _Generic(b, int: printf("b is an int\n"), float: printf("b is a float\n"), double: printf("b is a double\n"), char: printf("b is a char\n")); + _Generic(c, int: printf("c is an int\n"), float: printf("c is a float\n"), double: printf("c is a double\n"), char: printf("c is a char\n")); + _Generic(d, int: printf("d is an int\n"), float: printf("d is a float\n"), double: printf("d is a double\n"), char: printf("d is a char\n")); + + return 0; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (primitive_type) + (identifier)) + (parameter_declaration + (primitive_type) + (pointer_declarator + (pointer_declarator + (identifier)))))) + (compound_statement + (declaration + (primitive_type) + (init_declarator + (identifier) + (number_literal))) + (declaration + (primitive_type) + (init_declarator + (identifier) + (number_literal))) + (declaration + (primitive_type) + (init_declarator + (identifier) + (number_literal))) + (declaration + (primitive_type) + (init_declarator + (identifier) + (char_literal + (character)))) + (expression_statement + (assignment_expression + (identifier) + (generic_expression + (identifier) + (type_descriptor + (primitive_type)) + (number_literal) + (type_descriptor + (primitive_type)) + (number_literal) + (type_descriptor + (primitive_type)) + (number_literal)))) + (expression_statement + (assignment_expression + (identifier) + (generic_expression + (identifier) + (type_descriptor + (primitive_type) + (abstract_pointer_declarator)) + (number_literal) + (type_descriptor + (primitive_type)) + (number_literal) + (type_descriptor + (primitive_type)) + (number_literal) + (type_descriptor + (primitive_type)) + (number_literal) + (type_descriptor + (primitive_type)) + (number_literal)))) + (expression_statement + (assignment_expression + (identifier) + (generic_expression + (identifier) + (type_descriptor + (primitive_type) + (abstract_pointer_declarator)) + (number_literal) + (type_descriptor + (primitive_type)) + (number_literal) + (type_descriptor + (primitive_type)) + (number_literal) + (type_descriptor + (primitive_type)) + (number_literal) + (type_descriptor + (primitive_type)) + (number_literal)))) + (expression_statement + (assignment_expression + (identifier) + (generic_expression + (identifier) + (type_descriptor + (primitive_type) + (abstract_pointer_declarator)) + (char_literal + (escape_sequence)) + (type_descriptor + (primitive_type)) + (char_literal + (character)) + (type_descriptor + (primitive_type)) + (char_literal + (character)) + (type_descriptor + (primitive_type)) + (char_literal + (character)) + (type_descriptor + (primitive_type)) + (char_literal + (character))))) + (expression_statement + (generic_expression + (identifier) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))))) + (expression_statement + (generic_expression + (identifier) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))))) + (expression_statement + (generic_expression + (identifier) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))))) + (expression_statement + (generic_expression + (identifier) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))) + (type_descriptor + (primitive_type)) + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)))))) + (return_statement + (number_literal))))) + +================================================================================ +Noreturn Type Qualifier +================================================================================ + +_Noreturn void kill(void) { + printf("Killing the program\n"); + exit(0); +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (type_qualifier) + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (primitive_type)))) + (compound_statement + (expression_statement + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence))))) + (expression_statement + (call_expression + (identifier) + (argument_list + (number_literal))))))) + +================================================================================ +Restrict Type Qualifier +================================================================================ + +void fn (int *__restrict__ rptr) { + int *ptr = rptr; + *ptr = 0; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (primitive_type) + (pointer_declarator + (type_qualifier) + (identifier))))) + (compound_statement + (declaration + (primitive_type) + (init_declarator + (pointer_declarator + (identifier)) + (identifier))) + (expression_statement + (assignment_expression + (pointer_expression + (identifier)) + (number_literal)))))) + +================================================================================ +Ternary +================================================================================ + +void f() { + 0 ? 1 : 2; + a = 0 ? 1 : 2; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (expression_statement + (conditional_expression + (number_literal) + (number_literal) + (number_literal))) + (expression_statement + (assignment_expression + (identifier) + (conditional_expression + (number_literal) + (number_literal) + (number_literal))))))) diff --git a/c/test/corpus/microsoft.txt b/c/test/corpus/microsoft.txt new file mode 100644 index 0000000..93c0ae8 --- /dev/null +++ b/c/test/corpus/microsoft.txt @@ -0,0 +1,253 @@ +================================ +declaration specs +================================ + +struct __declspec(dllexport) s2 +{ +}; + +union __declspec(noinline) u2 { +}; + +--- + +(translation_unit + (struct_specifier + (ms_declspec_modifier + (identifier)) + name: (type_identifier) + body: (field_declaration_list)) + (union_specifier + (ms_declspec_modifier + (identifier)) + name: (type_identifier) + body: (field_declaration_list))) + +================================ +pointers +================================ + +struct s2 +{ + int * __restrict x; + int * __sptr psp; + int * __uptr pup; + int * __unaligned pup; +}; + +void sum2(int n, int * __restrict a, int * __restrict b, + int * c, int * d) { + int i; + for (i = 0; i < n; i++) { + a[i] = b[i] + c[i]; + c[i] = b[i] + d[i]; + } +} + +void MyFunction(char * __uptr myValue); + +--- + +(translation_unit + (struct_specifier + name: (type_identifier) + body: (field_declaration_list + (field_declaration + type: (primitive_type) + declarator: (pointer_declarator + (ms_pointer_modifier + (ms_restrict_modifier)) + declarator: (field_identifier))) + (field_declaration + type: (primitive_type) + declarator: (pointer_declarator + (ms_pointer_modifier + (ms_signed_ptr_modifier)) + declarator: (field_identifier))) + (field_declaration + type: (primitive_type) + declarator: (pointer_declarator + (ms_pointer_modifier + (ms_unsigned_ptr_modifier)) + declarator: (field_identifier))) + (field_declaration + type: (primitive_type) + declarator: (pointer_declarator + (ms_pointer_modifier + (ms_unaligned_ptr_modifier)) + declarator: (field_identifier))))) + (function_definition + type: (primitive_type) + declarator: (function_declarator + declarator: (identifier) + parameters: (parameter_list + (parameter_declaration + type: (primitive_type) + declarator: (identifier)) + (parameter_declaration + type: (primitive_type) + declarator: (pointer_declarator + (ms_pointer_modifier + (ms_restrict_modifier)) + declarator: (identifier))) + (parameter_declaration + type: (primitive_type) + declarator: (pointer_declarator + (ms_pointer_modifier + (ms_restrict_modifier)) + declarator: (identifier))) + (parameter_declaration + type: (primitive_type) + declarator: (pointer_declarator + declarator: (identifier))) + (parameter_declaration + type: (primitive_type) + declarator: (pointer_declarator + declarator: (identifier))))) + body: (compound_statement + (declaration + type: (primitive_type) + declarator: (identifier)) + (for_statement + initializer: (assignment_expression + left: (identifier) + right: (number_literal)) + condition: (binary_expression + left: (identifier) + right: (identifier)) + update: (update_expression + argument: (identifier)) + body: (compound_statement + (expression_statement + (assignment_expression + left: (subscript_expression + argument: (identifier) + index: (identifier)) + right: (binary_expression + left: (subscript_expression + argument: (identifier) + index: (identifier)) + right: (subscript_expression + argument: (identifier) + index: (identifier))))) + (expression_statement + (assignment_expression + left: (subscript_expression + argument: (identifier) + index: (identifier)) + right: (binary_expression + left: (subscript_expression + argument: (identifier) + index: (identifier)) + right: (subscript_expression + argument: (identifier) + index: (identifier))))))))) + (declaration + type: (primitive_type) + declarator: (function_declarator + declarator: (identifier) + parameters: (parameter_list + (parameter_declaration + type: (primitive_type) + declarator: (pointer_declarator + (ms_pointer_modifier + (ms_unsigned_ptr_modifier)) + declarator: (identifier))))))) + +================================ +call modifiers +================================ + +__cdecl void mymethod(){ + return; +} + +__fastcall void mymethod(){ + return; +} + +--- + +(translation_unit + (function_definition + (ms_call_modifier) + type: (primitive_type) + declarator: (function_declarator + declarator: (identifier) + parameters: (parameter_list)) + body: (compound_statement + (return_statement))) + (function_definition + (ms_call_modifier) + type: (primitive_type) + declarator: (function_declarator + declarator: (identifier) + parameters: (parameter_list)) + body: (compound_statement + (return_statement)))) + + +================================ +SEH exception handling +================================ + +int main() { + int arg; + __try { + __try { + arg = 1; + __leave; + } __except (-1) { + arg = 2; + } + __leave; + arg = 3; + } __finally { + printf("arg: %d\n", arg); + } +} + +--- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (declaration + (primitive_type) + (identifier)) + (seh_try_statement + (compound_statement + (seh_try_statement + (compound_statement + (expression_statement + (assignment_expression + (identifier) + (number_literal))) + (seh_leave_statement)) + (seh_except_clause + (parenthesized_expression + (number_literal)) + (compound_statement + (expression_statement + (assignment_expression + (identifier) + (number_literal)))))) + (seh_leave_statement) + (expression_statement + (assignment_expression + (identifier) + (number_literal)))) + (seh_finally_clause + (compound_statement + (expression_statement + (call_expression + (identifier) + (argument_list + (string_literal + (string_content) + (escape_sequence)) + (identifier)))))))))) diff --git a/c/test/corpus/preprocessor.txt b/c/test/corpus/preprocessor.txt new file mode 100644 index 0000000..24e5381 --- /dev/null +++ b/c/test/corpus/preprocessor.txt @@ -0,0 +1,401 @@ +================================================================================ +Include directives +================================================================================ + +#include "some/path.h" +#include <stdint.h> +#include MACRO +#include MACRO(arg1, arg2) + +-------------------------------------------------------------------------------- + +(translation_unit + (preproc_include + path: (string_literal + (string_content))) + (preproc_include + path: (system_lib_string)) + (preproc_include + path: (identifier)) + (preproc_include + path: (call_expression + function: (identifier) + arguments: (argument_list + (identifier) + (identifier))))) + +================================================================================ +Object-like macro definitions +================================================================================ + +#define ONE + #define TWO int a = b; +#define THREE \ + c == d ? \ + e : \ + f +#define FOUR (mno * pq) +#define FIVE(a,b) x \ + + y +#define SIX(a, \ + b) x \ + + y +#define SEVEN 7/* seven has an + * annoying comment */ +#define EIGHT(x) do { \ + x = x + 1; \ + x = x / 2; \ + } while (x > 0); + +-------------------------------------------------------------------------------- + +(translation_unit + (preproc_def + name: (identifier)) + (preproc_def + name: (identifier) + value: (preproc_arg)) + (preproc_def + name: (identifier) + value: (preproc_arg)) + (preproc_def + name: (identifier) + value: (preproc_arg)) + (preproc_function_def + name: (identifier) + parameters: (preproc_params + (identifier) + (identifier)) + value: (preproc_arg)) + (preproc_function_def + name: (identifier) + parameters: (preproc_params + (identifier) + (identifier)) + value: (preproc_arg)) + (preproc_def + name: (identifier) + value: (preproc_arg) + (comment)) + (preproc_function_def + name: (identifier) + parameters: (preproc_params + (identifier)) + value: (preproc_arg))) + +================================================================================ +Function-like macro definitions +================================================================================ + +#define ONE() a +#define TWO(b) c +#define THREE(d, e) f +#define FOUR(...) g +#define FIVE(h, i, ...) j + +-------------------------------------------------------------------------------- + +(translation_unit + (preproc_function_def + name: (identifier) + parameters: (preproc_params) + value: (preproc_arg)) + (preproc_function_def + name: (identifier) + parameters: (preproc_params + (identifier)) + value: (preproc_arg)) + (preproc_function_def + name: (identifier) + parameters: (preproc_params + (identifier) + (identifier)) + value: (preproc_arg)) + (preproc_function_def + name: (identifier) + parameters: (preproc_params) + value: (preproc_arg)) + (preproc_function_def + name: (identifier) + parameters: (preproc_params + (identifier) + (identifier)) + value: (preproc_arg))) + +================================================================================ +Ifdefs +================================================================================ + +#ifndef DEFINE1 +int j; +#endif + +#ifdef DEFINE2 +ssize_t b; +#define c 32 +#elif defined DEFINE3 +#else +int b; +#define c 16 +#endif + +#ifdef DEFINE2 +#else +# ifdef DEFINE3 +# else +# endif +#endif + +-------------------------------------------------------------------------------- + +(translation_unit + (preproc_ifdef + name: (identifier) + (declaration + type: (primitive_type) + declarator: (identifier))) + (preproc_ifdef + name: (identifier) + (declaration + type: (primitive_type) + declarator: (identifier)) + (preproc_def + name: (identifier) + value: (preproc_arg)) + alternative: (preproc_elif + condition: (preproc_defined + (identifier)) + alternative: (preproc_else + (declaration + type: (primitive_type) + declarator: (identifier)) + (preproc_def + name: (identifier) + value: (preproc_arg))))) + (preproc_ifdef + name: (identifier) + alternative: (preproc_else + (preproc_ifdef + name: (identifier) + alternative: (preproc_else))))) + +================================================================================ +Elifdefs +================================================================================ + +#ifndef DEFINE1 +int j; +#elifndef DEFINE2 +int k; +#endif + +#ifdef DEFINE2 +ssize_t b; +#elifdef DEFINE3 +ssize_t c; +#else +int b; +#endif + +-------------------------------------------------------------------------------- + +(translation_unit + (preproc_ifdef + (identifier) + (declaration + (primitive_type) + (identifier)) + (preproc_elifdef + (identifier) + (declaration + (primitive_type) + (identifier)))) + (preproc_ifdef + (identifier) + (declaration + (primitive_type) + (identifier)) + (preproc_elifdef + (identifier) + (declaration + (primitive_type) + (identifier)) + (preproc_else + (declaration + (primitive_type) + (identifier)))))) + +================================================================================ +General if blocks +================================================================================ + +#if defined(__GNUC__) && defined(__PIC__) +#define inline inline __attribute__((always_inline)) +#elif defined(_WIN32) +#define something +#elif !defined(SOMETHING_ELSE) +#define SOMETHING_ELSE +#else +#include <something> +#endif + +-------------------------------------------------------------------------------- + +(translation_unit + (preproc_if + condition: (binary_expression + left: (preproc_defined + (identifier)) + right: (preproc_defined + (identifier))) + (preproc_def + name: (identifier) + value: (preproc_arg)) + alternative: (preproc_elif + condition: (preproc_defined + (identifier)) + (preproc_def + name: (identifier)) + alternative: (preproc_elif + condition: (unary_expression + argument: (preproc_defined + (identifier))) + (preproc_def + name: (identifier)) + alternative: (preproc_else + (preproc_include + path: (system_lib_string))))))) + +================================================================================ +Preprocessor conditionals in functions +================================================================================ + +int main() { + #if d + puts("1"); + #else + puts("2"); + #endif + + #if a + return 0; + #elif b + return 1; + #elif c + return 2; + #else + return 3; + #endif +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (preproc_if + (identifier) + (expression_statement + (call_expression + (identifier) + (argument_list + (string_literal + (string_content))))) + (preproc_else + (expression_statement + (call_expression + (identifier) + (argument_list + (string_literal + (string_content))))))) + (preproc_if + (identifier) + (return_statement + (number_literal)) + (preproc_elif + (identifier) + (return_statement + (number_literal)) + (preproc_elif + (identifier) + (return_statement + (number_literal)) + (preproc_else + (return_statement + (number_literal))))))))) + +================================================================================ +Preprocessor conditionals in struct/union bodies +================================================================================ + +struct S { +#ifdef _WIN32 + LONG f2; +#else + uint32_t f2; +#endif +}; + +-------------------------------------------------------------------------------- + +(translation_unit + (struct_specifier + (type_identifier) + (field_declaration_list + (preproc_ifdef + (identifier) + (field_declaration + (type_identifier) + (field_identifier)) + (preproc_else + (field_declaration + (primitive_type) + (field_identifier))))))) + +================================================================================ +Unknown preprocessor directives +================================================================================ + +#pragma mark - UIViewController + +-------------------------------------------------------------------------------- + +(translation_unit + (preproc_call + directive: (preproc_directive) + argument: (preproc_arg))) + +================================================================================ +Preprocessor expressions +================================================================================ + +#if A(B || C) && \ + !D(F) + +uint32_t a; + +#endif + +-------------------------------------------------------------------------------- + +(translation_unit + (preproc_if + (binary_expression + (call_expression + (identifier) + (argument_list + (binary_expression + (identifier) + (identifier)))) + (unary_expression + (call_expression + (identifier) + (argument_list + (identifier))))) + (declaration + (primitive_type) + (identifier)))) diff --git a/c/test/corpus/statements.txt b/c/test/corpus/statements.txt new file mode 100644 index 0000000..ef81ddb --- /dev/null +++ b/c/test/corpus/statements.txt @@ -0,0 +1,535 @@ +================================================================================ +If statements +================================================================================ + +int main() { + if (a) + 1; + + if (!a) { + 2; + } else { + 3; + } +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (if_statement + (parenthesized_expression + (identifier)) + (expression_statement + (number_literal))) + (if_statement + (parenthesized_expression + (unary_expression + (identifier))) + (compound_statement + (expression_statement + (number_literal))) + (else_clause + (compound_statement + (expression_statement + (number_literal)))))))) + +================================================================================ +For loops +================================================================================ + +int main() { + for (;;) + 1; + + for (int i = 0; i < 5; next(), i++) { + 2; + } + + for (start(); check(); step()) + 3; + + for (i = 0, j = 0, k = 0, l = 0; i < 1, j < 1; i++, j++, k++, l++) + 1; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (for_statement + (expression_statement + (number_literal))) + (for_statement + (declaration + (primitive_type) + (init_declarator + (identifier) + (number_literal))) + (binary_expression + (identifier) + (number_literal)) + (comma_expression + (call_expression + (identifier) + (argument_list)) + (update_expression + (identifier))) + (compound_statement + (expression_statement + (number_literal)))) + (for_statement + (call_expression + (identifier) + (argument_list)) + (call_expression + (identifier) + (argument_list)) + (call_expression + (identifier) + (argument_list)) + (expression_statement + (number_literal))) + (for_statement + (comma_expression + (assignment_expression + (identifier) + (number_literal)) + (comma_expression + (assignment_expression + (identifier) + (number_literal)) + (comma_expression + (assignment_expression + (identifier) + (number_literal)) + (assignment_expression + (identifier) + (number_literal))))) + (comma_expression + (binary_expression + (identifier) + (number_literal)) + (binary_expression + (identifier) + (number_literal))) + (comma_expression + (update_expression + (identifier)) + (comma_expression + (update_expression + (identifier)) + (comma_expression + (update_expression + (identifier)) + (update_expression + (identifier))))) + (expression_statement + (number_literal)))))) + +================================================================================ +While loops +================================================================================ + +int main() { + while (x) + printf("hi"); +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (while_statement + (parenthesized_expression + (identifier)) + (expression_statement + (call_expression + (identifier) + (argument_list + (string_literal + (string_content))))))))) + +================================================================================ +Labeled statements +================================================================================ + +void foo(T *t) { +recur: + t = t->next(); + if (t) goto recur; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (type_identifier) + (pointer_declarator + (identifier))))) + (compound_statement + (labeled_statement + (statement_identifier) + (expression_statement + (assignment_expression + (identifier) + (call_expression + (field_expression + (identifier) + (field_identifier)) + (argument_list))))) + (if_statement + (parenthesized_expression + (identifier)) + (goto_statement + (statement_identifier)))))) + +================================================================================ +Switch statements +================================================================================ + +void foo(int a) { + switch (a) { + puts("entered switch!"); + + case 3: + case 5: + if (b) { + c(); + } + break; + + default: + c(); + break; + } +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list + (parameter_declaration + (primitive_type) + (identifier)))) + (compound_statement + (switch_statement + (parenthesized_expression + (identifier)) + (compound_statement + (expression_statement + (call_expression + (identifier) + (argument_list + (string_literal + (string_content))))) + (case_statement + (number_literal)) + (case_statement + (number_literal) + (if_statement + (parenthesized_expression + (identifier)) + (compound_statement + (expression_statement + (call_expression + (identifier) + (argument_list))))) + (break_statement)) + (case_statement + (expression_statement + (call_expression + (identifier) + (argument_list))) + (break_statement))))))) + +================================================================================ +Case statements separate from switch statements +================================================================================ + +int main() { + switch (count % 8) { + case 0: + do { + *to = *from++; + case 2: *to = *from++; + case 1: *to = *from++; + } while (--n > 0); + } +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (switch_statement + (parenthesized_expression + (binary_expression + (identifier) + (number_literal))) + (compound_statement + (case_statement + (number_literal) + (do_statement + (compound_statement + (expression_statement + (assignment_expression + (pointer_expression + (identifier)) + (pointer_expression + (update_expression + (identifier))))) + (case_statement + (number_literal) + (expression_statement + (assignment_expression + (pointer_expression + (identifier)) + (pointer_expression + (update_expression + (identifier)))))) + (case_statement + (number_literal) + (expression_statement + (assignment_expression + (pointer_expression + (identifier)) + (pointer_expression + (update_expression + (identifier))))))) + (parenthesized_expression + (binary_expression + (update_expression + (identifier)) + (number_literal)))))))))) + +================================================================================ +Return statements +================================================================================ + +void foo() { + return; + return a; + return a, b; +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (return_statement) + (return_statement + (identifier)) + (return_statement + (comma_expression + (identifier) + (identifier)))))) + +================================================================================ +Comments with asterisks +================================================================================ + +/************************* + * odd number of asterisks + *************************/ +int a; + +/************************** + * even number of asterisks + **************************/ +int b; + +-------------------------------------------------------------------------------- + +(translation_unit + (comment) + (declaration + (primitive_type) + (identifier)) + (comment) + (declaration + (primitive_type) + (identifier))) + +================================================================================ +Comment with multiple backslashes +================================================================================ + +int a = 3; // Hello \\ +World + +-------------------------------------------------------------------------------- + +(translation_unit + (declaration + (primitive_type) + (init_declarator + (identifier) + (number_literal))) + (comment)) + +================================================================================ +Attributes +================================================================================ + +void f() { + [[a]] switch (b) { + [[c]] case 1: {} + case 2: + [[fallthrough]]; + default: + } + [[a]] while (true) {} + [[a]] if (true) {} + [[a]] for (;;) {} + [[a]] return; + [[a]] a; + [[a]]; + [[a]] label: {} + [[a]] goto label; + + // these are c++ specific, but their bind locations should be c-compatible + if (true) [[likely]] {} else [[unlikely]] {} + do [[likely]] {} while (true); +} + +-------------------------------------------------------------------------------- + +(translation_unit + (function_definition + (primitive_type) + (function_declarator + (identifier) + (parameter_list)) + (compound_statement + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (switch_statement + (parenthesized_expression + (identifier)) + (compound_statement + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (case_statement + (number_literal) + (compound_statement))) + (case_statement + (number_literal) + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (expression_statement))) + (case_statement)))) + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (while_statement + (parenthesized_expression + (true)) + (compound_statement))) + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (if_statement + (parenthesized_expression + (true)) + (compound_statement))) + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (for_statement + (compound_statement))) + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (return_statement)) + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (expression_statement + (identifier))) + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (expression_statement)) + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (labeled_statement + (statement_identifier) + (compound_statement))) + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (goto_statement + (statement_identifier))) + (comment) + (if_statement + (parenthesized_expression + (true)) + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (compound_statement)) + (else_clause + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (compound_statement)))) + (do_statement + (attributed_statement + (attribute_declaration + (attribute + (identifier))) + (compound_statement)) + (parenthesized_expression + (true)))))) diff --git a/c/test/corpus/types.txt b/c/test/corpus/types.txt new file mode 100644 index 0000000..6d2d19a --- /dev/null +++ b/c/test/corpus/types.txt @@ -0,0 +1,80 @@ +======================================== +Primitive types +======================================== + +int a; +uint8_t a; +uint16_t a; +uint32_t a; +uint64_t a; +uintptr_t a; + +int8_t a; +int16_t a; +int32_t a; +int64_t a; +intptr_t a; + +char16_t a; +char32_t a; + +size_t a; +ssize_t a; + +--- + +(translation_unit + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier)) + (declaration (primitive_type) (identifier))) + +======================================== +Type modifiers +======================================== + +void f(unsigned); +void f(unsigned int); +void f(signed long int); +void f(unsigned v1); +void f(unsigned long v2); + +--- + +(translation_unit + (declaration + (primitive_type) + (function_declarator + (identifier) + (parameter_list (parameter_declaration (sized_type_specifier))))) + (declaration + (primitive_type) + (function_declarator + (identifier) + (parameter_list (parameter_declaration (sized_type_specifier (primitive_type)))))) + (declaration + (primitive_type) + (function_declarator + (identifier) + (parameter_list (parameter_declaration (sized_type_specifier (primitive_type)))))) + (declaration + (primitive_type) + (function_declarator + (identifier) + (parameter_list (parameter_declaration (sized_type_specifier) (identifier))))) + (declaration + (primitive_type) + (function_declarator + (identifier) + (parameter_list (parameter_declaration (sized_type_specifier) (identifier)))))) diff --git a/c/test/highlight/keywords.c b/c/test/highlight/keywords.c new file mode 100644 index 0000000..50d790c --- /dev/null +++ b/c/test/highlight/keywords.c @@ -0,0 +1,6 @@ +#include <stdlib.h> +// ^ keyword +// ^ string + +#include "something.h" +// ^ string diff --git a/c/test/highlight/names.c b/c/test/highlight/names.c new file mode 100644 index 0000000..efdd44c --- /dev/null +++ b/c/test/highlight/names.c @@ -0,0 +1,33 @@ +typedef struct { + // ^ keyword + // ^ keyword + a_t b; + // <- type + // ^ property + + unsigned c_t (*d)[2]; + // ^ type + // ^ type + // ^ property +}, T, V; +// ^ type +// ^ type + +int main(const char string[SIZE]) { +// <- type +// ^ function +// ^ keyword +// ^ type +// ^ variable +// ^ constant + + return foo.bar + foo.baz(); + // ^ keyword + // ^ variable + // ^ property + // ^ function + +error: + // <- label + return 0; +} diff --git a/nix/LICENSE b/nix/LICENSE new file mode 100644 index 0000000..cfa8a0f --- /dev/null +++ b/nix/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Charles Strahan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/nix/grammar.js b/nix/grammar.js new file mode 100644 index 0000000..38af016 --- /dev/null +++ b/nix/grammar.js @@ -0,0 +1,411 @@ +const PREC = { + impl: 1, + or: 2, + and: 3, + eq: 4, + neq: 4, + "<": 5, + ">": 5, + leq: 5, + geq: 5, + update: 6, + not: 7, + "+": 8, + "-": 8, + "*": 9, + "/": 9, + concat: 10, + "?": 11, + negate: 12, +}; + +module.exports = grammar({ + name: "nix", + + extras: ($) => [/\s/, $.comment], + + supertypes: ($) => [$._expression], + + inline: ($) => [], + + externals: ($) => [ + $.string_fragment, + $._indented_string_fragment, + $._path_start, + $.path_fragment, + $.dollar_escape, + $._indented_dollar_escape, + ], + + word: ($) => $.keyword, + + conflicts: ($) => [], + + rules: { + source_code: ($) => optional(field("expression", $._expression)), + _expression: ($) => $._expr_function_expression, + + // Keywords go before identifiers to let them take precedence when both are expected. + // Workaround before https://github.com/tree-sitter/tree-sitter/pull/246 + keyword: ($) => /if|then|else|let|inherit|in|rec|with|assert/, + identifier: ($) => /[a-zA-Z_][a-zA-Z0-9_\'\-]*/, + + variable_expression: ($) => field("name", $.identifier), + integer_expression: ($) => /[0-9]+/, + float_expression: ($) => + /(([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?/, + + path_expression: ($) => + seq( + alias($._path_start, $.path_fragment), + repeat( + choice( + $.path_fragment, + alias($._immediate_interpolation, $.interpolation) + ) + ) + ), + + _hpath_start: ($) => /\~\/[a-zA-Z0-9\._\-\+\/]+/, + hpath_expression: ($) => + seq( + alias($._hpath_start, $.path_fragment), + repeat( + choice( + $.path_fragment, + alias($._immediate_interpolation, $.interpolation) + ) + ) + ), + + spath_expression: ($) => /<[a-zA-Z0-9\._\-\+]+(\/[a-zA-Z0-9\._\-\+]+)*>/, + uri_expression: ($) => + /[a-zA-Z][a-zA-Z0-9\+\-\.]*:[a-zA-Z0-9%\/\?:@\&=\+\$,\-_\.\!\~\*\']+/, + + _expr_function_expression: ($) => + choice( + $.function_expression, + $.assert_expression, + $.with_expression, + $.let_expression, + $._expr_if + ), + + function_expression: ($) => + choice( + seq( + field("universal", $.identifier), + ":", + field("body", $._expr_function_expression) + ), + seq( + field("formals", $.formals), + ":", + field("body", $._expr_function_expression) + ), + seq( + field("formals", $.formals), + "@", + field("universal", $.identifier), + ":", + field("body", $._expr_function_expression) + ), + seq( + field("universal", $.identifier), + "@", + field("formals", $.formals), + ":", + field("body", $._expr_function_expression) + ) + ), + + formals: ($) => + choice( + seq("{", "}"), + seq("{", commaSep1(field("formal", $.formal)), "}"), + seq( + "{", + commaSep1(field("formal", $.formal)), + ",", + field("ellipses", $.ellipses), + "}" + ), + seq("{", field("ellipses", $.ellipses), "}") + ), + formal: ($) => + seq( + field("name", $.identifier), + optional(seq("?", field("default", $._expression))) + ), + ellipses: ($) => "...", + + assert_expression: ($) => + seq( + "assert", + field("condition", $._expression), + ";", + field("body", $._expr_function_expression) + ), + with_expression: ($) => + seq( + "with", + field("environment", $._expression), + ";", + field("body", $._expr_function_expression) + ), + let_expression: ($) => + seq( + "let", + optional($.binding_set), + "in", + field("body", $._expr_function_expression) + ), + + _expr_if: ($) => choice($.if_expression, $._expr_op), + + if_expression: ($) => + seq( + "if", + field("condition", $._expression), + "then", + field("consequence", $._expression), + "else", + field("alternative", $._expression) + ), + + _expr_op: ($) => + choice( + $.has_attr_expression, + $.unary_expression, + $.binary_expression, + $._expr_apply_expression + ), + + // I choose to *not* have this among the binary operators because + // this is the sole exception that takes an attrpath (instead of expression) + // as its right operand. + // My gut feeling is that this is: + // 1) better in theory, and + // 2) will be easier to work with in practice. + has_attr_expression: ($) => + prec( + PREC["?"], + seq( + field("expression", $._expr_op), + field("operator", "?"), + field("attrpath", $.attrpath) + ) + ), + + unary_expression: ($) => + choice( + ...[ + ["!", PREC.not], + ["-", PREC.negate], + ].map(([operator, precedence]) => + prec( + precedence, + seq(field("operator", operator), field("argument", $._expr_op)) + ) + ) + ), + + binary_expression: ($) => + choice( + // left assoc. + ...[ + ["==", PREC.eq], + ["!=", PREC.neq], + ["<", PREC["<"]], + ["<=", PREC.leq], + [">", PREC[">"]], + [">=", PREC.geq], + ["&&", PREC.and], + ["||", PREC.or], + ["+", PREC["+"]], + ["-", PREC["-"]], + ["*", PREC["*"]], + ["/", PREC["/"]], + ].map(([operator, precedence]) => + prec.left( + precedence, + seq( + field("left", $._expr_op), + field("operator", operator), + field("right", $._expr_op) + ) + ) + ), + // right assoc. + ...[ + ["->", PREC.impl], + ["//", PREC.update], + ["++", PREC.concat], + ].map(([operator, precedence]) => + prec.right( + precedence, + seq( + field("left", $._expr_op), + field("operator", operator), + field("right", $._expr_op) + ) + ) + ) + ), + + _expr_apply_expression: ($) => + choice($.apply_expression, $._expr_select_expression), + + apply_expression: ($) => + seq( + field("function", $._expr_apply_expression), + field("argument", $._expr_select_expression) + ), + + _expr_select_expression: ($) => choice($.select_expression, $._expr_simple), + + select_expression: ($) => + choice( + seq( + field("expression", $._expr_simple), + ".", + field("attrpath", $.attrpath) + ), + seq( + field("expression", $._expr_simple), + ".", + field("attrpath", $.attrpath), + "or", + field("default", $._expr_select_expression) + ) + ), + + _expr_simple: ($) => + choice( + $.variable_expression, + $.integer_expression, + $.float_expression, + $.string_expression, + $.indented_string_expression, + $.path_expression, + $.hpath_expression, + $.spath_expression, + $.uri_expression, + $.parenthesized_expression, + $.attrset_expression, + $.let_attrset_expression, + $.rec_attrset_expression, + $.list_expression + ), + + parenthesized_expression: ($) => + seq("(", field("expression", $._expression), ")"), + + attrset_expression: ($) => seq("{", optional($.binding_set), "}"), + let_attrset_expression: ($) => + seq("let", "{", optional($.binding_set), "}"), + rec_attrset_expression: ($) => + seq("rec", "{", optional($.binding_set), "}"), + + string_expression: ($) => + seq( + '"', + repeat( + choice( + $.string_fragment, + $.interpolation, + choice( + $.escape_sequence, + seq($.dollar_escape, alias("$", $.string_fragment)) + ) + ) + ), + '"' + ), + + escape_sequence: ($) => token.immediate(/\\([^$]|\s)/), // Can also escape newline. + + indented_string_expression: ($) => + seq( + "''", + repeat( + choice( + alias($._indented_string_fragment, $.string_fragment), + $.interpolation, + choice( + alias($._indented_escape_sequence, $.escape_sequence), + seq( + alias($._indented_dollar_escape, $.dollar_escape), + alias("$", $.string_fragment) + ) + ) + ) + ), + "''" + ), + _indented_escape_sequence: ($) => token.immediate(/'''|''\\([^$]|\s)/), // Can also escape newline. + + binding_set: ($) => + repeat1(field("binding", choice($.binding, $.inherit, $.inherit_from))), + binding: ($) => + seq( + field("attrpath", $.attrpath), + "=", + field("expression", $._expression), + ";" + ), + inherit: ($) => seq("inherit", field("attrs", $.inherited_attrs), ";"), + inherit_from: ($) => + seq( + "inherit", + "(", + field("expression", $._expression), + ")", + field("attrs", $.inherited_attrs), + ";" + ), + + attrpath: ($) => + sep1( + field( + "attr", + choice($.identifier, $.string_expression, $.interpolation) + ), + "." + ), + + inherited_attrs: ($) => + repeat1( + field( + "attr", + choice($.identifier, $.string_expression, $.interpolation) + ) + ), + + _immediate_interpolation: ($) => + seq(token.immediate("${"), field("expression", $._expression), "}"), + interpolation: ($) => seq("${", field("expression", $._expression), "}"), + + list_expression: ($) => + seq("[", repeat(field("element", $._expr_select_expression)), "]"), + + comment: ($) => + token(choice(seq("#", /.*/), seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/"))), + }, +}); + +function sep(rule, separator) { + return optional(sep1(rule, separator)); +} + +function sep1(rule, separator) { + return seq(rule, repeat(seq(separator, rule))); +} + +function commaSep1(rule) { + return sep1(rule, ","); +} + +function commaSep(rule) { + return optional(commaSep1(rule)); +} diff --git a/nix/src/scanner.c b/nix/src/scanner.c new file mode 100644 index 0000000..f1ee051 --- /dev/null +++ b/nix/src/scanner.c @@ -0,0 +1,238 @@ +#include <tree_sitter/parser.h> + +enum TokenType { + STRING_FRAGMENT, + INDENTED_STRING_FRAGMENT, + PATH_START, + PATH_FRAGMENT, + DOLLAR_ESCAPE, + INDENTED_DOLLAR_ESCAPE, +}; + +static void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + +static void skip(TSLexer *lexer) { lexer->advance(lexer, true); } + +static bool scan_dollar_escape(TSLexer *lexer) { + lexer->result_symbol = DOLLAR_ESCAPE; + advance(lexer); + lexer->mark_end(lexer); + if (lexer->lookahead == '$') { + return true; + } else { + return false; + } +} + +static bool scan_indented_dollar_escape(TSLexer *lexer) { + lexer->result_symbol = INDENTED_DOLLAR_ESCAPE; + advance(lexer); + lexer->mark_end(lexer); + if (lexer->lookahead == '$') { + return true; + } else { + if (lexer->lookahead == '\\') { + advance(lexer); + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + return true; + } + } + return false; + } +} + +// Here we only parse literal fragment inside a string. +// Delimiter, interpolation and escape sequence are handled by the parser and we +// simply stop at them. +// +// The implementation is inspired by tree-sitter-javascript: +// https://github.com/tree-sitter/tree-sitter-javascript/blob/fdeb68ac8d2bd5a78b943528bb68ceda3aade2eb/src/scanner.c#L19 +static bool scan_string_fragment(TSLexer *lexer) { + lexer->result_symbol = STRING_FRAGMENT; + for (bool has_content = false;; has_content = true) { + lexer->mark_end(lexer); + switch (lexer->lookahead) { + case '"': + case '\\': + return has_content; + case '$': + advance(lexer); + if (lexer->lookahead == '{') { + return has_content; + } else if (lexer->lookahead != '"' && lexer->lookahead != '\\') { + // Any char following '$' other than '"', '\\' and '{' (which was + // handled above) should be consumed as additional string content. This + // means `$${` doesn't start an interpolation, but `$$${` does. + advance(lexer); + } + break; + // Simply give up on EOF or '\0'. + case '\0': + return false; + default: + advance(lexer); + } + } +} + +// See comments of scan_string_fragment. +static bool scan_indented_string_fragment(TSLexer *lexer) { + lexer->result_symbol = INDENTED_STRING_FRAGMENT; + for (bool has_content = false;; has_content = true) { + lexer->mark_end(lexer); + switch (lexer->lookahead) { + case '$': + advance(lexer); + if (lexer->lookahead == '{') { + return has_content; + } else if (lexer->lookahead != '\'') { + // Any char following '$' other than '\'' and '{' (which was handled + // above) should be consumed as additional string content. This means + // `$${` doesn't start an interpolation, but `$$${` does. + advance(lexer); + } + break; + case '\'': + advance(lexer); + if (lexer->lookahead == '\'') { + // Two single quotes always stop current string fragment. + // It can be either an end delimiter '', or escape sequences ''', ''$, + // ''\<any> + return has_content; + } + break; + // Simply give up on EOF or '\0'. + case '\0': + return false; + default: + advance(lexer); + } + } +} + +static bool is_path_char(int32_t c) { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || c == '-' || c == '+' || c == '_' || + c == '.' || c == '/'; +} + +static bool scan_path_start(TSLexer *lexer) { + lexer->result_symbol = PATH_START; + + bool have_sep = false; + bool have_after_sep = false; + int32_t c = lexer->lookahead; + + // unlike string_fragments which which are preceded by initial token (i.e. + // '"') and thus will have all leading external whitespace consumed, we have + // no such luxury with the path_start token. + // + // so we must skip over any leading whitespace here. + while (c == ' ' || c == '\n' || c == '\r' || c == '\t') { + skip(lexer); + c = lexer->lookahead; + } + + while (true) { + lexer->mark_end(lexer); + c = lexer->lookahead; + + if (c == '/') { + have_sep = true; + } else if (is_path_char(c)) { + if (have_sep) { + have_after_sep = true; + } + } else if (c == '$') { + // starting a interpolation, + // so we have a valid token as long as we've seen a separator. + // example: a/${x} + return have_sep; + } else { + // we have a valid token if we've consumed anything after a separator. + // example: a/b + return have_after_sep; + } + + advance(lexer); + } +} + +static bool scan_path_fragment(TSLexer *lexer) { + lexer->result_symbol = PATH_FRAGMENT; + + for (bool has_content = false;; has_content = true) { + lexer->mark_end(lexer); + if (!is_path_char(lexer->lookahead)) { + return has_content; + } + advance(lexer); + } +} + +void *tree_sitter_nix_external_scanner_create() { return NULL; } + +bool tree_sitter_nix_external_scanner_scan(void *payload, TSLexer *lexer, + const bool *valid_symbols) { + // This never happens in valid grammar. Only during error recovery, everything + // becomes valid. See: https://github.com/tree-sitter/tree-sitter/issues/1259 + // + // We should not consume any content as string fragment during error recovery, + // or we'll break more valid grammar below. The test 'attrset typing field + // following string' covers this. + if (valid_symbols[STRING_FRAGMENT] && + valid_symbols[INDENTED_STRING_FRAGMENT] && valid_symbols[PATH_START] && + valid_symbols[PATH_FRAGMENT] && valid_symbols[DOLLAR_ESCAPE] && + valid_symbols[INDENTED_DOLLAR_ESCAPE]) { + return false; + } else if (valid_symbols[STRING_FRAGMENT]) { + if (lexer->lookahead == '\\') { + return scan_dollar_escape(lexer); + } + return scan_string_fragment(lexer); + } else if (valid_symbols[INDENTED_STRING_FRAGMENT]) { + if (lexer->lookahead == '\'') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '\'') { + return scan_indented_dollar_escape(lexer); + } + } + return scan_indented_string_fragment(lexer); + } else if (valid_symbols[PATH_FRAGMENT] && is_path_char(lexer->lookahead)) { + // path_fragments should be scanned as immediate tokens, with no preceding + // extras. so we assert that the very first token is a path character, and + // otherwise we fall through to the case below. example: + // a/b${c} d/e${f} + // ^--- note that scanning for the path_fragment will start here. + // this *should* be parsed as a function application. + // so we want to fall through to the path_start case below, + // which will skip the whitespace and correctly scan the + // following path_start. + // + // also, we want this above path_start, because wherever there's ambiguity + // we want to parse another fragment instead of starting a new path. + // example: + // a/b${c}d/e${f} + // if we swap the precedence, we'd effectively parse the above as the + // following function application: + // (a/b${c}) (d/e${f}) + return scan_path_fragment(lexer); + } else if (valid_symbols[PATH_START]) { + return scan_path_start(lexer); + } + + return false; +} + +unsigned tree_sitter_nix_external_scanner_serialize(void *payload, + char *buffer) { + return 0; +} + +void tree_sitter_nix_external_scanner_deserialize(void *payload, + const char *buffer, + unsigned length) {} + +void tree_sitter_nix_external_scanner_destroy(void *payload) {} diff --git a/nix/test/highlight/basic.nix b/nix/test/highlight/basic.nix new file mode 100644 index 0000000..54e61d6 --- /dev/null +++ b/nix/test/highlight/basic.nix @@ -0,0 +1,80 @@ +{ + or = { or = 1; }.or or 42; + # <- property + # ^ punctuation.delimiter + # ^ property + # ^ property + # ^ keyword + the-question = if builtins.true then "to be" else "not to be"; + # <- property + # ^ property + # ^ property + # ^ keyword + # ^ variable.builtin + # ^ property + # ^ keyword + # ^ string + # ^ keyword + # ^ string + null = if null then true else false; + # <- property + # ^ variable.builtin + # ^ variable.builtin + # ^ variable.builtin + pkgs' = { inherit (pkgs) stdenv lib; }; + # <- property + # ^ property + # ^ keyword + # ^ variable + # ^ property + # ^ property + thing' = + # <- property + let inherit (pkgs) stdenv lib; + # <- keyword + # ^ keyword + # ^ variable + # ^ property + # ^ property + in derivation rec { + # <- keyword + # ^ function.builtin + # ^ keyword + pname = "thing"; + # <- property + # ^ string + version = "v1.2.3"; + name = "${pname}-${version}"; + # <- property + # ^ string + # ^ punctuation.special + # ^ variable + # ^ punctuation.special + # ^ string + # ^ variable + # ^ string + buildInputs = with pkgs; [ thing_a thing_b ]; + # <- property + # ^ keyword + # ^ variable + # ^ variable + # ^ variable + }; + assert_bool = bool: assert lib.isBool bool; bool; + # <- property + # ^ variable.parameter + # ^ keyword + # ^ variable + # ^ function + # ^ variable + # ^ variable + import = import ./overlays.nix { inherit pkgs; }; + # <- property + # ^ function.builtin + # ^ string.special.path + # ^ keyword + # ^ property + uri = https://github.com; + # ^ string.special.uri + # ^ string.special.uri +} diff --git a/python/LICENSE b/python/LICENSE new file mode 100644 index 0000000..ff8ed93 --- /dev/null +++ b/python/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Max Brunsfeld + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/python/examples/compound-statement-without-trailing-newline.py b/python/examples/compound-statement-without-trailing-newline.py new file mode 100644 index 0000000..69c6a27 --- /dev/null +++ b/python/examples/compound-statement-without-trailing-newline.py @@ -0,0 +1,3 @@ +class Foo: + def bar(): + print "hi" \ No newline at end of file diff --git a/python/examples/crlf-line-endings.py b/python/examples/crlf-line-endings.py new file mode 100644 index 0000000..f84766e --- /dev/null +++ b/python/examples/crlf-line-endings.py @@ -0,0 +1,6 @@ +print a + +if b: + if c: + d + e diff --git a/python/examples/mixed-spaces-tabs.py b/python/examples/mixed-spaces-tabs.py new file mode 100644 index 0000000..ddcbb5f --- /dev/null +++ b/python/examples/mixed-spaces-tabs.py @@ -0,0 +1,4 @@ +def main(): + print "hello" + # 1 tab = 8 spaces in Python 2 + return diff --git a/python/examples/multiple-newlines.py b/python/examples/multiple-newlines.py new file mode 100644 index 0000000..aeb5d8d --- /dev/null +++ b/python/examples/multiple-newlines.py @@ -0,0 +1,25 @@ +def hi(): + + + + print "hi" + + +def bye(): + print "bye" + + + + + + + + + + + + + + + + diff --git a/python/examples/python2-grammar-crlf.py b/python/examples/python2-grammar-crlf.py new file mode 100644 index 0000000..fe129fa --- /dev/null +++ b/python/examples/python2-grammar-crlf.py @@ -0,0 +1,973 @@ +# Python test set -- part 1, grammar. +# This just tests whether the parser accepts them all. + +# NOTE: When you run this test as a script from the command line, you +# get warnings about certain hex/oct constants. Since those are +# issued by the parser, you can't suppress them by adding a +# filterwarnings() call to this module. Therefore, to shut up the +# regression test, the filterwarnings() call has been added to +# regrtest.py. + +from test.test_support import run_unittest, check_syntax_error +import unittest +import sys +# testing import * +from sys import * + +class TokenTests(unittest.TestCase): + + def testBackslash(self): + # Backslash means line continuation: + x = 1 \ + + 1 + self.assertEquals(x, 2, 'backslash for line continuation') + + # Backslash does not means continuation in comments :\ + x = 0 + self.assertEquals(x, 0, 'backslash ending comment') + + def testPlainIntegers(self): + self.assertEquals(0xff, 255) + self.assertEquals(0377, 255) + self.assertEquals(2147483647, 017777777777) + # "0x" is not a valid literal + self.assertRaises(SyntaxError, eval, "0x") + from sys import maxint + if maxint == 2147483647: + self.assertEquals(-2147483647-1, -020000000000) + # XXX -2147483648 + self.assert_(037777777777 > 0) + self.assert_(0xffffffff > 0) + for s in '2147483648', '040000000000', '0x100000000': + try: + x = eval(s) + except OverflowError: + self.fail("OverflowError on huge integer literal %r" % s) + elif maxint == 9223372036854775807: + self.assertEquals(-9223372036854775807-1, -01000000000000000000000) + self.assert_(01777777777777777777777 > 0) + self.assert_(0xffffffffffffffff > 0) + for s in '9223372036854775808', '02000000000000000000000','0x10000000000000000': + try: + x = eval(s) + except OverflowError: + self.fail("OverflowError on huge integer literal %r" % s) + else: + self.fail('Weird maxint value %r' % maxint) + + def testLongIntegers(self): + x = 0L + x = 0l + x = 0xffffffffffffffffL + x = 0xffffffffffffffffl + x = 077777777777777777L + x = 077777777777777777l + x = 123456789012345678901234567890L + x = 123456789012345678901234567890l + + def testFloats(self): + x = 3.14 + x = 314. + x = 0.314 + # XXX x = 000.314 + x = .314 + x = 3e14 + x = 3E14 + x = 3e-14 + x = 3e+14 + x = 3.e14 + x = .3e14 + x = 3.1e4 + +class GrammarTests(unittest.TestCase): + + # single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE + # XXX can't test in a script -- this rule is only used when interactive + + # file_input: (NEWLINE | stmt)* ENDMARKER + # Being tested as this very moment this very module + + # expr_input: testlist NEWLINE + # XXX Hard to test -- used only in calls to input() + + def testEvalInput(self): + # testlist ENDMARKER + x = eval('1, 0 or 1') + + def testFuncdef(self): + ### 'def' NAME parameters ':' suite + ### parameters: '(' [varargslist] ')' + ### varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' ('**'|'*' '*') NAME] + ### | ('**'|'*' '*') NAME) + ### | fpdef ['=' test] (',' fpdef ['=' test])* [','] + ### fpdef: NAME | '(' fplist ')' + ### fplist: fpdef (',' fpdef)* [','] + ### arglist: (argument ',')* (argument | *' test [',' '**' test] | '**' test) + ### argument: [test '='] test # Really [keyword '='] test + def f1(): pass + f1() + f1(*()) + f1(*(), **{}) + def f2(one_argument): pass + def f3(two, arguments): pass + def f4(two, (compound, (argument, list))): pass + def f5((compound, first), two): pass + self.assertEquals(f2.func_code.co_varnames, ('one_argument',)) + self.assertEquals(f3.func_code.co_varnames, ('two', 'arguments')) + if sys.platform.startswith('java'): + self.assertEquals(f4.func_code.co_varnames, + ('two', '(compound, (argument, list))', 'compound', 'argument', + 'list',)) + self.assertEquals(f5.func_code.co_varnames, + ('(compound, first)', 'two', 'compound', 'first')) + else: + self.assertEquals(f4.func_code.co_varnames, + ('two', '.1', 'compound', 'argument', 'list')) + self.assertEquals(f5.func_code.co_varnames, + ('.0', 'two', 'compound', 'first')) + def a1(one_arg,): pass + def a2(two, args,): pass + def v0(*rest): pass + def v1(a, *rest): pass + def v2(a, b, *rest): pass + def v3(a, (b, c), *rest): return a, b, c, rest + + f1() + f2(1) + f2(1,) + f3(1, 2) + f3(1, 2,) + f4(1, (2, (3, 4))) + v0() + v0(1) + v0(1,) + v0(1,2) + v0(1,2,3,4,5,6,7,8,9,0) + v1(1) + v1(1,) + v1(1,2) + v1(1,2,3) + v1(1,2,3,4,5,6,7,8,9,0) + v2(1,2) + v2(1,2,3) + v2(1,2,3,4) + v2(1,2,3,4,5,6,7,8,9,0) + v3(1,(2,3)) + v3(1,(2,3),4) + v3(1,(2,3),4,5,6,7,8,9,0) + + # ceval unpacks the formal arguments into the first argcount names; + # thus, the names nested inside tuples must appear after these names. + if sys.platform.startswith('java'): + self.assertEquals(v3.func_code.co_varnames, ('a', '(b, c)', 'rest', 'b', 'c')) + else: + self.assertEquals(v3.func_code.co_varnames, ('a', '.1', 'rest', 'b', 'c')) + self.assertEquals(v3(1, (2, 3), 4), (1, 2, 3, (4,))) + def d01(a=1): pass + d01() + d01(1) + d01(*(1,)) + d01(**{'a':2}) + def d11(a, b=1): pass + d11(1) + d11(1, 2) + d11(1, **{'b':2}) + def d21(a, b, c=1): pass + d21(1, 2) + d21(1, 2, 3) + d21(*(1, 2, 3)) + d21(1, *(2, 3)) + d21(1, 2, *(3,)) + d21(1, 2, **{'c':3}) + def d02(a=1, b=2): pass + d02() + d02(1) + d02(1, 2) + d02(*(1, 2)) + d02(1, *(2,)) + d02(1, **{'b':2}) + d02(**{'a': 1, 'b': 2}) + def d12(a, b=1, c=2): pass + d12(1) + d12(1, 2) + d12(1, 2, 3) + def d22(a, b, c=1, d=2): pass + d22(1, 2) + d22(1, 2, 3) + d22(1, 2, 3, 4) + def d01v(a=1, *rest): pass + d01v() + d01v(1) + d01v(1, 2) + d01v(*(1, 2, 3, 4)) + d01v(*(1,)) + d01v(**{'a':2}) + def d11v(a, b=1, *rest): pass + d11v(1) + d11v(1, 2) + d11v(1, 2, 3) + def d21v(a, b, c=1, *rest): pass + d21v(1, 2) + d21v(1, 2, 3) + d21v(1, 2, 3, 4) + d21v(*(1, 2, 3, 4)) + d21v(1, 2, **{'c': 3}) + def d02v(a=1, b=2, *rest): pass + d02v() + d02v(1) + d02v(1, 2) + d02v(1, 2, 3) + d02v(1, *(2, 3, 4)) + d02v(**{'a': 1, 'b': 2}) + def d12v(a, b=1, c=2, *rest): pass + d12v(1) + d12v(1, 2) + d12v(1, 2, 3) + d12v(1, 2, 3, 4) + d12v(*(1, 2, 3, 4)) + d12v(1, 2, *(3, 4, 5)) + d12v(1, *(2,), **{'c': 3}) + def d22v(a, b, c=1, d=2, *rest): pass + d22v(1, 2) + d22v(1, 2, 3) + d22v(1, 2, 3, 4) + d22v(1, 2, 3, 4, 5) + d22v(*(1, 2, 3, 4)) + d22v(1, 2, *(3, 4, 5)) + d22v(1, *(2, 3), **{'d': 4}) + def d31v((x)): pass + d31v(1) + def d32v((x,)): pass + d32v((1,)) + + # keyword arguments after *arglist + def f(*args, **kwargs): + return args, kwargs + self.assertEquals(f(1, x=2, *[3, 4], y=5), ((1, 3, 4), + {'x':2, 'y':5})) + self.assertRaises(SyntaxError, eval, "f(1, *(2,3), 4)") + self.assertRaises(SyntaxError, eval, "f(1, x=2, *(3,4), x=5)") + + # Check ast errors in *args and *kwargs + check_syntax_error(self, "f(*g(1=2))") + check_syntax_error(self, "f(**g(1=2))") + + def testLambdef(self): + ### lambdef: 'lambda' [varargslist] ':' test + l1 = lambda : 0 + self.assertEquals(l1(), 0) + l2 = lambda : a[d] # XXX just testing the expression + l3 = lambda : [2 < x for x in [-1, 3, 0L]] + self.assertEquals(l3(), [0, 1, 0]) + l4 = lambda x = lambda y = lambda z=1 : z : y() : x() + self.assertEquals(l4(), 1) + l5 = lambda x, y, z=2: x + y + z + self.assertEquals(l5(1, 2), 5) + self.assertEquals(l5(1, 2, 3), 6) + check_syntax_error(self, "lambda x: x = 2") + check_syntax_error(self, "lambda (None,): None") + + ### stmt: simple_stmt | compound_stmt + # Tested below + + def testSimpleStmt(self): + ### simple_stmt: small_stmt (';' small_stmt)* [';'] + x = 1; pass; del x + def foo(): + # verify statements that end with semi-colons + x = 1; pass; del x; + foo() + + ### small_stmt: expr_stmt | print_stmt | pass_stmt | del_stmt | flow_stmt | import_stmt | global_stmt | access_stmt | exec_stmt + # Tested below + + def testExprStmt(self): + # (exprlist '=')* exprlist + 1 + 1, 2, 3 + x = 1 + x = 1, 2, 3 + x = y = z = 1, 2, 3 + x, y, z = 1, 2, 3 + abc = a, b, c = x, y, z = xyz = 1, 2, (3, 4) + + check_syntax_error(self, "x + 1 = 1") + check_syntax_error(self, "a + 1 = b + 2") + + def testPrintStmt(self): + # 'print' (test ',')* [test] + import StringIO + + # Can't test printing to real stdout without comparing output + # which is not available in unittest. + save_stdout = sys.stdout + sys.stdout = StringIO.StringIO() + + print 1, 2, 3 + print 1, 2, 3, + print + print 0 or 1, 0 or 1, + print 0 or 1 + + # 'print' '>>' test ',' + print >> sys.stdout, 1, 2, 3 + print >> sys.stdout, 1, 2, 3, + print >> sys.stdout + print >> sys.stdout, 0 or 1, 0 or 1, + print >> sys.stdout, 0 or 1 + + # test printing to an instance + class Gulp: + def write(self, msg): pass + + gulp = Gulp() + print >> gulp, 1, 2, 3 + print >> gulp, 1, 2, 3, + print >> gulp + print >> gulp, 0 or 1, 0 or 1, + print >> gulp, 0 or 1 + + # test print >> None + def driver(): + oldstdout = sys.stdout + sys.stdout = Gulp() + try: + tellme(Gulp()) + tellme() + finally: + sys.stdout = oldstdout + + # we should see this once + def tellme(file=sys.stdout): + print >> file, 'hello world' + + driver() + + # we should not see this at all + def tellme(file=None): + print >> file, 'goodbye universe' + + driver() + + self.assertEqual(sys.stdout.getvalue(), '''\ +1 2 3 +1 2 3 +1 1 1 +1 2 3 +1 2 3 +1 1 1 +hello world +''') + sys.stdout = save_stdout + + # syntax errors + check_syntax_error(self, 'print ,') + check_syntax_error(self, 'print >> x,') + + def testDelStmt(self): + # 'del' exprlist + abc = [1,2,3] + x, y, z = abc + xyz = x, y, z + + del abc + del x, y, (z, xyz) + + def testPassStmt(self): + # 'pass' + pass + + # flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt + # Tested below + + def testBreakStmt(self): + # 'break' + while 1: break + + def testContinueStmt(self): + # 'continue' + i = 1 + while i: i = 0; continue + + msg = "" + while not msg: + msg = "ok" + try: + continue + msg = "continue failed to continue inside try" + except: + msg = "continue inside try called except block" + if msg != "ok": + self.fail(msg) + + msg = "" + while not msg: + msg = "finally block not called" + try: + continue + finally: + msg = "ok" + if msg != "ok": + self.fail(msg) + + def test_break_continue_loop(self): + # This test warrants an explanation. It is a test specifically for SF bugs + # #463359 and #462937. The bug is that a 'break' statement executed or + # exception raised inside a try/except inside a loop, *after* a continue + # statement has been executed in that loop, will cause the wrong number of + # arguments to be popped off the stack and the instruction pointer reset to + # a very small number (usually 0.) Because of this, the following test + # *must* written as a function, and the tracking vars *must* be function + # arguments with default values. Otherwise, the test will loop and loop. + + def test_inner(extra_burning_oil = 1, count=0): + big_hippo = 2 + while big_hippo: + count += 1 + try: + if extra_burning_oil and big_hippo == 1: + extra_burning_oil -= 1 + break + big_hippo -= 1 + continue + except: + raise + if count > 2 or big_hippo <> 1: + self.fail("continue then break in try/except in loop broken!") + test_inner() + + def testReturn(self): + # 'return' [testlist] + def g1(): return + def g2(): return 1 + g1() + x = g2() + check_syntax_error(self, "class foo:return 1") + + def testYield(self): + check_syntax_error(self, "class foo:yield 1") + + def testRaise(self): + # 'raise' test [',' test] + try: raise RuntimeError, 'just testing' + except RuntimeError: pass + try: raise KeyboardInterrupt + except KeyboardInterrupt: pass + + def testImport(self): + # 'import' dotted_as_names + import sys + import time, sys + # 'from' dotted_name 'import' ('*' | '(' import_as_names ')' | import_as_names) + from time import time + from time import (time) + # not testable inside a function, but already done at top of the module + # from sys import * + from sys import path, argv + from sys import (path, argv) + from sys import (path, argv,) + + def testGlobal(self): + # 'global' NAME (',' NAME)* + global a + global a, b + global one, two, three, four, five, six, seven, eight, nine, ten + + def testExec(self): + # 'exec' expr ['in' expr [',' expr]] + z = None + del z + exec 'z=1+1\n' + if z != 2: self.fail('exec \'z=1+1\'\\n') + del z + exec 'z=1+1' + if z != 2: self.fail('exec \'z=1+1\'') + z = None + del z + import types + if hasattr(types, "UnicodeType"): + exec r"""if 1: + exec u'z=1+1\n' + if z != 2: self.fail('exec u\'z=1+1\'\\n') + del z + exec u'z=1+1' + if z != 2: self.fail('exec u\'z=1+1\'')""" + g = {} + exec 'z = 1' in g + if g.has_key('__builtins__'): del g['__builtins__'] + if g != {'z': 1}: self.fail('exec \'z = 1\' in g') + g = {} + l = {} + + import warnings + warnings.filterwarnings("ignore", "global statement", module="<string>") + exec 'global a; a = 1; b = 2' in g, l + if g.has_key('__builtins__'): del g['__builtins__'] + if l.has_key('__builtins__'): del l['__builtins__'] + if (g, l) != ({'a':1}, {'b':2}): + self.fail('exec ... in g (%s), l (%s)' %(g,l)) + + def testAssert(self): + # assert_stmt: 'assert' test [',' test] + assert 1 + assert 1, 1 + assert lambda x:x + assert 1, lambda x:x+1 + try: + assert 0, "msg" + except AssertionError, e: + self.assertEquals(e.args[0], "msg") + else: + if __debug__: + self.fail("AssertionError not raised by assert 0") + + ### compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | funcdef | classdef + # Tested below + + def testIf(self): + # 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] + if 1: pass + if 1: pass + else: pass + if 0: pass + elif 0: pass + if 0: pass + elif 0: pass + elif 0: pass + elif 0: pass + else: pass + + def testWhile(self): + # 'while' test ':' suite ['else' ':' suite] + while 0: pass + while 0: pass + else: pass + + # Issue1920: "while 0" is optimized away, + # ensure that the "else" clause is still present. + x = 0 + while 0: + x = 1 + else: + x = 2 + self.assertEquals(x, 2) + + def testFor(self): + # 'for' exprlist 'in' exprlist ':' suite ['else' ':' suite] + for i in 1, 2, 3: pass + for i, j, k in (): pass + else: pass + class Squares: + def __init__(self, max): + self.max = max + self.sofar = [] + def __len__(self): return len(self.sofar) + def __getitem__(self, i): + if not 0 <= i < self.max: raise IndexError + n = len(self.sofar) + while n <= i: + self.sofar.append(n*n) + n = n+1 + return self.sofar[i] + n = 0 + for x in Squares(10): n = n+x + if n != 285: + self.fail('for over growing sequence') + + result = [] + for x, in [(1,), (2,), (3,)]: + result.append(x) + self.assertEqual(result, [1, 2, 3]) + + def testTry(self): + ### try_stmt: 'try' ':' suite (except_clause ':' suite)+ ['else' ':' suite] + ### | 'try' ':' suite 'finally' ':' suite + ### except_clause: 'except' [expr [('as' | ',') expr]] + try: + 1/0 + except ZeroDivisionError: + pass + else: + pass + try: 1/0 + except EOFError: pass + except TypeError as msg: pass + except RuntimeError, msg: pass + except: pass + else: pass + try: 1/0 + except (EOFError, TypeError, ZeroDivisionError): pass + try: 1/0 + except (EOFError, TypeError, ZeroDivisionError), msg: pass + try: pass + finally: pass + + def testSuite(self): + # simple_stmt | NEWLINE INDENT NEWLINE* (stmt NEWLINE*)+ DEDENT + if 1: pass + if 1: + pass + if 1: + # + # + # + pass + pass + # + pass + # + + def testTest(self): + ### and_test ('or' and_test)* + ### and_test: not_test ('and' not_test)* + ### not_test: 'not' not_test | comparison + if not 1: pass + if 1 and 1: pass + if 1 or 1: pass + if not not not 1: pass + if not 1 and 1 and 1: pass + if 1 and 1 or 1 and 1 and 1 or not 1 and 1: pass + + def testComparison(self): + ### comparison: expr (comp_op expr)* + ### comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' + if 1: pass + x = (1 == 1) + if 1 == 1: pass + if 1 != 1: pass + if 1 <> 1: pass + if 1 < 1: pass + if 1 > 1: pass + if 1 <= 1: pass + if 1 >= 1: pass + if 1 is 1: pass + if 1 is not 1: pass + if 1 in (): pass + if 1 not in (): pass + if 1 < 1 > 1 == 1 >= 1 <= 1 <> 1 != 1 in 1 not in 1 is 1 is not 1: pass + + def testBinaryMaskOps(self): + x = 1 & 1 + x = 1 ^ 1 + x = 1 | 1 + + def testShiftOps(self): + x = 1 << 1 + x = 1 >> 1 + x = 1 << 1 >> 1 + + def testAdditiveOps(self): + x = 1 + x = 1 + 1 + x = 1 - 1 - 1 + x = 1 - 1 + 1 - 1 + 1 + + def testMultiplicativeOps(self): + x = 1 * 1 + x = 1 / 1 + x = 1 % 1 + x = 1 / 1 * 1 % 1 + + def testUnaryOps(self): + x = +1 + x = -1 + x = ~1 + x = ~1 ^ 1 & 1 | 1 & 1 ^ -1 + x = -1*1/1 + 1*1 - ---1*1 + + def testSelectors(self): + ### trailer: '(' [testlist] ')' | '[' subscript ']' | '.' NAME + ### subscript: expr | [expr] ':' [expr] + + import sys, time + c = sys.path[0] + x = time.time() + x = sys.modules['time'].time() + a = '01234' + c = a[0] + c = a[-1] + s = a[0:5] + s = a[:5] + s = a[0:] + s = a[:] + s = a[-5:] + s = a[:-1] + s = a[-4:-3] + # A rough test of SF bug 1333982. http://python.org/sf/1333982 + # The testing here is fairly incomplete. + # Test cases should include: commas with 1 and 2 colons + d = {} + d[1] = 1 + d[1,] = 2 + d[1,2] = 3 + d[1,2,3] = 4 + L = list(d) + L.sort() + self.assertEquals(str(L), '[1, (1,), (1, 2), (1, 2, 3)]') + + def testAtoms(self): + ### atom: '(' [testlist] ')' | '[' [testlist] ']' | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING + ### dictmaker: test ':' test (',' test ':' test)* [','] + + x = (1) + x = (1 or 2 or 3) + x = (1 or 2 or 3, 2, 3) + + x = [] + x = [1] + x = [1 or 2 or 3] + x = [1 or 2 or 3, 2, 3] + x = [] + + x = {} + x = {'one': 1} + x = {'one': 1,} + x = {'one' or 'two': 1 or 2} + x = {'one': 1, 'two': 2} + x = {'one': 1, 'two': 2,} + x = {'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6} + + x = `x` + x = `1 or 2 or 3` + self.assertEqual(`1,2`, '(1, 2)') + + x = x + x = 'x' + x = 123 + + ### exprlist: expr (',' expr)* [','] + ### testlist: test (',' test)* [','] + # These have been exercised enough above + + def testClassdef(self): + # 'class' NAME ['(' [testlist] ')'] ':' suite + class B: pass + class B2(): pass + class C1(B): pass + class C2(B): pass + class D(C1, C2, B): pass + class C: + def meth1(self): pass + def meth2(self, arg): pass + def meth3(self, a1, a2): pass + # decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE + # decorators: decorator+ + # decorated: decorators (classdef | funcdef) + def class_decorator(x): + x.decorated = True + return x + @class_decorator + class G: + pass + self.assertEqual(G.decorated, True) + + def testListcomps(self): + # list comprehension tests + nums = [1, 2, 3, 4, 5] + strs = ["Apple", "Banana", "Coconut"] + spcs = [" Apple", " Banana ", "Coco nut "] + + self.assertEqual([s.strip() for s in spcs], ['Apple', 'Banana', 'Coco nut']) + self.assertEqual([3 * x for x in nums], [3, 6, 9, 12, 15]) + self.assertEqual([x for x in nums if x > 2], [3, 4, 5]) + self.assertEqual([(i, s) for i in nums for s in strs], + [(1, 'Apple'), (1, 'Banana'), (1, 'Coconut'), + (2, 'Apple'), (2, 'Banana'), (2, 'Coconut'), + (3, 'Apple'), (3, 'Banana'), (3, 'Coconut'), + (4, 'Apple'), (4, 'Banana'), (4, 'Coconut'), + (5, 'Apple'), (5, 'Banana'), (5, 'Coconut')]) + self.assertEqual([(i, s) for i in nums for s in [f for f in strs if "n" in f]], + [(1, 'Banana'), (1, 'Coconut'), (2, 'Banana'), (2, 'Coconut'), + (3, 'Banana'), (3, 'Coconut'), (4, 'Banana'), (4, 'Coconut'), + (5, 'Banana'), (5, 'Coconut')]) + self.assertEqual([(lambda a:[a**i for i in range(a+1)])(j) for j in range(5)], + [[1], [1, 1], [1, 2, 4], [1, 3, 9, 27], [1, 4, 16, 64, 256]]) + + def test_in_func(l): + return [None < x < 3 for x in l if x > 2] + + self.assertEqual(test_in_func(nums), [False, False, False]) + + def test_nested_front(): + self.assertEqual([[y for y in [x, x + 1]] for x in [1,3,5]], + [[1, 2], [3, 4], [5, 6]]) + + test_nested_front() + + check_syntax_error(self, "[i, s for i in nums for s in strs]") + check_syntax_error(self, "[x if y]") + + suppliers = [ + (1, "Boeing"), + (2, "Ford"), + (3, "Macdonalds") + ] + + parts = [ + (10, "Airliner"), + (20, "Engine"), + (30, "Cheeseburger") + ] + + suppart = [ + (1, 10), (1, 20), (2, 20), (3, 30) + ] + + x = [ + (sname, pname) + for (sno, sname) in suppliers + for (pno, pname) in parts + for (sp_sno, sp_pno) in suppart + if sno == sp_sno and pno == sp_pno + ] + + self.assertEqual(x, [('Boeing', 'Airliner'), ('Boeing', 'Engine'), ('Ford', 'Engine'), + ('Macdonalds', 'Cheeseburger')]) + + def testGenexps(self): + # generator expression tests + g = ([x for x in range(10)] for x in range(1)) + self.assertEqual(g.next(), [x for x in range(10)]) + try: + g.next() + self.fail('should produce StopIteration exception') + except StopIteration: + pass + + a = 1 + try: + g = (a for d in a) + g.next() + self.fail('should produce TypeError') + except TypeError: + pass + + self.assertEqual(list((x, y) for x in 'abcd' for y in 'abcd'), [(x, y) for x in 'abcd' for y in 'abcd']) + self.assertEqual(list((x, y) for x in 'ab' for y in 'xy'), [(x, y) for x in 'ab' for y in 'xy']) + + a = [x for x in range(10)] + b = (x for x in (y for y in a)) + self.assertEqual(sum(b), sum([x for x in range(10)])) + + self.assertEqual(sum(x**2 for x in range(10)), sum([x**2 for x in range(10)])) + self.assertEqual(sum(x*x for x in range(10) if x%2), sum([x*x for x in range(10) if x%2])) + self.assertEqual(sum(x for x in (y for y in range(10))), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10)))), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in [y for y in (z for z in range(10))]), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True)) if True), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True) if False) if True), 0) + check_syntax_error(self, "foo(x for x in range(10), 100)") + check_syntax_error(self, "foo(100, x for x in range(10))") + + def testComprehensionSpecials(self): + # test for outmost iterable precomputation + x = 10; g = (i for i in range(x)); x = 5 + self.assertEqual(len(list(g)), 10) + + # This should hold, since we're only precomputing outmost iterable. + x = 10; t = False; g = ((i,j) for i in range(x) if t for j in range(x)) + x = 5; t = True; + self.assertEqual([(i,j) for i in range(10) for j in range(5)], list(g)) + + # Grammar allows multiple adjacent 'if's in listcomps and genexps, + # even though it's silly. Make sure it works (ifelse broke this.) + self.assertEqual([ x for x in range(10) if x % 2 if x % 3 ], [1, 5, 7]) + self.assertEqual(list(x for x in range(10) if x % 2 if x % 3), [1, 5, 7]) + + # verify unpacking single element tuples in listcomp/genexp. + self.assertEqual([x for x, in [(4,), (5,), (6,)]], [4, 5, 6]) + self.assertEqual(list(x for x, in [(7,), (8,), (9,)]), [7, 8, 9]) + + def test_with_statement(self): + class manager(object): + def __enter__(self): + return (1, 2) + def __exit__(self, *args): + pass + + with manager(): + pass + with manager() as x: + pass + with manager() as (x, y): + pass + with manager(), manager(): + pass + with manager() as x, manager() as y: + pass + with manager() as x, manager(): + pass + + def testIfElseExpr(self): + # Test ifelse expressions in various cases + def _checkeval(msg, ret): + "helper to check that evaluation of expressions is done correctly" + print x + return ret + + self.assertEqual([ x() for x in lambda: True, lambda: False if x() ], [True]) + self.assertEqual([ x() for x in (lambda: True, lambda: False) if x() ], [True]) + self.assertEqual([ x(False) for x in (lambda x: False if x else True, lambda x: True if x else False) if x(False) ], [True]) + self.assertEqual((5 if 1 else _checkeval("check 1", 0)), 5) + self.assertEqual((_checkeval("check 2", 0) if 0 else 5), 5) + self.assertEqual((5 and 6 if 0 else 1), 1) + self.assertEqual(((5 and 6) if 0 else 1), 1) + self.assertEqual((5 and (6 if 1 else 1)), 6) + self.assertEqual((0 or _checkeval("check 3", 2) if 0 else 3), 3) + self.assertEqual((1 or _checkeval("check 4", 2) if 1 else _checkeval("check 5", 3)), 1) + self.assertEqual((0 or 5 if 1 else _checkeval("check 6", 3)), 5) + self.assertEqual((not 5 if 1 else 1), False) + self.assertEqual((not 5 if 0 else 1), 1) + self.assertEqual((6 + 1 if 1 else 2), 7) + self.assertEqual((6 - 1 if 1 else 2), 5) + self.assertEqual((6 * 2 if 1 else 4), 12) + self.assertEqual((6 / 2 if 1 else 3), 3) + self.assertEqual((6 < 4 if 0 else 2), 2) + + def testStringLiterals(self): + x = ''; y = ""; self.assert_(len(x) == 0 and x == y) + x = '\''; y = "'"; self.assert_(len(x) == 1 and x == y and ord(x) == 39) + x = '"'; y = "\""; self.assert_(len(x) == 1 and x == y and ord(x) == 34) + x = "doesn't \"shrink\" does it" + y = 'doesn\'t "shrink" does it' + self.assert_(len(x) == 24 and x == y) + x = "does \"shrink\" doesn't it" + y = 'does "shrink" doesn\'t it' + self.assert_(len(x) == 24 and x == y) + x = """ +The "quick" +brown fox +jumps over +the 'lazy' dog. +""" + y = '\nThe "quick"\nbrown fox\njumps over\nthe \'lazy\' dog.\n' + self.assertEquals(x, y) + y = ''' +The "quick" +brown fox +jumps over +the 'lazy' dog. +''' + self.assertEquals(x, y) + y = "\n\ +The \"quick\"\n\ +brown fox\n\ +jumps over\n\ +the 'lazy' dog.\n\ +" + self.assertEquals(x, y) + y = '\n\ +The \"quick\"\n\ +brown fox\n\ +jumps over\n\ +the \'lazy\' dog.\n\ +' + self.assertEquals(x, y) + + + +def test_main(): + run_unittest(TokenTests, GrammarTests) + +if __name__ == '__main__': + test_main() diff --git a/python/examples/python2-grammar.py b/python/examples/python2-grammar.py new file mode 100644 index 0000000..d6822bd --- /dev/null +++ b/python/examples/python2-grammar.py @@ -0,0 +1,975 @@ +# Python test set -- part 1, grammar. +# This just tests whether the parser accepts them all. + +# NOTE: When you run this test as a script from the command line, you +# get warnings about certain hex/oct constants. Since those are +# issued by the parser, you can't suppress them by adding a +# filterwarnings() call to this module. Therefore, to shut up the +# regression test, the filterwarnings() call has been added to +# regrtest.py. + +from test.test_support import run_unittest, check_syntax_error +import unittest +import sys +# testing import * +from sys import * + +class TokenTests(unittest.TestCase): + + def testBackslash(self): + # Backslash means line continuation: + x = 1 \ + + 1 + self.assertEquals(x, 2, 'backslash for line continuation') + + # Backslash does not means continuation in comments :\ + x = 0 + self.assertEquals(x, 0, 'backslash ending comment') + + def testPlainIntegers(self): + self.assertEquals(0xff, 255) + self.assertEquals(0377, 255) + self.assertEquals(2147483647, 017777777777) + # "0x" is not a valid literal + self.assertRaises(SyntaxError, eval, "0x") + from sys import maxint + if maxint == 2147483647: + self.assertEquals(-2147483647-1, -020000000000) + # XXX -2147483648 + self.assert_(037777777777 > 0) + self.assert_(0xffffffff > 0) + for s in '2147483648', '040000000000', '0x100000000': + try: + x = eval(s) + except OverflowError: + self.fail("OverflowError on huge integer literal %r" % s) + elif maxint == 9223372036854775807: + self.assertEquals(-9223372036854775807-1, -01000000000000000000000) + self.assert_(01777777777777777777777 > 0) + self.assert_(0xffffffffffffffff > 0) + for s in '9223372036854775808', '02000000000000000000000', \ + '0x10000000000000000': + try: + x = eval(s) + except OverflowError: + self.fail("OverflowError on huge integer literal %r" % s) + else: + self.fail('Weird maxint value %r' % maxint) + + def testLongIntegers(self): + x = 0L + x = 0l + x = 0xffffffffffffffffL + x = 0xffffffffffffffffl + x = 077777777777777777L + x = 077777777777777777l + x = 123456789012345678901234567890L + x = 123456789012345678901234567890l + + def testFloats(self): + x = 3.14 + x = 314. + x = 0.314 + # XXX x = 000.314 + x = .314 + x = 3e14 + x = 3E14 + x = 3e-14 + x = 3e+14 + x = 3.e14 + x = .3e14 + x = 3.1e4 + +class GrammarTests(unittest.TestCase): + + # single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE + # XXX can't test in a script -- this rule is only used when interactive + + # file_input: (NEWLINE | stmt)* ENDMARKER + # Being tested as this very moment this very module + + # expr_input: testlist NEWLINE + # XXX Hard to test -- used only in calls to input() + + def testEvalInput(self): + # testlist ENDMARKER + x = eval('1, 0 or 1') + + def testFuncdef(self): + ### 'def' NAME parameters ':' suite + ### parameters: '(' [varargslist] ')' + ### varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' ('**'|'*' '*') NAME] + ### | ('**'|'*' '*') NAME) + ### | fpdef ['=' test] (',' fpdef ['=' test])* [','] + ### fpdef: NAME | '(' fplist ')' + ### fplist: fpdef (',' fpdef)* [','] + ### arglist: (argument ',')* (argument | *' test [',' '**' test] | '**' test) + ### argument: [test '='] test # Really [keyword '='] test + def f1(): pass + f1() + f1(*()) + f1(*(), **{}) + def f2(one_argument): pass + def f3(two, arguments): pass + def f4(two, (compound, (argument, list))): pass + def f5((compound, first), two): pass + self.assertEquals(f2.func_code.co_varnames, ('one_argument',)) + self.assertEquals(f3.func_code.co_varnames, ('two', 'arguments')) + if sys.platform.startswith('java'): + self.assertEquals(f4.func_code.co_varnames, + ('two', '(compound, (argument, list))', 'compound', 'argument', + 'list',)) + self.assertEquals(f5.func_code.co_varnames, + ('(compound, first)', 'two', 'compound', 'first')) + else: + self.assertEquals(f4.func_code.co_varnames, + ('two', '.1', 'compound', 'argument', 'list')) + self.assertEquals(f5.func_code.co_varnames, + ('.0', 'two', 'compound', 'first')) + def a1(one_arg,): pass + def a2(two, args,): pass + def v0(*rest): pass + def v1(a, *rest): pass + def v2(a, b, *rest): pass + def v3(a, (b, c), *rest): return a, b, c, rest + + f1() + f2(1) + f2(1,) + f3(1, 2) + f3(1, 2,) + f4(1, (2, (3, 4))) + v0() + v0(1) + v0(1,) + v0(1,2) + v0(1,2,3,4,5,6,7,8,9,0) + v1(1) + v1(1,) + v1(1,2) + v1(1,2,3) + v1(1,2,3,4,5,6,7,8,9,0) + v2(1,2) + v2(1,2,3) + v2(1,2,3,4) + v2(1,2,3,4,5,6,7,8,9,0) + v3(1,(2,3)) + v3(1,(2,3),4) + v3(1,(2,3),4,5,6,7,8,9,0) + + # ceval unpacks the formal arguments into the first argcount names; + # thus, the names nested inside tuples must appear after these names. + if sys.platform.startswith('java'): + self.assertEquals(v3.func_code.co_varnames, ('a', '(b, c)', 'rest', 'b', 'c')) + else: + self.assertEquals(v3.func_code.co_varnames, ('a', '.1', 'rest', 'b', 'c')) + self.assertEquals(v3(1, (2, 3), 4), (1, 2, 3, (4,))) + def d01(a=1): pass + d01() + d01(1) + d01(*(1,)) + d01(**{'a':2}) + def d11(a, b=1): pass + d11(1) + d11(1, 2) + d11(1, **{'b':2}) + def d21(a, b, c=1): pass + d21(1, 2) + d21(1, 2, 3) + d21(*(1, 2, 3)) + d21(1, *(2, 3)) + d21(1, 2, *(3,)) + d21(1, 2, **{'c':3}) + def d02(a=1, b=2): pass + d02() + d02(1) + d02(1, 2) + d02(*(1, 2)) + d02(1, *(2,)) + d02(1, **{'b':2}) + d02(**{'a': 1, 'b': 2}) + def d12(a, b=1, c=2): pass + d12(1) + d12(1, 2) + d12(1, 2, 3) + def d22(a, b, c=1, d=2): pass + d22(1, 2) + d22(1, 2, 3) + d22(1, 2, 3, 4) + def d01v(a=1, *rest): pass + d01v() + d01v(1) + d01v(1, 2) + d01v(*(1, 2, 3, 4)) + d01v(*(1,)) + d01v(**{'a':2}) + def d11v(a, b=1, *rest): pass + d11v(1) + d11v(1, 2) + d11v(1, 2, 3) + def d21v(a, b, c=1, *rest): pass + d21v(1, 2) + d21v(1, 2, 3) + d21v(1, 2, 3, 4) + d21v(*(1, 2, 3, 4)) + d21v(1, 2, **{'c': 3}) + def d02v(a=1, b=2, *rest): pass + d02v() + d02v(1) + d02v(1, 2) + d02v(1, 2, 3) + d02v(1, *(2, 3, 4)) + d02v(**{'a': 1, 'b': 2}) + def d12v(a, b=1, c=2, *rest): pass + d12v(1) + d12v(1, 2) + d12v(1, 2, 3) + d12v(1, 2, 3, 4) + d12v(*(1, 2, 3, 4)) + d12v(1, 2, *(3, 4, 5)) + d12v(1, *(2,), **{'c': 3}) + def d22v(a, b, c=1, d=2, *rest): pass + d22v(1, 2) + d22v(1, 2, 3) + d22v(1, 2, 3, 4) + d22v(1, 2, 3, 4, 5) + d22v(*(1, 2, 3, 4)) + d22v(1, 2, *(3, 4, 5)) + d22v(1, *(2, 3), **{'d': 4}) + def d31v((x)): pass + d31v(1) + def d32v((x,)): pass + d32v((1,)) + + # keyword arguments after *arglist + def f(*args, **kwargs): + return args, kwargs + self.assertEquals(f(1, x=2, *[3, 4], y=5), ((1, 3, 4), + {'x':2, 'y':5})) + self.assertRaises(SyntaxError, eval, "f(1, *(2,3), 4)") + self.assertRaises(SyntaxError, eval, "f(1, x=2, *(3,4), x=5)") + + # Check ast errors in *args and *kwargs + check_syntax_error(self, "f(*g(1=2))") + check_syntax_error(self, "f(**g(1=2))") + + def testLambdef(self): + ### lambdef: 'lambda' [varargslist] ':' test + l1 = lambda : 0 + self.assertEquals(l1(), 0) + l2 = lambda : a[d] # XXX just testing the expression + l3 = lambda : [2 < x for x in [-1, 3, 0L]] + self.assertEquals(l3(), [0, 1, 0]) + l4 = lambda x = lambda y = lambda z=1 : z : y() : x() + self.assertEquals(l4(), 1) + l5 = lambda x, y, z=2: x + y + z + self.assertEquals(l5(1, 2), 5) + self.assertEquals(l5(1, 2, 3), 6) + check_syntax_error(self, "lambda x: x = 2") + check_syntax_error(self, "lambda (None,): None") + + ### stmt: simple_stmt | compound_stmt + # Tested below + + def testSimpleStmt(self): + ### simple_stmt: small_stmt (';' small_stmt)* [';'] + x = 1; pass; del x + def foo(): + # verify statements that end with semi-colons + x = 1; pass; del x; + foo() + + ### small_stmt: expr_stmt | print_stmt | pass_stmt | del_stmt | flow_stmt | import_stmt | global_stmt | access_stmt | exec_stmt + # Tested below + + def testExprStmt(self): + # (exprlist '=')* exprlist + 1 + 1, 2, 3 + x = 1 + x = 1, 2, 3 + x = y = z = 1, 2, 3 + x, y, z = 1, 2, 3 + abc = a, b, c = x, y, z = xyz = 1, 2, (3, 4) + + check_syntax_error(self, "x + 1 = 1") + check_syntax_error(self, "a + 1 = b + 2") + + def testPrintStmt(self): + # 'print' (test ',')* [test] + import StringIO + + # Can't test printing to real stdout without comparing output + # which is not available in unittest. + save_stdout = sys.stdout + sys.stdout = StringIO.StringIO() + + print 1, 2, 3 + print 1, 2, 3, + print + print 0 or 1, 0 or 1, + print 0 or 1 + + # 'print' '>>' test ',' + print >> sys.stdout, 1, 2, 3 + print >> sys.stdout, 1, 2, 3, + print >> sys.stdout + print >> sys.stdout, 0 or 1, 0 or 1, + print >> sys.stdout, 0 or 1 + + # test printing to an instance + class Gulp: + def write(self, msg): pass + + gulp = Gulp() + print >> gulp, 1, 2, 3 + print >> gulp, 1, 2, 3, + print >> gulp + print >> gulp, 0 or 1, 0 or 1, + print >> gulp, 0 or 1 + + # test print >> None + def driver(): + oldstdout = sys.stdout + sys.stdout = Gulp() + try: + tellme(Gulp()) + tellme() + finally: + sys.stdout = oldstdout + + # we should see this once + def tellme(file=sys.stdout): + print >> file, 'hello world' + + driver() + + # we should not see this at all + def tellme(file=None): + print >> file, 'goodbye universe' + + driver() + + self.assertEqual(sys.stdout.getvalue(), '''\ +1 2 3 +1 2 3 +1 1 1 +1 2 3 +1 2 3 +1 1 1 +hello world +''') + sys.stdout = save_stdout + + # syntax errors + check_syntax_error(self, 'print ,') + check_syntax_error(self, 'print >> x,') + + def testDelStmt(self): + # 'del' exprlist + abc = [1,2,3] + x, y, z = abc + xyz = x, y, z + + del abc + del x, y, (z, xyz) + + def testPassStmt(self): + # 'pass' + pass + + # flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt + # Tested below + + def testBreakStmt(self): + # 'break' + while 1: break + + def testContinueStmt(self): + # 'continue' + i = 1 + while i: i = 0; continue + + msg = "" + while not msg: + msg = "ok" + try: + continue + msg = "continue failed to continue inside try" + except: + msg = "continue inside try called except block" + if msg != "ok": + self.fail(msg) + + msg = "" + while not msg: + msg = "finally block not called" + try: + continue + finally: + msg = "ok" + if msg != "ok": + self.fail(msg) + + def test_break_continue_loop(self): + # This test warrants an explanation. It is a test specifically for SF bugs + # #463359 and #462937. The bug is that a 'break' statement executed or + # exception raised inside a try/except inside a loop, *after* a continue + # statement has been executed in that loop, will cause the wrong number of + # arguments to be popped off the stack and the instruction pointer reset to + # a very small number (usually 0.) Because of this, the following test + # *must* written as a function, and the tracking vars *must* be function + # arguments with default values. Otherwise, the test will loop and loop. + + def test_inner(extra_burning_oil = 1, count=0): + big_hippo = 2 + while big_hippo: + count += 1 + try: + if extra_burning_oil and big_hippo == 1: + extra_burning_oil -= 1 + break + big_hippo -= 1 + continue + except: + raise + if count > 2 or big_hippo <> 1: + self.fail("continue then break in try/except in loop broken!") + test_inner() + + def testReturn(self): + # 'return' [testlist] + def g1(): return + def g2(): return 1 + g1() + x = g2() + check_syntax_error(self, "class foo:return 1") + + def testYield(self): + check_syntax_error(self, "class foo:yield 1") + + def testRaise(self): + # 'raise' test [',' test] + try: raise RuntimeError, 'just testing' + except RuntimeError: pass + try: raise KeyboardInterrupt + except KeyboardInterrupt: pass + + def testImport(self): + # 'import' dotted_as_names + import sys + import time, sys + # 'from' dotted_name 'import' ('*' | '(' import_as_names ')' | import_as_names) + from time import time + from time import (time) + # not testable inside a function, but already done at top of the module + # from sys import * + from sys import path, argv + from sys import (path, argv) + from sys import (path, argv,) + + def testGlobal(self): + # 'global' NAME (',' NAME)* + global a + global a, b + global one, two, three, four, five, six, seven, eight, nine, ten + + def testExec(self): + # 'exec' expr ['in' expr [',' expr]] + z = None + del z + exec 'z=1+1\n' + if z != 2: self.fail('exec \'z=1+1\'\\n') + del z + exec 'z=1+1' + if z != 2: self.fail('exec \'z=1+1\'') + z = None + del z + import types + if hasattr(types, "UnicodeType"): + exec r"""if 1: + exec u'z=1+1\n' + if z != 2: self.fail('exec u\'z=1+1\'\\n') + del z + exec u'z=1+1' + if z != 2: self.fail('exec u\'z=1+1\'')""" + g = {} + exec 'z = 1' in g + if g.has_key('__builtins__'): del g['__builtins__'] + if g != {'z': 1}: self.fail('exec \'z = 1\' in g') + g = {} + l = {} + + import warnings + warnings.filterwarnings("ignore", "global statement", module="<string>") + exec 'global a; a = 1; b = 2' in g, l + if g.has_key('__builtins__'): del g['__builtins__'] + if l.has_key('__builtins__'): del l['__builtins__'] + if (g, l) != ({'a':1}, {'b':2}): + self.fail('exec ... in g (%s), l (%s)' %(g,l)) + + def testAssert(self): + # assert_stmt: 'assert' test [',' test] + assert 1 + assert 1, 1 + assert lambda x:x + assert 1, lambda x:x+1 + try: + assert 0, "msg" + except AssertionError, e: + self.assertEquals(e.args[0], "msg") + else: + if __debug__: + self.fail("AssertionError not raised by assert 0") + + ### compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | funcdef | classdef + # Tested below + + def testIf(self): + # 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] + if 1: pass + if 1: pass + else: pass + if 0: pass + elif 0: pass + if 0: pass + elif 0: pass + elif 0: pass + elif 0: pass + else: pass + + def testWhile(self): + # 'while' test ':' suite ['else' ':' suite] + while 0: pass + while 0: pass + else: pass + + # Issue1920: "while 0" is optimized away, + # ensure that the "else" clause is still present. + x = 0 + while 0: + x = 1 + else: + x = 2 + self.assertEquals(x, 2) + + def testFor(self): + # 'for' exprlist 'in' exprlist ':' suite ['else' ':' suite] + for i in 1, 2, 3: pass + for i, j, k in (): pass + else: pass + class Squares: + def __init__(self, max): + self.max = max + self.sofar = [] + def __len__(self): return len(self.sofar) + def __getitem__(self, i): + if not 0 <= i < self.max: raise IndexError + n = len(self.sofar) + while n <= i: + self.sofar.append(n*n) + n = n+1 + return self.sofar[i] + n = 0 + for x in Squares(10): n = n+x + if n != 285: + self.fail('for over growing sequence') + + result = [] + for x, in [(1,), (2,), (3,)]: + result.append(x) + self.assertEqual(result, [1, 2, 3]) + + def testTry(self): + ### try_stmt: 'try' ':' suite (except_clause ':' suite)+ ['else' ':' suite] + ### | 'try' ':' suite 'finally' ':' suite + ### except_clause: 'except' [expr [('as' | ',') expr]] + try: + 1/0 + except ZeroDivisionError: + pass + else: + pass + try: 1/0 + except EOFError: pass + except TypeError as msg: pass + except RuntimeError, msg: pass + except: pass + else: pass + try: 1/0 + except (EOFError, TypeError, ZeroDivisionError): pass + try: 1/0 + except (EOFError, TypeError, ZeroDivisionError), msg: pass + try: pass + finally: pass + + def testSuite(self): + # simple_stmt | NEWLINE INDENT NEWLINE* (stmt NEWLINE*)+ DEDENT + if 1: pass + if 1: + pass + if 1: + # + # + # + pass + pass + # + pass + # + + def testTest(self): + ### and_test ('or' and_test)* + ### and_test: not_test ('and' not_test)* + ### not_test: 'not' not_test | comparison + if not 1: pass + if 1 and 1: pass + if 1 or 1: pass + if not not not 1: pass + if not 1 and 1 and 1: pass + if 1 and 1 or 1 and 1 and 1 or not 1 and 1: pass + + def testComparison(self): + ### comparison: expr (comp_op expr)* + ### comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' + if 1: pass + x = (1 == 1) + if 1 == 1: pass + if 1 != 1: pass + if 1 <> 1: pass + if 1 < 1: pass + if 1 > 1: pass + if 1 <= 1: pass + if 1 >= 1: pass + if 1 is 1: pass + if 1 is not 1: pass + if 1 in (): pass + if 1 not in (): pass + if 1 < 1 > 1 == 1 >= 1 <= 1 <> 1 != 1 in 1 not in 1 is 1 is not 1: pass + + def testBinaryMaskOps(self): + x = 1 & 1 + x = 1 ^ 1 + x = 1 | 1 + + def testShiftOps(self): + x = 1 << 1 + x = 1 >> 1 + x = 1 << 1 >> 1 + + def testAdditiveOps(self): + x = 1 + x = 1 + 1 + x = 1 - 1 - 1 + x = 1 - 1 + 1 - 1 + 1 + + def testMultiplicativeOps(self): + x = 1 * 1 + x = 1 / 1 + x = 1 % 1 + x = 1 / 1 * 1 % 1 + + def testUnaryOps(self): + x = +1 + x = -1 + x = ~1 + x = ~1 ^ 1 & 1 | 1 & 1 ^ -1 + x = -1*1/1 + 1*1 - ---1*1 + + def testSelectors(self): + ### trailer: '(' [testlist] ')' | '[' subscript ']' | '.' NAME + ### subscript: expr | [expr] ':' [expr] + + import sys, time + c = sys.path[0] + x = time.time() + x = sys.modules['time'].time() + a = '01234' + c = a[0] + c = a[-1] + s = a[0:5] + s = a[:5] + s = a[0:] + s = a[:] + s = a[-5:] + s = a[:-1] + s = a[-4:-3] + # A rough test of SF bug 1333982. http://python.org/sf/1333982 + # The testing here is fairly incomplete. + # Test cases should include: commas with 1 and 2 colons + d = {} + d[1] = 1 + d[1,] = 2 + d[1,2] = 3 + d[1,2,3] = 4 + L = list(d) + L.sort() + self.assertEquals(str(L), '[1, (1,), (1, 2), (1, 2, 3)]') + + def testAtoms(self): + ### atom: '(' [testlist] ')' | '[' [testlist] ']' | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING + ### dictmaker: test ':' test (',' test ':' test)* [','] + + x = (1) + x = (1 or 2 or 3) + x = (1 or 2 or 3, 2, 3) + + x = [] + x = [1] + x = [1 or 2 or 3] + x = [1 or 2 or 3, 2, 3] + x = [] + + x = {} + x = {'one': 1} + x = {'one': 1,} + x = {'one' or 'two': 1 or 2} + x = {'one': 1, 'two': 2} + x = {'one': 1, 'two': 2,} + x = {'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6} + + x = `x` + x = `1 or 2 or 3` + self.assertEqual(`1,2`, '(1, 2)') + + x = x + x = 'x' + x = 123 + + ### exprlist: expr (',' expr)* [','] + ### testlist: test (',' test)* [','] + # These have been exercised enough above + + def testClassdef(self): + # 'class' NAME ['(' [testlist] ')'] ':' suite + class B: pass + class B2(): pass + class C1(B): pass + class C2(B): pass + class D(C1, C2, B): pass + class C: + def meth1(self): pass + def meth2(self, arg): pass + def meth3(self, a1, a2): pass + # decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE + # decorators: decorator+ + # decorated: decorators (classdef | funcdef) + def class_decorator(x): + x.decorated = True + return x + @class_decorator + class G: + pass + self.assertEqual(G.decorated, True) + + def testListcomps(self): + # list comprehension tests + nums = [1, 2, 3, 4, 5] + strs = ["Apple", "Banana", "Coconut"] + spcs = [" Apple", " Banana ", "Coco nut "] + + self.assertEqual([s.strip() for s in spcs], ['Apple', 'Banana', 'Coco nut']) + self.assertEqual([3 * x for x in nums], [3, 6, 9, 12, 15]) + self.assertEqual([x for x in nums if x > 2], [3, 4, 5]) + self.assertEqual([(i, s) for i in nums for s in strs], + [(1, 'Apple'), (1, 'Banana'), (1, 'Coconut'), + (2, 'Apple'), (2, 'Banana'), (2, 'Coconut'), + (3, 'Apple'), (3, 'Banana'), (3, 'Coconut'), + (4, 'Apple'), (4, 'Banana'), (4, 'Coconut'), + (5, 'Apple'), (5, 'Banana'), (5, 'Coconut')]) + self.assertEqual([(i, s) for i in nums for s in [f for f in strs if "n" in f]], + [(1, 'Banana'), (1, 'Coconut'), (2, 'Banana'), (2, 'Coconut'), + (3, 'Banana'), (3, 'Coconut'), (4, 'Banana'), (4, 'Coconut'), + (5, 'Banana'), (5, 'Coconut')]) + self.assertEqual([(lambda a:[a**i for i in range(a+1)])(j) for j in range(5)], + [[1], [1, 1], [1, 2, 4], [1, 3, 9, 27], [1, 4, 16, 64, 256]]) + + def test_in_func(l): + return [None < x < 3 for x in l if x > 2] + + self.assertEqual(test_in_func(nums), [False, False, False]) + + def test_nested_front(): + self.assertEqual([[y for y in [x, x + 1]] for x in [1,3,5]], + [[1, 2], [3, 4], [5, 6]]) + + test_nested_front() + + check_syntax_error(self, "[i, s for i in nums for s in strs]") + check_syntax_error(self, "[x if y]") + + suppliers = [ + (1, "Boeing"), + (2, "Ford"), + (3, "Macdonalds") + ] + + parts = [ + (10, "Airliner"), + (20, "Engine"), + (30, "Cheeseburger") + ] + + suppart = [ + (1, 10), (1, 20), (2, 20), (3, 30) + ] + + x = [ + (sname, pname) + for (sno, sname) in suppliers + for (pno, pname) in parts + for (sp_sno, sp_pno) in suppart + if sno == sp_sno and pno == sp_pno + ] + + self.assertEqual(x, [('Boeing', 'Airliner'), ('Boeing', 'Engine'), ('Ford', 'Engine'), + ('Macdonalds', 'Cheeseburger')]) + + def testGenexps(self): + # generator expression tests + g = ([x for x in range(10)] for x in range(1)) + self.assertEqual(g.next(), [x for x in range(10)]) + try: + g.next() + self.fail('should produce StopIteration exception') + except StopIteration: + pass + + a = 1 + try: + g = (a for d in a) + g.next() + self.fail('should produce TypeError') + except TypeError: + pass + + self.assertEqual(list((x, y) for x in 'abcd' for y in 'abcd'), [(x, y) for x in 'abcd' for y in 'abcd']) + self.assertEqual(list((x, y) for x in 'ab' for y in 'xy'), [(x, y) for x in 'ab' for y in 'xy']) + + a = [x for x in range(10)] + b = (x for x in (y for y in a)) + self.assertEqual(sum(b), sum([x for x in range(10)])) + + self.assertEqual(sum(x**2 for x in range(10)), sum([x**2 for x in range(10)])) + self.assertEqual(sum(x*x for x in range(10) if x%2), sum([x*x for x in range(10) if x%2])) + self.assertEqual(sum(x for x in (y for y in range(10))), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10)))), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in [y for y in (z for z in range(10))]), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True)) if True), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True) if False) if True), 0) + check_syntax_error(self, "foo(x for x in range(10), 100)") + check_syntax_error(self, "foo(100, x for x in range(10))") + + def testComprehensionSpecials(self): + # test for outmost iterable precomputation + x = 10; g = (i for i in range(x)); x = 5 + self.assertEqual(len(list(g)), 10) + + # This should hold, since we're only precomputing outmost iterable. + x = 10; t = False; g = ((i,j) for i in range(x) if t for j in range(x)) + x = 5; t = True; + self.assertEqual([(i,j) for i in range(10) for j in range(5)], list(g)) + + # Grammar allows multiple adjacent 'if's in listcomps and genexps, + # even though it's silly. Make sure it works (ifelse broke this.) + self.assertEqual([ x for x in range(10) if x % 2 if x % 3 ], [1, 5, 7]) + self.assertEqual(list(x for x in range(10) if x % 2 if x % 3), [1, 5, 7]) + + # verify unpacking single element tuples in listcomp/genexp. + self.assertEqual([x for x, in [(4,), (5,), (6,)]], [4, 5, 6]) + self.assertEqual(list(x for x, in [(7,), (8,), (9,)]), [7, 8, 9]) + + def test_with_statement(self): + class manager(object): + def __enter__(self): + return (1, 2) + def __exit__(self, *args): + pass + + with manager(): + pass + with manager() as x: + pass + with manager() as (x, y): + pass + with manager(), manager(): + pass + with manager() as x, manager() as y: + pass + with manager() as x, manager(): + pass + + def testIfElseExpr(self): + # Test ifelse expressions in various cases + def _checkeval(msg, ret): + "helper to check that evaluation of expressions is done correctly" + print x + return ret + + self.assertEqual([ x() for x in lambda: True, lambda: False if x() ], [True]) + self.assertEqual([ x() for x in (lambda: True, lambda: False) if x() ], [True]) + self.assertEqual([ x(False) for x in (lambda x: False if x else True, lambda x: True if x else False) if x(False) ], [True]) + self.assertEqual((5 if 1 else _checkeval("check 1", 0)), 5) + self.assertEqual((_checkeval("check 2", 0) if 0 else 5), 5) + self.assertEqual((5 and 6 if 0 else 1), 1) + self.assertEqual(((5 and 6) if 0 else 1), 1) + self.assertEqual((5 and (6 if 1 else 1)), 6) + self.assertEqual((0 or _checkeval("check 3", 2) if 0 else 3), 3) + self.assertEqual((1 or _checkeval("check 4", 2) if 1 else _checkeval("check 5", 3)), 1) + self.assertEqual((0 or 5 if 1 else _checkeval("check 6", 3)), 5) + self.assertEqual((not 5 if 1 else 1), False) + self.assertEqual((not 5 if 0 else 1), 1) + self.assertEqual((6 + 1 if 1 else 2), 7) + self.assertEqual((6 - 1 if 1 else 2), 5) + self.assertEqual((6 * 2 if 1 else 4), 12) + self.assertEqual((6 / 2 if 1 else 3), 3) + self.assertEqual((6 < 4 if 0 else 2), 2) + + def testStringLiterals(self): + x = ''; y = ""; self.assert_(len(x) == 0 and x == y) + x = '\''; y = "'"; self.assert_(len(x) == 1 and x == y and ord(x) == 39) + x = '"'; y = "\""; self.assert_(len(x) == 1 and x == y and ord(x) == 34) + x = "doesn't \"shrink\" does it" + y = 'doesn\'t "shrink" does it' + self.assert_(len(x) == 24 and x == y) + x = "does \"shrink\" doesn't it" + y = 'does "shrink" doesn\'t it' + self.assert_(len(x) == 24 and x == y) + x = """ +The "quick" +brown fox +jumps over +the 'lazy' dog. +""" + y = '\nThe "quick"\nbrown fox\njumps over\nthe \'lazy\' dog.\n' + self.assertEquals(x, y) + y = ''' +The "quick" +brown fox +jumps over +the 'lazy' dog. +''' + self.assertEquals(x, y) + y = "\n\ +The \"quick\"\n\ +brown fox\n\ +jumps over\n\ +the 'lazy' dog.\n\ +" + self.assertEquals(x, y) + y = '\n\ +The \"quick\"\n\ +brown fox\n\ +jumps over\n\ +the \'lazy\' dog.\n\ +' + self.assertEquals(x, y) + + + +def test_main(): + run_unittest(TokenTests, GrammarTests) + +if __name__ == '__main__': + test_main() + diff --git a/python/examples/python3-grammar-crlf.py b/python/examples/python3-grammar-crlf.py new file mode 100644 index 0000000..225cee2 --- /dev/null +++ b/python/examples/python3-grammar-crlf.py @@ -0,0 +1,945 @@ +# Python test set -- part 1, grammar. +# This just tests whether the parser accepts them all. + +# NOTE: When you run this test as a script from the command line, you +# get warnings about certain hex/oct constants. Since those are +# issued by the parser, you can't suppress them by adding a +# filterwarnings() call to this module. Therefore, to shut up the +# regression test, the filterwarnings() call has been added to +# regrtest.py. + +from test.support import run_unittest, check_syntax_error +import unittest +import sys +# testing import * +from sys import * + +class TokenTests(unittest.TestCase): + + def testBackslash(self): + # Backslash means line continuation: + x = 1 \ + + 1 + self.assertEquals(x, 2, 'backslash for line continuation') + + # Backslash does not means continuation in comments :\ + x = 0 + self.assertEquals(x, 0, 'backslash ending comment') + + def testPlainIntegers(self): + self.assertEquals(type(000), type(0)) + self.assertEquals(0xff, 255) + self.assertEquals(0o377, 255) + self.assertEquals(2147483647, 0o17777777777) + self.assertEquals(0b1001, 9) + # "0x" is not a valid literal + self.assertRaises(SyntaxError, eval, "0x") + from sys import maxsize + if maxsize == 2147483647: + self.assertEquals(-2147483647-1, -0o20000000000) + # XXX -2147483648 + self.assert_(0o37777777777 > 0) + self.assert_(0xffffffff > 0) + self.assert_(0b1111111111111111111111111111111 > 0) + for s in ('2147483648', '0o40000000000', '0x100000000', + '0b10000000000000000000000000000000'): + try: + x = eval(s) + except OverflowError: + self.fail("OverflowError on huge integer literal %r" % s) + elif maxsize == 9223372036854775807: + self.assertEquals(-9223372036854775807-1, -0o1000000000000000000000) + self.assert_(0o1777777777777777777777 > 0) + self.assert_(0xffffffffffffffff > 0) + self.assert_(0b11111111111111111111111111111111111111111111111111111111111111 > 0) + for s in '9223372036854775808', '0o2000000000000000000000', \ + '0x10000000000000000', \ + '0b100000000000000000000000000000000000000000000000000000000000000': + try: + x = eval(s) + except OverflowError: + self.fail("OverflowError on huge integer literal %r" % s) + else: + self.fail('Weird maxsize value %r' % maxsize) + + def testLongIntegers(self): + x = 0 + x = 0xffffffffffffffff + x = 0Xffffffffffffffff + x = 0o77777777777777777 + x = 0O77777777777777777 + x = 123456789012345678901234567890 + x = 0b100000000000000000000000000000000000000000000000000000000000000000000 + x = 0B111111111111111111111111111111111111111111111111111111111111111111111 + + def testUnderscoresInNumbers(self): + # Integers + x = 1_0 + x = 123_456_7_89 + x = 0xabc_123_4_5 + x = 0X_abc_123 + x = 0B11_01 + x = 0b_11_01 + x = 0o45_67 + x = 0O_45_67 + + # Floats + x = 3_1.4 + x = 03_1.4 + x = 3_1. + x = .3_1 + x = 3.1_4 + x = 0_3.1_4 + x = 3e1_4 + x = 3_1e+4_1 + x = 3_1E-4_1 + + def testFloats(self): + x = 3.14 + x = 314. + x = 0.314 + # XXX x = 000.314 + x = .314 + x = 3e14 + x = 3E14 + x = 3e-14 + x = 3e+14 + x = 3.e14 + x = .3e14 + x = 3.1e4 + + def testEllipsis(self): + x = ... + self.assert_(x is Ellipsis) + self.assertRaises(SyntaxError, eval, ".. .") + +class GrammarTests(unittest.TestCase): + + # single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE + # XXX can't test in a script -- this rule is only used when interactive + + # file_input: (NEWLINE | stmt)* ENDMARKER + # Being tested as this very moment this very module + + # expr_input: testlist NEWLINE + # XXX Hard to test -- used only in calls to input() + + def testEvalInput(self): + # testlist ENDMARKER + x = eval('1, 0 or 1') + + def testFuncdef(self): + ### [decorators] 'def' NAME parameters ['->' test] ':' suite + ### decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE + ### decorators: decorator+ + ### parameters: '(' [typedargslist] ')' + ### typedargslist: ((tfpdef ['=' test] ',')* + ### ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) + ### | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) + ### tfpdef: NAME [':' test] + ### varargslist: ((vfpdef ['=' test] ',')* + ### ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) + ### | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) + ### vfpdef: NAME + def f1(): pass + f1() + f1(*()) + f1(*(), **{}) + def f2(one_argument): pass + def f3(two, arguments): pass + self.assertEquals(f2.__code__.co_varnames, ('one_argument',)) + self.assertEquals(f3.__code__.co_varnames, ('two', 'arguments')) + def a1(one_arg,): pass + def a2(two, args,): pass + def v0(*rest): pass + def v1(a, *rest): pass + def v2(a, b, *rest): pass + + f1() + f2(1) + f2(1,) + f3(1, 2) + f3(1, 2,) + v0() + v0(1) + v0(1,) + v0(1,2) + v0(1,2,3,4,5,6,7,8,9,0) + v1(1) + v1(1,) + v1(1,2) + v1(1,2,3) + v1(1,2,3,4,5,6,7,8,9,0) + v2(1,2) + v2(1,2,3) + v2(1,2,3,4) + v2(1,2,3,4,5,6,7,8,9,0) + + def d01(a=1): pass + d01() + d01(1) + d01(*(1,)) + d01(**{'a':2}) + def d11(a, b=1): pass + d11(1) + d11(1, 2) + d11(1, **{'b':2}) + def d21(a, b, c=1): pass + d21(1, 2) + d21(1, 2, 3) + d21(*(1, 2, 3)) + d21(1, *(2, 3)) + d21(1, 2, *(3,)) + d21(1, 2, **{'c':3}) + def d02(a=1, b=2): pass + d02() + d02(1) + d02(1, 2) + d02(*(1, 2)) + d02(1, *(2,)) + d02(1, **{'b':2}) + d02(**{'a': 1, 'b': 2}) + def d12(a, b=1, c=2): pass + d12(1) + d12(1, 2) + d12(1, 2, 3) + def d22(a, b, c=1, d=2): pass + d22(1, 2) + d22(1, 2, 3) + d22(1, 2, 3, 4) + def d01v(a=1, *rest): pass + d01v() + d01v(1) + d01v(1, 2) + d01v(*(1, 2, 3, 4)) + d01v(*(1,)) + d01v(**{'a':2}) + def d11v(a, b=1, *rest): pass + d11v(1) + d11v(1, 2) + d11v(1, 2, 3) + def d21v(a, b, c=1, *rest): pass + d21v(1, 2) + d21v(1, 2, 3) + d21v(1, 2, 3, 4) + d21v(*(1, 2, 3, 4)) + d21v(1, 2, **{'c': 3}) + def d02v(a=1, b=2, *rest): pass + d02v() + d02v(1) + d02v(1, 2) + d02v(1, 2, 3) + d02v(1, *(2, 3, 4)) + d02v(**{'a': 1, 'b': 2}) + def d12v(a, b=1, c=2, *rest): pass + d12v(1) + d12v(1, 2) + d12v(1, 2, 3) + d12v(1, 2, 3, 4) + d12v(*(1, 2, 3, 4)) + d12v(1, 2, *(3, 4, 5)) + d12v(1, *(2,), **{'c': 3}) + def d22v(a, b, c=1, d=2, *rest): pass + d22v(1, 2) + d22v(1, 2, 3) + d22v(1, 2, 3, 4) + d22v(1, 2, 3, 4, 5) + d22v(*(1, 2, 3, 4)) + d22v(1, 2, *(3, 4, 5)) + d22v(1, *(2, 3), **{'d': 4}) + + # keyword argument type tests + try: + str('x', **{b'foo':1 }) + except TypeError: + pass + else: + self.fail('Bytes should not work as keyword argument names') + # keyword only argument tests + def pos0key1(*, key): return key + pos0key1(key=100) + def pos2key2(p1, p2, *, k1, k2=100): return p1,p2,k1,k2 + pos2key2(1, 2, k1=100) + pos2key2(1, 2, k1=100, k2=200) + pos2key2(1, 2, k2=100, k1=200) + def pos2key2dict(p1, p2, *, k1=100, k2, **kwarg): return p1,p2,k1,k2,kwarg + pos2key2dict(1,2,k2=100,tokwarg1=100,tokwarg2=200) + pos2key2dict(1,2,tokwarg1=100,tokwarg2=200, k2=100) + + # keyword arguments after *arglist + def f(*args, **kwargs): + return args, kwargs + self.assertEquals(f(1, x=2, *[3, 4], y=5), ((1, 3, 4), + {'x':2, 'y':5})) + self.assertRaises(SyntaxError, eval, "f(1, *(2,3), 4)") + self.assertRaises(SyntaxError, eval, "f(1, x=2, *(3,4), x=5)") + + # argument annotation tests + def f(x) -> list: pass + self.assertEquals(f.__annotations__, {'return': list}) + def f(x:int): pass + self.assertEquals(f.__annotations__, {'x': int}) + def f(*x:str): pass + self.assertEquals(f.__annotations__, {'x': str}) + def f(**x:float): pass + self.assertEquals(f.__annotations__, {'x': float}) + def f(x, y:1+2): pass + self.assertEquals(f.__annotations__, {'y': 3}) + def f(a, b:1, c:2, d): pass + self.assertEquals(f.__annotations__, {'b': 1, 'c': 2}) + def f(a, b:1, c:2, d, e:3=4, f=5, *g:6): pass + self.assertEquals(f.__annotations__, + {'b': 1, 'c': 2, 'e': 3, 'g': 6}) + def f(a, b:1, c:2, d, e:3=4, f=5, *g:6, h:7, i=8, j:9=10, + **k:11) -> 12: pass + self.assertEquals(f.__annotations__, + {'b': 1, 'c': 2, 'e': 3, 'g': 6, 'h': 7, 'j': 9, + 'k': 11, 'return': 12}) + # Check for SF Bug #1697248 - mixing decorators and a return annotation + def null(x): return x + @null + def f(x) -> list: pass + self.assertEquals(f.__annotations__, {'return': list}) + + # test closures with a variety of oparg's + closure = 1 + def f(): return closure + def f(x=1): return closure + def f(*, k=1): return closure + def f() -> int: return closure + + # Check ast errors in *args and *kwargs + check_syntax_error(self, "f(*g(1=2))") + check_syntax_error(self, "f(**g(1=2))") + + def testLambdef(self): + ### lambdef: 'lambda' [varargslist] ':' test + l1 = lambda : 0 + self.assertEquals(l1(), 0) + l2 = lambda : a[d] # XXX just testing the expression + l3 = lambda : [2 < x for x in [-1, 3, 0]] + self.assertEquals(l3(), [0, 1, 0]) + l4 = lambda x = lambda y = lambda z=1 : z : y() : x() + self.assertEquals(l4(), 1) + l5 = lambda x, y, z=2: x + y + z + self.assertEquals(l5(1, 2), 5) + self.assertEquals(l5(1, 2, 3), 6) + check_syntax_error(self, "lambda x: x = 2") + check_syntax_error(self, "lambda (None,): None") + l6 = lambda x, y, *, k=20: x+y+k + self.assertEquals(l6(1,2), 1+2+20) + self.assertEquals(l6(1,2,k=10), 1+2+10) + + + ### stmt: simple_stmt | compound_stmt + # Tested below + + def testSimpleStmt(self): + ### simple_stmt: small_stmt (';' small_stmt)* [';'] + x = 1; pass; del x + def foo(): + # verify statements that end with semi-colons + x = 1; pass; del x; + foo() + + ### small_stmt: expr_stmt | pass_stmt | del_stmt | flow_stmt | import_stmt | global_stmt | access_stmt + # Tested below + + def testExprStmt(self): + # (exprlist '=')* exprlist + 1 + 1, 2, 3 + x = 1 + x = 1, 2, 3 + x = y = z = 1, 2, 3 + x, y, z = 1, 2, 3 + abc = a, b, c = x, y, z = xyz = 1, 2, (3, 4) + + check_syntax_error(self, "x + 1 = 1") + check_syntax_error(self, "a + 1 = b + 2") + + def testDelStmt(self): + # 'del' exprlist + abc = [1,2,3] + x, y, z = abc + xyz = x, y, z + + del abc + del x, y, (z, xyz) + + def testPassStmt(self): + # 'pass' + pass + + # flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt + # Tested below + + def testBreakStmt(self): + # 'break' + while 1: break + + def testContinueStmt(self): + # 'continue' + i = 1 + while i: i = 0; continue + + msg = "" + while not msg: + msg = "ok" + try: + continue + msg = "continue failed to continue inside try" + except: + msg = "continue inside try called except block" + if msg != "ok": + self.fail(msg) + + msg = "" + while not msg: + msg = "finally block not called" + try: + continue + finally: + msg = "ok" + if msg != "ok": + self.fail(msg) + + def test_break_continue_loop(self): + # This test warrants an explanation. It is a test specifically for SF bugs + # #463359 and #462937. The bug is that a 'break' statement executed or + # exception raised inside a try/except inside a loop, *after* a continue + # statement has been executed in that loop, will cause the wrong number of + # arguments to be popped off the stack and the instruction pointer reset to + # a very small number (usually 0.) Because of this, the following test + # *must* written as a function, and the tracking vars *must* be function + # arguments with default values. Otherwise, the test will loop and loop. + + def test_inner(extra_burning_oil = 1, count=0): + big_hippo = 2 + while big_hippo: + count += 1 + try: + if extra_burning_oil and big_hippo == 1: + extra_burning_oil -= 1 + break + big_hippo -= 1 + continue + except: + raise + if count > 2 or big_hippo != 1: + self.fail("continue then break in try/except in loop broken!") + test_inner() + + def testReturn(self): + # 'return' [testlist] + def g1(): return + def g2(): return 1 + g1() + x = g2() + check_syntax_error(self, "class foo:return 1") + + def testYield(self): + check_syntax_error(self, "class foo:yield 1") + + def testRaise(self): + # 'raise' test [',' test] + try: raise RuntimeError('just testing') + except RuntimeError: pass + try: raise KeyboardInterrupt + except KeyboardInterrupt: pass + + def testImport(self): + # 'import' dotted_as_names + import sys + import time, sys + # 'from' dotted_name 'import' ('*' | '(' import_as_names ')' | import_as_names) + from time import time + from time import (time) + # not testable inside a function, but already done at top of the module + # from sys import * + from sys import path, argv + from sys import (path, argv) + from sys import (path, argv,) + + def testGlobal(self): + # 'global' NAME (',' NAME)* + global a + global a, b + global one, two, three, four, five, six, seven, eight, nine, ten + + def testNonlocal(self): + # 'nonlocal' NAME (',' NAME)* + x = 0 + y = 0 + def f(): + nonlocal x + nonlocal x, y + + def testAssert(self): + # assert_stmt: 'assert' test [',' test] + assert 1 + assert 1, 1 + assert lambda x:x + assert 1, lambda x:x+1 + try: + assert 0, "msg" + except AssertionError as e: + self.assertEquals(e.args[0], "msg") + else: + if __debug__: + self.fail("AssertionError not raised by assert 0") + + ### compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | funcdef | classdef + # Tested below + + def testIf(self): + # 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] + if 1: pass + if 1: pass + else: pass + if 0: pass + elif 0: pass + if 0: pass + elif 0: pass + elif 0: pass + elif 0: pass + else: pass + + def testWhile(self): + # 'while' test ':' suite ['else' ':' suite] + while 0: pass + while 0: pass + else: pass + + # Issue1920: "while 0" is optimized away, + # ensure that the "else" clause is still present. + x = 0 + while 0: + x = 1 + else: + x = 2 + self.assertEquals(x, 2) + + def testFor(self): + # 'for' exprlist 'in' exprlist ':' suite ['else' ':' suite] + for i in 1, 2, 3: pass + for i, j, k in (): pass + else: pass + class Squares: + def __init__(self, max): + self.max = max + self.sofar = [] + def __len__(self): return len(self.sofar) + def __getitem__(self, i): + if not 0 <= i < self.max: raise IndexError + n = len(self.sofar) + while n <= i: + self.sofar.append(n*n) + n = n+1 + return self.sofar[i] + n = 0 + for x in Squares(10): n = n+x + if n != 285: + self.fail('for over growing sequence') + + result = [] + for x, in [(1,), (2,), (3,)]: + result.append(x) + self.assertEqual(result, [1, 2, 3]) + + def testTry(self): + ### try_stmt: 'try' ':' suite (except_clause ':' suite)+ ['else' ':' suite] + ### | 'try' ':' suite 'finally' ':' suite + ### except_clause: 'except' [expr ['as' expr]] + try: + 1/0 + except ZeroDivisionError: + pass + else: + pass + try: 1/0 + except EOFError: pass + except TypeError as msg: pass + except RuntimeError as msg: pass + except: pass + else: pass + try: 1/0 + except (EOFError, TypeError, ZeroDivisionError): pass + try: 1/0 + except (EOFError, TypeError, ZeroDivisionError) as msg: pass + try: pass + finally: pass + + def testSuite(self): + # simple_stmt | NEWLINE INDENT NEWLINE* (stmt NEWLINE*)+ DEDENT + if 1: pass + if 1: + pass + if 1: + # + # + # + pass + pass + # + pass + # + + def testTest(self): + ### and_test ('or' and_test)* + ### and_test: not_test ('and' not_test)* + ### not_test: 'not' not_test | comparison + if not 1: pass + if 1 and 1: pass + if 1 or 1: pass + if not not not 1: pass + if not 1 and 1 and 1: pass + if 1 and 1 or 1 and 1 and 1 or not 1 and 1: pass + + def testComparison(self): + ### comparison: expr (comp_op expr)* + ### comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not' + if 1: pass + x = (1 == 1) + if 1 == 1: pass + if 1 != 1: pass + if 1 < 1: pass + if 1 > 1: pass + if 1 <= 1: pass + if 1 >= 1: pass + if 1 is 1: pass + if 1 is not 1: pass + if 1 in (): pass + if 1 not in (): pass + if 1 < 1 > 1 == 1 >= 1 <= 1 != 1 in 1 not in 1 is 1 is not 1: pass + + def testBinaryMaskOps(self): + x = 1 & 1 + x = 1 ^ 1 + x = 1 | 1 + + def testShiftOps(self): + x = 1 << 1 + x = 1 >> 1 + x = 1 << 1 >> 1 + + def testAdditiveOps(self): + x = 1 + x = 1 + 1 + x = 1 - 1 - 1 + x = 1 - 1 + 1 - 1 + 1 + + def testMultiplicativeOps(self): + x = 1 * 1 + x = 1 / 1 + x = 1 % 1 + x = 1 / 1 * 1 % 1 + + def testUnaryOps(self): + x = +1 + x = -1 + x = ~1 + x = ~1 ^ 1 & 1 | 1 & 1 ^ -1 + x = -1*1/1 + 1*1 - ---1*1 + + def testSelectors(self): + ### trailer: '(' [testlist] ')' | '[' subscript ']' | '.' NAME + ### subscript: expr | [expr] ':' [expr] + + import sys, time + c = sys.path[0] + x = time.time() + x = sys.modules['time'].time() + a = '01234' + c = a[0] + c = a[-1] + s = a[0:5] + s = a[:5] + s = a[0:] + s = a[:] + s = a[-5:] + s = a[:-1] + s = a[-4:-3] + # A rough test of SF bug 1333982. http://python.org/sf/1333982 + # The testing here is fairly incomplete. + # Test cases should include: commas with 1 and 2 colons + d = {} + d[1] = 1 + d[1,] = 2 + d[1,2] = 3 + d[1,2,3] = 4 + L = list(d) + L.sort(key=lambda x: x if isinstance(x, tuple) else ()) + self.assertEquals(str(L), '[1, (1,), (1, 2), (1, 2, 3)]') + + def testAtoms(self): + ### atom: '(' [testlist] ')' | '[' [testlist] ']' | '{' [dictsetmaker] '}' | NAME | NUMBER | STRING + ### dictsetmaker: (test ':' test (',' test ':' test)* [',']) | (test (',' test)* [',']) + + x = (1) + x = (1 or 2 or 3) + x = (1 or 2 or 3, 2, 3) + + x = [] + x = [1] + x = [1 or 2 or 3] + x = [1 or 2 or 3, 2, 3] + x = [] + + x = {} + x = {'one': 1} + x = {'one': 1,} + x = {'one' or 'two': 1 or 2} + x = {'one': 1, 'two': 2} + x = {'one': 1, 'two': 2,} + x = {'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6} + + x = {'one'} + x = {'one', 1,} + x = {'one', 'two', 'three'} + x = {2, 3, 4,} + + x = x + x = 'x' + x = 123 + + ### exprlist: expr (',' expr)* [','] + ### testlist: test (',' test)* [','] + # These have been exercised enough above + + def testClassdef(self): + # 'class' NAME ['(' [testlist] ')'] ':' suite + class B: pass + class B2(): pass + class C1(B): pass + class C2(B): pass + class D(C1, C2, B): pass + class C: + def meth1(self): pass + def meth2(self, arg): pass + def meth3(self, a1, a2): pass + + # decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE + # decorators: decorator+ + # decorated: decorators (classdef | funcdef) + def class_decorator(x): return x + @class_decorator + class G: pass + + def testDictcomps(self): + # dictorsetmaker: ( (test ':' test (comp_for | + # (',' test ':' test)* [','])) | + # (test (comp_for | (',' test)* [','])) ) + nums = [1, 2, 3] + self.assertEqual({i:i+1 for i in nums}, {1: 2, 2: 3, 3: 4}) + + def testListcomps(self): + # list comprehension tests + nums = [1, 2, 3, 4, 5] + strs = ["Apple", "Banana", "Coconut"] + spcs = [" Apple", " Banana ", "Coco nut "] + + self.assertEqual([s.strip() for s in spcs], ['Apple', 'Banana', 'Coco nut']) + self.assertEqual([3 * x for x in nums], [3, 6, 9, 12, 15]) + self.assertEqual([x for x in nums if x > 2], [3, 4, 5]) + self.assertEqual([(i, s) for i in nums for s in strs], + [(1, 'Apple'), (1, 'Banana'), (1, 'Coconut'), + (2, 'Apple'), (2, 'Banana'), (2, 'Coconut'), + (3, 'Apple'), (3, 'Banana'), (3, 'Coconut'), + (4, 'Apple'), (4, 'Banana'), (4, 'Coconut'), + (5, 'Apple'), (5, 'Banana'), (5, 'Coconut')]) + self.assertEqual([(i, s) for i in nums for s in [f for f in strs if "n" in f]], + [(1, 'Banana'), (1, 'Coconut'), (2, 'Banana'), (2, 'Coconut'), + (3, 'Banana'), (3, 'Coconut'), (4, 'Banana'), (4, 'Coconut'), + (5, 'Banana'), (5, 'Coconut')]) + self.assertEqual([(lambda a:[a**i for i in range(a+1)])(j) for j in range(5)], + [[1], [1, 1], [1, 2, 4], [1, 3, 9, 27], [1, 4, 16, 64, 256]]) + + def test_in_func(l): + return [0 < x < 3 for x in l if x > 2] + + self.assertEqual(test_in_func(nums), [False, False, False]) + + def test_nested_front(): + self.assertEqual([[y for y in [x, x + 1]] for x in [1,3,5]], + [[1, 2], [3, 4], [5, 6]]) + + test_nested_front() + + check_syntax_error(self, "[i, s for i in nums for s in strs]") + check_syntax_error(self, "[x if y]") + + suppliers = [ + (1, "Boeing"), + (2, "Ford"), + (3, "Macdonalds") + ] + + parts = [ + (10, "Airliner"), + (20, "Engine"), + (30, "Cheeseburger") + ] + + suppart = [ + (1, 10), (1, 20), (2, 20), (3, 30) + ] + + x = [ + (sname, pname) + for (sno, sname) in suppliers + for (pno, pname) in parts + for (sp_sno, sp_pno) in suppart + if sno == sp_sno and pno == sp_pno + ] + + self.assertEqual(x, [('Boeing', 'Airliner'), ('Boeing', 'Engine'), ('Ford', 'Engine'), + ('Macdonalds', 'Cheeseburger')]) + + def testGenexps(self): + # generator expression tests + g = ([x for x in range(10)] for x in range(1)) + self.assertEqual(next(g), [x for x in range(10)]) + try: + next(g) + self.fail('should produce StopIteration exception') + except StopIteration: + pass + + a = 1 + try: + g = (a for d in a) + next(g) + self.fail('should produce TypeError') + except TypeError: + pass + + self.assertEqual(list((x, y) for x in 'abcd' for y in 'abcd'), [(x, y) for x in 'abcd' for y in 'abcd']) + self.assertEqual(list((x, y) for x in 'ab' for y in 'xy'), [(x, y) for x in 'ab' for y in 'xy']) + + a = [x for x in range(10)] + b = (x for x in (y for y in a)) + self.assertEqual(sum(b), sum([x for x in range(10)])) + + self.assertEqual(sum(x**2 for x in range(10)), sum([x**2 for x in range(10)])) + self.assertEqual(sum(x*x for x in range(10) if x%2), sum([x*x for x in range(10) if x%2])) + self.assertEqual(sum(x for x in (y for y in range(10))), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10)))), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in [y for y in (z for z in range(10))]), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True)) if True), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True) if False) if True), 0) + check_syntax_error(self, "foo(x for x in range(10), 100)") + check_syntax_error(self, "foo(100, x for x in range(10))") + + def testComprehensionSpecials(self): + # test for outmost iterable precomputation + x = 10; g = (i for i in range(x)); x = 5 + self.assertEqual(len(list(g)), 10) + + # This should hold, since we're only precomputing outmost iterable. + x = 10; t = False; g = ((i,j) for i in range(x) if t for j in range(x)) + x = 5; t = True; + self.assertEqual([(i,j) for i in range(10) for j in range(5)], list(g)) + + # Grammar allows multiple adjacent 'if's in listcomps and genexps, + # even though it's silly. Make sure it works (ifelse broke this.) + self.assertEqual([ x for x in range(10) if x % 2 if x % 3 ], [1, 5, 7]) + self.assertEqual(list(x for x in range(10) if x % 2 if x % 3), [1, 5, 7]) + + # verify unpacking single element tuples in listcomp/genexp. + self.assertEqual([x for x, in [(4,), (5,), (6,)]], [4, 5, 6]) + self.assertEqual(list(x for x, in [(7,), (8,), (9,)]), [7, 8, 9]) + + def test_with_statement(self): + class manager(object): + def __enter__(self): + return (1, 2) + def __exit__(self, *args): + pass + + with manager(): + pass + with manager() as x: + pass + with manager() as (x, y): + pass + with manager(), manager(): + pass + with manager() as x, manager() as y: + pass + with manager() as x, manager(): + pass + + def testIfElseExpr(self): + # Test ifelse expressions in various cases + def _checkeval(msg, ret): + "helper to check that evaluation of expressions is done correctly" + print(x) + return ret + + # the next line is not allowed anymore + #self.assertEqual([ x() for x in lambda: True, lambda: False if x() ], [True]) + self.assertEqual([ x() for x in (lambda: True, lambda: False) if x() ], [True]) + self.assertEqual([ x(False) for x in (lambda x: False if x else True, lambda x: True if x else False) if x(False) ], [True]) + self.assertEqual((5 if 1 else _checkeval("check 1", 0)), 5) + self.assertEqual((_checkeval("check 2", 0) if 0 else 5), 5) + self.assertEqual((5 and 6 if 0 else 1), 1) + self.assertEqual(((5 and 6) if 0 else 1), 1) + self.assertEqual((5 and (6 if 1 else 1)), 6) + self.assertEqual((0 or _checkeval("check 3", 2) if 0 else 3), 3) + self.assertEqual((1 or _checkeval("check 4", 2) if 1 else _checkeval("check 5", 3)), 1) + self.assertEqual((0 or 5 if 1 else _checkeval("check 6", 3)), 5) + self.assertEqual((not 5 if 1 else 1), False) + self.assertEqual((not 5 if 0 else 1), 1) + self.assertEqual((6 + 1 if 1 else 2), 7) + self.assertEqual((6 - 1 if 1 else 2), 5) + self.assertEqual((6 * 2 if 1 else 4), 12) + self.assertEqual((6 / 2 if 1 else 3), 3) + self.assertEqual((6 < 4 if 0 else 2), 2) + + def testStringLiterals(self): + x = ''; y = ""; self.assert_(len(x) == 0 and x == y) + x = '\''; y = "'"; self.assert_(len(x) == 1 and x == y and ord(x) == 39) + x = '"'; y = "\""; self.assert_(len(x) == 1 and x == y and ord(x) == 34) + x = "doesn't \"shrink\" does it" + y = 'doesn\'t "shrink" does it' + self.assert_(len(x) == 24 and x == y) + x = "does \"shrink\" doesn't it" + y = 'does "shrink" doesn\'t it' + self.assert_(len(x) == 24 and x == y) + x = """ +The "quick" +brown fox +jumps over +the 'lazy' dog. +""" + y = '\nThe "quick"\nbrown fox\njumps over\nthe \'lazy\' dog.\n' + self.assertEquals(x, y) + y = ''' +The "quick" +brown fox +jumps over +the 'lazy' dog. +''' + self.assertEquals(x, y) + y = "\n\ +The \"quick\"\n\ +brown fox\n\ +jumps over\n\ +the 'lazy' dog.\n\ +" + self.assertEquals(x, y) + y = '\n\ +The \"quick\"\n\ +brown fox\n\ +jumps over\n\ +the \'lazy\' dog.\n\ +' + self.assertEquals(x, y) + + +def test_main(): + run_unittest(TokenTests, GrammarTests) + +if __name__ == '__main__': + test_main() diff --git a/python/examples/python3-grammar.py b/python/examples/python3-grammar.py new file mode 100644 index 0000000..1afbf30 --- /dev/null +++ b/python/examples/python3-grammar.py @@ -0,0 +1,945 @@ +# Python test set -- part 1, grammar. +# This just tests whether the parser accepts them all. + +# NOTE: When you run this test as a script from the command line, you +# get warnings about certain hex/oct constants. Since those are +# issued by the parser, you can't suppress them by adding a +# filterwarnings() call to this module. Therefore, to shut up the +# regression test, the filterwarnings() call has been added to +# regrtest.py. + +from test.support import run_unittest, check_syntax_error +import unittest +import sys +# testing import * +from sys import * + +class TokenTests(unittest.TestCase): + + def testBackslash(self): + # Backslash means line continuation: + x = 1 \ + + 1 + self.assertEquals(x, 2, 'backslash for line continuation') + + # Backslash does not means continuation in comments :\ + x = 0 + self.assertEquals(x, 0, 'backslash ending comment') + + def testPlainIntegers(self): + self.assertEquals(type(000), type(0)) + self.assertEquals(0xff, 255) + self.assertEquals(0o377, 255) + self.assertEquals(2147483647, 0o17777777777) + self.assertEquals(0b1001, 9) + # "0x" is not a valid literal + self.assertRaises(SyntaxError, eval, "0x") + from sys import maxsize + if maxsize == 2147483647: + self.assertEquals(-2147483647-1, -0o20000000000) + # XXX -2147483648 + self.assert_(0o37777777777 > 0) + self.assert_(0xffffffff > 0) + self.assert_(0b1111111111111111111111111111111 > 0) + for s in ('2147483648', '0o40000000000', '0x100000000', + '0b10000000000000000000000000000000'): + try: + x = eval(s) + except OverflowError: + self.fail("OverflowError on huge integer literal %r" % s) + elif maxsize == 9223372036854775807: + self.assertEquals(-9223372036854775807-1, -0o1000000000000000000000) + self.assert_(0o1777777777777777777777 > 0) + self.assert_(0xffffffffffffffff > 0) + self.assert_(0b11111111111111111111111111111111111111111111111111111111111111 > 0) + for s in '9223372036854775808', '0o2000000000000000000000', \ + '0x10000000000000000', \ + '0b100000000000000000000000000000000000000000000000000000000000000': + try: + x = eval(s) + except OverflowError: + self.fail("OverflowError on huge integer literal %r" % s) + else: + self.fail('Weird maxsize value %r' % maxsize) + + def testLongIntegers(self): + x = 0 + x = 0xffffffffffffffff + x = 0Xffffffffffffffff + x = 0o77777777777777777 + x = 0O77777777777777777 + x = 123456789012345678901234567890 + x = 0b100000000000000000000000000000000000000000000000000000000000000000000 + x = 0B111111111111111111111111111111111111111111111111111111111111111111111 + + def testUnderscoresInNumbers(self): + # Integers + x = 1_0 + x = 123_456_7_89 + x = 0xabc_123_4_5 + x = 0X_abc_123 + x = 0B11_01 + x = 0b_11_01 + x = 0o45_67 + x = 0O_45_67 + + # Floats + x = 3_1.4 + x = 03_1.4 + x = 3_1. + x = .3_1 + x = 3.1_4 + x = 0_3.1_4 + x = 3e1_4 + x = 3_1e+4_1 + x = 3_1E-4_1 + + def testFloats(self): + x = 3.14 + x = 314. + x = 0.314 + # XXX x = 000.314 + x = .314 + x = 3e14 + x = 3E14 + x = 3e-14 + x = 3e+14 + x = 3.e14 + x = .3e14 + x = 3.1e4 + + def testEllipsis(self): + x = ... + self.assert_(x is Ellipsis) + self.assertRaises(SyntaxError, eval, ".. .") + +class GrammarTests(unittest.TestCase): + + # single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE + # XXX can't test in a script -- this rule is only used when interactive + + # file_input: (NEWLINE | stmt)* ENDMARKER + # Being tested as this very moment this very module + + # expr_input: testlist NEWLINE + # XXX Hard to test -- used only in calls to input() + + def testEvalInput(self): + # testlist ENDMARKER + x = eval('1, 0 or 1') + + def testFuncdef(self): + ### [decorators] 'def' NAME parameters ['->' test] ':' suite + ### decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE + ### decorators: decorator+ + ### parameters: '(' [typedargslist] ')' + ### typedargslist: ((tfpdef ['=' test] ',')* + ### ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) + ### | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) + ### tfpdef: NAME [':' test] + ### varargslist: ((vfpdef ['=' test] ',')* + ### ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) + ### | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) + ### vfpdef: NAME + def f1(): pass + f1() + f1(*()) + f1(*(), **{}) + def f2(one_argument): pass + def f3(two, arguments): pass + self.assertEquals(f2.__code__.co_varnames, ('one_argument',)) + self.assertEquals(f3.__code__.co_varnames, ('two', 'arguments')) + def a1(one_arg,): pass + def a2(two, args,): pass + def v0(*rest): pass + def v1(a, *rest): pass + def v2(a, b, *rest): pass + + f1() + f2(1) + f2(1,) + f3(1, 2) + f3(1, 2,) + v0() + v0(1) + v0(1,) + v0(1,2) + v0(1,2,3,4,5,6,7,8,9,0) + v1(1) + v1(1,) + v1(1,2) + v1(1,2,3) + v1(1,2,3,4,5,6,7,8,9,0) + v2(1,2) + v2(1,2,3) + v2(1,2,3,4) + v2(1,2,3,4,5,6,7,8,9,0) + + def d01(a=1): pass + d01() + d01(1) + d01(*(1,)) + d01(**{'a':2}) + def d11(a, b=1): pass + d11(1) + d11(1, 2) + d11(1, **{'b':2}) + def d21(a, b, c=1): pass + d21(1, 2) + d21(1, 2, 3) + d21(*(1, 2, 3)) + d21(1, *(2, 3)) + d21(1, 2, *(3,)) + d21(1, 2, **{'c':3}) + def d02(a=1, b=2): pass + d02() + d02(1) + d02(1, 2) + d02(*(1, 2)) + d02(1, *(2,)) + d02(1, **{'b':2}) + d02(**{'a': 1, 'b': 2}) + def d12(a, b=1, c=2): pass + d12(1) + d12(1, 2) + d12(1, 2, 3) + def d22(a, b, c=1, d=2): pass + d22(1, 2) + d22(1, 2, 3) + d22(1, 2, 3, 4) + def d01v(a=1, *rest): pass + d01v() + d01v(1) + d01v(1, 2) + d01v(*(1, 2, 3, 4)) + d01v(*(1,)) + d01v(**{'a':2}) + def d11v(a, b=1, *rest): pass + d11v(1) + d11v(1, 2) + d11v(1, 2, 3) + def d21v(a, b, c=1, *rest): pass + d21v(1, 2) + d21v(1, 2, 3) + d21v(1, 2, 3, 4) + d21v(*(1, 2, 3, 4)) + d21v(1, 2, **{'c': 3}) + def d02v(a=1, b=2, *rest): pass + d02v() + d02v(1) + d02v(1, 2) + d02v(1, 2, 3) + d02v(1, *(2, 3, 4)) + d02v(**{'a': 1, 'b': 2}) + def d12v(a, b=1, c=2, *rest): pass + d12v(1) + d12v(1, 2) + d12v(1, 2, 3) + d12v(1, 2, 3, 4) + d12v(*(1, 2, 3, 4)) + d12v(1, 2, *(3, 4, 5)) + d12v(1, *(2,), **{'c': 3}) + def d22v(a, b, c=1, d=2, *rest): pass + d22v(1, 2) + d22v(1, 2, 3) + d22v(1, 2, 3, 4) + d22v(1, 2, 3, 4, 5) + d22v(*(1, 2, 3, 4)) + d22v(1, 2, *(3, 4, 5)) + d22v(1, *(2, 3), **{'d': 4}) + + # keyword argument type tests + try: + str('x', **{b'foo':1 }) + except TypeError: + pass + else: + self.fail('Bytes should not work as keyword argument names') + # keyword only argument tests + def pos0key1(*, key): return key + pos0key1(key=100) + def pos2key2(p1, p2, *, k1, k2=100): return p1,p2,k1,k2 + pos2key2(1, 2, k1=100) + pos2key2(1, 2, k1=100, k2=200) + pos2key2(1, 2, k2=100, k1=200) + def pos2key2dict(p1, p2, *, k1=100, k2, **kwarg): return p1,p2,k1,k2,kwarg + pos2key2dict(1,2,k2=100,tokwarg1=100,tokwarg2=200) + pos2key2dict(1,2,tokwarg1=100,tokwarg2=200, k2=100) + + # keyword arguments after *arglist + def f(*args, **kwargs): + return args, kwargs + self.assertEquals(f(1, x=2, *[3, 4], y=5), ((1, 3, 4), + {'x':2, 'y':5})) + self.assertRaises(SyntaxError, eval, "f(1, *(2,3), 4)") + self.assertRaises(SyntaxError, eval, "f(1, x=2, *(3,4), x=5)") + + # argument annotation tests + def f(x) -> list: pass + self.assertEquals(f.__annotations__, {'return': list}) + def f(x:int): pass + self.assertEquals(f.__annotations__, {'x': int}) + def f(*x:str): pass + self.assertEquals(f.__annotations__, {'x': str}) + def f(**x:float): pass + self.assertEquals(f.__annotations__, {'x': float}) + def f(x, y:1+2): pass + self.assertEquals(f.__annotations__, {'y': 3}) + def f(a, b:1, c:2, d): pass + self.assertEquals(f.__annotations__, {'b': 1, 'c': 2}) + def f(a, b:1, c:2, d, e:3=4, f=5, *g:6): pass + self.assertEquals(f.__annotations__, + {'b': 1, 'c': 2, 'e': 3, 'g': 6}) + def f(a, b:1, c:2, d, e:3=4, f=5, *g:6, h:7, i=8, j:9=10, + **k:11) -> 12: pass + self.assertEquals(f.__annotations__, + {'b': 1, 'c': 2, 'e': 3, 'g': 6, 'h': 7, 'j': 9, + 'k': 11, 'return': 12}) + # Check for SF Bug #1697248 - mixing decorators and a return annotation + def null(x): return x + @null + def f(x) -> list: pass + self.assertEquals(f.__annotations__, {'return': list}) + + # test closures with a variety of oparg's + closure = 1 + def f(): return closure + def f(x=1): return closure + def f(*, k=1): return closure + def f() -> int: return closure + + # Check ast errors in *args and *kwargs + check_syntax_error(self, "f(*g(1=2))") + check_syntax_error(self, "f(**g(1=2))") + + def testLambdef(self): + ### lambdef: 'lambda' [varargslist] ':' test + l1 = lambda : 0 + self.assertEquals(l1(), 0) + l2 = lambda : a[d] # XXX just testing the expression + l3 = lambda : [2 < x for x in [-1, 3, 0]] + self.assertEquals(l3(), [0, 1, 0]) + l4 = lambda x = lambda y = lambda z=1 : z : y() : x() + self.assertEquals(l4(), 1) + l5 = lambda x, y, z=2: x + y + z + self.assertEquals(l5(1, 2), 5) + self.assertEquals(l5(1, 2, 3), 6) + check_syntax_error(self, "lambda x: x = 2") + check_syntax_error(self, "lambda (None,): None") + l6 = lambda x, y, *, k=20: x+y+k + self.assertEquals(l6(1,2), 1+2+20) + self.assertEquals(l6(1,2,k=10), 1+2+10) + + + ### stmt: simple_stmt | compound_stmt + # Tested below + + def testSimpleStmt(self): + ### simple_stmt: small_stmt (';' small_stmt)* [';'] + x = 1; pass; del x + def foo(): + # verify statements that end with semi-colons + x = 1; pass; del x; + foo() + + ### small_stmt: expr_stmt | pass_stmt | del_stmt | flow_stmt | import_stmt | global_stmt | access_stmt + # Tested below + + def testExprStmt(self): + # (exprlist '=')* exprlist + 1 + 1, 2, 3 + x = 1 + x = 1, 2, 3 + x = y = z = 1, 2, 3 + x, y, z = 1, 2, 3 + abc = a, b, c = x, y, z = xyz = 1, 2, (3, 4) + + check_syntax_error(self, "x + 1 = 1") + check_syntax_error(self, "a + 1 = b + 2") + + def testDelStmt(self): + # 'del' exprlist + abc = [1,2,3] + x, y, z = abc + xyz = x, y, z + + del abc + del x, y, (z, xyz) + + def testPassStmt(self): + # 'pass' + pass + + # flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt + # Tested below + + def testBreakStmt(self): + # 'break' + while 1: break + + def testContinueStmt(self): + # 'continue' + i = 1 + while i: i = 0; continue + + msg = "" + while not msg: + msg = "ok" + try: + continue + msg = "continue failed to continue inside try" + except: + msg = "continue inside try called except block" + if msg != "ok": + self.fail(msg) + + msg = "" + while not msg: + msg = "finally block not called" + try: + continue + finally: + msg = "ok" + if msg != "ok": + self.fail(msg) + + def test_break_continue_loop(self): + # This test warrants an explanation. It is a test specifically for SF bugs + # #463359 and #462937. The bug is that a 'break' statement executed or + # exception raised inside a try/except inside a loop, *after* a continue + # statement has been executed in that loop, will cause the wrong number of + # arguments to be popped off the stack and the instruction pointer reset to + # a very small number (usually 0.) Because of this, the following test + # *must* written as a function, and the tracking vars *must* be function + # arguments with default values. Otherwise, the test will loop and loop. + + def test_inner(extra_burning_oil = 1, count=0): + big_hippo = 2 + while big_hippo: + count += 1 + try: + if extra_burning_oil and big_hippo == 1: + extra_burning_oil -= 1 + break + big_hippo -= 1 + continue + except: + raise + if count > 2 or big_hippo != 1: + self.fail("continue then break in try/except in loop broken!") + test_inner() + + def testReturn(self): + # 'return' [testlist] + def g1(): return + def g2(): return 1 + g1() + x = g2() + check_syntax_error(self, "class foo:return 1") + + def testYield(self): + check_syntax_error(self, "class foo:yield 1") + + def testRaise(self): + # 'raise' test [',' test] + try: raise RuntimeError('just testing') + except RuntimeError: pass + try: raise KeyboardInterrupt + except KeyboardInterrupt: pass + + def testImport(self): + # 'import' dotted_as_names + import sys + import time, sys + # 'from' dotted_name 'import' ('*' | '(' import_as_names ')' | import_as_names) + from time import time + from time import (time) + # not testable inside a function, but already done at top of the module + # from sys import * + from sys import path, argv + from sys import (path, argv) + from sys import (path, argv,) + + def testGlobal(self): + # 'global' NAME (',' NAME)* + global a + global a, b + global one, two, three, four, five, six, seven, eight, nine, ten + + def testNonlocal(self): + # 'nonlocal' NAME (',' NAME)* + x = 0 + y = 0 + def f(): + nonlocal x + nonlocal x, y + + def testAssert(self): + # assert_stmt: 'assert' test [',' test] + assert 1 + assert 1, 1 + assert lambda x:x + assert 1, lambda x:x+1 + try: + assert 0, "msg" + except AssertionError as e: + self.assertEquals(e.args[0], "msg") + else: + if __debug__: + self.fail("AssertionError not raised by assert 0") + + ### compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | funcdef | classdef + # Tested below + + def testIf(self): + # 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] + if 1: pass + if 1: pass + else: pass + if 0: pass + elif 0: pass + if 0: pass + elif 0: pass + elif 0: pass + elif 0: pass + else: pass + + def testWhile(self): + # 'while' test ':' suite ['else' ':' suite] + while 0: pass + while 0: pass + else: pass + + # Issue1920: "while 0" is optimized away, + # ensure that the "else" clause is still present. + x = 0 + while 0: + x = 1 + else: + x = 2 + self.assertEquals(x, 2) + + def testFor(self): + # 'for' exprlist 'in' exprlist ':' suite ['else' ':' suite] + for i in 1, 2, 3: pass + for i, j, k in (): pass + else: pass + class Squares: + def __init__(self, max): + self.max = max + self.sofar = [] + def __len__(self): return len(self.sofar) + def __getitem__(self, i): + if not 0 <= i < self.max: raise IndexError + n = len(self.sofar) + while n <= i: + self.sofar.append(n*n) + n = n+1 + return self.sofar[i] + n = 0 + for x in Squares(10): n = n+x + if n != 285: + self.fail('for over growing sequence') + + result = [] + for x, in [(1,), (2,), (3,)]: + result.append(x) + self.assertEqual(result, [1, 2, 3]) + + def testTry(self): + ### try_stmt: 'try' ':' suite (except_clause ':' suite)+ ['else' ':' suite] + ### | 'try' ':' suite 'finally' ':' suite + ### except_clause: 'except' [expr ['as' expr]] + try: + 1/0 + except ZeroDivisionError: + pass + else: + pass + try: 1/0 + except EOFError: pass + except TypeError as msg: pass + except RuntimeError as msg: pass + except: pass + else: pass + try: 1/0 + except (EOFError, TypeError, ZeroDivisionError): pass + try: 1/0 + except (EOFError, TypeError, ZeroDivisionError) as msg: pass + try: pass + finally: pass + + def testSuite(self): + # simple_stmt | NEWLINE INDENT NEWLINE* (stmt NEWLINE*)+ DEDENT + if 1: pass + if 1: + pass + if 1: + # + # + # + pass + pass + # + pass + # + + def testTest(self): + ### and_test ('or' and_test)* + ### and_test: not_test ('and' not_test)* + ### not_test: 'not' not_test | comparison + if not 1: pass + if 1 and 1: pass + if 1 or 1: pass + if not not not 1: pass + if not 1 and 1 and 1: pass + if 1 and 1 or 1 and 1 and 1 or not 1 and 1: pass + + def testComparison(self): + ### comparison: expr (comp_op expr)* + ### comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not' + if 1: pass + x = (1 == 1) + if 1 == 1: pass + if 1 != 1: pass + if 1 < 1: pass + if 1 > 1: pass + if 1 <= 1: pass + if 1 >= 1: pass + if 1 is 1: pass + if 1 is not 1: pass + if 1 in (): pass + if 1 not in (): pass + if 1 < 1 > 1 == 1 >= 1 <= 1 != 1 in 1 not in 1 is 1 is not 1: pass + + def testBinaryMaskOps(self): + x = 1 & 1 + x = 1 ^ 1 + x = 1 | 1 + + def testShiftOps(self): + x = 1 << 1 + x = 1 >> 1 + x = 1 << 1 >> 1 + + def testAdditiveOps(self): + x = 1 + x = 1 + 1 + x = 1 - 1 - 1 + x = 1 - 1 + 1 - 1 + 1 + + def testMultiplicativeOps(self): + x = 1 * 1 + x = 1 / 1 + x = 1 % 1 + x = 1 / 1 * 1 % 1 + + def testUnaryOps(self): + x = +1 + x = -1 + x = ~1 + x = ~1 ^ 1 & 1 | 1 & 1 ^ -1 + x = -1*1/1 + 1*1 - ---1*1 + + def testSelectors(self): + ### trailer: '(' [testlist] ')' | '[' subscript ']' | '.' NAME + ### subscript: expr | [expr] ':' [expr] + + import sys, time + c = sys.path[0] + x = time.time() + x = sys.modules['time'].time() + a = '01234' + c = a[0] + c = a[-1] + s = a[0:5] + s = a[:5] + s = a[0:] + s = a[:] + s = a[-5:] + s = a[:-1] + s = a[-4:-3] + # A rough test of SF bug 1333982. http://python.org/sf/1333982 + # The testing here is fairly incomplete. + # Test cases should include: commas with 1 and 2 colons + d = {} + d[1] = 1 + d[1,] = 2 + d[1,2] = 3 + d[1,2,3] = 4 + L = list(d) + L.sort(key=lambda x: x if isinstance(x, tuple) else ()) + self.assertEquals(str(L), '[1, (1,), (1, 2), (1, 2, 3)]') + + def testAtoms(self): + ### atom: '(' [testlist] ')' | '[' [testlist] ']' | '{' [dictsetmaker] '}' | NAME | NUMBER | STRING + ### dictsetmaker: (test ':' test (',' test ':' test)* [',']) | (test (',' test)* [',']) + + x = (1) + x = (1 or 2 or 3) + x = (1 or 2 or 3, 2, 3) + + x = [] + x = [1] + x = [1 or 2 or 3] + x = [1 or 2 or 3, 2, 3] + x = [] + + x = {} + x = {'one': 1} + x = {'one': 1,} + x = {'one' or 'two': 1 or 2} + x = {'one': 1, 'two': 2} + x = {'one': 1, 'two': 2,} + x = {'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6} + + x = {'one'} + x = {'one', 1,} + x = {'one', 'two', 'three'} + x = {2, 3, 4,} + + x = x + x = 'x' + x = 123 + + ### exprlist: expr (',' expr)* [','] + ### testlist: test (',' test)* [','] + # These have been exercised enough above + + def testClassdef(self): + # 'class' NAME ['(' [testlist] ')'] ':' suite + class B: pass + class B2(): pass + class C1(B): pass + class C2(B): pass + class D(C1, C2, B): pass + class C: + def meth1(self): pass + def meth2(self, arg): pass + def meth3(self, a1, a2): pass + + # decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE + # decorators: decorator+ + # decorated: decorators (classdef | funcdef) + def class_decorator(x): return x + @class_decorator + class G: pass + + def testDictcomps(self): + # dictorsetmaker: ( (test ':' test (comp_for | + # (',' test ':' test)* [','])) | + # (test (comp_for | (',' test)* [','])) ) + nums = [1, 2, 3] + self.assertEqual({i:i+1 for i in nums}, {1: 2, 2: 3, 3: 4}) + + def testListcomps(self): + # list comprehension tests + nums = [1, 2, 3, 4, 5] + strs = ["Apple", "Banana", "Coconut"] + spcs = [" Apple", " Banana ", "Coco nut "] + + self.assertEqual([s.strip() for s in spcs], ['Apple', 'Banana', 'Coco nut']) + self.assertEqual([3 * x for x in nums], [3, 6, 9, 12, 15]) + self.assertEqual([x for x in nums if x > 2], [3, 4, 5]) + self.assertEqual([(i, s) for i in nums for s in strs], + [(1, 'Apple'), (1, 'Banana'), (1, 'Coconut'), + (2, 'Apple'), (2, 'Banana'), (2, 'Coconut'), + (3, 'Apple'), (3, 'Banana'), (3, 'Coconut'), + (4, 'Apple'), (4, 'Banana'), (4, 'Coconut'), + (5, 'Apple'), (5, 'Banana'), (5, 'Coconut')]) + self.assertEqual([(i, s) for i in nums for s in [f for f in strs if "n" in f]], + [(1, 'Banana'), (1, 'Coconut'), (2, 'Banana'), (2, 'Coconut'), + (3, 'Banana'), (3, 'Coconut'), (4, 'Banana'), (4, 'Coconut'), + (5, 'Banana'), (5, 'Coconut')]) + self.assertEqual([(lambda a:[a**i for i in range(a+1)])(j) for j in range(5)], + [[1], [1, 1], [1, 2, 4], [1, 3, 9, 27], [1, 4, 16, 64, 256]]) + + def test_in_func(l): + return [0 < x < 3 for x in l if x > 2] + + self.assertEqual(test_in_func(nums), [False, False, False]) + + def test_nested_front(): + self.assertEqual([[y for y in [x, x + 1]] for x in [1,3,5]], + [[1, 2], [3, 4], [5, 6]]) + + test_nested_front() + + check_syntax_error(self, "[i, s for i in nums for s in strs]") + check_syntax_error(self, "[x if y]") + + suppliers = [ + (1, "Boeing"), + (2, "Ford"), + (3, "Macdonalds") + ] + + parts = [ + (10, "Airliner"), + (20, "Engine"), + (30, "Cheeseburger") + ] + + suppart = [ + (1, 10), (1, 20), (2, 20), (3, 30) + ] + + x = [ + (sname, pname) + for (sno, sname) in suppliers + for (pno, pname) in parts + for (sp_sno, sp_pno) in suppart + if sno == sp_sno and pno == sp_pno + ] + + self.assertEqual(x, [('Boeing', 'Airliner'), ('Boeing', 'Engine'), ('Ford', 'Engine'), + ('Macdonalds', 'Cheeseburger')]) + + def testGenexps(self): + # generator expression tests + g = ([x for x in range(10)] for x in range(1)) + self.assertEqual(next(g), [x for x in range(10)]) + try: + next(g) + self.fail('should produce StopIteration exception') + except StopIteration: + pass + + a = 1 + try: + g = (a for d in a) + next(g) + self.fail('should produce TypeError') + except TypeError: + pass + + self.assertEqual(list((x, y) for x in 'abcd' for y in 'abcd'), [(x, y) for x in 'abcd' for y in 'abcd']) + self.assertEqual(list((x, y) for x in 'ab' for y in 'xy'), [(x, y) for x in 'ab' for y in 'xy']) + + a = [x for x in range(10)] + b = (x for x in (y for y in a)) + self.assertEqual(sum(b), sum([x for x in range(10)])) + + self.assertEqual(sum(x**2 for x in range(10)), sum([x**2 for x in range(10)])) + self.assertEqual(sum(x*x for x in range(10) if x%2), sum([x*x for x in range(10) if x%2])) + self.assertEqual(sum(x for x in (y for y in range(10))), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10)))), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in [y for y in (z for z in range(10))]), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True)) if True), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True) if False) if True), 0) + check_syntax_error(self, "foo(x for x in range(10), 100)") + check_syntax_error(self, "foo(100, x for x in range(10))") + + def testComprehensionSpecials(self): + # test for outmost iterable precomputation + x = 10; g = (i for i in range(x)); x = 5 + self.assertEqual(len(list(g)), 10) + + # This should hold, since we're only precomputing outmost iterable. + x = 10; t = False; g = ((i,j) for i in range(x) if t for j in range(x)) + x = 5; t = True; + self.assertEqual([(i,j) for i in range(10) for j in range(5)], list(g)) + + # Grammar allows multiple adjacent 'if's in listcomps and genexps, + # even though it's silly. Make sure it works (ifelse broke this.) + self.assertEqual([ x for x in range(10) if x % 2 if x % 3 ], [1, 5, 7]) + self.assertEqual(list(x for x in range(10) if x % 2 if x % 3), [1, 5, 7]) + + # verify unpacking single element tuples in listcomp/genexp. + self.assertEqual([x for x, in [(4,), (5,), (6,)]], [4, 5, 6]) + self.assertEqual(list(x for x, in [(7,), (8,), (9,)]), [7, 8, 9]) + + def test_with_statement(self): + class manager(object): + def __enter__(self): + return (1, 2) + def __exit__(self, *args): + pass + + with manager(): + pass + with manager() as x: + pass + with manager() as (x, y): + pass + with manager(), manager(): + pass + with manager() as x, manager() as y: + pass + with manager() as x, manager(): + pass + + def testIfElseExpr(self): + # Test ifelse expressions in various cases + def _checkeval(msg, ret): + "helper to check that evaluation of expressions is done correctly" + print(x) + return ret + + # the next line is not allowed anymore + #self.assertEqual([ x() for x in lambda: True, lambda: False if x() ], [True]) + self.assertEqual([ x() for x in (lambda: True, lambda: False) if x() ], [True]) + self.assertEqual([ x(False) for x in (lambda x: False if x else True, lambda x: True if x else False) if x(False) ], [True]) + self.assertEqual((5 if 1 else _checkeval("check 1", 0)), 5) + self.assertEqual((_checkeval("check 2", 0) if 0 else 5), 5) + self.assertEqual((5 and 6 if 0 else 1), 1) + self.assertEqual(((5 and 6) if 0 else 1), 1) + self.assertEqual((5 and (6 if 1 else 1)), 6) + self.assertEqual((0 or _checkeval("check 3", 2) if 0 else 3), 3) + self.assertEqual((1 or _checkeval("check 4", 2) if 1 else _checkeval("check 5", 3)), 1) + self.assertEqual((0 or 5 if 1 else _checkeval("check 6", 3)), 5) + self.assertEqual((not 5 if 1 else 1), False) + self.assertEqual((not 5 if 0 else 1), 1) + self.assertEqual((6 + 1 if 1 else 2), 7) + self.assertEqual((6 - 1 if 1 else 2), 5) + self.assertEqual((6 * 2 if 1 else 4), 12) + self.assertEqual((6 / 2 if 1 else 3), 3) + self.assertEqual((6 < 4 if 0 else 2), 2) + + def testStringLiterals(self): + x = ''; y = ""; self.assert_(len(x) == 0 and x == y) + x = '\''; y = "'"; self.assert_(len(x) == 1 and x == y and ord(x) == 39) + x = '"'; y = "\""; self.assert_(len(x) == 1 and x == y and ord(x) == 34) + x = "doesn't \"shrink\" does it" + y = 'doesn\'t "shrink" does it' + self.assert_(len(x) == 24 and x == y) + x = "does \"shrink\" doesn't it" + y = 'does "shrink" doesn\'t it' + self.assert_(len(x) == 24 and x == y) + x = f""" +The "quick" +brown fo{ok()}x +jumps over +the 'lazy' dog. +""" + y = '\nThe "quick"\nbrown fox\njumps over\nthe \'lazy\' dog.\n' + self.assertEquals(x, y) + y = ''' +The "quick" +brown fox +jumps over +the 'lazy' dog. +''' + self.assertEquals(x, y) + y = "\n\ +The \"quick\"\n\ +brown fox\n\ +jumps over\n\ +the 'lazy' dog.\n\ +" + self.assertEquals(x, y) + y = '\n\ +The \"quick\"\n\ +brown fox\n\ +jumps over\n\ +the \'lazy\' dog.\n\ +' + self.assertEquals(x, y) + + +def test_main(): + run_unittest(TokenTests, GrammarTests) + +if __name__ == '__main__': + test_main() diff --git a/python/examples/python3.8_grammar.py b/python/examples/python3.8_grammar.py new file mode 100644 index 0000000..6bde90a --- /dev/null +++ b/python/examples/python3.8_grammar.py @@ -0,0 +1,1572 @@ +# Python test set -- part 1, grammar. +# This just tests whether the parser accepts them all. + +from test.support import check_syntax_error +import inspect +import unittest +import sys +# testing import * +from sys import * + +# different import patterns to check that __annotations__ does not interfere +# with import machinery +import test.ann_module as ann_module +import typing +from collections import ChainMap +from test import ann_module2 +import test + +# These are shared with test_tokenize and other test modules. +# +# Note: since several test cases filter out floats by looking for "e" and ".", +# don't add hexadecimal literals that contain "e" or "E". +VALID_UNDERSCORE_LITERALS = [ + '0_0_0', + '4_2', + '1_0000_0000', + '0b1001_0100', + '0xffff_ffff', + '0o5_7_7', + '1_00_00.5', + '1_00_00.5e5', + '1_00_00e5_1', + '1e1_0', + '.1_4', + '.1_4e1', + '0b_0', + '0x_f', + '0o_5', + '1_00_00j', + '1_00_00.5j', + '1_00_00e5_1j', + '.1_4j', + '(1_2.5+3_3j)', + '(.5_6j)', +] +INVALID_UNDERSCORE_LITERALS = [ + # Trailing underscores: + '0_', + '42_', + '1.4j_', + '0x_', + '0b1_', + '0xf_', + '0o5_', + '0 if 1_Else 1', + # Underscores in the base selector: + '0_b0', + '0_xf', + '0_o5', + # Old-style octal, still disallowed: + '0_7', + '09_99', + # Multiple consecutive underscores: + '4_______2', + '0.1__4', + '0.1__4j', + '0b1001__0100', + '0xffff__ffff', + '0x___', + '0o5__77', + '1e1__0', + '1e1__0j', + # Underscore right before a dot: + '1_.4', + '1_.4j', + # Underscore right after a dot: + '1._4', + '1._4j', + '._5', + '._5j', + # Underscore right after a sign: + '1.0e+_1', + '1.0e+_1j', + # Underscore right before j: + '1.4_j', + '1.4e5_j', + # Underscore right before e: + '1_e1', + '1.4_e1', + '1.4_e1j', + # Underscore right after e: + '1e_1', + '1.4e_1', + '1.4e_1j', + # Complex cases with parens: + '(1+1.5_j_)', + '(1+1.5_j)', +] + + +class TokenTests(unittest.TestCase): + + def test_backslash(self): + # Backslash means line continuation: + x = 1 \ + + 1 + self.assertEqual(x, 2, 'backslash for line continuation') + + # Backslash does not means continuation in comments :\ + x = 0 + self.assertEqual(x, 0, 'backslash ending comment') + + def test_plain_integers(self): + self.assertEqual(type(000), type(0)) + self.assertEqual(0xff, 255) + self.assertEqual(0o377, 255) + self.assertEqual(2147483647, 0o17777777777) + self.assertEqual(0b1001, 9) + # "0x" is not a valid literal + self.assertRaises(SyntaxError, eval, "0x") + from sys import maxsize + if maxsize == 2147483647: + self.assertEqual(-2147483647-1, -0o20000000000) + # XXX -2147483648 + self.assertTrue(0o37777777777 > 0) + self.assertTrue(0xffffffff > 0) + self.assertTrue(0b1111111111111111111111111111111 > 0) + for s in ('2147483648', '0o40000000000', '0x100000000', + '0b10000000000000000000000000000000'): + try: + x = eval(s) + except OverflowError: + self.fail("OverflowError on huge integer literal %r" % s) + elif maxsize == 9223372036854775807: + self.assertEqual(-9223372036854775807-1, -0o1000000000000000000000) + self.assertTrue(0o1777777777777777777777 > 0) + self.assertTrue(0xffffffffffffffff > 0) + self.assertTrue(0b11111111111111111111111111111111111111111111111111111111111111 > 0) + for s in '9223372036854775808', '0o2000000000000000000000', \ + '0x10000000000000000', \ + '0b100000000000000000000000000000000000000000000000000000000000000': + try: + x = eval(s) + except OverflowError: + self.fail("OverflowError on huge integer literal %r" % s) + else: + self.fail('Weird maxsize value %r' % maxsize) + + def test_long_integers(self): + x = 0 + x = 0xffffffffffffffff + x = 0Xffffffffffffffff + x = 0o77777777777777777 + x = 0O77777777777777777 + x = 123456789012345678901234567890 + x = 0b100000000000000000000000000000000000000000000000000000000000000000000 + x = 0B111111111111111111111111111111111111111111111111111111111111111111111 + + def test_floats(self): + x = 3.14 + x = 314. + x = 0.314 + # XXX x = 000.314 + x = .314 + x = 3e14 + x = 3E14 + x = 3e-14 + x = 3e+14 + x = 3.e14 + x = .3e14 + x = 3.1e4 + + def test_float_exponent_tokenization(self): + # See issue 21642. + self.assertEqual(1 if 1else 0, 1) + self.assertEqual(1 if 0else 0, 0) + self.assertRaises(SyntaxError, eval, "0 if 1Else 0") + + def test_underscore_literals(self): + for lit in VALID_UNDERSCORE_LITERALS: + self.assertEqual(eval(lit), eval(lit.replace('_', ''))) + for lit in INVALID_UNDERSCORE_LITERALS: + self.assertRaises(SyntaxError, eval, lit) + # Sanity check: no literal begins with an underscore + self.assertRaises(NameError, eval, "_0") + + def test_string_literals(self): + x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y) + x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39) + x = '"'; y = "\""; self.assertTrue(len(x) == 1 and x == y and ord(x) == 34) + x = "doesn't \"shrink\" does it" + y = 'doesn\'t "shrink" does it' + self.assertTrue(len(x) == 24 and x == y) + x = "does \"shrink\" doesn't it" + y = 'does "shrink" doesn\'t it' + self.assertTrue(len(x) == 24 and x == y) + x = """ +The "quick" +brown fox +jumps over +the 'lazy' dog. +""" + y = '\nThe "quick"\nbrown fox\njumps over\nthe \'lazy\' dog.\n' + self.assertEqual(x, y) + y = ''' +The "quick" +brown fox +jumps over +the 'lazy' dog. +''' + self.assertEqual(x, y) + y = "\n\ +The \"quick\"\n\ +brown fox\n\ +jumps over\n\ +the 'lazy' dog.\n\ +" + self.assertEqual(x, y) + y = '\n\ +The \"quick\"\n\ +brown fox\n\ +jumps over\n\ +the \'lazy\' dog.\n\ +' + self.assertEqual(x, y) + + def test_ellipsis(self): + x = ... + self.assertTrue(x is Ellipsis) + self.assertRaises(SyntaxError, eval, ".. .") + + def test_eof_error(self): + samples = ("def foo(", "\ndef foo(", "def foo(\n") + for s in samples: + with self.assertRaises(SyntaxError) as cm: + compile(s, "<test>", "exec") + self.assertIn("unexpected EOF", str(cm.exception)) + +# var_annot_global: int # a global annotated is necessary for test_var_annot + +# custom namespace for testing __annotations__ + +class CNS: + def __init__(self): + self._dct = {} + def __setitem__(self, item, value): + self._dct[item.lower()] = value + def __getitem__(self, item): + return self._dct[item] + + +class GrammarTests(unittest.TestCase): + + check_syntax_error = check_syntax_error + + # single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE + # XXX can't test in a script -- this rule is only used when interactive + + # file_input: (NEWLINE | stmt)* ENDMARKER + # Being tested as this very moment this very module + + # expr_input: testlist NEWLINE + # XXX Hard to test -- used only in calls to input() + + def test_eval_input(self): + # testlist ENDMARKER + x = eval('1, 0 or 1') + + def test_var_annot_basics(self): + # all these should be allowed + var1: int = 5 + # var2: [int, str] + my_lst = [42] + def one(): + return 1 + # int.new_attr: int + # [list][0]: type + my_lst[one()-1]: int = 5 + self.assertEqual(my_lst, [5]) + + def test_var_annot_syntax_errors(self): + # parser pass + check_syntax_error(self, "def f: int") + check_syntax_error(self, "x: int: str") + check_syntax_error(self, "def f():\n" + " nonlocal x: int\n") + # AST pass + check_syntax_error(self, "[x, 0]: int\n") + check_syntax_error(self, "f(): int\n") + check_syntax_error(self, "(x,): int") + check_syntax_error(self, "def f():\n" + " (x, y): int = (1, 2)\n") + # symtable pass + check_syntax_error(self, "def f():\n" + " x: int\n" + " global x\n") + check_syntax_error(self, "def f():\n" + " global x\n" + " x: int\n") + + def test_var_annot_basic_semantics(self): + # execution order + with self.assertRaises(ZeroDivisionError): + no_name[does_not_exist]: no_name_again = 1/0 + with self.assertRaises(NameError): + no_name[does_not_exist]: 1/0 = 0 + global var_annot_global + + # function semantics + def f(): + st: str = "Hello" + a.b: int = (1, 2) + return st + self.assertEqual(f.__annotations__, {}) + def f_OK(): + # x: 1/0 + f_OK() + def fbad(): + # x: int + print(x) + with self.assertRaises(UnboundLocalError): + fbad() + def f2bad(): + # (no_such_global): int + print(no_such_global) + try: + f2bad() + except Exception as e: + self.assertIs(type(e), NameError) + + # class semantics + class C: + # __foo: int + s: str = "attr" + z = 2 + def __init__(self, x): + self.x: int = x + self.assertEqual(C.__annotations__, {'_C__foo': int, 's': str}) + with self.assertRaises(NameError): + class CBad: + no_such_name_defined.attr: int = 0 + with self.assertRaises(NameError): + class Cbad2(C): + # x: int + x.y: list = [] + + def test_var_annot_metaclass_semantics(self): + class CMeta(type): + @classmethod + def __prepare__(metacls, name, bases, **kwds): + return {'__annotations__': CNS()} + class CC(metaclass=CMeta): + # XX: 'ANNOT' + self.assertEqual(CC.__annotations__['xx'], 'ANNOT') + + def test_var_annot_module_semantics(self): + with self.assertRaises(AttributeError): + print(test.__annotations__) + self.assertEqual(ann_module.__annotations__, + {1: 2, 'x': int, 'y': str, 'f': typing.Tuple[int, int]}) + self.assertEqual(ann_module.M.__annotations__, + {'123': 123, 'o': type}) + self.assertEqual(ann_module2.__annotations__, {}) + + def test_var_annot_in_module(self): + # check that functions fail the same way when executed + # outside of module where they were defined + from test.ann_module3 import f_bad_ann, g_bad_ann, D_bad_ann + with self.assertRaises(NameError): + f_bad_ann() + with self.assertRaises(NameError): + g_bad_ann() + with self.assertRaises(NameError): + D_bad_ann(5) + + def test_var_annot_simple_exec(self): + gns = {}; lns= {} + exec("'docstring'\n" + "__annotations__[1] = 2\n" + "x: int = 5\n", gns, lns) + self.assertEqual(lns["__annotations__"], {1: 2, 'x': int}) + with self.assertRaises(KeyError): + gns['__annotations__'] + + def test_var_annot_custom_maps(self): + # tests with custom locals() and __annotations__ + ns = {'__annotations__': CNS()} + exec('X: int; Z: str = "Z"; (w): complex = 1j', ns) + self.assertEqual(ns['__annotations__']['x'], int) + self.assertEqual(ns['__annotations__']['z'], str) + with self.assertRaises(KeyError): + ns['__annotations__']['w'] + nonloc_ns = {} + class CNS2: + def __init__(self): + self._dct = {} + def __setitem__(self, item, value): + nonlocal nonloc_ns + self._dct[item] = value + nonloc_ns[item] = value + def __getitem__(self, item): + return self._dct[item] + exec('x: int = 1', {}, CNS2()) + self.assertEqual(nonloc_ns['__annotations__']['x'], int) + + def test_var_annot_refleak(self): + # complex case: custom locals plus custom __annotations__ + # this was causing refleak + cns = CNS() + nonloc_ns = {'__annotations__': cns} + class CNS2: + def __init__(self): + self._dct = {'__annotations__': cns} + def __setitem__(self, item, value): + nonlocal nonloc_ns + self._dct[item] = value + nonloc_ns[item] = value + def __getitem__(self, item): + return self._dct[item] + exec('X: str', {}, CNS2()) + self.assertEqual(nonloc_ns['__annotations__']['x'], str) + + def test_funcdef(self): + ### [decorators] 'def' NAME parameters ['->' test] ':' suite + ### decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE + ### decorators: decorator+ + ### parameters: '(' [typedargslist] ')' + ### typedargslist: ((tfpdef ['=' test] ',')* + ### ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) + ### | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) + ### tfpdef: NAME [':' test] + ### varargslist: ((vfpdef ['=' test] ',')* + ### ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) + ### | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) + ### vfpdef: NAME + def f1(): pass + f1() + f1(*()) + f1(*(), **{}) + def f2(one_argument): pass + def f3(two, arguments): pass + self.assertEqual(f2.__code__.co_varnames, ('one_argument',)) + self.assertEqual(f3.__code__.co_varnames, ('two', 'arguments')) + def a1(one_arg,): pass + def a2(two, args,): pass + def v0(*rest): pass + def v1(a, *rest): pass + def v2(a, b, *rest): pass + + f1() + f2(1) + f2(1,) + f3(1, 2) + f3(1, 2,) + v0() + v0(1) + v0(1,) + v0(1,2) + v0(1,2,3,4,5,6,7,8,9,0) + v1(1) + v1(1,) + v1(1,2) + v1(1,2,3) + v1(1,2,3,4,5,6,7,8,9,0) + v2(1,2) + v2(1,2,3) + v2(1,2,3,4) + v2(1,2,3,4,5,6,7,8,9,0) + + def d01(a=1): pass + d01() + d01(1) + d01(*(1,)) + d01(*[] or [2]) + d01(*() or (), *{} and (), **() or {}) + d01(**{'a':2}) + d01(**{'a':2} or {}) + def d11(a, b=1): pass + d11(1) + d11(1, 2) + d11(1, **{'b':2}) + def d21(a, b, c=1): pass + d21(1, 2) + d21(1, 2, 3) + d21(*(1, 2, 3)) + d21(1, *(2, 3)) + d21(1, 2, *(3,)) + d21(1, 2, **{'c':3}) + def d02(a=1, b=2): pass + d02() + d02(1) + d02(1, 2) + d02(*(1, 2)) + d02(1, *(2,)) + d02(1, **{'b':2}) + d02(**{'a': 1, 'b': 2}) + def d12(a, b=1, c=2): pass + d12(1) + d12(1, 2) + d12(1, 2, 3) + def d22(a, b, c=1, d=2): pass + d22(1, 2) + d22(1, 2, 3) + d22(1, 2, 3, 4) + def d01v(a=1, *rest): pass + d01v() + d01v(1) + d01v(1, 2) + d01v(*(1, 2, 3, 4)) + d01v(*(1,)) + d01v(**{'a':2}) + def d11v(a, b=1, *rest): pass + d11v(1) + d11v(1, 2) + d11v(1, 2, 3) + def d21v(a, b, c=1, *rest): pass + d21v(1, 2) + d21v(1, 2, 3) + d21v(1, 2, 3, 4) + d21v(*(1, 2, 3, 4)) + d21v(1, 2, **{'c': 3}) + def d02v(a=1, b=2, *rest): pass + d02v() + d02v(1) + d02v(1, 2) + d02v(1, 2, 3) + d02v(1, *(2, 3, 4)) + d02v(**{'a': 1, 'b': 2}) + def d12v(a, b=1, c=2, *rest): pass + d12v(1) + d12v(1, 2) + d12v(1, 2, 3) + d12v(1, 2, 3, 4) + d12v(*(1, 2, 3, 4)) + d12v(1, 2, *(3, 4, 5)) + d12v(1, *(2,), **{'c': 3}) + def d22v(a, b, c=1, d=2, *rest): pass + d22v(1, 2) + d22v(1, 2, 3) + d22v(1, 2, 3, 4) + d22v(1, 2, 3, 4, 5) + d22v(*(1, 2, 3, 4)) + d22v(1, 2, *(3, 4, 5)) + d22v(1, *(2, 3), **{'d': 4}) + + # keyword argument type tests + try: + str('x', **{b'foo':1 }) + except TypeError: + pass + else: + self.fail('Bytes should not work as keyword argument names') + # keyword only argument tests + def pos0key1(*, key): return key + pos0key1(key=100) + def pos2key2(p1, p2, *, k1, k2=100): return p1,p2,k1,k2 + pos2key2(1, 2, k1=100) + pos2key2(1, 2, k1=100, k2=200) + pos2key2(1, 2, k2=100, k1=200) + def pos2key2dict(p1, p2, *, k1=100, k2, **kwarg): return p1,p2,k1,k2,kwarg + pos2key2dict(1,2,k2=100,tokwarg1=100,tokwarg2=200) + pos2key2dict(1,2,tokwarg1=100,tokwarg2=200, k2=100) + + self.assertRaises(SyntaxError, eval, "def f(*): pass") + self.assertRaises(SyntaxError, eval, "def f(*,): pass") + self.assertRaises(SyntaxError, eval, "def f(*, **kwds): pass") + + # keyword arguments after *arglist + def f(*args, **kwargs): + return args, kwargs + self.assertEqual(f(1, x=2, *[3, 4], y=5), ((1, 3, 4), + {'x':2, 'y':5})) + self.assertEqual(f(1, *(2,3), 4), ((1, 2, 3, 4), {})) + self.assertRaises(SyntaxError, eval, "f(1, x=2, *(3,4), x=5)") + self.assertEqual(f(**{'eggs':'scrambled', 'spam':'fried'}), + ((), {'eggs':'scrambled', 'spam':'fried'})) + self.assertEqual(f(spam='fried', **{'eggs':'scrambled'}), + ((), {'eggs':'scrambled', 'spam':'fried'})) + + # Check ast errors in *args and *kwargs + check_syntax_error(self, "f(*g(1=2))") + check_syntax_error(self, "f(**g(1=2))") + + # argument annotation tests + def f(x) -> list: pass + self.assertEqual(f.__annotations__, {'return': list}) + def f(x: int): pass + self.assertEqual(f.__annotations__, {'x': int}) + def f(*x: str): pass + self.assertEqual(f.__annotations__, {'x': str}) + def f(**x: float): pass + self.assertEqual(f.__annotations__, {'x': float}) + def f(x, y: 1+2): pass + self.assertEqual(f.__annotations__, {'y': 3}) + def f(a, b: 1, c: 2, d): pass + self.assertEqual(f.__annotations__, {'b': 1, 'c': 2}) + def f(a, b: 1, c: 2, d, e: 3 = 4, f=5, *g: 6): pass + self.assertEqual(f.__annotations__, + {'b': 1, 'c': 2, 'e': 3, 'g': 6}) + def f(a, b: 1, c: 2, d, e: 3 = 4, f=5, *g: 6, h: 7, i=8, j: 9 = 10, + **k: 11) -> 12: pass + self.assertEqual(f.__annotations__, + {'b': 1, 'c': 2, 'e': 3, 'g': 6, 'h': 7, 'j': 9, + 'k': 11, 'return': 12}) + # Check for issue #20625 -- annotations mangling + class Spam: + def f(self, *, __kw: 1): + pass + class Ham(Spam): pass + self.assertEqual(Spam.f.__annotations__, {'_Spam__kw': 1}) + self.assertEqual(Ham.f.__annotations__, {'_Spam__kw': 1}) + # Check for SF Bug #1697248 - mixing decorators and a return annotation + def null(x): return x + @null + def f(x) -> list: pass + self.assertEqual(f.__annotations__, {'return': list}) + + # test closures with a variety of opargs + closure = 1 + def f(): return closure + def f(x=1): return closure + def f(*, k=1): return closure + def f() -> int: return closure + + # Check trailing commas are permitted in funcdef argument list + def f(a,): pass + def f(*args,): pass + def f(**kwds,): pass + def f(a, *args,): pass + def f(a, **kwds,): pass + def f(*args, b,): pass + def f(*, b,): pass + def f(*args, **kwds,): pass + def f(a, *args, b,): pass + def f(a, *, b,): pass + def f(a, *args, **kwds,): pass + def f(*args, b, **kwds,): pass + def f(*, b, **kwds,): pass + def f(a, *args, b, **kwds,): pass + def f(a, *, b, **kwds,): pass + + def test_lambdef(self): + ### lambdef: 'lambda' [varargslist] ':' test + l1 = lambda : 0 + self.assertEqual(l1(), 0) + l2 = lambda : a[d] # XXX just testing the expression + l3 = lambda : [2 < x for x in [-1, 3, 0]] + self.assertEqual(l3(), [0, 1, 0]) + l4 = lambda x = lambda y = lambda z=1 : z : y() : x() + self.assertEqual(l4(), 1) + l5 = lambda x, y, z=2: x + y + z + self.assertEqual(l5(1, 2), 5) + self.assertEqual(l5(1, 2, 3), 6) + check_syntax_error(self, "lambda x: x = 2") + check_syntax_error(self, "lambda (None,): None") + l6 = lambda x, y, *, k=20: x+y+k + self.assertEqual(l6(1,2), 1+2+20) + self.assertEqual(l6(1,2,k=10), 1+2+10) + + # check that trailing commas are permitted + l10 = lambda a,: 0 + l11 = lambda *args,: 0 + l12 = lambda **kwds,: 0 + l13 = lambda a, *args,: 0 + l14 = lambda a, **kwds,: 0 + l15 = lambda *args, b,: 0 + l16 = lambda *, b,: 0 + l17 = lambda *args, **kwds,: 0 + l18 = lambda a, *args, b,: 0 + l19 = lambda a, *, b,: 0 + l20 = lambda a, *args, **kwds,: 0 + l21 = lambda *args, b, **kwds,: 0 + l22 = lambda *, b, **kwds,: 0 + l23 = lambda a, *args, b, **kwds,: 0 + l24 = lambda a, *, b, **kwds,: 0 + + + ### stmt: simple_stmt | compound_stmt + # Tested below + + def test_simple_stmt(self): + ### simple_stmt: small_stmt (';' small_stmt)* [';'] + x = 1; pass; del x + def foo(): + # verify statements that end with semi-colons + x = 1; pass; del x; + foo() + + ### small_stmt: expr_stmt | pass_stmt | del_stmt | flow_stmt | import_stmt | global_stmt | access_stmt + # Tested below + + def test_expr_stmt(self): + # (exprlist '=')* exprlist + 1 + 1, 2, 3 + x = 1 + x = 1, 2, 3 + x = y = z = 1, 2, 3 + x, y, z = 1, 2, 3 + abc = a, b, c = x, y, z = xyz = 1, 2, (3, 4) + + check_syntax_error(self, "x + 1 = 1") + check_syntax_error(self, "a + 1 = b + 2") + + # Check the heuristic for print & exec covers significant cases + # As well as placing some limits on false positives + def test_former_statements_refer_to_builtins(self): + keywords = "print", "exec" + # Cases where we want the custom error + cases = [ + "{} foo", + "{} {{1:foo}}", + "if 1: {} foo", + "if 1: {} {{1:foo}}", + "if 1:\n {} foo", + "if 1:\n {} {{1:foo}}", + ] + for keyword in keywords: + custom_msg = "call to '{}'".format(keyword) + for case in cases: + source = case.format(keyword) + with self.subTest(source=source): + with self.assertRaisesRegex(SyntaxError, custom_msg): + exec(source) + source = source.replace("foo", "(foo.)") + with self.subTest(source=source): + with self.assertRaisesRegex(SyntaxError, "invalid syntax"): + exec(source) + + def test_del_stmt(self): + # 'del' exprlist + abc = [1,2,3] + x, y, z = abc + xyz = x, y, z + + del abc + del x, y, (z, xyz) + + def test_pass_stmt(self): + # 'pass' + pass + + # flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt + # Tested below + + def test_break_stmt(self): + # 'break' + while 1: break + + def test_continue_stmt(self): + # 'continue' + i = 1 + while i: i = 0; continue + + msg = "" + while not msg: + msg = "ok" + try: + continue + msg = "continue failed to continue inside try" + except: + msg = "continue inside try called except block" + if msg != "ok": + self.fail(msg) + + msg = "" + while not msg: + msg = "finally block not called" + try: + continue + finally: + msg = "ok" + if msg != "ok": + self.fail(msg) + + def test_break_continue_loop(self): + # This test warrants an explanation. It is a test specifically for SF bugs + # #463359 and #462937. The bug is that a 'break' statement executed or + # exception raised inside a try/except inside a loop, *after* a continue + # statement has been executed in that loop, will cause the wrong number of + # arguments to be popped off the stack and the instruction pointer reset to + # a very small number (usually 0.) Because of this, the following test + # *must* written as a function, and the tracking vars *must* be function + # arguments with default values. Otherwise, the test will loop and loop. + + def test_inner(extra_burning_oil = 1, count=0): + big_hippo = 2 + while big_hippo: + count += 1 + try: + if extra_burning_oil and big_hippo == 1: + extra_burning_oil -= 1 + break + big_hippo -= 1 + continue + except: + raise + if count > 2 or big_hippo != 1: + self.fail("continue then break in try/except in loop broken!") + test_inner() + + def test_return(self): + # 'return' [testlist] + def g1(): return + def g2(): return 1 + g1() + x = g2() + check_syntax_error(self, "class foo:return 1") + + def test_break_in_finally(self): + count = 0 + while count < 2: + count += 1 + try: + pass + finally: + break + self.assertEqual(count, 1) + + count = 0 + while count < 2: + count += 1 + try: + continue + finally: + break + self.assertEqual(count, 1) + + count = 0 + while count < 2: + count += 1 + try: + 1/0 + finally: + break + self.assertEqual(count, 1) + + for count in [0, 1]: + self.assertEqual(count, 0) + try: + pass + finally: + break + self.assertEqual(count, 0) + + for count in [0, 1]: + self.assertEqual(count, 0) + try: + continue + finally: + break + self.assertEqual(count, 0) + + for count in [0, 1]: + self.assertEqual(count, 0) + try: + 1/0 + finally: + break + self.assertEqual(count, 0) + + def test_continue_in_finally(self): + count = 0 + while count < 2: + count += 1 + try: + pass + finally: + continue + break + self.assertEqual(count, 2) + + count = 0 + while count < 2: + count += 1 + try: + break + finally: + continue + self.assertEqual(count, 2) + + count = 0 + while count < 2: + count += 1 + try: + 1/0 + finally: + continue + break + self.assertEqual(count, 2) + + for count in [0, 1]: + try: + pass + finally: + continue + break + self.assertEqual(count, 1) + + for count in [0, 1]: + try: + break + finally: + continue + self.assertEqual(count, 1) + + for count in [0, 1]: + try: + 1/0 + finally: + continue + break + self.assertEqual(count, 1) + + def test_return_in_finally(self): + def g1(): + try: + pass + finally: + return 1 + self.assertEqual(g1(), 1) + + def g2(): + try: + return 2 + finally: + return 3 + self.assertEqual(g2(), 3) + + def g3(): + try: + 1/0 + finally: + return 4 + self.assertEqual(g3(), 4) + + def test_yield(self): + # Allowed as standalone statement + def g(): yield 1 + def g(): yield from () + # Allowed as RHS of assignment + def g(): x = yield 1 + def g(): x = yield from () + # Ordinary yield accepts implicit tuples + def g(): yield 1, 1 + def g(): x = yield 1, 1 + # 'yield from' does not + check_syntax_error(self, "def g(): yield from (), 1") + check_syntax_error(self, "def g(): x = yield from (), 1") + # Requires parentheses as subexpression + def g(): 1, (yield 1) + def g(): 1, (yield from ()) + check_syntax_error(self, "def g(): 1, yield 1") + check_syntax_error(self, "def g(): 1, yield from ()") + # Requires parentheses as call argument + def g(): f((yield 1)) + def g(): f((yield 1), 1) + def g(): f((yield from ())) + def g(): f((yield from ()), 1) + check_syntax_error(self, "def g(): f(yield 1)") + check_syntax_error(self, "def g(): f(yield 1, 1)") + check_syntax_error(self, "def g(): f(yield from ())") + check_syntax_error(self, "def g(): f(yield from (), 1)") + # Not allowed at top level + check_syntax_error(self, "yield") + check_syntax_error(self, "yield from") + # Not allowed at class scope + check_syntax_error(self, "class foo:yield 1") + check_syntax_error(self, "class foo:yield from ()") + # Check annotation refleak on SyntaxError + check_syntax_error(self, "def g(a:(yield)): pass") + + def test_yield_in_comprehensions(self): + # Check yield in comprehensions + def g(): [x for x in [(yield 1)]] + def g(): [x for x in [(yield from ())]] + + check = self.check_syntax_error + check("def g(): [(yield x) for x in ()]", + "'yield' inside list comprehension") + check("def g(): [x for x in () if not (yield x)]", + "'yield' inside list comprehension") + check("def g(): [y for x in () for y in [(yield x)]]", + "'yield' inside list comprehension") + check("def g(): {(yield x) for x in ()}", + "'yield' inside set comprehension") + check("def g(): {(yield x): x for x in ()}", + "'yield' inside dict comprehension") + check("def g(): {x: (yield x) for x in ()}", + "'yield' inside dict comprehension") + check("def g(): ((yield x) for x in ())", + "'yield' inside generator expression") + check("def g(): [(yield from x) for x in ()]", + "'yield' inside list comprehension") + check("class C: [(yield x) for x in ()]", + "'yield' inside list comprehension") + check("[(yield x) for x in ()]", + "'yield' inside list comprehension") + + def test_raise(self): + # 'raise' test [',' test] + try: raise RuntimeError('just testing') + except RuntimeError: pass + try: raise KeyboardInterrupt + except KeyboardInterrupt: pass + + def test_import(self): + # 'import' dotted_as_names + import sys + import time, sys + # 'from' dotted_name 'import' ('*' | '(' import_as_names ')' | import_as_names) + from time import time + from time import (time) + # not testable inside a function, but already done at top of the module + # from sys import * + from sys import path, argv + from sys import (path, argv) + from sys import (path, argv,) + + def test_global(self): + # 'global' NAME (',' NAME)* + global a + global a, b + global one, two, three, four, five, six, seven, eight, nine, ten + + def test_nonlocal(self): + # 'nonlocal' NAME (',' NAME)* + x = 0 + y = 0 + def f(): + nonlocal x + nonlocal x, y + + def test_assert(self): + # assertTruestmt: 'assert' test [',' test] + assert 1 + assert 1, 1 + assert lambda x:x + assert 1, lambda x:x+1 + + try: + assert True + except AssertionError as e: + self.fail("'assert True' should not have raised an AssertionError") + + try: + assert True, 'this should always pass' + except AssertionError as e: + self.fail("'assert True, msg' should not have " + "raised an AssertionError") + + # these tests fail if python is run with -O, so check __debug__ + @unittest.skipUnless(__debug__, "Won't work if __debug__ is False") + def testAssert2(self): + try: + assert 0, "msg" + except AssertionError as e: + self.assertEqual(e.args[0], "msg") + else: + self.fail("AssertionError not raised by assert 0") + + try: + assert False + except AssertionError as e: + self.assertEqual(len(e.args), 0) + else: + self.fail("AssertionError not raised by 'assert False'") + + + ### compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | funcdef | classdef + # Tested below + + def test_if(self): + # 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] + if 1: pass + if 1: pass + else: pass + if 0: pass + elif 0: pass + if 0: pass + elif 0: pass + elif 0: pass + elif 0: pass + else: pass + + def test_while(self): + # 'while' test ':' suite ['else' ':' suite] + while 0: pass + while 0: pass + else: pass + + # Issue1920: "while 0" is optimized away, + # ensure that the "else" clause is still present. + x = 0 + while 0: + x = 1 + else: + x = 2 + self.assertEqual(x, 2) + + def test_for(self): + # 'for' exprlist 'in' exprlist ':' suite ['else' ':' suite] + for i in 1, 2, 3: pass + for i, j, k in (): pass + else: pass + class Squares: + def __init__(self, max): + self.max = max + self.sofar = [] + def __len__(self): return len(self.sofar) + def __getitem__(self, i): + if not 0 <= i < self.max: raise IndexError + n = len(self.sofar) + while n <= i: + self.sofar.append(n*n) + n = n+1 + return self.sofar[i] + n = 0 + for x in Squares(10): n = n+x + if n != 285: + self.fail('for over growing sequence') + + result = [] + for x, in [(1,), (2,), (3,)]: + result.append(x) + self.assertEqual(result, [1, 2, 3]) + + def test_try(self): + ### try_stmt: 'try' ':' suite (except_clause ':' suite)+ ['else' ':' suite] + ### | 'try' ':' suite 'finally' ':' suite + ### except_clause: 'except' [expr ['as' expr]] + try: + 1/0 + except ZeroDivisionError: + pass + else: + pass + try: 1/0 + except EOFError: pass + except TypeError as msg: pass + except: pass + else: pass + try: 1/0 + except (EOFError, TypeError, ZeroDivisionError): pass + try: 1/0 + except (EOFError, TypeError, ZeroDivisionError) as msg: pass + try: pass + finally: pass + + def test_suite(self): + # simple_stmt | NEWLINE INDENT NEWLINE* (stmt NEWLINE*)+ DEDENT + if 1: pass + if 1: + pass + if 1: + # + # + # + pass + pass + # + pass + # + + def test_test(self): + ### and_test ('or' and_test)* + ### and_test: not_test ('and' not_test)* + ### not_test: 'not' not_test | comparison + if not 1: pass + if 1 and 1: pass + if 1 or 1: pass + if not not not 1: pass + if not 1 and 1 and 1: pass + if 1 and 1 or 1 and 1 and 1 or not 1 and 1: pass + + def test_comparison(self): + ### comparison: expr (comp_op expr)* + ### comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not' + if 1: pass + x = (1 == 1) + if 1 == 1: pass + if 1 != 1: pass + if 1 < 1: pass + if 1 > 1: pass + if 1 <= 1: pass + if 1 >= 1: pass + if 1 is 1: pass + if 1 is not 1: pass + if 1 in (): pass + if 1 not in (): pass + if 1 < 1 > 1 == 1 >= 1 <= 1 != 1 in 1 not in 1 is 1 is not 1: pass + + def test_binary_mask_ops(self): + x = 1 & 1 + x = 1 ^ 1 + x = 1 | 1 + + def test_shift_ops(self): + x = 1 << 1 + x = 1 >> 1 + x = 1 << 1 >> 1 + + def test_additive_ops(self): + x = 1 + x = 1 + 1 + x = 1 - 1 - 1 + x = 1 - 1 + 1 - 1 + 1 + + def test_multiplicative_ops(self): + x = 1 * 1 + x = 1 / 1 + x = 1 % 1 + x = 1 / 1 * 1 % 1 + + def test_unary_ops(self): + x = +1 + x = -1 + x = ~1 + x = ~1 ^ 1 & 1 | 1 & 1 ^ -1 + x = -1*1/1 + 1*1 - ---1*1 + + def test_selectors(self): + ### trailer: '(' [testlist] ')' | '[' subscript ']' | '.' NAME + ### subscript: expr | [expr] ':' [expr] + + import sys, time + c = sys.path[0] + x = time.time() + x = sys.modules['time'].time() + a = '01234' + c = a[0] + c = a[-1] + s = a[0:5] + s = a[:5] + s = a[0:] + s = a[:] + s = a[-5:] + s = a[:-1] + s = a[-4:-3] + # A rough test of SF bug 1333982. http://python.org/sf/1333982 + # The testing here is fairly incomplete. + # Test cases should include: commas with 1 and 2 colons + d = {} + d[1] = 1 + d[1,] = 2 + d[1,2] = 3 + d[1,2,3] = 4 + L = list(d) + L.sort(key=lambda x: (type(x).__name__, x)) + self.assertEqual(str(L), '[1, (1,), (1, 2), (1, 2, 3)]') + + def test_atoms(self): + ### atom: '(' [testlist] ')' | '[' [testlist] ']' | '{' [dictsetmaker] '}' | NAME | NUMBER | STRING + ### dictsetmaker: (test ':' test (',' test ':' test)* [',']) | (test (',' test)* [',']) + + x = (1) + x = (1 or 2 or 3) + x = (1 or 2 or 3, 2, 3) + + x = [] + x = [1] + x = [1 or 2 or 3] + x = [1 or 2 or 3, 2, 3] + x = [] + + x = {} + x = {'one': 1} + x = {'one': 1,} + x = {'one' or 'two': 1 or 2} + x = {'one': 1, 'two': 2} + x = {'one': 1, 'two': 2,} + x = {'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6} + + x = {'one'} + x = {'one', 1,} + x = {'one', 'two', 'three'} + x = {2, 3, 4,} + + x = x + x = 'x' + x = 123 + + ### exprlist: expr (',' expr)* [','] + ### testlist: test (',' test)* [','] + # These have been exercised enough above + + def test_classdef(self): + # 'class' NAME ['(' [testlist] ')'] ':' suite + class B: pass + class B2(): pass + class C1(B): pass + class C2(B): pass + class D(C1, C2, B): pass + class C: + def meth1(self): pass + def meth2(self, arg): pass + def meth3(self, a1, a2): pass + + # decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE + # decorators: decorator+ + # decorated: decorators (classdef | funcdef) + def class_decorator(x): return x + @class_decorator + class G: pass + + def test_dictcomps(self): + # dictorsetmaker: ( (test ':' test (comp_for | + # (',' test ':' test)* [','])) | + # (test (comp_for | (',' test)* [','])) ) + nums = [1, 2, 3] + self.assertEqual({i:i+1 for i in nums}, {1: 2, 2: 3, 3: 4}) + + def test_listcomps(self): + # list comprehension tests + nums = [1, 2, 3, 4, 5] + strs = ["Apple", "Banana", "Coconut"] + spcs = [" Apple", " Banana ", "Coco nut "] + + self.assertEqual([s.strip() for s in spcs], ['Apple', 'Banana', 'Coco nut']) + self.assertEqual([3 * x for x in nums], [3, 6, 9, 12, 15]) + self.assertEqual([x for x in nums if x > 2], [3, 4, 5]) + self.assertEqual([(i, s) for i in nums for s in strs], + [(1, 'Apple'), (1, 'Banana'), (1, 'Coconut'), + (2, 'Apple'), (2, 'Banana'), (2, 'Coconut'), + (3, 'Apple'), (3, 'Banana'), (3, 'Coconut'), + (4, 'Apple'), (4, 'Banana'), (4, 'Coconut'), + (5, 'Apple'), (5, 'Banana'), (5, 'Coconut')]) + self.assertEqual([(i, s) for i in nums for s in [f for f in strs if "n" in f]], + [(1, 'Banana'), (1, 'Coconut'), (2, 'Banana'), (2, 'Coconut'), + (3, 'Banana'), (3, 'Coconut'), (4, 'Banana'), (4, 'Coconut'), + (5, 'Banana'), (5, 'Coconut')]) + self.assertEqual([(lambda a:[a**i for i in range(a+1)])(j) for j in range(5)], + [[1], [1, 1], [1, 2, 4], [1, 3, 9, 27], [1, 4, 16, 64, 256]]) + + def test_in_func(l): + return [0 < x < 3 for x in l if x > 2] + + self.assertEqual(test_in_func(nums), [False, False, False]) + + def test_nested_front(): + self.assertEqual([[y for y in [x, x + 1]] for x in [1,3,5]], + [[1, 2], [3, 4], [5, 6]]) + + test_nested_front() + + check_syntax_error(self, "[i, s for i in nums for s in strs]") + check_syntax_error(self, "[x if y]") + + suppliers = [ + (1, "Boeing"), + (2, "Ford"), + (3, "Macdonalds") + ] + + parts = [ + (10, "Airliner"), + (20, "Engine"), + (30, "Cheeseburger") + ] + + suppart = [ + (1, 10), (1, 20), (2, 20), (3, 30) + ] + + x = [ + (sname, pname) + for (sno, sname) in suppliers + for (pno, pname) in parts + for (sp_sno, sp_pno) in suppart + if sno == sp_sno and pno == sp_pno + ] + + self.assertEqual(x, [('Boeing', 'Airliner'), ('Boeing', 'Engine'), ('Ford', 'Engine'), + ('Macdonalds', 'Cheeseburger')]) + + def test_genexps(self): + # generator expression tests + g = ([x for x in range(10)] for x in range(1)) + self.assertEqual(next(g), [x for x in range(10)]) + try: + next(g) + self.fail('should produce StopIteration exception') + except StopIteration: + pass + + a = 1 + try: + g = (a for d in a) + next(g) + self.fail('should produce TypeError') + except TypeError: + pass + + self.assertEqual(list((x, y) for x in 'abcd' for y in 'abcd'), [(x, y) for x in 'abcd' for y in 'abcd']) + self.assertEqual(list((x, y) for x in 'ab' for y in 'xy'), [(x, y) for x in 'ab' for y in 'xy']) + + a = [x for x in range(10)] + b = (x for x in (y for y in a)) + self.assertEqual(sum(b), sum([x for x in range(10)])) + + self.assertEqual(sum(x**2 for x in range(10)), sum([x**2 for x in range(10)])) + self.assertEqual(sum(x*x for x in range(10) if x%2), sum([x*x for x in range(10) if x%2])) + self.assertEqual(sum(x for x in (y for y in range(10))), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10)))), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in [y for y in (z for z in range(10))]), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True)) if True), sum([x for x in range(10)])) + self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True) if False) if True), 0) + check_syntax_error(self, "foo(x for x in range(10), 100)") + check_syntax_error(self, "foo(100, x for x in range(10))") + + def test_comprehension_specials(self): + # test for outmost iterable precomputation + x = 10; g = (i for i in range(x)); x = 5 + self.assertEqual(len(list(g)), 10) + + # This should hold, since we're only precomputing outmost iterable. + x = 10; t = False; g = ((i,j) for i in range(x) if t for j in range(x)) + x = 5; t = True; + self.assertEqual([(i,j) for i in range(10) for j in range(5)], list(g)) + + # Grammar allows multiple adjacent 'if's in listcomps and genexps, + # even though it's silly. Make sure it works (ifelse broke this.) + self.assertEqual([ x for x in range(10) if x % 2 if x % 3 ], [1, 5, 7]) + self.assertEqual(list(x for x in range(10) if x % 2 if x % 3), [1, 5, 7]) + + # verify unpacking single element tuples in listcomp/genexp. + self.assertEqual([x for x, in [(4,), (5,), (6,)]], [4, 5, 6]) + self.assertEqual(list(x for x, in [(7,), (8,), (9,)]), [7, 8, 9]) + + def test_with_statement(self): + class manager(object): + def __enter__(self): + return (1, 2) + def __exit__(self, *args): + pass + + with manager(): + pass + with manager() as x: + pass + with manager() as (x, y): + pass + with manager(), manager(): + pass + with manager() as x, manager() as y: + pass + with manager() as x, manager(): + pass + + def test_if_else_expr(self): + # Test ifelse expressions in various cases + def _checkeval(msg, ret): + "helper to check that evaluation of expressions is done correctly" + print(msg) + return ret + + # the next line is not allowed anymore + #self.assertEqual([ x() for x in lambda: True, lambda: False if x() ], [True]) + self.assertEqual([ x() for x in (lambda: True, lambda: False) if x() ], [True]) + self.assertEqual([ x(False) for x in (lambda x: False if x else True, lambda x: True if x else False) if x(False) ], [True]) + self.assertEqual((5 if 1 else _checkeval("check 1", 0)), 5) + self.assertEqual((_checkeval("check 2", 0) if 0 else 5), 5) + self.assertEqual((5 and 6 if 0 else 1), 1) + self.assertEqual(((5 and 6) if 0 else 1), 1) + self.assertEqual((5 and (6 if 1 else 1)), 6) + self.assertEqual((0 or _checkeval("check 3", 2) if 0 else 3), 3) + self.assertEqual((1 or _checkeval("check 4", 2) if 1 else _checkeval("check 5", 3)), 1) + self.assertEqual((0 or 5 if 1 else _checkeval("check 6", 3)), 5) + self.assertEqual((not 5 if 1 else 1), False) + self.assertEqual((not 5 if 0 else 1), 1) + self.assertEqual((6 + 1 if 1 else 2), 7) + self.assertEqual((6 - 1 if 1 else 2), 5) + self.assertEqual((6 * 2 if 1 else 4), 12) + self.assertEqual((6 / 2 if 1 else 3), 3) + self.assertEqual((6 < 4 if 0 else 2), 2) + + def test_paren_evaluation(self): + self.assertEqual(16 // (4 // 2), 8) + self.assertEqual((16 // 4) // 2, 2) + self.assertEqual(16 // 4 // 2, 2) + self.assertTrue(False is (2 is 3)) + self.assertFalse((False is 2) is 3) + self.assertFalse(False is 2 is 3) + + def test_matrix_mul(self): + # This is not intended to be a comprehensive test, rather just to be few + # samples of the @ operator in test_grammar.py. + class M: + def __matmul__(self, o): + return 4 + def __imatmul__(self, o): + self.other = o + return self + m = M() + self.assertEqual(m @ m, 4) + m @= 42 + self.assertEqual(m.other, 42) + + def test_async_await(self): + async def test(): + def sum(): + pass + if 1: + await someobj() + + self.assertEqual(test.__name__, 'test') + self.assertTrue(bool(test.__code__.co_flags & inspect.CO_COROUTINE)) + + def decorator(func): + setattr(func, '_marked', True) + return func + + @decorator + async def test2(): + return 22 + self.assertTrue(test2._marked) + self.assertEqual(test2.__name__, 'test2') + self.assertTrue(bool(test2.__code__.co_flags & inspect.CO_COROUTINE)) + + def test_async_for(self): + class Done(Exception): pass + + class AIter: + def __aiter__(self): + return self + async def __anext__(self): + raise StopAsyncIteration + + async def foo(): + async for i in AIter(): + pass + async for i, j in AIter(): + pass + async for i in AIter(): + pass + else: + pass + raise Done + + with self.assertRaises(Done): + foo().send(None) + + def test_async_with(self): + class Done(Exception): pass + + class manager: + async def __aenter__(self): + return (1, 2) + async def __aexit__(self, *exc): + return False + + async def foo(): + async with manager(): + pass + async with manager() as x: + pass + async with manager() as (x, y): + pass + async with manager(), manager(): + pass + async with manager() as x, manager() as y: + pass + async with manager() as x, manager(): + pass + raise Done + + with self.assertRaises(Done): + foo().send(None) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/examples/simple-statements-without-trailing-newline.py b/python/examples/simple-statements-without-trailing-newline.py new file mode 100644 index 0000000..73df4bf --- /dev/null +++ b/python/examples/simple-statements-without-trailing-newline.py @@ -0,0 +1 @@ +pass; print "hi" \ No newline at end of file diff --git a/python/examples/tabs.py b/python/examples/tabs.py new file mode 100644 index 0000000..4479f5c --- /dev/null +++ b/python/examples/tabs.py @@ -0,0 +1,32 @@ +def set_password(args): + password = args.password + while not password : + password1 = getpass("" if args.quiet else "Provide password: ") + password_repeat = getpass("" if args.quiet else "Repeat password: ") + if password1 != password_repeat: + print("Passwords do not match, try again") + elif len(password1) < 4: + print("Please provide at least 4 characters") + else: + password = password1 + + password_hash = passwd(password) + cfg = BaseJSONConfigManager(config_dir=jupyter_config_dir()) + cfg.update('jupyter_notebook_config', { + 'NotebookApp': { + 'password': password_hash, + } + }) + if not args.quiet: + print("password stored in config dir: %s" % jupyter_config_dir()) + +def main(argv): + parser = argparse.ArgumentParser(argv[0]) + subparsers = parser.add_subparsers() + parser_password = subparsers.add_parser('password', help='sets a password for your notebook server') + parser_password.add_argument("password", help="password to set, if not given, a password will be queried for (NOTE: this may not be safe)", + nargs="?") + parser_password.add_argument("--quiet", help="suppress messages", action="store_true") + parser_password.set_defaults(function=set_password) + args = parser.parse_args(argv[1:]) + args.function(args) diff --git a/python/examples/trailing-whitespace.py b/python/examples/trailing-whitespace.py new file mode 100644 index 0000000..0e24d64 --- /dev/null +++ b/python/examples/trailing-whitespace.py @@ -0,0 +1,6 @@ +print a + +if b: + if c: + d + e diff --git a/python/grammar.js b/python/grammar.js new file mode 100644 index 0000000..529ac27 --- /dev/null +++ b/python/grammar.js @@ -0,0 +1,1224 @@ +/** + * @file Python grammar for tree-sitter + * @author Max Brunsfeld <maxbrunsfeld@gmail.com> + * @license MIT + * @see {@link https://docs.python.org/2/reference/grammar.html|Python 2 grammar} + * @see {@link https://docs.python.org/3/reference/grammar.html|Python 3 grammar} + */ + +/* eslint-disable arrow-parens */ +/* eslint-disable camelcase */ +/* eslint-disable-next-line spaced-comment */ +/// <reference types="tree-sitter-cli/dsl" /> +// @ts-check + +const PREC = { + // this resolves a conflict between the usage of ':' in a lambda vs in a + // typed parameter. In the case of a lambda, we don't allow typed parameters. + lambda: -2, + typed_parameter: -1, + conditional: -1, + + parenthesized_expression: 1, + parenthesized_list_splat: 1, + or: 10, + and: 11, + not: 12, + compare: 13, + bitwise_or: 14, + bitwise_and: 15, + xor: 16, + shift: 17, + plus: 18, + times: 19, + unary: 20, + power: 21, + call: 22, +}; + +const SEMICOLON = ';'; + +module.exports = grammar({ + name: 'python', + + extras: $ => [ + $.comment, + /[\s\f\uFEFF\u2060\u200B]|\r?\n/, + $.line_continuation, + ], + + conflicts: $ => [ + [$.primary_expression, $.pattern], + [$.primary_expression, $.list_splat_pattern], + [$.tuple, $.tuple_pattern], + [$.list, $.list_pattern], + [$.with_item, $._collection_elements], + [$.named_expression, $.as_pattern], + [$.print_statement, $.primary_expression], + [$.type_alias_statement, $.primary_expression], + ], + + supertypes: $ => [ + $._simple_statement, + $._compound_statement, + $.expression, + $.primary_expression, + $.pattern, + $.parameter, + ], + + externals: $ => [ + $._newline, + $._indent, + $._dedent, + $.string_start, + $._string_content, + $.escape_interpolation, + $.string_end, + + // Mark comments as external tokens so that the external scanner is always + // invoked, even if no external token is expected. This allows for better + // error recovery, because the external scanner can maintain the overall + // structure by returning dedent tokens whenever a dedent occurs, even + // if no dedent is expected. + $.comment, + + // Allow the external scanner to check for the validity of closing brackets + // so that it can avoid returning dedent tokens between brackets. + ']', + ')', + '}', + 'except', + ], + + inline: $ => [ + $._simple_statement, + $._compound_statement, + $._suite, + $._expressions, + $._left_hand_side, + $.keyword_identifier, + ], + + word: $ => $.identifier, + + rules: { + module: $ => repeat($._statement), + + _statement: $ => choice( + $._simple_statements, + $._compound_statement, + ), + + // Simple statements + + _simple_statements: $ => seq( + sep1($._simple_statement, SEMICOLON), + optional(SEMICOLON), + $._newline, + ), + + _simple_statement: $ => choice( + $.future_import_statement, + $.import_statement, + $.import_from_statement, + $.print_statement, + $.assert_statement, + $.expression_statement, + $.return_statement, + $.delete_statement, + $.raise_statement, + $.pass_statement, + $.break_statement, + $.continue_statement, + $.global_statement, + $.nonlocal_statement, + $.exec_statement, + $.type_alias_statement, + ), + + import_statement: $ => seq( + 'import', + $._import_list, + ), + + import_prefix: _ => repeat1('.'), + + relative_import: $ => seq( + $.import_prefix, + optional($.dotted_name), + ), + + future_import_statement: $ => seq( + 'from', + '__future__', + 'import', + choice( + $._import_list, + seq('(', $._import_list, ')'), + ), + ), + + import_from_statement: $ => seq( + 'from', + field('module_name', choice( + $.relative_import, + $.dotted_name, + )), + 'import', + choice( + $.wildcard_import, + $._import_list, + seq('(', $._import_list, ')'), + ), + ), + + _import_list: $ => seq( + commaSep1(field('name', choice( + $.dotted_name, + $.aliased_import, + ))), + optional(','), + ), + + aliased_import: $ => seq( + field('name', $.dotted_name), + 'as', + field('alias', $.identifier), + ), + + wildcard_import: _ => '*', + + print_statement: $ => choice( + prec(1, seq( + 'print', + $.chevron, + repeat(seq(',', field('argument', $.expression))), + optional(',')), + ), + prec(-3, prec.dynamic(-1, seq( + 'print', + commaSep1(field('argument', $.expression)), + optional(','), + ))), + ), + + chevron: $ => seq( + '>>', + $.expression, + ), + + assert_statement: $ => seq( + 'assert', + commaSep1($.expression), + ), + + expression_statement: $ => choice( + $.expression, + seq(commaSep1($.expression), optional(',')), + $.assignment, + $.augmented_assignment, + $.yield, + ), + + named_expression: $ => seq( + field('name', $._named_expression_lhs), + ':=', + field('value', $.expression), + ), + + _named_expression_lhs: $ => choice( + $.identifier, + $.keyword_identifier, + ), + + return_statement: $ => seq( + 'return', + optional($._expressions), + ), + + delete_statement: $ => seq( + 'del', + $._expressions, + ), + + _expressions: $ => choice( + $.expression, + $.expression_list, + ), + + raise_statement: $ => seq( + 'raise', + optional($._expressions), + optional(seq('from', field('cause', $.expression))), + ), + + pass_statement: _ => prec.left('pass'), + break_statement: _ => prec.left('break'), + continue_statement: _ => prec.left('continue'), + + // Compound statements + + _compound_statement: $ => choice( + $.if_statement, + $.for_statement, + $.while_statement, + $.try_statement, + $.with_statement, + $.function_definition, + $.class_definition, + $.decorated_definition, + $.match_statement, + ), + + if_statement: $ => seq( + 'if', + field('condition', $.expression), + ':', + field('consequence', $._suite), + repeat(field('alternative', $.elif_clause)), + optional(field('alternative', $.else_clause)), + ), + + elif_clause: $ => seq( + 'elif', + field('condition', $.expression), + ':', + field('consequence', $._suite), + ), + + else_clause: $ => seq( + 'else', + ':', + field('body', $._suite), + ), + + match_statement: $ => seq( + 'match', + commaSep1(field('subject', $.expression)), + optional(','), + ':', + field('body', alias($._match_block, $.block)), + ), + + _match_block: $ => choice( + seq( + $._indent, + repeat(field('alternative', $.case_clause)), + $._dedent, + ), + $._newline, + ), + + case_clause: $ => seq( + 'case', + commaSep1($.case_pattern), + optional(','), + optional(field('guard', $.if_clause)), + ':', + field('consequence', $._suite), + ), + + for_statement: $ => seq( + optional('async'), + 'for', + field('left', $._left_hand_side), + 'in', + field('right', $._expressions), + ':', + field('body', $._suite), + field('alternative', optional($.else_clause)), + ), + + while_statement: $ => seq( + 'while', + field('condition', $.expression), + ':', + field('body', $._suite), + optional(field('alternative', $.else_clause)), + ), + + try_statement: $ => seq( + 'try', + ':', + field('body', $._suite), + choice( + seq( + repeat1($.except_clause), + optional($.else_clause), + optional($.finally_clause), + ), + seq( + repeat1($.except_group_clause), + optional($.else_clause), + optional($.finally_clause), + ), + $.finally_clause, + ), + ), + + except_clause: $ => seq( + 'except', + optional(seq( + $.expression, + optional(seq( + choice('as', ','), + $.expression, + )), + )), + ':', + $._suite, + ), + + except_group_clause: $ => seq( + 'except*', + seq( + $.expression, + optional(seq( + 'as', + $.expression, + )), + ), + ':', + $._suite, + ), + + finally_clause: $ => seq( + 'finally', + ':', + $._suite, + ), + + with_statement: $ => seq( + optional('async'), + 'with', + $.with_clause, + ':', + field('body', $._suite), + ), + + with_clause: $ => choice( + seq(commaSep1($.with_item), optional(',')), + seq('(', commaSep1($.with_item), optional(','), ')'), + ), + + with_item: $ => prec.dynamic(1, seq( + field('value', $.expression), + )), + + function_definition: $ => seq( + optional('async'), + 'def', + field('name', $.identifier), + field('type_parameters', optional($.type_parameter)), + field('parameters', $.parameters), + optional( + seq( + '->', + field('return_type', $.type), + ), + ), + ':', + field('body', $._suite), + ), + + parameters: $ => seq( + '(', + optional($._parameters), + ')', + ), + + lambda_parameters: $ => $._parameters, + + list_splat: $ => seq( + '*', + $.expression, + ), + + dictionary_splat: $ => seq( + '**', + $.expression, + ), + + global_statement: $ => seq( + 'global', + commaSep1($.identifier), + ), + + nonlocal_statement: $ => seq( + 'nonlocal', + commaSep1($.identifier), + ), + + exec_statement: $ => seq( + 'exec', + field('code', choice($.string, $.identifier)), + optional( + seq( + 'in', + commaSep1($.expression), + ), + ), + ), + + type_alias_statement: $ => prec.dynamic(1, seq( + 'type', + $.type, + '=', + $.type, + )), + + class_definition: $ => seq( + 'class', + field('name', $.identifier), + field('type_parameters', optional($.type_parameter)), + field('superclasses', optional($.argument_list)), + ':', + field('body', $._suite), + ), + type_parameter: $ => seq( + '[', + commaSep1($.type), + ']', + ), + + parenthesized_list_splat: $ => prec(PREC.parenthesized_list_splat, seq( + '(', + choice( + alias($.parenthesized_list_splat, $.parenthesized_expression), + $.list_splat, + ), + ')', + )), + + argument_list: $ => seq( + '(', + optional(commaSep1( + choice( + $.expression, + $.list_splat, + $.dictionary_splat, + alias($.parenthesized_list_splat, $.parenthesized_expression), + $.keyword_argument, + ), + )), + optional(','), + ')', + ), + + decorated_definition: $ => seq( + repeat1($.decorator), + field('definition', choice( + $.class_definition, + $.function_definition, + )), + ), + + decorator: $ => seq( + '@', + $.expression, + $._newline, + ), + + _suite: $ => choice( + alias($._simple_statements, $.block), + seq($._indent, $.block), + alias($._newline, $.block), + ), + + block: $ => seq( + repeat($._statement), + $._dedent, + ), + + expression_list: $ => prec.right(seq( + $.expression, + choice( + ',', + seq( + repeat1(seq( + ',', + $.expression, + )), + optional(','), + ), + ), + )), + + dotted_name: $ => prec(1, sep1($.identifier, '.')), + + // Match cases + + case_pattern: $ => prec(1, choice( + alias($._as_pattern, $.as_pattern), + $.keyword_pattern, + $._simple_pattern, + )), + + _simple_pattern: $ => prec(1, choice( + $.class_pattern, + $.splat_pattern, + $.union_pattern, + alias($._list_pattern, $.list_pattern), + alias($._tuple_pattern, $.tuple_pattern), + $.dict_pattern, + $.string, + $.concatenated_string, + $.true, + $.false, + $.none, + seq(optional('-'), choice($.integer, $.float)), + $.complex_pattern, + $.dotted_name, + '_', + )), + + _as_pattern: $ => seq($.case_pattern, 'as', $.identifier), + + union_pattern: $ => prec.right(seq($._simple_pattern, repeat1(prec.left(seq('|', $._simple_pattern))))), + + _list_pattern: $ => seq( + '[', + optional(seq( + commaSep1($.case_pattern), + optional(','), + )), + ']', + ), + + _tuple_pattern: $ => seq( + '(', + optional(seq( + commaSep1($.case_pattern), + optional(','), + )), + ')', + ), + + dict_pattern: $ => seq( + '{', + optional(seq( + commaSep1(choice($._key_value_pattern, $.splat_pattern)), + optional(','), + )), + '}', + ), + + _key_value_pattern: $ => seq( + field('key', $._simple_pattern), + ':', + field('value', $.case_pattern), + ), + + keyword_pattern: $ => seq($.identifier, '=', $._simple_pattern), + + splat_pattern: $ => prec(1, seq(choice('*', '**'), choice($.identifier, '_'))), + + class_pattern: $ => seq( + $.dotted_name, + '(', + optional(seq( + commaSep1($.case_pattern), + optional(','), + )), + ')', + ), + + complex_pattern: $ => prec(1, seq( + optional('-'), + choice($.integer, $.float), + choice('+', '-'), + choice($.integer, $.float), + )), + + // Patterns + + _parameters: $ => seq( + commaSep1($.parameter), + optional(','), + ), + + _patterns: $ => seq( + commaSep1($.pattern), + optional(','), + ), + + parameter: $ => choice( + $.identifier, + $.typed_parameter, + $.default_parameter, + $.typed_default_parameter, + $.list_splat_pattern, + $.tuple_pattern, + $.keyword_separator, + $.positional_separator, + $.dictionary_splat_pattern, + ), + + pattern: $ => choice( + $.identifier, + $.keyword_identifier, + $.subscript, + $.attribute, + $.list_splat_pattern, + $.tuple_pattern, + $.list_pattern, + ), + + tuple_pattern: $ => seq( + '(', + optional($._patterns), + ')', + ), + + list_pattern: $ => seq( + '[', + optional($._patterns), + ']', + ), + + default_parameter: $ => seq( + field('name', choice($.identifier, $.tuple_pattern)), + '=', + field('value', $.expression), + ), + + typed_default_parameter: $ => prec(PREC.typed_parameter, seq( + field('name', $.identifier), + ':', + field('type', $.type), + '=', + field('value', $.expression), + )), + + list_splat_pattern: $ => seq( + '*', + choice($.identifier, $.keyword_identifier, $.subscript, $.attribute), + ), + + dictionary_splat_pattern: $ => seq( + '**', + choice($.identifier, $.keyword_identifier, $.subscript, $.attribute), + ), + + // Extended patterns (patterns allowed in match statement are far more flexible than simple patterns though still a subset of "expression") + + as_pattern: $ => prec.left(seq( + $.expression, + 'as', + field('alias', alias($.expression, $.as_pattern_target)), + )), + + // Expressions + + _expression_within_for_in_clause: $ => choice( + $.expression, + alias($.lambda_within_for_in_clause, $.lambda), + ), + + expression: $ => choice( + $.comparison_operator, + $.not_operator, + $.boolean_operator, + $.lambda, + $.primary_expression, + $.conditional_expression, + $.named_expression, + $.as_pattern, + ), + + primary_expression: $ => choice( + $.await, + $.binary_operator, + $.identifier, + $.keyword_identifier, + $.string, + $.concatenated_string, + $.integer, + $.float, + $.true, + $.false, + $.none, + $.unary_operator, + $.attribute, + $.subscript, + $.call, + $.list, + $.list_comprehension, + $.dictionary, + $.dictionary_comprehension, + $.set, + $.set_comprehension, + $.tuple, + $.parenthesized_expression, + $.generator_expression, + $.ellipsis, + alias($.list_splat_pattern, $.list_splat), + ), + + not_operator: $ => prec(PREC.not, seq( + 'not', + field('argument', $.expression), + )), + + boolean_operator: $ => choice( + prec.left(PREC.and, seq( + field('left', $.expression), + field('operator', 'and'), + field('right', $.expression), + )), + prec.left(PREC.or, seq( + field('left', $.expression), + field('operator', 'or'), + field('right', $.expression), + )), + ), + + binary_operator: $ => { + const table = [ + [prec.left, '+', PREC.plus], + [prec.left, '-', PREC.plus], + [prec.left, '*', PREC.times], + [prec.left, '@', PREC.times], + [prec.left, '/', PREC.times], + [prec.left, '%', PREC.times], + [prec.left, '//', PREC.times], + [prec.right, '**', PREC.power], + [prec.left, '|', PREC.bitwise_or], + [prec.left, '&', PREC.bitwise_and], + [prec.left, '^', PREC.xor], + [prec.left, '<<', PREC.shift], + [prec.left, '>>', PREC.shift], + ]; + + // @ts-ignore + return choice(...table.map(([fn, operator, precedence]) => fn(precedence, seq( + field('left', $.primary_expression), + // @ts-ignore + field('operator', operator), + field('right', $.primary_expression), + )))); + }, + + unary_operator: $ => prec(PREC.unary, seq( + field('operator', choice('+', '-', '~')), + field('argument', $.primary_expression), + )), + + comparison_operator: $ => prec.left(PREC.compare, seq( + $.primary_expression, + repeat1(seq( + field('operators', + choice( + '<', + '<=', + '==', + '!=', + '>=', + '>', + '<>', + 'in', + alias(seq('not', 'in'), 'not in'), + 'is', + alias(seq('is', 'not'), 'is not'), + )), + $.primary_expression, + )), + )), + + lambda: $ => prec(PREC.lambda, seq( + 'lambda', + field('parameters', optional($.lambda_parameters)), + ':', + field('body', $.expression), + )), + + lambda_within_for_in_clause: $ => seq( + 'lambda', + field('parameters', optional($.lambda_parameters)), + ':', + field('body', $._expression_within_for_in_clause), + ), + + assignment: $ => seq( + field('left', $._left_hand_side), + choice( + seq('=', field('right', $._right_hand_side)), + seq(':', field('type', $.type)), + seq(':', field('type', $.type), '=', field('right', $._right_hand_side)), + ), + ), + + augmented_assignment: $ => seq( + field('left', $._left_hand_side), + field('operator', choice( + '+=', '-=', '*=', '/=', '@=', '//=', '%=', '**=', + '>>=', '<<=', '&=', '^=', '|=', + )), + field('right', $._right_hand_side), + ), + + _left_hand_side: $ => choice( + $.pattern, + $.pattern_list, + ), + + pattern_list: $ => seq( + $.pattern, + choice( + ',', + seq( + repeat1(seq( + ',', + $.pattern, + )), + optional(','), + ), + ), + ), + + _right_hand_side: $ => choice( + $.expression, + $.expression_list, + $.assignment, + $.augmented_assignment, + $.pattern_list, + $.yield, + ), + + yield: $ => prec.right(seq( + 'yield', + choice( + seq( + 'from', + $.expression, + ), + optional($._expressions), + ), + )), + + attribute: $ => prec(PREC.call, seq( + field('object', $.primary_expression), + '.', + field('attribute', $.identifier), + )), + + subscript: $ => prec(PREC.call, seq( + field('value', $.primary_expression), + '[', + commaSep1(field('subscript', choice($.expression, $.slice))), + optional(','), + ']', + )), + + slice: $ => seq( + optional($.expression), + ':', + optional($.expression), + optional(seq(':', optional($.expression))), + ), + + ellipsis: _ => '...', + + call: $ => prec(PREC.call, seq( + field('function', $.primary_expression), + field('arguments', choice( + $.generator_expression, + $.argument_list, + )), + )), + + typed_parameter: $ => prec(PREC.typed_parameter, seq( + choice( + $.identifier, + $.list_splat_pattern, + $.dictionary_splat_pattern, + ), + ':', + field('type', $.type), + )), + + type: $ => choice( + $.expression, + $.splat_type, + $.generic_type, + $.union_type, + $.constrained_type, + $.member_type, + ), + splat_type: $ => prec(1, seq(choice('*', '**'), $.identifier)), + generic_type: $ => prec(1, seq($.identifier, $.type_parameter)), + union_type: $ => prec.left(seq($.type, '|', $.type)), + constrained_type: $ => prec.right(seq($.type, ':', $.type)), + member_type: $ => seq($.type, '.', $.identifier), + + keyword_argument: $ => seq( + field('name', choice($.identifier, $.keyword_identifier)), + '=', + field('value', $.expression), + ), + + // Literals + + list: $ => seq( + '[', + optional($._collection_elements), + ']', + ), + + set: $ => seq( + '{', + $._collection_elements, + '}', + ), + + tuple: $ => seq( + '(', + optional($._collection_elements), + ')', + ), + + dictionary: $ => seq( + '{', + optional(commaSep1(choice($.pair, $.dictionary_splat))), + optional(','), + '}', + ), + + pair: $ => seq( + field('key', $.expression), + ':', + field('value', $.expression), + ), + + list_comprehension: $ => seq( + '[', + field('body', $.expression), + $._comprehension_clauses, + ']', + ), + + dictionary_comprehension: $ => seq( + '{', + field('body', $.pair), + $._comprehension_clauses, + '}', + ), + + set_comprehension: $ => seq( + '{', + field('body', $.expression), + $._comprehension_clauses, + '}', + ), + + generator_expression: $ => seq( + '(', + field('body', $.expression), + $._comprehension_clauses, + ')', + ), + + _comprehension_clauses: $ => seq( + $.for_in_clause, + repeat(choice( + $.for_in_clause, + $.if_clause, + )), + ), + + parenthesized_expression: $ => prec(PREC.parenthesized_expression, seq( + '(', + choice($.expression, $.yield), + ')', + )), + + _collection_elements: $ => seq( + commaSep1(choice( + $.expression, $.yield, $.list_splat, $.parenthesized_list_splat, + )), + optional(','), + ), + + for_in_clause: $ => prec.left(seq( + optional('async'), + 'for', + field('left', $._left_hand_side), + 'in', + field('right', commaSep1($._expression_within_for_in_clause)), + optional(','), + )), + + if_clause: $ => seq( + 'if', + $.expression, + ), + + conditional_expression: $ => prec.right(PREC.conditional, seq( + $.expression, + 'if', + $.expression, + 'else', + $.expression, + )), + + concatenated_string: $ => seq( + $.string, + repeat1($.string), + ), + + string: $ => seq( + $.string_start, + repeat(choice($.interpolation, $.string_content)), + $.string_end, + ), + + string_content: $ => prec.right(repeat1( + choice( + $.escape_interpolation, + $.escape_sequence, + $._not_escape_sequence, + $._string_content, + ))), + + interpolation: $ => seq( + '{', + field('expression', $._f_expression), + optional('='), + optional(field('type_conversion', $.type_conversion)), + optional(field('format_specifier', $.format_specifier)), + '}', + ), + + _f_expression: $ => choice( + $.expression, + $.expression_list, + $.pattern_list, + $.yield, + ), + + escape_sequence: _ => token.immediate(prec(1, seq( + '\\', + choice( + /u[a-fA-F\d]{4}/, + /U[a-fA-F\d]{8}/, + /x[a-fA-F\d]{2}/, + /\d{3}/, + /\r?\n/, + /['"abfrntv\\]/, + /N\{[^}]+\}/, + ), + ))), + + _not_escape_sequence: _ => token.immediate('\\'), + + format_specifier: $ => seq( + ':', + repeat(choice( + token(prec(1, /[^{}\n]+/)), + alias($.interpolation, $.format_expression), + )), + ), + + type_conversion: _ => /![a-z]/, + + integer: _ => token(choice( + seq( + choice('0x', '0X'), + repeat1(/_?[A-Fa-f0-9]+/), + optional(/[Ll]/), + ), + seq( + choice('0o', '0O'), + repeat1(/_?[0-7]+/), + optional(/[Ll]/), + ), + seq( + choice('0b', '0B'), + repeat1(/_?[0-1]+/), + optional(/[Ll]/), + ), + seq( + repeat1(/[0-9]+_?/), + choice( + optional(/[Ll]/), // long numbers + optional(/[jJ]/), // complex numbers + ), + ), + )), + + float: _ => { + const digits = repeat1(/[0-9]+_?/); + const exponent = seq(/[eE][\+-]?/, digits); + + return token(seq( + choice( + seq(digits, '.', optional(digits), optional(exponent)), + seq(optional(digits), '.', digits, optional(exponent)), + seq(digits, exponent), + ), + optional(choice(/[Ll]/, /[jJ]/)), + )); + }, + + identifier: _ => /[_\p{XID_Start}][_\p{XID_Continue}]*/, + + keyword_identifier: $ => choice( + prec(-3, alias( + choice( + 'print', + 'exec', + 'async', + 'await', + 'match', + ), + $.identifier, + )), + alias('type', $.identifier), + ), + + true: _ => 'True', + false: _ => 'False', + none: _ => 'None', + + await: $ => prec(PREC.unary, seq( + 'await', + $.primary_expression, + )), + + comment: _ => token(seq('#', /.*/)), + + line_continuation: _ => token(seq('\\', choice(seq(optional('\r'), '\n'), '\0'))), + + positional_separator: _ => '/', + keyword_separator: _ => '*', + }, +}); + +module.exports.PREC = PREC; + +/** + * Creates a rule to match one or more of the rules separated by a comma + * + * @param {RuleOrLiteral} rule + * + * @return {SeqRule} + * + */ +function commaSep1(rule) { + return sep1(rule, ','); +} + +/** + * Creates a rule to match one or more occurrences of `rule` separated by `sep` + * + * @param {RuleOrLiteral} rule + * + * @param {RuleOrLiteral} separator + * + * @return {SeqRule} + * + */ +function sep1(rule, separator) { + return seq(rule, repeat(seq(separator, rule))); +} diff --git a/python/src/scanner.c b/python/src/scanner.c new file mode 100644 index 0000000..44058d9 --- /dev/null +++ b/python/src/scanner.c @@ -0,0 +1,528 @@ +#include "tree_sitter/parser.h" + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define VEC_RESIZE(vec, _cap) \ + void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0])); \ + assert(tmp != NULL); \ + (vec).data = tmp; \ + (vec).cap = (_cap); + +#define VEC_GROW(vec, _cap) \ + if ((vec).cap < (_cap)) { \ + VEC_RESIZE((vec), (_cap)); \ + } + +#define VEC_PUSH(vec, el) \ + if ((vec).cap == (vec).len) { \ + VEC_RESIZE((vec), MAX(16, (vec).len * 2)); \ + } \ + (vec).data[(vec).len++] = (el); + +#define VEC_POP(vec) (vec).len--; + +#define VEC_NEW \ + { .len = 0, .cap = 0, .data = NULL } + +#define VEC_BACK(vec) ((vec).data[(vec).len - 1]) + +#define VEC_FREE(vec) \ + { \ + if ((vec).data != NULL) \ + free((vec).data); \ + } + +#define VEC_CLEAR(vec) (vec).len = 0; + +enum TokenType { + NEWLINE, + INDENT, + DEDENT, + STRING_START, + STRING_CONTENT, + ESCAPE_INTERPOLATION, + STRING_END, + COMMENT, + CLOSE_PAREN, + CLOSE_BRACKET, + CLOSE_BRACE, + EXCEPT, +}; + +typedef enum { + SingleQuote = 1 << 0, + DoubleQuote = 1 << 1, + BackQuote = 1 << 2, + Raw = 1 << 3, + Format = 1 << 4, + Triple = 1 << 5, + Bytes = 1 << 6, +} Flags; + +typedef struct { + char flags; +} Delimiter; + +static inline Delimiter new_delimiter() { return (Delimiter){0}; } + +static inline bool is_format(Delimiter *delimiter) { + return delimiter->flags & Format; +} + +static inline bool is_raw(Delimiter *delimiter) { + return delimiter->flags & Raw; +} + +static inline bool is_triple(Delimiter *delimiter) { + return delimiter->flags & Triple; +} + +static inline bool is_bytes(Delimiter *delimiter) { + return delimiter->flags & Bytes; +} + +static inline int32_t end_character(Delimiter *delimiter) { + if (delimiter->flags & SingleQuote) { + return '\''; + } + if (delimiter->flags & DoubleQuote) { + return '"'; + } + if (delimiter->flags & BackQuote) { + return '`'; + } + return 0; +} + +static inline void set_format(Delimiter *delimiter) { + delimiter->flags |= Format; +} + +static inline void set_raw(Delimiter *delimiter) { delimiter->flags |= Raw; } + +static inline void set_triple(Delimiter *delimiter) { + delimiter->flags |= Triple; +} + +static inline void set_bytes(Delimiter *delimiter) { + delimiter->flags |= Bytes; +} + +static inline void set_end_character(Delimiter *delimiter, int32_t character) { + switch (character) { + case '\'': + delimiter->flags |= SingleQuote; + break; + case '"': + delimiter->flags |= DoubleQuote; + break; + case '`': + delimiter->flags |= BackQuote; + break; + default: + assert(false); + } +} + +typedef struct { + uint32_t len; + uint32_t cap; + uint16_t *data; +} indent_vec; + +static indent_vec indent_vec_new() { + indent_vec vec = VEC_NEW; + vec.data = calloc(1, sizeof(uint16_t)); + vec.cap = 1; + return vec; +} + +typedef struct { + uint32_t len; + uint32_t cap; + Delimiter *data; +} delimiter_vec; + +static delimiter_vec delimiter_vec_new() { + delimiter_vec vec = VEC_NEW; + vec.data = calloc(1, sizeof(Delimiter)); + vec.cap = 1; + return vec; +} + +typedef struct { + indent_vec indents; + delimiter_vec delimiters; + bool inside_f_string; +} Scanner; + +static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + +static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } + +bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, + const bool *valid_symbols) { + Scanner *scanner = (Scanner *)payload; + + bool error_recovery_mode = + valid_symbols[STRING_CONTENT] && valid_symbols[INDENT]; + bool within_brackets = valid_symbols[CLOSE_BRACE] || + valid_symbols[CLOSE_PAREN] || + valid_symbols[CLOSE_BRACKET]; + + bool advanced_once = false; + if (valid_symbols[ESCAPE_INTERPOLATION] && scanner->delimiters.len > 0 && + (lexer->lookahead == '{' || lexer->lookahead == '}') && + !error_recovery_mode) { + Delimiter delimiter = VEC_BACK(scanner->delimiters); + if (is_format(&delimiter)) { + lexer->mark_end(lexer); + bool is_left_brace = lexer->lookahead == '{'; + advance(lexer); + advanced_once = true; + if ((lexer->lookahead == '{' && is_left_brace) || + (lexer->lookahead == '}' && !is_left_brace)) { + advance(lexer); + lexer->mark_end(lexer); + lexer->result_symbol = ESCAPE_INTERPOLATION; + return true; + } + return false; + } + } + + if (valid_symbols[STRING_CONTENT] && scanner->delimiters.len > 0 && + !error_recovery_mode) { + Delimiter delimiter = VEC_BACK(scanner->delimiters); + int32_t end_char = end_character(&delimiter); + bool has_content = advanced_once; + while (lexer->lookahead) { + if ((advanced_once || lexer->lookahead == '{' || + lexer->lookahead == '}') && + is_format(&delimiter)) { + lexer->mark_end(lexer); + lexer->result_symbol = STRING_CONTENT; + return has_content; + } + if (lexer->lookahead == '\\') { + if (is_raw(&delimiter)) { + // Step over the backslash. + advance(lexer); + // Step over any escaped quotes. + if (lexer->lookahead == end_character(&delimiter) || + lexer->lookahead == '\\') { + advance(lexer); + } + // Step over newlines + if (lexer->lookahead == '\r') { + advance(lexer); + if (lexer->lookahead == '\n') { + advance(lexer); + } + } else if (lexer->lookahead == '\n') { + advance(lexer); + } + continue; + } + if (is_bytes(&delimiter)) { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == 'N' || lexer->lookahead == 'u' || + lexer->lookahead == 'U') { + // In bytes string, \N{...}, \uXXXX and \UXXXXXXXX are + // not escape sequences + // https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals + advance(lexer); + } else { + lexer->result_symbol = STRING_CONTENT; + return has_content; + } + } else { + lexer->mark_end(lexer); + lexer->result_symbol = STRING_CONTENT; + return has_content; + } + } else if (lexer->lookahead == end_char) { + if (is_triple(&delimiter)) { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == end_char) { + advance(lexer); + if (lexer->lookahead == end_char) { + if (has_content) { + lexer->result_symbol = STRING_CONTENT; + } else { + advance(lexer); + lexer->mark_end(lexer); + VEC_POP(scanner->delimiters); + lexer->result_symbol = STRING_END; + scanner->inside_f_string = false; + } + return true; + } + lexer->mark_end(lexer); + lexer->result_symbol = STRING_CONTENT; + return true; + } + lexer->mark_end(lexer); + lexer->result_symbol = STRING_CONTENT; + return true; + } + if (has_content) { + lexer->result_symbol = STRING_CONTENT; + } else { + advance(lexer); + VEC_POP(scanner->delimiters); + lexer->result_symbol = STRING_END; + scanner->inside_f_string = false; + } + lexer->mark_end(lexer); + return true; + + } else if (lexer->lookahead == '\n' && has_content && + !is_triple(&delimiter)) { + return false; + } + advance(lexer); + has_content = true; + } + } + + lexer->mark_end(lexer); + + bool found_end_of_line = false; + uint32_t indent_length = 0; + int32_t first_comment_indent_length = -1; + for (;;) { + if (lexer->lookahead == '\n') { + found_end_of_line = true; + indent_length = 0; + skip(lexer); + } else if (lexer->lookahead == ' ') { + indent_length++; + skip(lexer); + } else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') { + indent_length = 0; + skip(lexer); + } else if (lexer->lookahead == '\t') { + indent_length += 8; + skip(lexer); + } else if (lexer->lookahead == '#' && + (valid_symbols[INDENT] || valid_symbols[DEDENT] || + valid_symbols[NEWLINE] || valid_symbols[EXCEPT])) { + // If we haven't found an EOL yet, + // then this is a comment after an expression: + // foo = bar # comment + // Just return, since we don't want to generate an indent/dedent + // token. + if (!found_end_of_line) { + return false; + } + if (first_comment_indent_length == -1) { + first_comment_indent_length = (int32_t)indent_length; + } + while (lexer->lookahead && lexer->lookahead != '\n') { + skip(lexer); + } + skip(lexer); + indent_length = 0; + } else if (lexer->lookahead == '\\') { + skip(lexer); + if (lexer->lookahead == '\r') { + skip(lexer); + } + if (lexer->lookahead == '\n' || lexer->eof(lexer)) { + skip(lexer); + } else { + return false; + } + } else if (lexer->eof(lexer)) { + indent_length = 0; + found_end_of_line = true; + break; + } else { + break; + } + } + + if (found_end_of_line) { + if (scanner->indents.len > 0) { + uint16_t current_indent_length = VEC_BACK(scanner->indents); + + if (valid_symbols[INDENT] && + indent_length > current_indent_length) { + VEC_PUSH(scanner->indents, indent_length); + lexer->result_symbol = INDENT; + return true; + } + + bool next_tok_is_string_start = lexer->lookahead == '\"' || + lexer->lookahead == '\'' || + lexer->lookahead == '`'; + + if ((valid_symbols[DEDENT] || + (!valid_symbols[NEWLINE] && + !(valid_symbols[STRING_START] && next_tok_is_string_start) && + !within_brackets)) && + indent_length < current_indent_length && + !scanner->inside_f_string && + + // Wait to create a dedent token until we've consumed any + // comments + // whose indentation matches the current block. + first_comment_indent_length < (int32_t)current_indent_length) { + VEC_POP(scanner->indents); + lexer->result_symbol = DEDENT; + return true; + } + } + + if (valid_symbols[NEWLINE] && !error_recovery_mode) { + lexer->result_symbol = NEWLINE; + return true; + } + } + + if (first_comment_indent_length == -1 && valid_symbols[STRING_START]) { + Delimiter delimiter = new_delimiter(); + + bool has_flags = false; + while (lexer->lookahead) { + if (lexer->lookahead == 'f' || lexer->lookahead == 'F') { + set_format(&delimiter); + } else if (lexer->lookahead == 'r' || lexer->lookahead == 'R') { + set_raw(&delimiter); + } else if (lexer->lookahead == 'b' || lexer->lookahead == 'B') { + set_bytes(&delimiter); + } else if (lexer->lookahead != 'u' && lexer->lookahead != 'U') { + break; + } + has_flags = true; + advance(lexer); + } + + if (lexer->lookahead == '`') { + set_end_character(&delimiter, '`'); + advance(lexer); + lexer->mark_end(lexer); + } else if (lexer->lookahead == '\'') { + set_end_character(&delimiter, '\''); + advance(lexer); + lexer->mark_end(lexer); + if (lexer->lookahead == '\'') { + advance(lexer); + if (lexer->lookahead == '\'') { + advance(lexer); + lexer->mark_end(lexer); + set_triple(&delimiter); + } + } + } else if (lexer->lookahead == '"') { + set_end_character(&delimiter, '"'); + advance(lexer); + lexer->mark_end(lexer); + if (lexer->lookahead == '"') { + advance(lexer); + if (lexer->lookahead == '"') { + advance(lexer); + lexer->mark_end(lexer); + set_triple(&delimiter); + } + } + } + + if (end_character(&delimiter)) { + VEC_PUSH(scanner->delimiters, delimiter); + lexer->result_symbol = STRING_START; + scanner->inside_f_string = is_format(&delimiter); + return true; + } + if (has_flags) { + return false; + } + } + + return false; +} + +unsigned tree_sitter_python_external_scanner_serialize(void *payload, + char *buffer) { + Scanner *scanner = (Scanner *)payload; + + size_t size = 0; + + buffer[size++] = (char)scanner->inside_f_string; + + size_t delimiter_count = scanner->delimiters.len; + if (delimiter_count > UINT8_MAX) { + delimiter_count = UINT8_MAX; + } + buffer[size++] = (char)delimiter_count; + + if (delimiter_count > 0) { + memcpy(&buffer[size], scanner->delimiters.data, delimiter_count); + } + size += delimiter_count; + + int iter = 1; + for (; iter < scanner->indents.len && + size < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; + ++iter) { + buffer[size++] = (char)scanner->indents.data[iter]; + } + + return size; +} + +void tree_sitter_python_external_scanner_deserialize(void *payload, + const char *buffer, + unsigned length) { + Scanner *scanner = (Scanner *)payload; + + VEC_CLEAR(scanner->delimiters); + VEC_CLEAR(scanner->indents); + VEC_PUSH(scanner->indents, 0); + + if (length > 0) { + size_t size = 0; + + scanner->inside_f_string = (bool)buffer[size++]; + + size_t delimiter_count = (uint8_t)buffer[size++]; + if (delimiter_count > 0) { + VEC_GROW(scanner->delimiters, delimiter_count); + scanner->delimiters.len = delimiter_count; + memcpy(scanner->delimiters.data, &buffer[size], delimiter_count); + size += delimiter_count; + } + + for (; size < length; size++) { + VEC_PUSH(scanner->indents, (unsigned char)buffer[size]); + } + } +} + +void *tree_sitter_python_external_scanner_create() { +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + _Static_assert(sizeof(Delimiter) == sizeof(char), ""); +#else + assert(sizeof(Delimiter) == sizeof(char)); +#endif + Scanner *scanner = calloc(1, sizeof(Scanner)); + scanner->indents = indent_vec_new(); + scanner->delimiters = delimiter_vec_new(); + tree_sitter_python_external_scanner_deserialize(scanner, NULL, 0); + return scanner; +} + +void tree_sitter_python_external_scanner_destroy(void *payload) { + Scanner *scanner = (Scanner *)payload; + VEC_FREE(scanner->indents); + VEC_FREE(scanner->delimiters); + free(scanner); +} diff --git a/python/test/corpus/errors.txt b/python/test/corpus/errors.txt new file mode 100644 index 0000000..f357c27 --- /dev/null +++ b/python/test/corpus/errors.txt @@ -0,0 +1,30 @@ +==================================== +An error before a string literal +==================================== + +def a(b): + c. + + """ + d + """ + + e + +--- + +(module + (function_definition + (identifier) + (parameters + (identifier)) + (ERROR + (identifier)) + (block + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (identifier))))) diff --git a/python/test/corpus/expressions.txt b/python/test/corpus/expressions.txt new file mode 100644 index 0000000..1c40935 --- /dev/null +++ b/python/test/corpus/expressions.txt @@ -0,0 +1,1108 @@ +================================================================================ +Identifiers with Greek letters +================================================================================ + +ψ1 = β_γ + Ψ_5 + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (assignment + left: (identifier) + right: (binary_operator + left: (identifier) + right: (identifier))))) + +================================================================================ +Subscript expressions +================================================================================ + +a[1] +b[2, 3] +c[4, 5,] + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (subscript + (identifier) + (integer))) + (expression_statement + (subscript + (identifier) + (integer) + (integer))) + (expression_statement + (subscript + (identifier) + (integer) + (integer)))) + +================================================================================ +Subscript slice expressions +================================================================================ + +a[:] +b[5:] +b[5:6, ...] +c[::] + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (subscript + (identifier) + (slice))) + (expression_statement + (subscript + (identifier) + (slice + (integer)))) + (expression_statement + (subscript + (identifier) + (slice + (integer) + (integer)) + (ellipsis))) + (expression_statement + (subscript + (identifier) + (slice)))) + +================================================================================ +Attribute references +================================================================================ + +a.b.c + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (attribute + (attribute + (identifier) + (identifier)) + (identifier)))) + +================================================================================ +Await expressions +================================================================================ + +await i(j, 5) +return await i(j, 5) +async def region_exists(region: str) -> bool: + return region in await all_regions() + +assert await a(b) == c + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (await + (call + (identifier) + (argument_list + (identifier) + (integer))))) + (return_statement + (await + (call + (identifier) + (argument_list + (identifier) + (integer))))) + (function_definition + (identifier) + (parameters + (typed_parameter + (identifier) + (type + (identifier)))) + (type + (identifier)) + (block + (return_statement + (comparison_operator + (identifier) + (await + (call + (identifier) + (argument_list))))))) + (assert_statement + (comparison_operator + (await + (call + (identifier) + (argument_list + (identifier)))) + (identifier)))) + +================================================================================ +Call expressions +================================================================================ + +__a__() +b(1) +c(e, f=g) +i(j, 5,) + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (call + (identifier) + (argument_list))) + (expression_statement + (call + (identifier) + (argument_list + (integer)))) + (expression_statement + (call + (identifier) + (argument_list + (identifier) + (keyword_argument + (identifier) + (identifier))))) + (expression_statement + (call + (identifier) + (argument_list + (identifier) + (integer))))) + +================================================================================ +Print used as an identifier +================================================================================ + +print() +print(a) +print(a, b=c) +print(d, e) +print(d, *e) +print(*f, **g,) +a(print) + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (call + (identifier) + (argument_list))) + (expression_statement + (call + (identifier) + (argument_list + (identifier)))) + (expression_statement + (call + (identifier) + (argument_list + (identifier) + (keyword_argument + (identifier) + (identifier))))) + (expression_statement + (call + (identifier) + (argument_list + (identifier) + (identifier)))) + (expression_statement + (call + (identifier) + (argument_list + (identifier) + (list_splat + (identifier))))) + (expression_statement + (call + (identifier) + (argument_list + (list_splat + (identifier)) + (dictionary_splat + (identifier))))) + (expression_statement + (call + (identifier) + (argument_list + (identifier))))) + +================================================================================ +Print used as a parameter +================================================================================ + +def a(print): + b +def a(printer=print): + c +def a(*print): + b +def a(**print): + b +def print(): + a + +-------------------------------------------------------------------------------- + +(module + (function_definition + (identifier) + (parameters + (identifier)) + (block + (expression_statement + (identifier)))) + (function_definition + (identifier) + (parameters + (default_parameter + (identifier) + (identifier))) + (block + (expression_statement + (identifier)))) + (function_definition + (identifier) + (parameters + (list_splat_pattern + (identifier))) + (block + (expression_statement + (identifier)))) + (function_definition + (identifier) + (parameters + (dictionary_splat_pattern + (identifier))) + (block + (expression_statement + (identifier)))) + (function_definition + (identifier) + (parameters) + (block + (expression_statement + (identifier))))) + +================================================================================ +Exec used as an identifier +================================================================================ + +exec("print \"'%s' has %i characters\" % (public_function(), len(public_function()))", {"__builtins__" : None}, safe_dict) +exec("""exec _code_ in _globs_, _locs_""") + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (call + (identifier) + (argument_list + (string + (string_start) + (string_content + (escape_sequence) + (escape_sequence)) + (string_end)) + (dictionary + (pair + (string + (string_start) + (string_content) + (string_end)) + (none))) + (identifier)))) + (expression_statement + (call + (identifier) + (argument_list + (string + (string_start) + (string_content) + (string_end)))))) + +================================================================================ +Async / await used as identifiers +================================================================================ + +async = 4 +await = 5 +print async, await + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (assignment + (identifier) + (integer))) + (expression_statement + (assignment + (identifier) + (integer))) + (print_statement + (identifier) + (identifier))) + +================================================================================ +Calls with splats +================================================================================ + +a(*()) +a(**{}) +a(*b) +c(d, *e, **g) + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (call + (identifier) + (argument_list + (list_splat + (tuple))))) + (expression_statement + (call + (identifier) + (argument_list + (dictionary_splat + (dictionary))))) + (expression_statement + (call + (identifier) + (argument_list + (list_splat + (identifier))))) + (expression_statement + (call + (identifier) + (argument_list + (identifier) + (list_splat + (identifier)) + (dictionary_splat + (identifier)))))) + +================================================================================ +Math operators +================================================================================ + +a + b * c ** d - e / 5 +-5 ++x +~x + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (binary_operator + (binary_operator + (identifier) + (binary_operator + (identifier) + (binary_operator + (identifier) + (identifier)))) + (binary_operator + (identifier) + (integer)))) + (expression_statement + (unary_operator + (integer))) + (expression_statement + (unary_operator + (identifier))) + (expression_statement + (unary_operator + (identifier)))) + +================================================================================ +Binary Addition / Subtraction With Floats +================================================================================ + +.1-.0 +.1+.0 +.1-0 +.1+0 + +1-.0 +1+.0 + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (binary_operator + (float) + (float))) + (expression_statement + (binary_operator + (float) + (float))) + (expression_statement + (binary_operator + (float) + (integer))) + (expression_statement + (binary_operator + (float) + (integer))) + (expression_statement + (binary_operator + (integer) + (float))) + (expression_statement + (binary_operator + (integer) + (float)))) + +================================================================================ +Power Operator Precedence +================================================================================ + +2**2**3 +-2**2 + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (binary_operator + (integer) + (binary_operator + (integer) + (integer)))) + (expression_statement + (unary_operator + (binary_operator + (integer) + (integer))))) + +================================================================================ +Operator precedence +================================================================================ + +a() + b[c] * c.d.e + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (binary_operator + left: (call + function: (identifier) + arguments: (argument_list)) + right: (binary_operator + left: (subscript + value: (identifier) + subscript: (identifier)) + right: (attribute + object: (attribute + object: (identifier) + attribute: (identifier)) + attribute: (identifier)))))) + +================================================================================ +Bitwise operators +================================================================================ + +a << b | c >> d & e + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (binary_operator + (binary_operator + (identifier) + (identifier)) + (binary_operator + (binary_operator + (identifier) + (identifier)) + (identifier))))) + +================================================================================ +Boolean operators +================================================================================ + +a or b and c +not d +not a and b or c +a and not b and c + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (boolean_operator + (identifier) + (boolean_operator + (identifier) + (identifier)))) + (expression_statement + (not_operator + (identifier))) + (expression_statement + (boolean_operator + (boolean_operator + (not_operator + (identifier)) + (identifier)) + (identifier))) + (expression_statement + (boolean_operator + (boolean_operator + (identifier) + (not_operator + (identifier))) + (identifier)))) + +================================================================================ +Comparison operators +================================================================================ + +a < b <= c == d >= e > f +not a == b or c == d +a not in b +a is not b + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (comparison_operator + (identifier) + (identifier) + (identifier) + (identifier) + (identifier) + (identifier))) + (expression_statement + (boolean_operator + (not_operator + (comparison_operator + (identifier) + (identifier))) + (comparison_operator + (identifier) + (identifier)))) + (expression_statement + (comparison_operator + (identifier) + (identifier))) + (expression_statement + (comparison_operator + (identifier) + (identifier)))) + +================================================================================ +Assignments +================================================================================ + +a = 1 +a, b = 1, 2 +a, *c = 1, 2, 3 +a, = 1, 2 +a[b] = c = d +a, *b.c = d + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (assignment + (identifier) + (integer))) + (expression_statement + (assignment + (pattern_list + (identifier) + (identifier)) + (expression_list + (integer) + (integer)))) + (expression_statement + (assignment + (pattern_list + (identifier) + (list_splat_pattern + (identifier))) + (expression_list + (integer) + (integer) + (integer)))) + (expression_statement + (assignment + (pattern_list + (identifier)) + (expression_list + (integer) + (integer)))) + (expression_statement + (assignment + (subscript + (identifier) + (identifier)) + (assignment + (identifier) + (identifier)))) + (expression_statement + (assignment + (pattern_list + (identifier) + (list_splat_pattern + (attribute + (identifier) + (identifier)))) + (identifier)))) + +================================================================================ +Assignments with type annotations +================================================================================ + +tail_leaves: List[Leaf] = [] + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (assignment + (identifier) + (type + (generic_type + (identifier) + (type_parameter + (type + (identifier))))) + (list)))) + +================================================================================ +Augmented assignments +================================================================================ + +a += 1 +b >>= 2 +c //= 1 + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (augmented_assignment + (identifier) + (integer))) + (expression_statement + (augmented_assignment + (identifier) + (integer))) + (expression_statement + (augmented_assignment + (identifier) + (integer)))) + +================================================================================ +Named expressions +================================================================================ + +a := x +(y := f(x)) +foo(x=(y := f(x))) +y0 = (y1 := f(x)) +def foo(answer=(p := 42)): + return answer; +def foo(answer: (p := 42) = 5): + return answer; +foo(x := 3, cat='vector') +(z := (y := (x := 0))) + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (named_expression + (identifier) + (identifier))) + (expression_statement + (parenthesized_expression + (named_expression + (identifier) + (call + (identifier) + (argument_list + (identifier)))))) + (expression_statement + (call + (identifier) + (argument_list + (keyword_argument + (identifier) + (parenthesized_expression + (named_expression + (identifier) + (call + (identifier) + (argument_list + (identifier))))))))) + (expression_statement + (assignment + (identifier) + (parenthesized_expression + (named_expression + (identifier) + (call + (identifier) + (argument_list + (identifier))))))) + (function_definition + (identifier) + (parameters + (default_parameter + (identifier) + (parenthesized_expression + (named_expression + (identifier) + (integer))))) + (block + (return_statement + (identifier)))) + (function_definition + (identifier) + (parameters + (typed_default_parameter + (identifier) + (type + (parenthesized_expression + (named_expression + (identifier) + (integer)))) + (integer))) + (block + (return_statement + (identifier)))) + (expression_statement + (call + (identifier) + (argument_list + (named_expression + (identifier) + (integer)) + (keyword_argument + (identifier) + (string + (string_start) + (string_content) + (string_end)))))) + (expression_statement + (parenthesized_expression + (named_expression + (identifier) + (parenthesized_expression + (named_expression + (identifier) + (parenthesized_expression + (named_expression + (identifier) + (integer))))))))) + +================================================================================ +Yield expressions +================================================================================ + +def example(): + yield + yield 1 + x = yield 2 + yield from a + yield from (yield from (x for x in range(1, 10))) + +-------------------------------------------------------------------------------- + +(module + (function_definition + (identifier) + (parameters) + (block + (expression_statement + (yield)) + (expression_statement + (yield + (integer))) + (expression_statement + (assignment + (identifier) + (yield + (integer)))) + (expression_statement + (yield + (identifier))) + (expression_statement + (yield + (parenthesized_expression + (yield + (generator_expression + (identifier) + (for_in_clause + (identifier) + (call + (identifier) + (argument_list + (integer) + (integer)))))))))))) + +================================================================================ +lambdas +================================================================================ + +lambda b, c: d("e" % f) +lambda: True +lambda a, b = c, *d, **e: a +lambda (a, b): (a, b) + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (lambda + (lambda_parameters + (identifier) + (identifier)) + (call + (identifier) + (argument_list + (binary_operator + (string + (string_start) + (string_content) + (string_end)) + (identifier)))))) + (expression_statement + (lambda + (true))) + (expression_statement + (lambda + (lambda_parameters + (identifier) + (default_parameter + (identifier) + (identifier)) + (list_splat_pattern + (identifier)) + (dictionary_splat_pattern + (identifier))) + (identifier))) + (expression_statement + (lambda + (lambda_parameters + (tuple_pattern + (identifier) + (identifier))) + (tuple + (identifier) + (identifier))))) + +================================================================================ +Tuples with splats +================================================================================ + +(foo, *bar, *baz) + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (tuple + (identifier) + (list_splat + (identifier)) + (list_splat + (identifier))))) + +================================================================================ +Tuples with yield +================================================================================ + +(a, yield a, b, c) + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (tuple + (identifier) + (yield + (expression_list + (identifier) + (identifier) + (identifier)))))) + +================================================================================ +Default Tuple Arguments +================================================================================ + +def comp_args((a, b)=(3, 4)): + return a, b + +-------------------------------------------------------------------------------- + +(module + (function_definition + (identifier) + (parameters + (default_parameter + (tuple_pattern + (identifier) + (identifier)) + (tuple + (integer) + (integer)))) + (block + (return_statement + (expression_list + (identifier) + (identifier)))))) + +================================================================================ +Conditional if expressions +================================================================================ + +a = b if c else d +something() if a else d +slice(1,1,1) if a else d + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (assignment + (identifier) + (conditional_expression + (identifier) + (identifier) + (identifier)))) + (expression_statement + (conditional_expression + (call + (identifier) + (argument_list)) + (identifier) + (identifier))) + (expression_statement + (conditional_expression + (call + (identifier) + (argument_list + (integer) + (integer) + (integer))) + (identifier) + (identifier)))) + +================================================================================ +Async context managers and iterators +================================================================================ + +async with a as b: + async for c in d: + [e async for f in g] + +-------------------------------------------------------------------------------- + +(module + (with_statement + (with_clause + (with_item + value: (as_pattern + (identifier) + alias: (as_pattern_target + (identifier))))) + body: (block + (for_statement + left: (identifier) + right: (identifier) + body: (block + (expression_statement + (list_comprehension + body: (identifier) + (for_in_clause + left: (identifier) + right: (identifier))))))))) + +================================================================================ +Arbitrary indentation between brackets +================================================================================ + +def a(): + b( +1, +2 + ) + + c = [ +3 + ] + +-------------------------------------------------------------------------------- + +(module + (function_definition + (identifier) + (parameters) + (block + (expression_statement + (call + (identifier) + (argument_list + (integer) + (integer)))) + (expression_statement + (assignment + (identifier) + (list + (integer))))))) + +================================================================================ +Splat Inside of Expression List +================================================================================ + +a,c = [1,2],3 +w, x, y, z = 0, *a, c + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (assignment + (pattern_list + (identifier) + (identifier)) + (expression_list + (list + (integer) + (integer)) + (integer)))) + (expression_statement + (assignment + (pattern_list + (identifier) + (identifier) + (identifier) + (identifier)) + (expression_list + (integer) + (list_splat + (identifier)) + (identifier))))) diff --git a/python/test/corpus/literals.txt b/python/test/corpus/literals.txt new file mode 100644 index 0000000..0d844e3 --- /dev/null +++ b/python/test/corpus/literals.txt @@ -0,0 +1,1046 @@ +================================================================================ +Integers +================================================================================ + +-1 +0xDEAD +0XDEAD +1j +-1j +0o123 +0O123 +0b001 +0B001 +1_1 +0B1_1 +0O1_1 +0L + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (unary_operator + (integer))) + (expression_statement + (integer)) + (expression_statement + (integer)) + (expression_statement + (integer)) + (expression_statement + (unary_operator + (integer))) + (expression_statement + (integer)) + (expression_statement + (integer)) + (expression_statement + (integer)) + (expression_statement + (integer)) + (expression_statement + (integer)) + (expression_statement + (integer)) + (expression_statement + (integer)) + (expression_statement + (integer))) + +================================================================================ +Floats +================================================================================ + +-.6_6 ++.1_1 +123.4123 +123.123J +1_1.3_1 +1_1. +1e+3_4j +.3e1_4 +1_0.l +.1l + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (unary_operator + (float))) + (expression_statement + (unary_operator + (float))) + (expression_statement + (float)) + (expression_statement + (float)) + (expression_statement + (float)) + (expression_statement + (float)) + (expression_statement + (float)) + (expression_statement + (float)) + (expression_statement + (float)) + (expression_statement + (float))) + +================================================================================ +Scientific Notation Floats +================================================================================ + +1e322 +1e-3 +1e+3 +1.8e10 +1.e10 +-1e10 + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (float)) + (expression_statement + (float)) + (expression_statement + (float)) + (expression_statement + (float)) + (expression_statement + (float)) + (expression_statement + (unary_operator + (float)))) + +================================================================================ +Strings +================================================================================ + +"I'm ok" +'"ok"' +UR'bye' +b'sup' +B"sup" +`1` +"\\" +"/" +"multiline \ +string" +b"\x12\u12\U12\x13\N{WINKING FACE}" +"\xab\123\'\"\a\b\f\r\n\t\v\\" +"\xgh\o123\p\q\c\d\e\u12\U1234" +f'\N{GREEK CAPITAL LETTER DELTA}' + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content + (escape_sequence)) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content + (escape_sequence)) + (string_end))) + (expression_statement + (string + (string_start) + (string_content + (escape_sequence) + (escape_sequence)) + (string_end))) + (expression_statement + (string + (string_start) + (string_content + (escape_sequence) + (escape_sequence) + (escape_sequence) + (escape_sequence) + (escape_sequence) + (escape_sequence) + (escape_sequence) + (escape_sequence) + (escape_sequence) + (escape_sequence) + (escape_sequence) + (escape_sequence)) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content + (escape_sequence)) + (string_end)))) + +================================================================================ +Raw strings +================================================================================ + +'ab\x00cd' +"\n" + +# no escape sequences in these +r'ab\x00cd' +ur"\n" + +# raw f-string +fr"\{0}" + +r"\\" +r'"a\ +de\ +fg"' + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content + (escape_sequence)) + (string_end))) + (expression_statement + (string + (string_start) + (string_content + (escape_sequence)) + (string_end))) + (comment) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (comment) + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (integer)) + (string_end))) + (expression_statement + (string + (string_start) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end)))) + +================================================================================ +Raw strings with escaped quotes +================================================================================ + +re.compile(r"(\n|\A)#include\s*['\"]" + r"(?P<name>[\w\d./\\]+[.]src)['\"]") + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list + (concatenated_string + (string + (string_start) + (string_content) + (string_end)) + (string + (string_start) + (string_content) + (string_end))))))) + +================================================================================ +Format strings +================================================================================ + +# nested! +f"a {b(f'c {e} d')} e" +f"""a"{b}c""" +f"""a""{b}c""" +f"a {{}} e" +f"a {b}}}" +f"a {{{b}" +f"a {{b}}" +f"a {{{b}}}" +f"{c,}" +f"{yield d}" +f"{*a,}" + +def function(): + return f""" +{"string1" if True else + "string2"}""" + +def test(self): + self.assertEqual(f'''A complex trick: { +2 # two +}''', 'A complex trick: 2') + +-------------------------------------------------------------------------------- + +(module + (comment) + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (call + (identifier) + (argument_list + (string + (string_start) + (string_content) + (interpolation + (identifier)) + (string_content) + (string_end))))) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier)) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier)) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content + (escape_interpolation) + (escape_interpolation)) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier)) + (string_content + (escape_interpolation)) + (string_end))) + (expression_statement + (string + (string_start) + (string_content + (escape_interpolation)) + (interpolation + (identifier)) + (string_end))) + (expression_statement + (string + (string_start) + (string_content + (escape_interpolation) + (escape_interpolation)) + (string_end))) + (expression_statement + (string + (string_start) + (string_content + (escape_interpolation)) + (interpolation + (identifier)) + (string_content + (escape_interpolation)) + (string_end))) + (expression_statement + (string + (string_start) + (interpolation + (expression_list + (identifier))) + (string_end))) + (expression_statement + (string + (string_start) + (interpolation + (yield + (identifier))) + (string_end))) + (expression_statement + (string + (string_start) + (interpolation + (expression_list + (list_splat + (identifier)))) + (string_end))) + (function_definition + (identifier) + (parameters) + (block + (return_statement + (string + (string_start) + (string_content) + (interpolation + (conditional_expression + (string + (string_start) + (string_content) + (string_end)) + (true) + (string + (string_start) + (string_content) + (string_end)))) + (string_end))))) + (function_definition + (identifier) + (parameters + (identifier)) + (block + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list + (string + (string_start) + (string_content) + (interpolation + (integer) + (comment)) + (string_end)) + (string + (string_start) + (string_content) + (string_end)))))))) + +================================================================================ +Format strings with format specifiers +================================================================================ + +f"a {b:2} {c:34.5}" +f"{b:{c.d}.{d.e}}" +f"{a:#06x}" +f"{a=}" +f"{a=:.2f}" +f"{value:{width + padding!r}.{precision}}" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier) + (format_specifier)) + (string_content) + (interpolation + (identifier) + (format_specifier)) + (string_end))) + (expression_statement + (string + (string_start) + (interpolation + (identifier) + (format_specifier + (format_expression + (attribute + (identifier) + (identifier))) + (format_expression + (attribute + (identifier) + (identifier))))) + (string_end))) + (expression_statement + (string + (string_start) + (interpolation + (identifier) + (format_specifier)) + (string_end))) + (expression_statement + (string + (string_start) + (interpolation + (identifier)) + (string_end))) + (expression_statement + (string + (string_start) + (interpolation + (identifier) + (format_specifier)) + (string_end))) + (expression_statement + (string + (string_start) + (interpolation + (identifier) + (format_specifier + (format_expression + (binary_operator + (identifier) + (identifier)) + (type_conversion)) + (format_expression + (identifier)))) + (string_end)))) + +================================================================================ +Unicode escape sequences +================================================================================ + +"\x12 \123 \u1234" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content + (escape_sequence) + (escape_sequence) + (escape_sequence)) + (string_end)))) + +================================================================================ +Other primitives +================================================================================ + +True +False +None + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (true)) + (expression_statement + (false)) + (expression_statement + (none))) + +================================================================================ +Concatenated strings +================================================================================ + +"one" "two" "three" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (concatenated_string + (string + (string_start) + (string_content) + (string_end)) + (string + (string_start) + (string_content) + (string_end)) + (string + (string_start) + (string_content) + (string_end))))) + +================================================================================ +Multi-line strings +================================================================================ + +""" +A double quote hello, +without double or single quotes. +""" + +""" +A double quote "hello", +with double quotes. +""" + +""" +A double quote 'hello', +with single quotes. +""" + +''' +A single quote hello, +without double or single quotes. +''' + +''' +A single quote 'hello', +with single quotes. +''' + +''' +A single quote "hello", +with double quotes. +''' + +""" +A double quote hello\n\ +with an escaped newline\n\ +and another escaped newline\n\ +""" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content + (escape_sequence) + (escape_sequence) + (escape_sequence) + (escape_sequence) + (escape_sequence) + (escape_sequence)) + (string_end)))) + +================================================================================ +Lists +================================================================================ + +[a, b, [c, d]] +[*()] +[*[]] +[*a] +[*a.b] +[*a[b].c] +[*a()] + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (list + (identifier) + (identifier) + (list + (identifier) + (identifier)))) + (expression_statement + (list + (list_splat + (tuple)))) + (expression_statement + (list + (list_splat + (list)))) + (expression_statement + (list + (list_splat + (identifier)))) + (expression_statement + (list + (attribute + (list_splat + (identifier)) + (identifier)))) + (expression_statement + (list + (attribute + (subscript + (list_splat + (identifier)) + (identifier)) + (identifier)))) + (expression_statement + (list + (call + (list_splat + (identifier)) + (argument_list))))) + +================================================================================ +List comprehensions +================================================================================ + +[a + b for (a, b) in items] +[a for b in c for a in b] +[(x,y) for x in [1,2,3] for y in [1,2,3] if True] +[a for a in lambda: True, lambda: False if a()] + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (list_comprehension + (binary_operator + (identifier) + (identifier)) + (for_in_clause + (tuple_pattern + (identifier) + (identifier)) + (identifier)))) + (expression_statement + (list_comprehension + (identifier) + (for_in_clause + (identifier) + (identifier)) + (for_in_clause + (identifier) + (identifier)))) + (expression_statement + (list_comprehension + (tuple + (identifier) + (identifier)) + (for_in_clause + (identifier) + (list + (integer) + (integer) + (integer))) + (for_in_clause + (identifier) + (list + (integer) + (integer) + (integer))) + (if_clause + (true)))) + (expression_statement + (list_comprehension + (identifier) + (for_in_clause + (identifier) + (lambda + (true)) + (lambda + (false))) + (if_clause + (call + (identifier) + (argument_list)))))) + +================================================================================ +Dictionaries +================================================================================ + +{a: 1, b: 2} +{} +{**{}} +{**a} +{**a.b} +{**a[b].c} +{**a()} + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (dictionary + (pair + (identifier) + (integer)) + (pair + (identifier) + (integer)))) + (expression_statement + (dictionary)) + (expression_statement + (dictionary + (dictionary_splat + (dictionary)))) + (expression_statement + (dictionary + (dictionary_splat + (identifier)))) + (expression_statement + (dictionary + (dictionary_splat + (attribute + (identifier) + (identifier))))) + (expression_statement + (dictionary + (dictionary_splat + (attribute + (subscript + (identifier) + (identifier)) + (identifier))))) + (expression_statement + (dictionary + (dictionary_splat + (call + (identifier) + (argument_list)))))) + +================================================================================ +Dictionary comprehensions +================================================================================ + +{a: b for a, b in items} +{a: b for c in d for e in items} + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (dictionary_comprehension + (pair + (identifier) + (identifier)) + (for_in_clause + (pattern_list + (identifier) + (identifier)) + (identifier)))) + (expression_statement + (dictionary_comprehension + (pair + (identifier) + (identifier)) + (for_in_clause + (identifier) + (identifier)) + (for_in_clause + (identifier) + (identifier))))) + +================================================================================ +Sets +================================================================================ + +{a, b, c,} +{*{}} + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (set + (identifier) + (identifier) + (identifier))) + (expression_statement + (set + (list_splat + (dictionary))))) + +================================================================================ +Set comprehensions +================================================================================ + +{a[b][c] for a, b, c in items} +{r for s in qs for n in ms} + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (set_comprehension + (subscript + (subscript + (identifier) + (identifier)) + (identifier)) + (for_in_clause + (pattern_list + (identifier) + (identifier) + (identifier)) + (identifier)))) + (expression_statement + (set_comprehension + (identifier) + (for_in_clause + (identifier) + (identifier)) + (for_in_clause + (identifier) + (identifier))))) + +================================================================================ +Simple Tuples +================================================================================ + +() +(a, b) +(a, b, c,) +(print, exec) + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (tuple)) + (expression_statement + (tuple + (identifier) + (identifier))) + (expression_statement + (tuple + (identifier) + (identifier) + (identifier))) + (expression_statement + (tuple + (identifier) + (identifier)))) + +================================================================================ +Generator expression +================================================================================ + +(a[b][c] for a, b, c in items) +dict((a, b) for a, b in d) +(a for b in c for d in e,) +(x for x in range(1, 10)) + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (generator_expression + (subscript + (subscript + (identifier) + (identifier)) + (identifier)) + (for_in_clause + (pattern_list + (identifier) + (identifier) + (identifier)) + (identifier)))) + (expression_statement + (call + (identifier) + (generator_expression + (tuple + (identifier) + (identifier)) + (for_in_clause + (pattern_list + (identifier) + (identifier)) + (identifier))))) + (expression_statement + (generator_expression + (identifier) + (for_in_clause + (identifier) + (identifier)) + (for_in_clause + (identifier) + (identifier)))) + (expression_statement + (generator_expression + (identifier) + (for_in_clause + (identifier) + (call + (identifier) + (argument_list + (integer) + (integer))))))) diff --git a/python/test/corpus/pattern_matching.txt b/python/test/corpus/pattern_matching.txt new file mode 100644 index 0000000..2913048 --- /dev/null +++ b/python/test/corpus/pattern_matching.txt @@ -0,0 +1,1572 @@ +================================================================================ +Matching specific values +================================================================================ + +match command.split(): + case ["quit"]: + print("Goodbye!") + quit_game() + case ["look"]: + current_room.describe() + case ["get", obj]: + character.get(obj, current_room) + case ["go", direction]: + current_room = current_room.neighbor(direction) + # The rest of your commands go here + +-------------------------------------------------------------------------------- + +(module + (match_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list)) + (block + (case_clause + (case_pattern + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))))) + (block + (expression_statement + (call + (identifier) + (argument_list + (string + (string_start) + (string_content) + (string_end))))) + (expression_statement + (call + (identifier) + (argument_list))))) + (case_clause + (case_pattern + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))))) + (block + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list))))) + (case_clause + (case_pattern + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))) + (case_pattern + (dotted_name + (identifier))))) + (block + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list + (identifier) + (identifier)))))) + (case_clause + (case_pattern + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))) + (case_pattern + (dotted_name + (identifier))))) + (block + (expression_statement + (assignment + (identifier) + (call + (attribute + (identifier) + (identifier)) + (argument_list + (identifier))))))) + (comment)))) + +================================================================================ +Matching multiple values +================================================================================ + +match command.split(): + case ["drop", *objects]: + for obj in objects: + character.drop(obj, current_room) + +-------------------------------------------------------------------------------- + +(module + (match_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list)) + (block + (case_clause + (case_pattern + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))) + (case_pattern + (splat_pattern + (identifier))))) + (block + (for_statement + (identifier) + (identifier) + (block + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list + (identifier) + (identifier))))))))))) + +================================================================================ +Adding a wild card +================================================================================ + +match command.split(): +# ^ conditional + case ["quit"]: ... # Code omitted for brevity + case ["go", direction]: pass + case ["drop", *objects]: pass + case _: + print(f"Sorry, I couldn't understand {command!r}") + +-------------------------------------------------------------------------------- + +(module + (match_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list)) + (block + (comment) + (case_clause + (case_pattern + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))))) + (block + (expression_statement + (ellipsis)) + (comment))) + (case_clause + (case_pattern + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))) + (case_pattern + (dotted_name + (identifier))))) + (block + (pass_statement))) + (case_clause + (case_pattern + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))) + (case_pattern + (splat_pattern + (identifier))))) + (block + (pass_statement))) + (case_clause + (case_pattern) + (block + (expression_statement + (call + (identifier) + (argument_list + (string + (string_start) + (string_content) + (interpolation + (identifier) + (type_conversion)) + (string_end)))))))))) + +================================================================================ +Or patterns +================================================================================ + +match command.split(): + case ["north"] | ["go", "north"]: + current_room = current_room.neighbor("north") + case ["get", obj] | ["pick", "up", obj] | ["pick", obj, "up"]: + pass + +-------------------------------------------------------------------------------- + +(module + (match_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list)) + (block + (case_clause + (case_pattern + (union_pattern + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end)))) + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))) + (case_pattern + (string + (string_start) + (string_content) + (string_end)))))) + (block + (expression_statement + (assignment + (identifier) + (call + (attribute + (identifier) + (identifier)) + (argument_list + (string + (string_start) + (string_content) + (string_end)))))))) + (case_clause + (case_pattern + (union_pattern + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))) + (case_pattern + (dotted_name + (identifier)))) + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))) + (case_pattern + (string + (string_start) + (string_content) + (string_end))) + (case_pattern + (dotted_name + (identifier)))) + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))) + (case_pattern + (dotted_name + (identifier))) + (case_pattern + (string + (string_start) + (string_content) + (string_end)))))) + (block + (pass_statement)))))) + +================================================================================ +As patterns +================================================================================ + +match command.split(): + case ["go", ("north" | "south" | "east" | "west") as direction]: + current_room = current_room.neighbor(direction) + +-------------------------------------------------------------------------------- + +(module + (match_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list)) + (block + (case_clause + (case_pattern + (list_pattern + (case_pattern + (string + (string_start) + (string_content) + (string_end))) + (case_pattern + (as_pattern + (case_pattern + (tuple_pattern + (case_pattern + (union_pattern + (string + (string_start) + (string_content) + (string_end)) + (string + (string_start) + (string_content) + (string_end)) + (string + (string_start) + (string_content) + (string_end)) + (string + (string_start) + (string_content) + (string_end)))))) + (identifier))))) + (block + (expression_statement + (assignment + (identifier) + (call + (attribute + (identifier) + (identifier)) + (argument_list + (identifier)))))))))) + +================================================================================ +Actually not match +================================================================================ + +match = 2 +match, a = 2, 3 +match: int = secret +x, match = 2, "hey, what's up?" +*match, last = [1, 2, 3] +def foo(**match): pass + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (assignment + (identifier) + (integer))) + (expression_statement + (assignment + (pattern_list + (identifier) + (identifier)) + (expression_list + (integer) + (integer)))) + (expression_statement + (assignment + (identifier) + (type + (identifier)) + (identifier))) + (expression_statement + (assignment + (pattern_list + (identifier) + (identifier)) + (expression_list + (integer) + (string + (string_start) + (string_content) + (string_end))))) + (expression_statement + (assignment + (pattern_list + (list_splat_pattern + (identifier)) + (identifier)) + (list + (integer) + (integer) + (integer)))) + (function_definition + (identifier) + (parameters + (dictionary_splat_pattern + (identifier))) + (block + (pass_statement)))) + +================================================================================ +Match is match but not pattern matching +================================================================================ + +a = [match] +match = [match] + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (assignment + (identifier) + (list + (identifier)))) + (expression_statement + (assignment + (identifier) + (list + (identifier))))) + +================================================================================ +Match kwargs +================================================================================ + +field = call(match=r".*\.txt$") + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (assignment + (identifier) + (call + (identifier) + (argument_list + (keyword_argument + (identifier) + (string + (string_start) + (string_content) + (string_end)))))))) + +================================================================================ +Match kwargs 2 +================================================================================ + +field = match(match=match, match) + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (assignment + (identifier) + (call + (identifier) + (argument_list + (keyword_argument + (identifier) + (identifier)) + (identifier)))))) + +================================================================================ +Case used as identifier +================================================================================ + +a = [case] +case = [case] +just_in_case = call_me(case=True) + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (assignment + (identifier) + (list + (identifier)))) + (expression_statement + (assignment + (identifier) + (list + (identifier)))) + (expression_statement + (assignment + (identifier) + (call + (identifier) + (argument_list + (keyword_argument + (identifier) + (true))))))) + +================================================================================ +If guards +================================================================================ + +match 0: + case 0 if False: + x = False + case 0 if True: + x = True + +-------------------------------------------------------------------------------- + +(module + (match_statement + (integer) + (block + (case_clause + (case_pattern + (integer)) + (if_clause + (false)) + (block + (expression_statement + (assignment + (identifier) + (false))))) + (case_clause + (case_pattern + (integer)) + (if_clause + (true)) + (block + (expression_statement + (assignment + (identifier) + (true)))))))) + +================================================================================ +Literals +================================================================================ + +match xxx: + case 3 | -3: + pass + case "something": + pass + case "something" "else": + pass + case 1.0 | -1.0: + pass + case True | False: + pass + case None: + pass + +-------------------------------------------------------------------------------- + +(module + (match_statement + (identifier) + (block + (case_clause + (case_pattern + (union_pattern + (integer) + (integer))) + (block + (pass_statement))) + (case_clause + (case_pattern + (string + (string_start) + (string_content) + (string_end))) + (block + (pass_statement))) + (case_clause + (case_pattern + (concatenated_string + (string + (string_start) + (string_content) + (string_end)) + (string + (string_start) + (string_content) + (string_end)))) + (block + (pass_statement))) + (case_clause + (case_pattern + (union_pattern + (float) + (float))) + (block + (pass_statement))) + (case_clause + (case_pattern + (union_pattern + (true) + (false))) + (block + (pass_statement))) + (case_clause + (case_pattern + (none)) + (block + (pass_statement)))))) + +================================================================================ +Comma separated cases +================================================================================ + +match (0, 1, 2): + case 0,1: + x = 0 + case 0, *x: + x = 0 + +-------------------------------------------------------------------------------- + +(module + (match_statement + (tuple + (integer) + (integer) + (integer)) + (block + (case_clause + (case_pattern + (integer)) + (case_pattern + (integer)) + (block + (expression_statement + (assignment + (identifier) + (integer))))) + (case_clause + (case_pattern + (integer)) + (case_pattern + (splat_pattern + (identifier))) + (block + (expression_statement + (assignment + (identifier) + (integer)))))))) + +================================================================================ +Case terminating in comma +================================================================================ + +match x,: + case *x,: + y = 0 + +-------------------------------------------------------------------------------- + +(module + (match_statement + (identifier) + (block + (case_clause + (case_pattern + (splat_pattern + (identifier))) + (block + (expression_statement + (assignment + (identifier) + (integer)))))))) + +================================================================================ +Multiple match patterns +================================================================================ + +match ..., ...: + case a, b: + return locals() + +-------------------------------------------------------------------------------- + +(module + (match_statement + (ellipsis) + (ellipsis) + (block + (case_clause + (case_pattern + (dotted_name + (identifier))) + (case_pattern + (dotted_name + (identifier))) + (block + (return_statement + (call + (identifier) + (argument_list)))))))) + +================================================================================ +Match match, case case +================================================================================ + +match = case = 0 +match match: + case case: + x = 0 +-------------------------------------------------------------------------------- + +(module + (expression_statement + (assignment + (identifier) + (assignment + (identifier) + (integer)))) + (match_statement + (identifier) + (block + (case_clause + (case_pattern + (dotted_name + (identifier))) + (block + (expression_statement + (assignment + (identifier) + (integer)))))))) + +================================================================================ +Walrus match (Issue #150) +================================================================================ + +if match := re.fullmatch(r"(-)?(\d+:)?\d?\d:\d\d(\.\d*)?", time, flags=re.ASCII): + return 42 + +-------------------------------------------------------------------------------- + +(module + (if_statement + (named_expression + (identifier) + (call + (attribute + (identifier) + (identifier)) + (argument_list + (string + (string_start) + (string_content) + (string_end)) + (identifier) + (keyword_argument + (identifier) + (attribute + (identifier) + (identifier)))))) + (block + (return_statement + (integer))))) + +================================================================================ +Matching objects +================================================================================ + +match event.get(): + case Click(position=(x, y)): + handle_click_at(x, y) + case KeyPress(key_name="Q") | Quit(): + game.quit() + case KeyPress(key_name="up arrow"): + game.go_north() + ... + case KeyPress(): + pass # Ignore other keystrokes + case other_event: + raise ValueError(f"Unrecognized event: {other_event}") + +-------------------------------------------------------------------------------- + +(module + (match_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list)) + (block + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (keyword_pattern + (identifier) + (tuple_pattern + (case_pattern + (dotted_name + (identifier))) + (case_pattern + (dotted_name + (identifier)))))))) + (block + (expression_statement + (call + (identifier) + (argument_list + (identifier) + (identifier)))))) + (case_clause + (case_pattern + (union_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (keyword_pattern + (identifier) + (string + (string_start) + (string_content) + (string_end))))) + (class_pattern + (dotted_name + (identifier))))) + (block + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list))))) + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (keyword_pattern + (identifier) + (string + (string_start) + (string_content) + (string_end)))))) + (block + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list))) + (expression_statement + (ellipsis)))) + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)))) + (block + (pass_statement) + (comment))) + (case_clause + (case_pattern + (dotted_name + (identifier))) + (block + (raise_statement + (call + (identifier) + (argument_list + (string + (string_start) + (string_content) + (interpolation + (identifier)) + (string_end)))))))))) + +================================================================================ +Positional arguments +================================================================================ + +match event.get(): + case Click((x, y)): + handle_click_at(x, y) + +-------------------------------------------------------------------------------- + +(module + (match_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list)) + (block + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (tuple_pattern + (case_pattern + (dotted_name + (identifier))) + (case_pattern + (dotted_name + (identifier))))))) + (block + (expression_statement + (call + (identifier) + (argument_list + (identifier) + (identifier))))))))) + +================================================================================ +Constants and enums +================================================================================ + +match event.get(): + case Click((x, y), button=Button.LEFT): # This is a left click + handle_click_at(x, y) + case Click(): + pass # ignore other clicks + +-------------------------------------------------------------------------------- + +(module + (match_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list)) + (block + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (tuple_pattern + (case_pattern + (dotted_name + (identifier))) + (case_pattern + (dotted_name + (identifier))))) + (case_pattern + (keyword_pattern + (identifier) + (dotted_name + (identifier) + (identifier)))))) + (comment) + (block + (expression_statement + (call + (identifier) + (argument_list + (identifier) + (identifier)))))) + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)))) + (block + (pass_statement) + (comment)))))) + +================================================================================ +Dict mappings +================================================================================ + +for action in actions: + match action: + case {"text": message, "color": c}: + ui.set_text_color(c) + ui.display(message) + case {"sleep": duration}: + ui.wait(duration) + case {"sound": url, "format": "ogg"}: + ui.play(url) + case {a.b: c}: + action() + case {"sound": _, "format": _}: + warning("Unsupported audio format") +-------------------------------------------------------------------------------- + +(module + (for_statement + (identifier) + (identifier) + (block + (match_statement + (identifier) + (block + (case_clause + (case_pattern + (dict_pattern + (string + (string_start) + (string_content) + (string_end)) + (case_pattern + (dotted_name + (identifier))) + (string + (string_start) + (string_content) + (string_end)) + (case_pattern + (dotted_name + (identifier))))) + (block + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list + (identifier)))) + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list + (identifier)))))) + (case_clause + (case_pattern + (dict_pattern + (string + (string_start) + (string_content) + (string_end)) + (case_pattern + (dotted_name + (identifier))))) + (block + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list + (identifier)))))) + (case_clause + (case_pattern + (dict_pattern + (string + (string_start) + (string_content) + (string_end)) + (case_pattern + (dotted_name + (identifier))) + (string + (string_start) + (string_content) + (string_end)) + (case_pattern + (string + (string_start) + (string_content) + (string_end))))) + (block + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list + (identifier)))))) + (case_clause + (case_pattern + (dict_pattern + (dotted_name + (identifier) + (identifier)) + (case_pattern + (dotted_name + (identifier))))) + (block + (expression_statement + (call + (identifier) + (argument_list))))) + (case_clause + (case_pattern + (dict_pattern + (string + (string_start) + (string_content) + (string_end)) + (case_pattern) + (string + (string_start) + (string_content) + (string_end)) + (case_pattern))) + (block + (expression_statement + (call + (identifier) + (argument_list + (string + (string_start) + (string_content) + (string_end)))))))))))) + +================================================================================ +Builtin classes +================================================================================ + +for action in actions: + match action: + case {"text": str(message), "color": str(c)}: + ui.set_text_color(c) + ui.display(message) + case {"sleep": float(duration)}: + ui.wait(duration) + case {"sound": str(url), "format": "ogg"}: + ui.play(url) + case {"sound": _, "format": _}: + warning("Unsupported audio format") + +-------------------------------------------------------------------------------- + +(module + (for_statement + (identifier) + (identifier) + (block + (match_statement + (identifier) + (block + (case_clause + (case_pattern + (dict_pattern + (string + (string_start) + (string_content) + (string_end)) + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (dotted_name + (identifier))))) + (string + (string_start) + (string_content) + (string_end)) + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (dotted_name + (identifier))))))) + (block + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list + (identifier)))) + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list + (identifier)))))) + (case_clause + (case_pattern + (dict_pattern + (string + (string_start) + (string_content) + (string_end)) + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (dotted_name + (identifier))))))) + (block + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list + (identifier)))))) + (case_clause + (case_pattern + (dict_pattern + (string + (string_start) + (string_content) + (string_end)) + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (dotted_name + (identifier))))) + (string + (string_start) + (string_content) + (string_end)) + (case_pattern + (string + (string_start) + (string_content) + (string_end))))) + (block + (expression_statement + (call + (attribute + (identifier) + (identifier)) + (argument_list + (identifier)))))) + (case_clause + (case_pattern + (dict_pattern + (string + (string_start) + (string_content) + (string_end)) + (case_pattern) + (string + (string_start) + (string_content) + (string_end)) + (case_pattern))) + (block + (expression_statement + (call + (identifier) + (argument_list + (string + (string_start) + (string_content) + (string_end)))))))))))) + +================================================================================ +Complex case patterns on classes +================================================================================ + +match x: + case Something(): # no args + foo1() + case Something.Else(): # more complex class name + foo2() + case Point2D(0, 1, 2): # three args + foo3() + case Point3D(x=0, y=0, z=0): # kw args + foo4() + case Point3D(34, x=0, y=0, z=0): # positional + kw args + foo5() + case Point2D(0, 1, 2,): # three args + trail comma + foo6() + case Point3D(x=0, y=0, z=0,): # kw args + trail comma + foo7() + case Point3D(34, x=0, y=0, z=0,): # positional + kw args + trail comma + foo8() + +-------------------------------------------------------------------------------- + +(module + (match_statement + (identifier) + (block + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)))) + (comment) + (block + (expression_statement + (call + (identifier) + (argument_list))))) + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier) + (identifier)))) + (comment) + (block + (expression_statement + (call + (identifier) + (argument_list))))) + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (integer)) + (case_pattern + (integer)) + (case_pattern + (integer)))) + (comment) + (block + (expression_statement + (call + (identifier) + (argument_list))))) + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (keyword_pattern + (identifier) + (integer))) + (case_pattern + (keyword_pattern + (identifier) + (integer))) + (case_pattern + (keyword_pattern + (identifier) + (integer))))) + (comment) + (block + (expression_statement + (call + (identifier) + (argument_list))))) + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (integer)) + (case_pattern + (keyword_pattern + (identifier) + (integer))) + (case_pattern + (keyword_pattern + (identifier) + (integer))) + (case_pattern + (keyword_pattern + (identifier) + (integer))))) + (comment) + (block + (expression_statement + (call + (identifier) + (argument_list))))) + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (integer)) + (case_pattern + (integer)) + (case_pattern + (integer)))) + (comment) + (block + (expression_statement + (call + (identifier) + (argument_list))))) + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (keyword_pattern + (identifier) + (integer))) + (case_pattern + (keyword_pattern + (identifier) + (integer))) + (case_pattern + (keyword_pattern + (identifier) + (integer))))) + (comment) + (block + (expression_statement + (call + (identifier) + (argument_list))))) + (case_clause + (case_pattern + (class_pattern + (dotted_name + (identifier)) + (case_pattern + (integer)) + (case_pattern + (keyword_pattern + (identifier) + (integer))) + (case_pattern + (keyword_pattern + (identifier) + (integer))) + (case_pattern + (keyword_pattern + (identifier) + (integer))))) + (comment) + (block + (expression_statement + (call + (identifier) + (argument_list)))))))) + +================================================================================ +Complex case patterns on complex numbers +================================================================================ + +match x: + case -3 + 5j: + pass + case -3 + 5j as b2: + pass + case 3j as b1, -3 + 5j as b2: + pass + case -3. + 5j: + pass + case 3 - 5.j: + pass + +-------------------------------------------------------------------------------- + +(module + (match_statement + (identifier) + (block + (case_clause + (case_pattern + (complex_pattern + (integer) + (integer))) + (block + (pass_statement))) + (case_clause + (case_pattern + (as_pattern + (case_pattern + (complex_pattern + (integer) + (integer))) + (identifier))) + (block + (pass_statement))) + (case_clause + (case_pattern + (as_pattern + (case_pattern + (integer)) + (identifier))) + (case_pattern + (as_pattern + (case_pattern + (complex_pattern + (integer) + (integer))) + (identifier))) + (block + (pass_statement))) + (case_clause + (case_pattern + (complex_pattern + (float) + (integer))) + (block + (pass_statement))) + (case_clause + (case_pattern + (complex_pattern + (integer) + (float))) + (block + (pass_statement)))))) + +================================================================================ +Maybe sequence pattern right hand side precedence validation +================================================================================ + +match x: + case a1, *a2, a3: + pass + +-------------------------------------------------------------------------------- + +(module + (match_statement + (identifier) + (block + (case_clause + (case_pattern + (dotted_name + (identifier))) + (case_pattern + (splat_pattern + (identifier))) + (case_pattern + (dotted_name + (identifier))) + (block + (pass_statement)))))) diff --git a/python/test/corpus/statements.txt b/python/test/corpus/statements.txt new file mode 100644 index 0000000..f2162e5 --- /dev/null +++ b/python/test/corpus/statements.txt @@ -0,0 +1,1628 @@ +================================================================================ +Import statements +================================================================================ + +import a, b +import b.c as d +import a.b.c + +-------------------------------------------------------------------------------- + +(module + (import_statement + (dotted_name + (identifier)) + (dotted_name + (identifier))) + (import_statement + (aliased_import + (dotted_name + (identifier) + (identifier)) + (identifier))) + (import_statement + (dotted_name + (identifier) + (identifier) + (identifier)))) + +================================================================================ +Import-from statements +================================================================================ + +from a import b +from a import * +from a import (b, c) +from a.b import c +from . import b +from .. import b +from .a import b +from ..a import b + +-------------------------------------------------------------------------------- + +(module + (import_from_statement + (dotted_name + (identifier)) + (dotted_name + (identifier))) + (import_from_statement + (dotted_name + (identifier)) + (wildcard_import)) + (import_from_statement + (dotted_name + (identifier)) + (dotted_name + (identifier)) + (dotted_name + (identifier))) + (import_from_statement + (dotted_name + (identifier) + (identifier)) + (dotted_name + (identifier))) + (import_from_statement + (relative_import + (import_prefix)) + (dotted_name + (identifier))) + (import_from_statement + (relative_import + (import_prefix)) + (dotted_name + (identifier))) + (import_from_statement + (relative_import + (import_prefix) + (dotted_name + (identifier))) + (dotted_name + (identifier))) + (import_from_statement + (relative_import + (import_prefix) + (dotted_name + (identifier))) + (dotted_name + (identifier)))) + +================================================================================ +Future import statements +================================================================================ + +from __future__ import print_statement +from __future__ import python4 +from __future__ import (absolute_import, division, print_function, + unicode_literals) +-------------------------------------------------------------------------------- + +(module + (future_import_statement + (dotted_name + (identifier))) + (future_import_statement + (dotted_name + (identifier))) + (future_import_statement + (dotted_name + (identifier)) + (dotted_name + (identifier)) + (dotted_name + (identifier)) + (dotted_name + (identifier)))) + +================================================================================ +Print statements +================================================================================ + +print a +print b, c +print 0 or 1, 1 or 0, +print 0 or 1 +print not True + +-------------------------------------------------------------------------------- + +(module + (print_statement + (identifier)) + (print_statement + (identifier) + (identifier)) + (print_statement + (boolean_operator + (integer) + (integer)) + (boolean_operator + (integer) + (integer))) + (print_statement + (boolean_operator + (integer) + (integer))) + (print_statement + (not_operator + (true)))) + +================================================================================ +Print statements with redirection +================================================================================ + +print >> a +print >> a, "b", "c" + +-------------------------------------------------------------------------------- + +(module + (print_statement + (chevron + (identifier))) + (print_statement + (chevron + (identifier)) + (string + (string_start) + (string_content) + (string_end)) + (string + (string_start) + (string_content) + (string_end)))) + +================================================================================ +Assert statements +================================================================================ + +assert a +assert b, c + +-------------------------------------------------------------------------------- + +(module + (assert_statement + (identifier)) + (assert_statement + (identifier) + (identifier))) + +================================================================================ +Expression statements +================================================================================ + +a +b + c +1, 2, 3 +1, 2, 3, + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (identifier)) + (expression_statement + (binary_operator + (identifier) + (identifier))) + (expression_statement + (integer) + (integer) + (integer)) + (expression_statement + (integer) + (integer) + (integer))) + +================================================================================ +Delete statements +================================================================================ + +del a[1], b[2] + +-------------------------------------------------------------------------------- + +(module + (delete_statement + (expression_list + (subscript + (identifier) + (integer)) + (subscript + (identifier) + (integer))))) + +================================================================================ +Control-flow statements +================================================================================ + +while true: + pass + break + continue + +-------------------------------------------------------------------------------- + +(module + (while_statement + condition: (identifier) + body: (block + (pass_statement) + (break_statement) + (continue_statement)))) + +================================================================================ +Return statements +================================================================================ + +return +return a + b, c +return not b + +-------------------------------------------------------------------------------- + +(module + (return_statement) + (return_statement + (expression_list + (binary_operator + (identifier) + (identifier)) + (identifier))) + (return_statement + (not_operator + (identifier)))) + +================================================================================ +If statements +================================================================================ + +if a: + b + c + +-------------------------------------------------------------------------------- + +(module + (if_statement + condition: (identifier) + consequence: (block + (expression_statement + (identifier)) + (expression_statement + (identifier))))) + +================================================================================ +If else statements +================================================================================ + +if a: + b +elif c: + d +else: + f + +if a: + b +else: + f + +if a: b + +if a: b; c + +-------------------------------------------------------------------------------- + +(module + (if_statement + condition: (identifier) + consequence: (block + (expression_statement + (identifier))) + alternative: (elif_clause + condition: (identifier) + consequence: (block + (expression_statement + (identifier)))) + alternative: (else_clause + body: (block + (expression_statement + (identifier))))) + (if_statement + condition: (identifier) + consequence: (block + (expression_statement + (identifier))) + alternative: (else_clause + body: (block + (expression_statement + (identifier))))) + (if_statement + condition: (identifier) + consequence: (block + (expression_statement + (identifier)))) + (if_statement + condition: (identifier) + consequence: (block + (expression_statement + (identifier)) + (expression_statement + (identifier))))) + +================================================================================ +Nested if statements +================================================================================ + +if a: + if b: + c + else: + if e: + f +g + +-------------------------------------------------------------------------------- + +(module + (if_statement + condition: (identifier) + consequence: (block + (if_statement + condition: (identifier) + consequence: (block + (expression_statement + (identifier))) + alternative: (else_clause + body: (block + (if_statement + condition: (identifier) + consequence: (block + (expression_statement + (identifier))))))))) + (expression_statement + (identifier))) + +================================================================================ +While statements +================================================================================ + +while a: + b + +while c: + d +else: + e + f + +-------------------------------------------------------------------------------- + +(module + (while_statement + condition: (identifier) + body: (block + (expression_statement + (identifier)))) + (while_statement + condition: (identifier) + body: (block + (expression_statement + (identifier))) + alternative: (else_clause + body: (block + (expression_statement + (identifier)) + (expression_statement + (identifier)))))) + +================================================================================ +For statements +================================================================================ + +for line, i in lines: + print line + for character, j in line: + print character +else: + print x + +for x, in [(1,), (2,), (3,)]: + x + +-------------------------------------------------------------------------------- + +(module + (for_statement + left: (pattern_list + (identifier) + (identifier)) + right: (identifier) + body: (block + (print_statement + argument: (identifier)) + (for_statement + left: (pattern_list + (identifier) + (identifier)) + right: (identifier) + body: (block + (print_statement + argument: (identifier))))) + alternative: (else_clause + body: (block + (print_statement + argument: (identifier))))) + (for_statement + left: (pattern_list + (identifier)) + right: (list + (tuple + (integer)) + (tuple + (integer)) + (tuple + (integer))) + body: (block + (expression_statement + (identifier))))) + +================================================================================ +Try statements +================================================================================ + +try: + a +except b: + c +except d as e: + f +except g, h: + i +except: + j + +try: + a +except b: + c + d +else: + e +finally: + f + +try: + a +except* b: + c +except* d as e: + f +else: + g +finally: + h + +-------------------------------------------------------------------------------- + +(module + (try_statement + body: (block + (expression_statement + (identifier))) + (except_clause + (identifier) + (block + (expression_statement + (identifier)))) + (except_clause + (as_pattern + (identifier) + alias: (as_pattern_target + (identifier))) + (block + (expression_statement + (identifier)))) + (except_clause + (identifier) + (identifier) + (block + (expression_statement + (identifier)))) + (except_clause + (block + (expression_statement + (identifier))))) + (try_statement + body: (block + (expression_statement + (identifier))) + (except_clause + (identifier) + (block + (expression_statement + (identifier)) + (expression_statement + (identifier)))) + (else_clause + body: (block + (expression_statement + (identifier)))) + (finally_clause + (block + (expression_statement + (identifier))))) + (try_statement + body: (block + (expression_statement + (identifier))) + (except_group_clause + (identifier) + (block + (expression_statement + (identifier)))) + (except_group_clause + (as_pattern + (identifier) + alias: (as_pattern_target + (identifier))) + (block + (expression_statement + (identifier)))) + (else_clause + body: (block + (expression_statement + (identifier)))) + (finally_clause + (block + (expression_statement + (identifier)))))) + +================================================================================ +With statements +================================================================================ + +with a as b: + c + +with (open('d') as d, + open('e') as e): + f + +with e as f, g as h,: + i + +-------------------------------------------------------------------------------- + +(module + (with_statement + (with_clause + (with_item + (as_pattern + (identifier) + (as_pattern_target + (identifier))))) + (block + (expression_statement + (identifier)))) + (with_statement + (with_clause + (with_item + (as_pattern + (call + (identifier) + (argument_list + (string + (string_start) + (string_content) + (string_end)))) + (as_pattern_target + (identifier)))) + (with_item + (as_pattern + (call + (identifier) + (argument_list + (string + (string_start) + (string_content) + (string_end)))) + (as_pattern_target + (identifier))))) + (block + (expression_statement + (identifier)))) + (with_statement + (with_clause + (with_item + (as_pattern + (identifier) + (as_pattern_target + (identifier)))) + (with_item + (as_pattern + (identifier) + (as_pattern_target + (identifier))))) + (block + (expression_statement + (identifier))))) + +================================================================================ +Async Function definitions +================================================================================ + +async def a(): + b + +async def c(d): + e + +async def g(g, h,): + i + +async def c(a: str): + a + +async def c(a: b.c): + a + +async def d(a: Sequence[T]) -> T: + a + +async def i(a, b=c, *c, **d): + a + +async def d(a: str) -> None: + return None + +async def d(a:str="default", b=c) -> None: + return None + +-------------------------------------------------------------------------------- + +(module + (function_definition + name: (identifier) + parameters: (parameters) + body: (block + (expression_statement + (identifier)))) + (function_definition + name: (identifier) + parameters: (parameters + (identifier)) + body: (block + (expression_statement + (identifier)))) + (function_definition + name: (identifier) + parameters: (parameters + (identifier) + (identifier)) + body: (block + (expression_statement + (identifier)))) + (function_definition + name: (identifier) + parameters: (parameters + (typed_parameter + (identifier) + type: (type + (identifier)))) + body: (block + (expression_statement + (identifier)))) + (function_definition + name: (identifier) + parameters: (parameters + (typed_parameter + (identifier) + type: (type + (attribute + object: (identifier) + attribute: (identifier))))) + body: (block + (expression_statement + (identifier)))) + (function_definition + name: (identifier) + parameters: (parameters + (typed_parameter + (identifier) + type: (type + (generic_type + (identifier) + (type_parameter + (type + (identifier))))))) + return_type: (type + (identifier)) + body: (block + (expression_statement + (identifier)))) + (function_definition + name: (identifier) + parameters: (parameters + (identifier) + (default_parameter + name: (identifier) + value: (identifier)) + (list_splat_pattern + (identifier)) + (dictionary_splat_pattern + (identifier))) + body: (block + (expression_statement + (identifier)))) + (function_definition + name: (identifier) + parameters: (parameters + (typed_parameter + (identifier) + type: (type + (identifier)))) + return_type: (type + (none)) + body: (block + (return_statement + (none)))) + (function_definition + name: (identifier) + parameters: (parameters + (typed_default_parameter + name: (identifier) + type: (type + (identifier)) + value: (string + (string_start) + (string_content) + (string_end))) + (default_parameter + name: (identifier) + value: (identifier))) + return_type: (type + (none)) + body: (block + (return_statement + (none))))) + +================================================================================ +Function definitions +================================================================================ + +def e((a,b)): + return (a,b) + +def e(*list: str): + pass + +def e(**list: str): + pass + +def f(): + nonlocal a + +def g(h, i, /, j, *, k=100, **kwarg): + return h,i,j,k,kwarg + +def h(*a): + i((*a)) + j(((*a))) + +def foo(): + pass \ +\ +\ + +-------------------------------------------------------------------------------- + +(module + (function_definition + name: (identifier) + parameters: (parameters + (tuple_pattern + (identifier) + (identifier))) + body: (block + (return_statement + (tuple + (identifier) + (identifier))))) + (function_definition + name: (identifier) + parameters: (parameters + (typed_parameter + (list_splat_pattern + (identifier)) + type: (type + (identifier)))) + body: (block + (pass_statement))) + (function_definition + name: (identifier) + parameters: (parameters + (typed_parameter + (dictionary_splat_pattern + (identifier)) + type: (type + (identifier)))) + body: (block + (pass_statement))) + (function_definition + name: (identifier) + parameters: (parameters) + body: (block + (nonlocal_statement + (identifier)))) + (function_definition + name: (identifier) + parameters: (parameters + (identifier) + (identifier) + (positional_separator) + (identifier) + (keyword_separator) + (default_parameter + name: (identifier) + value: (integer)) + (dictionary_splat_pattern + (identifier))) + body: (block + (return_statement + (expression_list + (identifier) + (identifier) + (identifier) + (identifier) + (identifier))))) + (function_definition + name: (identifier) + parameters: (parameters + (list_splat_pattern + (identifier))) + body: (block + (expression_statement + (call + function: (identifier) + arguments: (argument_list + (parenthesized_expression + (list_splat + (identifier)))))) + (expression_statement + (call + function: (identifier) + arguments: (argument_list + (parenthesized_expression + (parenthesized_expression + (list_splat + (identifier))))))))) + (function_definition + name: (identifier) + parameters: (parameters) + body: (block + (pass_statement))) + (line_continuation) + (line_continuation) + (line_continuation)) + +================================================================================ +Empty blocks +================================================================================ + +# These are not actually valid python; blocks +# must contain at least one statement. But we +# allow them because error recovery for empty +# blocks doesn't work very well otherwise. +def a(b, c): + +if d: + print e + while f(): + +-------------------------------------------------------------------------------- + +(module + (comment) + (comment) + (comment) + (comment) + (function_definition + name: (identifier) + parameters: (parameters + (identifier) + (identifier)) + body: (block)) + (if_statement + condition: (identifier) + consequence: (block + (print_statement + argument: (identifier)) + (while_statement + condition: (call + function: (identifier) + arguments: (argument_list)) + body: (block))))) + +================================================================================ +Class definitions +================================================================================ + +class A: + def b(self): + return c +class B(): + pass +class B(method1): + def method1(self): + return +class C(method1, Sequence[T]): + pass +class D(Sequence[T, U]): + pass + +-------------------------------------------------------------------------------- + +(module + (class_definition + (identifier) + (block + (function_definition + (identifier) + (parameters + (identifier)) + (block + (return_statement + (identifier)))))) + (class_definition + (identifier) + (argument_list) + (block + (pass_statement))) + (class_definition + (identifier) + (argument_list + (identifier)) + (block + (function_definition + (identifier) + (parameters + (identifier)) + (block + (return_statement))))) + (class_definition + (identifier) + (argument_list + (identifier) + (subscript + (identifier) + (identifier))) + (block + (pass_statement))) + (class_definition + (identifier) + (argument_list + (subscript + (identifier) + (identifier) + (identifier))) + (block + (pass_statement)))) + +================================================================================ +Class definitions with superclasses +================================================================================ + +class A(B, C): + def d(): + e + +-------------------------------------------------------------------------------- + +(module + (class_definition + (identifier) + (argument_list + (identifier) + (identifier)) + (block + (function_definition + (identifier) + (parameters) + (block + (expression_statement + (identifier))))))) + +================================================================================ +Decorated definitions +================================================================================ + +@a.b +class C: + @d(1) + @e[2].f.g + def f(): + g + + @f() + async def f(): + g + +@buttons[0].clicked.connect +def spam(): + ... + +-------------------------------------------------------------------------------- + +(module + (decorated_definition + (decorator + (attribute + (identifier) + (identifier))) + (class_definition + (identifier) + (block + (decorated_definition + (decorator + (call + (identifier) + (argument_list + (integer)))) + (decorator + (attribute + (attribute + (subscript + (identifier) + (integer)) + (identifier)) + (identifier))) + (function_definition + (identifier) + (parameters) + (block + (expression_statement + (identifier))))) + (decorated_definition + (decorator + (call + (identifier) + (argument_list))) + (function_definition + (identifier) + (parameters) + (block + (expression_statement + (identifier)))))))) + (decorated_definition + (decorator + (attribute + (attribute + (subscript + (identifier) + (integer)) + (identifier)) + (identifier))) + (function_definition + (identifier) + (parameters) + (block + (expression_statement + (ellipsis)))))) + +================================================================================ +Raise statements +================================================================================ + +raise +raise RuntimeError('NO') +raise RunTimeError('NO') from e + +-------------------------------------------------------------------------------- + +(module + (raise_statement) + (raise_statement + (call + (identifier) + (argument_list + (string + (string_start) + (string_content) + (string_end))))) + (raise_statement + (call + (identifier) + (argument_list + (string + (string_start) + (string_content) + (string_end)))) + (identifier))) + +================================================================================ +Comments +================================================================================ + +print a +# hi +print b # bye +print c + +-------------------------------------------------------------------------------- + +(module + (print_statement + (identifier)) + (comment) + (print_statement + (identifier)) + (comment) + (print_statement + (identifier))) + +================================================================================ +Comments at different indentation levels +================================================================================ + +if a: + # one +# two + # three + b + # four + c + +-------------------------------------------------------------------------------- + +(module + (if_statement + (identifier) + (comment) + (comment) + (comment) + (block + (expression_statement + (identifier)) + (comment) + (expression_statement + (identifier))))) + +================================================================================ +Comments after dedents +================================================================================ + +if a: + b + +# one +c + +-------------------------------------------------------------------------------- + +(module + (if_statement + (identifier) + (block + (expression_statement + (identifier)))) + (comment) + (expression_statement + (identifier))) + +================================================================================ +Comments at the ends of indented blocks +================================================================================ + +if a: + b + # one + # two + +if c: + d + # three + # four + +def a(): + if b: + b # comment + b # comment + +# five + +-------------------------------------------------------------------------------- + +(module + (if_statement + (identifier) + (block + (expression_statement + (identifier)) + (comment) +