Add HTML
This commit is contained in:
parent
bb561d2e33
commit
3088bcfa22
|
@ -7,3 +7,4 @@ Languages:
|
|||
* nix: https://github.com/nix-community/tree-sitter-nix (MIT)
|
||||
* python: https://github.com/tree-sitter/tree-sitter-python (MIT)
|
||||
* rust: https://github.com/tree-sitter/tree-sitter-rust (MIT)
|
||||
* html: https://github.com/tree-sitter/tree-sitter-html (MIT)
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
packages.nix = compile-tree-sitter { src = ./nix; name = "nix"; };
|
||||
packages.python = compile-tree-sitter { src = ./python; name = "python"; };
|
||||
packages.rust = compile-tree-sitter { src = ./rust; name = "rust"; };
|
||||
packages.html = compile-tree-sitter { src = ./html; name = "html"; };
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
21
html/LICENSE
Normal file
21
html/LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2014 Max Brunsfeld
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
143
html/grammar.js
Normal file
143
html/grammar.js
Normal file
|
@ -0,0 +1,143 @@
|
|||
/**
|
||||
* @file HTML grammar for tree-sitter
|
||||
* @author Max Brunsfeld
|
||||
* @license MIT
|
||||
*/
|
||||
|
||||
/* eslint-disable arrow-parens */
|
||||
/* eslint-disable camelcase */
|
||||
/* eslint-disable-next-line spaced-comment */
|
||||
/// <reference types="tree-sitter-cli/dsl" />
|
||||
// @ts-check
|
||||
|
||||
module.exports = grammar({
|
||||
name: 'html',
|
||||
|
||||
extras: $ => [
|
||||
$.comment,
|
||||
/\s+/,
|
||||
],
|
||||
|
||||
externals: $ => [
|
||||
$._start_tag_name,
|
||||
$._script_start_tag_name,
|
||||
$._style_start_tag_name,
|
||||
$._end_tag_name,
|
||||
$.erroneous_end_tag_name,
|
||||
'/>',
|
||||
$._implicit_end_tag,
|
||||
$.raw_text,
|
||||
$.comment,
|
||||
],
|
||||
|
||||
rules: {
|
||||
fragment: $ => repeat($._node),
|
||||
|
||||
doctype: $ => seq(
|
||||
'<!',
|
||||
alias($._doctype, 'doctype'),
|
||||
/[^>]+/,
|
||||
'>',
|
||||
),
|
||||
|
||||
_doctype: _ => /[Dd][Oo][Cc][Tt][Yy][Pp][Ee]/,
|
||||
|
||||
_node: $ => choice(
|
||||
$.doctype,
|
||||
$.entity,
|
||||
$.text,
|
||||
$.element,
|
||||
$.script_element,
|
||||
$.style_element,
|
||||
$.erroneous_end_tag,
|
||||
),
|
||||
|
||||
element: $ => choice(
|
||||
seq(
|
||||
$.start_tag,
|
||||
repeat($._node),
|
||||
choice($.end_tag, $._implicit_end_tag),
|
||||
),
|
||||
$.self_closing_tag,
|
||||
),
|
||||
|
||||
script_element: $ => seq(
|
||||
alias($.script_start_tag, $.start_tag),
|
||||
optional($.raw_text),
|
||||
$.end_tag,
|
||||
),
|
||||
|
||||
style_element: $ => seq(
|
||||
alias($.style_start_tag, $.start_tag),
|
||||
optional($.raw_text),
|
||||
$.end_tag,
|
||||
),
|
||||
|
||||
start_tag: $ => seq(
|
||||
'<',
|
||||
alias($._start_tag_name, $.tag_name),
|
||||
repeat($.attribute),
|
||||
'>',
|
||||
),
|
||||
|
||||
script_start_tag: $ => seq(
|
||||
'<',
|
||||
alias($._script_start_tag_name, $.tag_name),
|
||||
repeat($.attribute),
|
||||
'>',
|
||||
),
|
||||
|
||||
style_start_tag: $ => seq(
|
||||
'<',
|
||||
alias($._style_start_tag_name, $.tag_name),
|
||||
repeat($.attribute),
|
||||
'>',
|
||||
),
|
||||
|
||||
self_closing_tag: $ => seq(
|
||||
'<',
|
||||
alias($._start_tag_name, $.tag_name),
|
||||
repeat($.attribute),
|
||||
'/>',
|
||||
),
|
||||
|
||||
end_tag: $ => seq(
|
||||
'</',
|
||||
alias($._end_tag_name, $.tag_name),
|
||||
'>',
|
||||
),
|
||||
|
||||
erroneous_end_tag: $ => seq(
|
||||
'</',
|
||||
$.erroneous_end_tag_name,
|
||||
'>',
|
||||
),
|
||||
|
||||
attribute: $ => seq(
|
||||
$.attribute_name,
|
||||
optional(seq(
|
||||
'=',
|
||||
choice(
|
||||
$.attribute_value,
|
||||
$.quoted_attribute_value,
|
||||
),
|
||||
)),
|
||||
),
|
||||
|
||||
attribute_name: _ => /[^<>"'/=\s]+/,
|
||||
|
||||
attribute_value: _ => /[^<>"'=\s]+/,
|
||||
|
||||
// An entity can be named, numeric (decimal), or numeric (hexacecimal). The
|
||||
// longest entity name is 29 characters long, and the HTML spec says that
|
||||
// no more will ever be added.
|
||||
entity: _ => /&(#([xX][0-9a-fA-F]{1,6}|[0-9]{1,5})|[A-Za-z]{1,30});/,
|
||||
|
||||
quoted_attribute_value: $ => choice(
|
||||
seq('\'', optional(alias(/[^']+/, $.attribute_value)), '\''),
|
||||
seq('"', optional(alias(/[^"]+/, $.attribute_value)), '"'),
|
||||
),
|
||||
|
||||
text: _ => /[^<>&\s]([^<>&]*[^<>&\s])?/,
|
||||
},
|
||||
});
|
452
html/src/scanner.c
Normal file
452
html/src/scanner.c
Normal file
|
@ -0,0 +1,452 @@
|
|||
#include "tag.h"
|
||||
|
||||
#include <wctype.h>
|
||||
|
||||
enum TokenType {
|
||||
START_TAG_NAME,
|
||||
SCRIPT_START_TAG_NAME,
|
||||
STYLE_START_TAG_NAME,
|
||||
END_TAG_NAME,
|
||||
ERRONEOUS_END_TAG_NAME,
|
||||
SELF_CLOSING_TAG_DELIMITER,
|
||||
IMPLICIT_END_TAG,
|
||||
RAW_TEXT,
|
||||
COMMENT
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint32_t len;
|
||||
uint32_t cap;
|
||||
Tag *data;
|
||||
} tags_vec;
|
||||
|
||||
typedef struct {
|
||||
tags_vec tags;
|
||||
} Scanner;
|
||||
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define VEC_RESIZE(vec, _cap) \
|
||||
if ((_cap) > (vec).cap && (_cap) > 0) { \
|
||||
void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0])); \
|
||||
assert(tmp != NULL); \
|
||||
(vec).data = tmp; \
|
||||
(vec).cap = (_cap); \
|
||||
}
|
||||
|
||||
#define VEC_GROW(vec, _cap) \
|
||||
if ((vec).cap < (_cap)) { \
|
||||
VEC_RESIZE((vec), (_cap)); \
|
||||
}
|
||||
|
||||
#define VEC_PUSH(vec, el) \
|
||||
if ((vec).cap == (vec).len) { \
|
||||
VEC_RESIZE((vec), MAX(16, (vec).len * 2)); \
|
||||
} \
|
||||
(vec).data[(vec).len++] = (el);
|
||||
|
||||
#define VEC_POP(vec) \
|
||||
{ \
|
||||
if (VEC_BACK(vec).type == CUSTOM) { \
|
||||
tag_free(&VEC_BACK(vec)); \
|
||||
} \
|
||||
(vec).len--; \
|
||||
}
|
||||
|
||||
#define VEC_BACK(vec) ((vec).data[(vec).len - 1])
|
||||
|
||||
#define VEC_FREE(vec) \
|
||||
{ \
|
||||
if ((vec).data != NULL) \
|
||||
free((vec).data); \
|
||||
(vec).data = NULL; \
|
||||
}
|
||||
|
||||
#define VEC_CLEAR(vec) \
|
||||
{ \
|
||||
for (int i = 0; i < (vec).len; i++) { \
|
||||
tag_free(&(vec).data[i]); \
|
||||
} \
|
||||
(vec).len = 0; \
|
||||
}
|
||||
|
||||
#define STRING_RESIZE(vec, _cap) \
|
||||
void *tmp = realloc((vec).data, ((_cap) + 1) * sizeof((vec).data[0])); \
|
||||
assert(tmp != NULL); \
|
||||
(vec).data = tmp; \
|
||||
memset((vec).data + (vec).len, 0, \
|
||||
(((_cap) + 1) - (vec).len) * sizeof((vec).data[0])); \
|
||||
(vec).cap = (_cap);
|
||||
|
||||
#define STRING_GROW(vec, _cap) \
|
||||
if ((vec).cap < (_cap)) { \
|
||||
STRING_RESIZE((vec), (_cap)); \
|
||||
}
|
||||
|
||||
#define STRING_PUSH(vec, el) \
|
||||
if ((vec).cap == (vec).len) { \
|
||||
STRING_RESIZE((vec), MAX(16, (vec).len * 2)); \
|
||||
} \
|
||||
(vec).data[(vec).len++] = (el);
|
||||
|
||||
#define STRING_INIT(vec) \
|
||||
{ \
|
||||
(vec).data = calloc(1, sizeof(char) * 17); \
|
||||
(vec).len = 0; \
|
||||
(vec).cap = 16; \
|
||||
}
|
||||
|
||||
#define STRING_FREE(vec) \
|
||||
{ \
|
||||
if ((vec).data != NULL) \
|
||||
free((vec).data); \
|
||||
(vec).data = NULL; \
|
||||
}
|
||||
|
||||
#define STRING_CLEAR(vec) \
|
||||
{ \
|
||||
(vec).len = 0; \
|
||||
memset((vec).data, 0, (vec).cap * sizeof(char)); \
|
||||
}
|
||||
|
||||
static unsigned serialize(Scanner *scanner, char *buffer) {
|
||||
uint16_t tag_count =
|
||||
scanner->tags.len > UINT16_MAX ? UINT16_MAX : scanner->tags.len;
|
||||
uint16_t serialized_tag_count = 0;
|
||||
|
||||
unsigned size = sizeof(tag_count);
|
||||
memcpy(&buffer[size], &tag_count, sizeof(tag_count));
|
||||
size += sizeof(tag_count);
|
||||
|
||||
for (; serialized_tag_count < tag_count; serialized_tag_count++) {
|
||||
Tag tag = scanner->tags.data[serialized_tag_count];
|
||||
if (tag.type == CUSTOM) {
|
||||
unsigned name_length = tag.custom_tag_name.len;
|
||||
if (name_length > UINT8_MAX) {
|
||||
name_length = UINT8_MAX;
|
||||
}
|
||||
if (size + 2 + name_length >=
|
||||
TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
|
||||
break;
|
||||
}
|
||||
buffer[size++] = (char)tag.type;
|
||||
buffer[size++] = (char)name_length;
|
||||
strncpy(&buffer[size], tag.custom_tag_name.data, name_length);
|
||||
size += name_length;
|
||||
} else {
|
||||
if (size + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
|
||||
break;
|
||||
}
|
||||
buffer[size++] = (char)tag.type;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(&buffer[0], &serialized_tag_count, sizeof(serialized_tag_count));
|
||||
return size;
|
||||
}
|
||||
|
||||
static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
|
||||
VEC_CLEAR(scanner->tags);
|
||||
if (length > 0) {
|
||||
unsigned size = 0;
|
||||
uint16_t tag_count = 0;
|
||||
uint16_t serialized_tag_count = 0;
|
||||
|
||||
memcpy(&serialized_tag_count, &buffer[size],
|
||||
sizeof(serialized_tag_count));
|
||||
size += sizeof(serialized_tag_count);
|
||||
|
||||
memcpy(&tag_count, &buffer[size], sizeof(tag_count));
|
||||
size += sizeof(tag_count);
|
||||
|
||||
VEC_RESIZE(scanner->tags, tag_count);
|
||||
if (tag_count > 0) {
|
||||
unsigned iter = 0;
|
||||
for (iter = 0; iter < serialized_tag_count; iter++) {
|
||||
Tag tag = scanner->tags.data[iter];
|
||||
tag.type = (TagType)buffer[size++];
|
||||
if (tag.type == CUSTOM) {
|
||||
uint16_t name_length = (uint8_t)buffer[size++];
|
||||
tag.custom_tag_name.len = name_length;
|
||||
tag.custom_tag_name.cap = name_length;
|
||||
tag.custom_tag_name.data =
|
||||
(char *)calloc(1, sizeof(char) * (name_length + 1));
|
||||
strncpy(tag.custom_tag_name.data, &buffer[size],
|
||||
name_length);
|
||||
size += name_length;
|
||||
}
|
||||
VEC_PUSH(scanner->tags, tag);
|
||||
}
|
||||
// add zero tags if we didn't read enough, this is because the
|
||||
// buffer had no more room but we held more tags.
|
||||
for (; iter < tag_count; iter++) {
|
||||
Tag tag = new_tag();
|
||||
VEC_PUSH(scanner->tags, tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static String scan_tag_name(TSLexer *lexer) {
|
||||
String tag_name;
|
||||
STRING_INIT(tag_name);
|
||||
while (iswalnum(lexer->lookahead) || lexer->lookahead == '-' ||
|
||||
lexer->lookahead == ':') {
|
||||
STRING_PUSH(tag_name, towupper(lexer->lookahead));
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
return tag_name;
|
||||
}
|
||||
|
||||
static bool scan_comment(TSLexer *lexer) {
|
||||
if (lexer->lookahead != '-') {
|
||||
return false;
|
||||
}
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead != '-') {
|
||||
return false;
|
||||
}
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
unsigned dashes = 0;
|
||||
while (lexer->lookahead) {
|
||||
switch (lexer->lookahead) {
|
||||
case '-':
|
||||
++dashes;
|
||||
break;
|
||||
case '>':
|
||||
if (dashes >= 2) {
|
||||
lexer->result_symbol = COMMENT;
|
||||
lexer->advance(lexer, false);
|
||||
lexer->mark_end(lexer);
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
dashes = 0;
|
||||
}
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool scan_raw_text(Scanner *scanner, TSLexer *lexer) {
|
||||
if (scanner->tags.len == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
const char *end_delimiter =
|
||||
VEC_BACK(scanner->tags).type == SCRIPT ? "</SCRIPT" : "</STYLE";
|
||||
|
||||
unsigned delimiter_index = 0;
|
||||
while (lexer->lookahead) {
|
||||
if (towupper(lexer->lookahead) == end_delimiter[delimiter_index]) {
|
||||
delimiter_index++;
|
||||
if (delimiter_index == strlen(end_delimiter)) {
|
||||
break;
|
||||
}
|
||||
lexer->advance(lexer, false);
|
||||
} else {
|
||||
delimiter_index = 0;
|
||||
lexer->advance(lexer, false);
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
lexer->result_symbol = RAW_TEXT;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool scan_implicit_end_tag(Scanner *scanner, TSLexer *lexer) {
|
||||
Tag *parent = scanner->tags.len == 0 ? NULL : &VEC_BACK(scanner->tags);
|
||||
|
||||
bool is_closing_tag = false;
|
||||
if (lexer->lookahead == '/') {
|
||||
is_closing_tag = true;
|
||||
lexer->advance(lexer, false);
|
||||
} else {
|
||||
if (parent && is_void(parent)) {
|
||||
VEC_POP(scanner->tags);
|
||||
lexer->result_symbol = IMPLICIT_END_TAG;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
String tag_name = scan_tag_name(lexer);
|
||||
if (tag_name.len == 0) {
|
||||
STRING_FREE(tag_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
Tag next_tag = for_name(tag_name.data);
|
||||
|
||||
if (is_closing_tag) {
|
||||
// The tag correctly closes the topmost element on the stack
|
||||
if (scanner->tags.len > 0 &&
|
||||
tagcmp(&VEC_BACK(scanner->tags), &next_tag)) {
|
||||
STRING_FREE(tag_name);
|
||||
tag_free(&next_tag);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Otherwise, dig deeper and queue implicit end tags (to be nice in
|
||||
// the case of malformed HTML)
|
||||
for (unsigned i = scanner->tags.len; i > 0; i--) {
|
||||
if (scanner->tags.data[i - 1].type == next_tag.type) {
|
||||
VEC_POP(scanner->tags);
|
||||
lexer->result_symbol = IMPLICIT_END_TAG;
|
||||
STRING_FREE(tag_name);
|
||||
tag_free(&next_tag);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else if (parent && !can_contain(parent, &next_tag)) {
|
||||
VEC_POP(scanner->tags);
|
||||
lexer->result_symbol = IMPLICIT_END_TAG;
|
||||
STRING_FREE(tag_name);
|
||||
tag_free(&next_tag);
|
||||
return true;
|
||||
}
|
||||
|
||||
STRING_FREE(tag_name);
|
||||
tag_free(&next_tag);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool scan_start_tag_name(Scanner *scanner, TSLexer *lexer) {
|
||||
String tag_name = scan_tag_name(lexer);
|
||||
if (tag_name.len == 0) {
|
||||
STRING_FREE(tag_name);
|
||||
return false;
|
||||
}
|
||||
Tag tag = for_name(tag_name.data);
|
||||
VEC_PUSH(scanner->tags, tag);
|
||||
switch (tag.type) {
|
||||
case SCRIPT:
|
||||
lexer->result_symbol = SCRIPT_START_TAG_NAME;
|
||||
break;
|
||||
case STYLE:
|
||||
lexer->result_symbol = STYLE_START_TAG_NAME;
|
||||
break;
|
||||
default:
|
||||
lexer->result_symbol = START_TAG_NAME;
|
||||
break;
|
||||
}
|
||||
STRING_FREE(tag_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool scan_end_tag_name(Scanner *scanner, TSLexer *lexer) {
|
||||
String tag_name = scan_tag_name(lexer);
|
||||
if (tag_name.len == 0) {
|
||||
STRING_FREE(tag_name);
|
||||
return false;
|
||||
}
|
||||
Tag tag = for_name(tag_name.data);
|
||||
if (scanner->tags.len > 0 && tagcmp(&VEC_BACK(scanner->tags), &tag)) {
|
||||
VEC_POP(scanner->tags);
|
||||
lexer->result_symbol = END_TAG_NAME;
|
||||
} else {
|
||||
lexer->result_symbol = ERRONEOUS_END_TAG_NAME;
|
||||
}
|
||||
tag_free(&tag);
|
||||
STRING_FREE(tag_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool scan_self_closing_tag_delimiter(Scanner *scanner, TSLexer *lexer) {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == '>') {
|
||||
lexer->advance(lexer, false);
|
||||
if (scanner->tags.len > 0) {
|
||||
VEC_POP(scanner->tags);
|
||||
lexer->result_symbol = SELF_CLOSING_TAG_DELIMITER;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
|
||||
if (valid_symbols[RAW_TEXT] && !valid_symbols[START_TAG_NAME] &&
|
||||
!valid_symbols[END_TAG_NAME]) {
|
||||
return scan_raw_text(scanner, lexer);
|
||||
}
|
||||
|
||||
while (iswspace(lexer->lookahead)) {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
switch (lexer->lookahead) {
|
||||
case '<':
|
||||
lexer->mark_end(lexer);
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
if (lexer->lookahead == '!') {
|
||||
lexer->advance(lexer, false);
|
||||
return scan_comment(lexer);
|
||||
}
|
||||
|
||||
if (valid_symbols[IMPLICIT_END_TAG]) {
|
||||
return scan_implicit_end_tag(scanner, lexer);
|
||||
}
|
||||
break;
|
||||
|
||||
case '\0':
|
||||
if (valid_symbols[IMPLICIT_END_TAG]) {
|
||||
return scan_implicit_end_tag(scanner, lexer);
|
||||
}
|
||||
break;
|
||||
|
||||
case '/':
|
||||
if (valid_symbols[SELF_CLOSING_TAG_DELIMITER]) {
|
||||
return scan_self_closing_tag_delimiter(scanner, lexer);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
if ((valid_symbols[START_TAG_NAME] ||
|
||||
valid_symbols[END_TAG_NAME]) &&
|
||||
!valid_symbols[RAW_TEXT]) {
|
||||
return valid_symbols[START_TAG_NAME]
|
||||
? scan_start_tag_name(scanner, lexer)
|
||||
: scan_end_tag_name(scanner, lexer);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void *tree_sitter_html_external_scanner_create() {
|
||||
Scanner *scanner = (Scanner *)calloc(1, sizeof(Scanner));
|
||||
return scanner;
|
||||
}
|
||||
|
||||
bool tree_sitter_html_external_scanner_scan(void *payload, TSLexer *lexer,
|
||||
const bool *valid_symbols) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
return scan(scanner, lexer, valid_symbols);
|
||||
}
|
||||
|
||||
unsigned tree_sitter_html_external_scanner_serialize(void *payload,
|
||||
char *buffer) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
return serialize(scanner, buffer);
|
||||
}
|
||||
|
||||
void tree_sitter_html_external_scanner_deserialize(void *payload,
|
||||
const char *buffer,
|
||||
unsigned length) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
deserialize(scanner, buffer, length);
|
||||
}
|
||||
|
||||
void tree_sitter_html_external_scanner_destroy(void *payload) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
for (unsigned i = 0; i < scanner->tags.len; i++) {
|
||||
STRING_FREE(scanner->tags.data[i].custom_tag_name);
|
||||
}
|
||||
VEC_FREE(scanner->tags);
|
||||
free(scanner);
|
||||
}
|
384
html/src/tag.h
Normal file
384
html/src/tag.h
Normal file
|
@ -0,0 +1,384 @@
|
|||
#include "tree_sitter/parser.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef enum {
|
||||
AREA,
|
||||
BASE,
|
||||
BASEFONT,
|
||||
BGSOUND,
|
||||
BR,
|
||||
COL,
|
||||
COMMAND,
|
||||
EMBED,
|
||||
FRAME,
|
||||
HR,
|
||||
IMAGE,
|
||||
IMG,
|
||||
INPUT,
|
||||
ISINDEX,
|
||||
KEYGEN,
|
||||
LINK,
|
||||
MENUITEM,
|
||||
META,
|
||||
NEXTID,
|
||||
PARAM,
|
||||
SOURCE,
|
||||
TRACK,
|
||||
WBR,
|
||||
END_OF_VOID_TAGS,
|
||||
|
||||
A,
|
||||
ABBR,
|
||||
ADDRESS,
|
||||
ARTICLE,
|
||||
ASIDE,
|
||||
AUDIO,
|
||||
B,
|
||||
BDI,
|
||||
BDO,
|
||||
BLOCKQUOTE,
|
||||
BODY,
|
||||
BUTTON,
|
||||
CANVAS,
|
||||
CAPTION,
|
||||
CITE,
|
||||
CODE,
|
||||
COLGROUP,
|
||||
DATA,
|
||||
DATALIST,
|
||||
DD,
|
||||
DEL,
|
||||
DETAILS,
|
||||
DFN,
|
||||
DIALOG,
|
||||
DIV,
|
||||
DL,
|
||||
DT,
|
||||
EM,
|
||||
FIELDSET,
|
||||
FIGCAPTION,
|
||||
FIGURE,
|
||||
FOOTER,
|
||||
FORM,
|
||||
H1,
|
||||
H2,
|
||||
H3,
|
||||
H4,
|
||||
H5,
|
||||
H6,
|
||||
HEAD,
|
||||
HEADER,
|
||||
HGROUP,
|
||||
HTML,
|
||||
I,
|
||||
IFRAME,
|
||||
INS,
|
||||
KBD,
|
||||
LABEL,
|
||||
LEGEND,
|
||||
LI,
|
||||
MAIN,
|
||||
MAP,
|
||||
MARK,
|
||||
MATH,
|
||||
MENU,
|
||||
METER,
|
||||
NAV,
|
||||
NOSCRIPT,
|
||||
OBJECT,
|
||||
OL,
|
||||
OPTGROUP,
|
||||
OPTION,
|
||||
OUTPUT,
|
||||
P,
|
||||
PICTURE,
|
||||
PRE,
|
||||
PROGRESS,
|
||||
Q,
|
||||
RB,
|
||||
RP,
|
||||
RT,
|
||||
RTC,
|
||||
RUBY,
|
||||
S,
|
||||
SAMP,
|
||||
SCRIPT,
|
||||
SECTION,
|
||||
SELECT,
|
||||
SLOT,
|
||||
SMALL,
|
||||
SPAN,
|
||||
STRONG,
|
||||
STYLE,
|
||||
SUB,
|
||||
SUMMARY,
|
||||
SUP,
|
||||
SVG,
|
||||
TABLE,
|
||||
TBODY,
|
||||
TD,
|
||||
TEMPLATE,
|
||||
TEXTAREA,
|
||||
TFOOT,
|
||||
TH,
|
||||
THEAD,
|
||||
TIME,
|
||||
TITLE,
|
||||
TR,
|
||||
U,
|
||||
UL,
|
||||
VAR,
|
||||
VIDEO,
|
||||
|
||||
CUSTOM,
|
||||
|
||||
END_,
|
||||
} TagType;
|
||||
|
||||
typedef struct {
|
||||
uint32_t len;
|
||||
uint32_t cap;
|
||||
char *data;
|
||||
} String;
|
||||
|
||||
typedef struct {
|
||||
char tag_name[16];
|
||||
TagType tag_value;
|
||||
} TagMap;
|
||||
|
||||
typedef struct {
|
||||
TagType type;
|
||||
String custom_tag_name;
|
||||
} Tag;
|
||||
|
||||
const TagMap TAG_TYPES_BY_TAG_NAME[126] = {
|
||||
{"AREA", AREA },
|
||||
{"BASE", BASE },
|
||||
{"BASEFONT", BASEFONT },
|
||||
{"BGSOUND", BGSOUND },
|
||||
{"BR", BR },
|
||||
{"COL", COL },
|
||||
{"COMMAND", COMMAND },
|
||||
{"EMBED", EMBED },
|
||||
{"FRAME", FRAME },
|
||||
{"HR", HR },
|
||||
{"IMAGE", IMAGE },
|
||||
{"IMG", IMG },
|
||||
{"INPUT", INPUT },
|
||||
{"ISINDEX", ISINDEX },
|
||||
{"KEYGEN", KEYGEN },
|
||||
{"LINK", LINK },
|
||||
{"MENUITEM", MENUITEM },
|
||||
{"META", META },
|
||||
{"NEXTID", NEXTID },
|
||||
{"PARAM", PARAM },
|
||||
{"SOURCE", SOURCE },
|
||||
{"TRACK", TRACK },
|
||||
{"WBR", WBR },
|
||||
{"A", A },
|
||||
{"ABBR", ABBR },
|
||||
{"ADDRESS", ADDRESS },
|
||||
{"ARTICLE", ARTICLE },
|
||||
{"ASIDE", ASIDE },
|
||||
{"AUDIO", AUDIO },
|
||||
{"B", B },
|
||||
{"BDI", BDI },
|
||||
{"BDO", BDO },
|
||||
{"BLOCKQUOTE", BLOCKQUOTE},
|
||||
{"BODY", BODY },
|
||||
{"BUTTON", BUTTON },
|
||||
{"CANVAS", CANVAS },
|
||||
{"CAPTION", CAPTION },
|
||||
{"CITE", CITE },
|
||||
{"CODE", CODE },
|
||||
{"COLGROUP", COLGROUP },
|
||||
{"DATA", DATA },
|
||||
{"DATALIST", DATALIST },
|
||||
{"DD", DD },
|
||||
{"DEL", DEL },
|
||||
{"DETAILS", DETAILS },
|
||||
{"DFN", DFN },
|
||||
{"DIALOG", DIALOG },
|
||||
{"DIV", DIV },
|
||||
{"DL", DL },
|
||||
{"DT", DT },
|
||||
{"EM", EM },
|
||||
{"FIELDSET", FIELDSET },
|
||||
{"FIGCAPTION", FIGCAPTION},
|
||||
{"FIGURE", FIGURE },
|
||||
{"FOOTER", FOOTER },
|
||||
{"FORM", FORM },
|
||||
{"H1", H1 },
|
||||
{"H2", H2 },
|
||||
{"H3", H3 },
|
||||
{"H4", H4 },
|
||||
{"H5", H5 },
|
||||
{"H6", H6 },
|
||||
{"HEAD", HEAD },
|
||||
{"HEADER", HEADER },
|
||||
{"HGROUP", HGROUP },
|
||||
{"HTML", HTML },
|
||||
{"I", I },
|
||||
{"IFRAME", IFRAME },
|
||||
{"INS", INS },
|
||||
{"KBD", KBD },
|
||||
{"LABEL", LABEL },
|
||||
{"LEGEND", LEGEND },
|
||||
{"LI", LI },
|
||||
{"MAIN", MAIN },
|
||||
{"MAP", MAP },
|
||||
{"MARK", MARK },
|
||||
{"MATH", MATH },
|
||||
{"MENU", MENU },
|
||||
{"METER", METER },
|
||||
{"NAV", NAV },
|
||||
{"NOSCRIPT", NOSCRIPT },
|
||||
{"OBJECT", OBJECT },
|
||||
{"OL", OL },
|
||||
{"OPTGROUP", OPTGROUP },
|
||||
{"OPTION", OPTION },
|
||||
{"OUTPUT", OUTPUT },
|
||||
{"P", P },
|
||||
{"PICTURE", PICTURE },
|
||||
{"PRE", PRE },
|
||||
{"PROGRESS", PROGRESS },
|
||||
{"Q", Q },
|
||||
{"RB", RB },
|
||||
{"RP", RP },
|
||||
{"RT", RT },
|
||||
{"RTC", RTC },
|
||||
{"RUBY", RUBY },
|
||||
{"S", S },
|
||||
{"SAMP", SAMP },
|
||||
{"SCRIPT", SCRIPT },
|
||||
{"SECTION", SECTION },
|
||||
{"SELECT", SELECT },
|
||||
{"SLOT", SLOT },
|
||||
{"SMALL", SMALL },
|
||||
{"SPAN", SPAN },
|
||||
{"STRONG", STRONG },
|
||||
{"STYLE", STYLE },
|
||||
{"SUB", SUB },
|
||||
{"SUMMARY", SUMMARY },
|
||||
{"SUP", SUP },
|
||||
{"SVG", SVG },
|
||||
{"TABLE", TABLE },
|
||||
{"TBODY", TBODY },
|
||||
{"TD", TD },
|
||||
{"TEMPLATE", TEMPLATE },
|
||||
{"TEXTAREA", TEXTAREA },
|
||||
{"TFOOT", TFOOT },
|
||||
{"TH", TH },
|
||||
{"THEAD", THEAD },
|
||||
{"TIME", TIME },
|
||||
{"TITLE", TITLE },
|
||||
{"TR", TR },
|
||||
{"U", U },
|
||||
{"UL", UL },
|
||||
{"VAR", VAR },
|
||||
{"VIDEO", VIDEO },
|
||||
{"CUSTOM", CUSTOM },
|
||||
};
|
||||
|
||||
static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = {
|
||||
ADDRESS, ARTICLE, ASIDE, BLOCKQUOTE, DETAILS, DIV, DL,
|
||||
FIELDSET, FIGCAPTION, FIGURE, FOOTER, FORM, H1, H2,
|
||||
H3, H4, H5, H6, HEADER, HR, MAIN,
|
||||
NAV, OL, P, PRE, SECTION,
|
||||
};
|
||||
|
||||
static TagType get_tag_from_string(const char *tag_name) {
|
||||
for (int i = 0; i < 126; i++) {
|
||||
if (strcmp(TAG_TYPES_BY_TAG_NAME[i].tag_name, tag_name) == 0) {
|
||||
return TAG_TYPES_BY_TAG_NAME[i].tag_value;
|
||||
}
|
||||
}
|
||||
return CUSTOM;
|
||||
}
|
||||
|
||||
static inline Tag new_tag() {
|
||||
Tag tag;
|
||||
tag.type = END_;
|
||||
tag.custom_tag_name.data = NULL;
|
||||
tag.custom_tag_name.len = 0;
|
||||
tag.custom_tag_name.cap = 0;
|
||||
return tag;
|
||||
}
|
||||
|
||||
static Tag make_tag(TagType type, const char *name) {
|
||||
Tag tag = new_tag();
|
||||
tag.type = type;
|
||||
if (type == CUSTOM) {
|
||||
tag.custom_tag_name.len = strlen(name);
|
||||
tag.custom_tag_name.data =
|
||||
(char *)calloc(1, sizeof(char) * (tag.custom_tag_name.len + 1));
|
||||
strncpy(tag.custom_tag_name.data, name, tag.custom_tag_name.len);
|
||||
}
|
||||
return tag;
|
||||
}
|
||||
|
||||
static inline void tag_free(Tag *tag) {
|
||||
if (tag->type == CUSTOM) {
|
||||
free(tag->custom_tag_name.data);
|
||||
}
|
||||
tag->custom_tag_name.data = NULL;
|
||||
}
|
||||
|
||||
static inline bool is_void(const Tag *tag) {
|
||||
return tag->type < END_OF_VOID_TAGS;
|
||||
}
|
||||
|
||||
static inline Tag for_name(const char *name) {
|
||||
return make_tag(get_tag_from_string(name), name);
|
||||
}
|
||||
|
||||
static inline bool tagcmp(const Tag *_tag1, const Tag *_tag2) {
|
||||
return _tag1->type == _tag2->type &&
|
||||
(_tag1->type == CUSTOM ? strcmp(_tag1->custom_tag_name.data,
|
||||
_tag2->custom_tag_name.data) == 0
|
||||
: true);
|
||||
}
|
||||
|
||||
static bool can_contain(Tag *self, const Tag *other) {
|
||||
TagType child = other->type;
|
||||
|
||||
switch (self->type) {
|
||||
case LI:
|
||||
return child != LI;
|
||||
|
||||
case DT:
|
||||
case DD:
|
||||
return child != DT && child != DD;
|
||||
|
||||
case P:
|
||||
for (int i = 0; i < 26; i++) {
|
||||
if (child == TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
||||
case COLGROUP:
|
||||
return child == COL;
|
||||
|
||||
case RB:
|
||||
case RT:
|
||||
case RP:
|
||||
return child != RB && child != RT && child != RP;
|
||||
|
||||
case OPTGROUP:
|
||||
return child != OPTGROUP;
|
||||
|
||||
case TR:
|
||||
return child != TR;
|
||||
|
||||
case TD:
|
||||
case TH:
|
||||
return child != TD && child != TH && child != TR;
|
||||
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
365
html/test/corpus/main.txt
Normal file
365
html/test/corpus/main.txt
Normal file
|
@ -0,0 +1,365 @@
|
|||
===================================
|
||||
Tags
|
||||
===================================
|
||||
<span>Hello</span>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
===================================
|
||||
Tags with attributes
|
||||
===================================
|
||||
<input value=yes class="a" data-💩></input>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag
|
||||
(tag_name)
|
||||
(attribute
|
||||
(attribute_name)
|
||||
(attribute_value))
|
||||
(attribute
|
||||
(attribute_name)
|
||||
(quoted_attribute_value (attribute_value)))
|
||||
(attribute
|
||||
(attribute_name)))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
===================================
|
||||
Nested tags
|
||||
===================================
|
||||
<div>
|
||||
<span>a</span>
|
||||
b
|
||||
<b>c</b>
|
||||
Multi-line
|
||||
text
|
||||
</div>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(end_tag (tag_name)))
|
||||
(text)
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(end_tag (tag_name)))
|
||||
(text)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
Void tags
|
||||
==================================
|
||||
<form><img src="something.png"><br><input type=submit value=Ok /></form>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element
|
||||
(start_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (quoted_attribute_value (attribute_value)))))
|
||||
(element (start_tag (tag_name)))
|
||||
(element
|
||||
(self_closing_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (attribute_value))
|
||||
(attribute (attribute_name) (attribute_value))))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
Void tags at EOF
|
||||
==================================
|
||||
<img src="something.png">
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (quoted_attribute_value (attribute_value))))))
|
||||
|
||||
==================================
|
||||
Custom tags
|
||||
==================================
|
||||
<something:different>
|
||||
<atom-text-editor mini>
|
||||
Hello
|
||||
</atom-text-editor>
|
||||
</something:different>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element
|
||||
(start_tag (tag_name) (attribute (attribute_name)))
|
||||
(text)
|
||||
(end_tag (tag_name)))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
Comments
|
||||
==================================
|
||||
<!-- hello -->
|
||||
<!-- world ->-> -- > ->->->-- -> still comment -->
|
||||
<div>
|
||||
<!-- <span>something</span> -->
|
||||
</div>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(comment)
|
||||
(comment)
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(comment)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
Raw text elements
|
||||
==================================
|
||||
<script>
|
||||
</s
|
||||
</sc
|
||||
</scr
|
||||
</scri
|
||||
</scrip
|
||||
</script>
|
||||
|
||||
<style>
|
||||
</ </s </st </sty </styl
|
||||
</style>
|
||||
|
||||
<script>
|
||||
</SCRIPT>
|
||||
|
||||
---
|
||||
|
||||
(fragment
|
||||
(script_element
|
||||
(start_tag (tag_name))
|
||||
(raw_text)
|
||||
(end_tag (tag_name)))
|
||||
(style_element
|
||||
(start_tag (tag_name))
|
||||
(raw_text)
|
||||
(end_tag (tag_name)))
|
||||
(script_element
|
||||
(start_tag (tag_name))
|
||||
(raw_text)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
All-caps doctype
|
||||
==================================
|
||||
<!DOCTYPE html PUBLIC
|
||||
"-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
---
|
||||
|
||||
(fragment
|
||||
(doctype))
|
||||
|
||||
==================================
|
||||
Lowercase doctype
|
||||
==================================
|
||||
<!doctype html>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(doctype))
|
||||
|
||||
==================================
|
||||
LI elements without close tags
|
||||
==================================
|
||||
<ul>
|
||||
<li>One
|
||||
<li>Two
|
||||
</ul>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
======================================
|
||||
DT and DL elements without close tags
|
||||
======================================
|
||||
<dl>
|
||||
<dt>Coffee
|
||||
<dt>Café
|
||||
<dd>Black hot drink
|
||||
<dt>Milk
|
||||
<dd>White cold drink
|
||||
</dl>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
======================================
|
||||
P elements without close tags
|
||||
======================================
|
||||
<p>One
|
||||
<div>Two</div>
|
||||
<p>Three
|
||||
<p>Four
|
||||
<h1>Five</h1>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name))))
|
||||
|
||||
======================================
|
||||
Ruby annotation elements without close tags
|
||||
======================================
|
||||
<ruby>東<rb>京<rt>とう<rt>きょう</ruby>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
=======================================
|
||||
COLGROUP elements without end tags
|
||||
=======================================
|
||||
<table>
|
||||
<colgroup>
|
||||
<col style="background-color: #0f0">
|
||||
<col span="2">
|
||||
<tr>
|
||||
<th>Lime</th>
|
||||
<th>Lemon</th>
|
||||
<th>Orange</th>
|
||||
</tr>
|
||||
</table>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element (start_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (quoted_attribute_value (attribute_value)))))
|
||||
(element (start_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (quoted_attribute_value (attribute_value))))))
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
|
||||
(end_tag (tag_name)))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
=========================================
|
||||
TR, TD, and TH elements without end tags
|
||||
=========================================
|
||||
<table>
|
||||
<tr>
|
||||
<th>One
|
||||
<th>Two
|
||||
<tr>
|
||||
<td>Three
|
||||
<td>Four
|
||||
</table>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text)))
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text)))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==============================
|
||||
Named entities in tag contents
|
||||
==============================
|
||||
|
||||
<p>Lorem ipsum dolor sit © amet.</p>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(entity)
|
||||
(text)
|
||||
(entity)
|
||||
(text)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
================================
|
||||
Numeric entities in tag contents
|
||||
================================
|
||||
|
||||
<p>Lorem ipsum   dolor sit — amet.</p>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(entity)
|
||||
(text)
|
||||
(entity)
|
||||
(text)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
=================================
|
||||
Multiple entities in tag contents
|
||||
=================================
|
||||
|
||||
<p>Lorem ipsum   dolor   sit amet.</p>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(entity)
|
||||
(text)
|
||||
(entity)
|
||||
(text)
|
||||
(entity)
|
||||
(text)
|
||||
(end_tag (tag_name))))
|
39
html/test/highlight/attributes.html
Normal file
39
html/test/highlight/attributes.html
Normal file
|
@ -0,0 +1,39 @@
|
|||
<div style="display: flex" draggable>
|
||||
<!-- <- tag -->
|
||||
<!--^^^^ attribute -->
|
||||
<!-- ^^^^^^^^^^^^^ string -->
|
||||
<!-- ^^^^^^^^^ attribute -->
|
||||
<div onclick=tap>Hello, World</div>
|
||||
<!-- <- punctuation.bracket -->
|
||||
<div onclick=tap>Hello, World</div>
|
||||
<!-- ^^^^^^^^ attribute -->
|
||||
<div onclick=tap>Hello, World</div>
|
||||
<!-- ^^^ string -->
|
||||
<div onclick=tap>Hello, World</div>
|
||||
<!-- ^^ punctuation.bracket -->
|
||||
<div onclick="tap">Hello, World</div>
|
||||
<!-- ^^^ tag -->
|
||||
<div onclick="tap">Hello, World</div>
|
||||
<!-- ^ punctuation.bracket -->
|
||||
<something:different
|
||||
<!-- <- punctuation.bracket -->
|
||||
<!-- ^^^^^^^^^^^^^^^ tag -->
|
||||
@click="count++"
|
||||
<!--^^^^^^^ attribute -->
|
||||
<!-- ^^^^^^^ string -->
|
||||
:value="count"
|
||||
<!--^^^^^^^ attribute -->
|
||||
<!-- ^^^^^ string -->
|
||||
@value:modelValue="newValue => count = newValue"
|
||||
<!--^^^^^^^^^^^^^^^^^^ attribute -->
|
||||
<!-- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ string -->
|
||||
>
|
||||
<!-- <- punctuation.bracket -->
|
||||
</something:different>
|
||||
<!-- <- punctuation.bracket -->
|
||||
<!-- ^^^^^^^^^^^^^^^^ tag -->
|
||||
<!-- ^ punctuation.bracket -->
|
||||
</div>
|
||||
<!-- <- punctuation.bracket -->
|
||||
<!--^ tag -->
|
||||
<!-- ^ punctuation.bracket -->
|
6
html/test/highlight/doctype.html
Normal file
6
html/test/highlight/doctype.html
Normal file
|
@ -0,0 +1,6 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<!-- ^^^^^^^^^^^^^^^^^^^^ constant -->
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!-- ^^^^ constant -->
|
||||
<!-- ^ punctuation.bracket -->
|
7
html/test/highlight/erroneous.html
Normal file
7
html/test/highlight/erroneous.html
Normal file
|
@ -0,0 +1,7 @@
|
|||
<span>
|
||||
<!-- <- punctuation.bracket -->
|
||||
<!--^ tag -->
|
||||
|
||||
</div>
|
||||
<!--^ tag.error -->
|
||||
<!-- ^ punctuation.bracket -->
|
15
html/test/highlight/self-closing.html
Normal file
15
html/test/highlight/self-closing.html
Normal file
|
@ -0,0 +1,15 @@
|
|||
<input />
|
||||
<!-- <- punctuation.bracket -->
|
||||
<!-- ^ tag -->
|
||||
|
||||
<input type="submit" readonly />
|
||||
<!-- ^^^^ attribute -->
|
||||
|
||||
<input type="submit" readonly />
|
||||
<!-- ^^^^^^ string -->
|
||||
|
||||
<input type="submit" readonly />
|
||||
<!-- ^^^^^^^^ attribute -->
|
||||
|
||||
<input type="submit" readonly />
|
||||
<!-- ^^ punctuation.bracket -->
|
Loading…
Reference in New Issue
Block a user