Add support for regex flag to toggle dot-matches-newline

This commit is contained in:
Olivier Perret 2018-06-04 18:00:59 +02:00
parent 2a4d3eb13b
commit 8edef8b3f1
2 changed files with 32 additions and 8 deletions

View File

@ -156,7 +156,9 @@ Some modifiers can control the matching behavior of the atoms following
them: them:
* `(?i)` enables case-insensitive matching * `(?i)` enables case-insensitive matching
* `(?I)` disables case-insensitive matching * `(?I)` disables case-insensitive matching (default)
* `(?s)` enables dot-matches-newline (default)
* `(?S)` disables dot-matches-newline
== Quoting == Quoting

View File

@ -201,12 +201,21 @@ private:
auto it = m_pos.base(); auto it = m_pos.base();
if (m_regex.end() - it >= 4 and *it++ == '(' and *it++ == '?') if (m_regex.end() - it >= 4 and *it++ == '(' and *it++ == '?')
{ {
auto m = *it++; while (true)
if ((m != 'i' and m != 'I') or *it++ != ')') {
return false; auto m = *it++;
m_ignore_case = (m == 'i'); if (m == 'i' or m == 'I')
m_pos = Iterator{it, m_regex}; m_ignore_case = (m == 'i');
return true; else if (m == 's' or m == 'S')
m_dot_maches_newline = (m == 's');
else if (m == ')')
{
m_pos = Iterator{it, m_regex};
return true;
}
else
return false;
}
} }
return false; return false;
} }
@ -274,7 +283,19 @@ private:
switch (const Codepoint cp = *m_pos) switch (const Codepoint cp = *m_pos)
{ {
case '.': ++m_pos; return new_node(ParsedRegex::AnyChar); case '.':
++m_pos;
if (m_dot_maches_newline)
return new_node(ParsedRegex::AnyChar);
else
{
CharacterClass c;
c.negative = true;
c.ranges.push_back({ '\n', '\n' });
auto class_id = m_parsed_regex.character_classes.size();
m_parsed_regex.character_classes.push_back(std::move(c));
return new_node(ParsedRegex::Class, class_id);
}
case '(': case '(':
{ {
auto captures = [this, it = (++m_pos).base()]() mutable { auto captures = [this, it = (++m_pos).base()]() mutable {
@ -594,6 +615,7 @@ private:
StringView m_regex; StringView m_regex;
Iterator m_pos; Iterator m_pos;
bool m_ignore_case = false; bool m_ignore_case = false;
bool m_dot_maches_newline = true;
static constexpr struct CharacterClassEscape { static constexpr struct CharacterClassEscape {
Codepoint cp; Codepoint cp;