Skip to content

Commit

Permalink
Merge pull request #25 from BackEndTea/feat/named-back-reference
Browse files Browse the repository at this point in the history
Support named back references
  • Loading branch information
BackEndTea authored Jan 28, 2021
2 parents 7c27ed1 + c0f267f commit 4f63341
Show file tree
Hide file tree
Showing 11 changed files with 243 additions and 6 deletions.
6 changes: 3 additions & 3 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
}
},
"require-dev": {
"phpstan/phpstan": "^0.12.64",
"phpstan/phpstan": "^0.12.70",
"phpunit/phpunit": "^9.5",
"vimeo/psalm": "^4.3",
"infection/infection": "^0.20.2",
"vimeo/psalm": "^4.4",
"infection/infection": "^0.21",
"doctrine/coding-standard": "^8.2"
}
}
2 changes: 1 addition & 1 deletion phpstan_baseline.neon
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ parameters:
ignoreErrors:
-
message: "#^Parameter \\#1 \\$char of static method BackEndTea\\\\Regexer\\\\Token\\\\Exception\\\\MissingEnd\\:\\:fromDelimiter\\(\\) expects string, string\\|null given\\.$#"
count: 1
count: 2
path: src/Lexer/Lexer.php
2 changes: 1 addition & 1 deletion psalm_baseline.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<MixedArgument occurrences="1">
<code>$this-&gt;delimiter</code>
</MixedArgument>
<PossiblyNullArgument occurrences="2">
<PossiblyNullArgument occurrences="3">
<code>$this-&gt;delimiter</code>
<code>$input-&gt;next()</code>
</PossiblyNullArgument>
Expand Down
47 changes: 47 additions & 0 deletions src/Lexer/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
use BackEndTea\Regexer\Token\Dot;
use BackEndTea\Regexer\Token\Escaped;
use BackEndTea\Regexer\Token\Exception\InvalidDelimiter;
use BackEndTea\Regexer\Token\Exception\InvalidReference;
use BackEndTea\Regexer\Token\Exception\MissingEnd;
use BackEndTea\Regexer\Token\Exception\MissingStart;
use BackEndTea\Regexer\Token\Exception\UnclosedBracketList;
Expand Down Expand Up @@ -111,6 +112,20 @@ public function regexToTokenStream(Stream $input): iterable
$token = Token\Anchor\End::create();
break;
case '(':
if ($input->getBetween($input->currentIndex(), $input->currentIndex() + 3) === '(?P=') {
$referenceTo = '';
while ($input->next() !== null) {
$current = $input->current();
$referenceTo .= $current;
if ($current === ')') {
break;
}
}

$token = SubPattern\Reference::forPNotation($referenceTo);
break;
}

++$this->subPatternCount;
$token = SubPattern\Start::create();
$currentIndex = $input->currentIndex();
Expand Down Expand Up @@ -375,6 +390,38 @@ private function createEscapeSequence(Stream $input): Token
return Escaped\EscapedCharacter::fromCharacter('-');
}

if ($current === 'k') {
$next = $input->next();
switch ($next) {
case '{':
$closing = '}';
break;
case '\'':
$closing = '\'';
break;
case '<':
$closing = '>';
break;
case null:
throw MissingEnd::fromDelimiter($this->delimiter);

default:
throw InvalidReference::fromKNotation($next);
}

$start = 'k' . $next;
$referenceTo = '';
while ($input->next() !== null) {
$current = $input->current();
$referenceTo .= $current;
if ($current === $closing) {
break;
}
}

return SubPattern\Reference::create($start . $referenceTo);
}

if ($current === 'g' && $input->at($input->currentIndex() + 1) === '{') {
$current = 'g{';
$input->next();
Expand Down
4 changes: 4 additions & 0 deletions src/Node/SubPattern/Reference.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ final class Reference extends Node
'\\' => '',
'\g' => '',
'\g{' => '}',
'\k{' => '}',
'\k<' => '>',
'\k\'' => '\'',
'(?P=' => ')',
];

private string $referenceTo;
Expand Down
6 changes: 5 additions & 1 deletion src/Parser/TokenParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,11 @@ private function parseFromToken(Node\NodeWithChildren $parent, array $tokens, in
if ($token instanceof Token\SubPattern\Reference) {
$tokenString = $token->asString();

if ($tokenString[1] !== 'g') {
if ($tokenString[1] === 'k') {
$node = new Node\SubPattern\Reference('\\k' . $tokenString[2], substr($token->asString(), 3, -1));
} elseif ($tokenString[0] === '(') {
$node = new Node\SubPattern\Reference('(?P=', substr($token->asString(), 4, -1));
} elseif ($tokenString[1] !== 'g') {
$node = new Node\SubPattern\Reference('\\', substr($token->asString(), 1));
} elseif ($tokenString[2] === '{') {
$node = new Node\SubPattern\Reference('\\g{', substr($token->asString(), 3, -1));
Expand Down
18 changes: 18 additions & 0 deletions src/Token/Exception/InvalidReference.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?php

declare(strict_types=1);

namespace BackEndTea\Regexer\Token\Exception;

use function sprintf;

final class InvalidReference extends SyntaxException
{
public static function fromKNotation(string $character): self
{
return new self(sprintf(
'Only \',{ or < are allowed after \k, got "%s"',
$character
));
}
}
10 changes: 10 additions & 0 deletions src/Token/SubPattern/Reference.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,20 @@

use BackEndTea\Regexer\Token;

use function sprintf;

final class Reference extends Token
{
public static function create(string $characters): self
{
return new self('\\' . $characters);
}

public static function forPNotation(string $name): self
{
return new self(sprintf(
'(%s',
$name
));
}
}
130 changes: 130 additions & 0 deletions tests/E2E/ParserLexer/NamedSubPatternTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,135 @@ public function provideTestCases(): Generator
],
new Node\RootNode('/', [new Node\SubPattern([new Node\LiteralCharacters('ab')], true, new Node\SubPattern\Name('?P<', 'foo'))], ''),
];

yield 'Named back reference with \g{ notation' => [
'/(?P<foo>ab)=\g{foo}/',
[
Token\Delimiter::create('/'),
Token\SubPattern\Start::create(),
Token\SubPattern\Named::fromName('?P<foo>'),
Token\LiteralCharacters::create('ab'),
Token\SubPattern\End::create(),
Token\LiteralCharacters::create('='),
Token\SubPattern\Reference::create('g{foo}'),
Token\Delimiter::create('/'),
],
new Node\RootNode('/', [
new Node\SubPattern(
[
new Node\LiteralCharacters('ab'),
],
true,
new Node\SubPattern\Name('?P<', 'foo')
),
new Node\LiteralCharacters('='),
new Node\SubPattern\Reference('\g{', 'foo'),
], ''),
['ab=ab'],
];

yield 'Named back reference with \k{ notation' => [
'/(?P<foo>ab)=\k{foo}/',
[
Token\Delimiter::create('/'),
Token\SubPattern\Start::create(),
Token\SubPattern\Named::fromName('?P<foo>'),
Token\LiteralCharacters::create('ab'),
Token\SubPattern\End::create(),
Token\LiteralCharacters::create('='),
Token\SubPattern\Reference::create('k{foo}'),
Token\Delimiter::create('/'),
],
new Node\RootNode('/', [
new Node\SubPattern(
[
new Node\LiteralCharacters('ab'),
],
true,
new Node\SubPattern\Name('?P<', 'foo')
),
new Node\LiteralCharacters('='),
new Node\SubPattern\Reference('\k{', 'foo'),
], ''),
['ab=ab'],
];

yield 'Named back reference with \k\' notation' => [
'/(?P<foo>ab)=\k\'foo\'/',
[
Token\Delimiter::create('/'),
Token\SubPattern\Start::create(),
Token\SubPattern\Named::fromName('?P<foo>'),
Token\LiteralCharacters::create('ab'),
Token\SubPattern\End::create(),
Token\LiteralCharacters::create('='),
Token\SubPattern\Reference::create('k\'foo\''),
Token\Delimiter::create('/'),
],
new Node\RootNode('/', [
new Node\SubPattern(
[
new Node\LiteralCharacters('ab'),
],
true,
new Node\SubPattern\Name('?P<', 'foo')
),
new Node\LiteralCharacters('='),
new Node\SubPattern\Reference('\k\'', 'foo'),
], ''),
['ab=ab'],
];

yield 'Named back reference with \k< notation' => [
'/(?P<foo>ab)=\k<foo>/',
[
Token\Delimiter::create('/'),
Token\SubPattern\Start::create(),
Token\SubPattern\Named::fromName('?P<foo>'),
Token\LiteralCharacters::create('ab'),
Token\SubPattern\End::create(),
Token\LiteralCharacters::create('='),
Token\SubPattern\Reference::create('k<foo>'),
Token\Delimiter::create('/'),
],
new Node\RootNode('/', [
new Node\SubPattern(
[
new Node\LiteralCharacters('ab'),
],
true,
new Node\SubPattern\Name('?P<', 'foo')
),
new Node\LiteralCharacters('='),
new Node\SubPattern\Reference('\k<', 'foo'),
], ''),
['ab=ab'],
];

yield 'Named back reference with (?P=) notation' => [
'/(?P<foo>ab)=(?P=foo)/',
[
Token\Delimiter::create('/'),
Token\SubPattern\Start::create(),
Token\SubPattern\Named::fromName('?P<foo>'),
Token\LiteralCharacters::create('ab'),
Token\SubPattern\End::create(),
Token\LiteralCharacters::create('='),
Token\SubPattern\Reference::forPNotation('?P=foo)'),
Token\Delimiter::create('/'),
],
new Node\RootNode('/', [
new Node\SubPattern(
[
new Node\LiteralCharacters('ab'),
],
true,
new Node\SubPattern\Name('?P<', 'foo')
),
new Node\LiteralCharacters('='),
new Node\SubPattern\Reference('(?P=', 'foo'),
], ''),
['ab=ab'],
];
}
}
17 changes: 17 additions & 0 deletions tests/Lexer/LexerFailureTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

use BackEndTea\Regexer\StringStream;
use BackEndTea\Regexer\Token\Exception\InvalidDelimiter;
use BackEndTea\Regexer\Token\Exception\InvalidReference;
use BackEndTea\Regexer\Token\Exception\InvalidSubPattern;
use BackEndTea\Regexer\Token\Exception\MissingEnd;
use BackEndTea\Regexer\Token\Exception\MissingStart;
Expand Down Expand Up @@ -111,4 +112,20 @@ public function testCantEndOnBackslash(): void
$this->expectException(MissingEnd::class);
Util::iterableToArray($lexer->regexToTokenStream(new StringStream('/foo\\')));
}

public function testInvalidKReference(): void
{
$lexer = new Lexer();

$this->expectException(InvalidReference::class);
Util::iterableToArray($lexer->regexToTokenStream(new StringStream('/\kfoo/')));
}

public function testCantEndOnUnescapedK(): void
{
$lexer = new Lexer();

$this->expectException(MissingEnd::class);
Util::iterableToArray($lexer->regexToTokenStream(new StringStream('/\k')));
}
}
7 changes: 7 additions & 0 deletions tests/StringStreamTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,11 @@ public function testCanGetBetween(): void

$this->assertSame('{123}', $stream->getBetween(3, 7));
}

public function testAtWillReturnNullIfOutOfBounds(): void
{
$stream = new StringStream('0123');

$this->assertNull($stream->at(4));
}
}

0 comments on commit 4f63341

Please sign in to comment.