Skip to content

Commit

Permalink
Improve delete properties performance by replace DOMDocument with xml…
Browse files Browse the repository at this point in the history
…_parse (#432)

* Add some test case for XmlPropsRemover
  • Loading branch information
alexander-schranz authored Jan 6, 2024
1 parent f7b286f commit 1b5d994
Show file tree
Hide file tree
Showing 6 changed files with 344 additions and 42 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ Changelog
1.x
===

1.11.0 (unreleased)
-------------------

* Improve delete properties performance by replace DOMDocument with xml_parse

1.10.1
------

Expand Down
80 changes: 43 additions & 37 deletions src/Jackalope/Transport/DoctrineDBAL/Client.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
use Jackalope\Transport\BaseTransport;
use Jackalope\Transport\DoctrineDBAL\Query\QOMWalker;
use Jackalope\Transport\DoctrineDBAL\XmlParser\XmlToPropsParser;
use Jackalope\Transport\DoctrineDBAL\XmlPropsRemover\XmlPropsRemover;
use Jackalope\Transport\MoveNodeOperation;
use Jackalope\Transport\NodeTypeManagementInterface;
use Jackalope\Transport\QueryInterface as QueryTransport;
Expand Down Expand Up @@ -1408,6 +1409,7 @@ public function getNode($path)

$nestedNodes = $this->getNodesData($rows);
$node = array_shift($nestedNodes);

foreach ($nestedNodes as $nestedPath => $nested) {
$relativePath = PathHelper::relativizePath($nestedPath, $path);
$this->nestNode($node, $nested, explode('/', $relativePath));
Expand Down Expand Up @@ -1798,10 +1800,20 @@ private function cleanIdentifierCache($root)
*/
public function deleteProperties(array $operations)
{
$this->assertLoggedIn();

$nodesByPath = [];
foreach ($operations as $op) {
$this->deleteProperty($op->srcPath);
$nodePath = PathHelper::getParentPath($op->srcPath);
$propertyName = PathHelper::getNodeName($op->srcPath);
if (!array_key_exists($nodePath, $nodesByPath)) {
$nodesByPath[$nodePath] = [];
}
$nodesByPath[$nodePath][$propertyName] = $propertyName;
}

// Doing the actual removal
$this->assertLoggedIn();
foreach ($nodesByPath as $nodePath => $propertiesToDelete) {
$this->removePropertiesFromNode($nodePath, $propertiesToDelete);
}

return true;
Expand All @@ -1828,60 +1840,54 @@ public function deletePropertyImmediately($path)
protected function deleteProperty($path)
{
$this->assertLoggedIn();

$nodePath = PathHelper::getParentPath($path);
$propertyName = PathHelper::getNodeName($path);
$this->removePropertiesFromNode($nodePath, [$propertyName]);
}

/**
* Removes a list of properties from a given node.
*
* @param string $nodePath
* @param array<string> $propertiesToDelete Path belonging to that node that should be deleted
*/
private function removePropertiesFromNode($nodePath, array $propertiesToDelete): void
{
$nodeId = $this->getSystemIdForNode($nodePath);
if (!$nodeId) {
// no we really don't know that path
throw new ItemNotFoundException("No item found at " . $path);
throw new ItemNotFoundException('No item found at ' . $nodePath);
}

$query = 'SELECT props FROM phpcr_nodes WHERE id = ?';
$xml = $this->getConnection()->fetchOne($query, [$nodeId]);

$dom = new DOMDocument('1.0', 'UTF-8');
$dom->loadXML($xml);
$xpath = new DomXpath($dom);
$xmlPropsRemover = new XmlPropsRemover($xml, $propertiesToDelete);
[$xml, $references] = $xmlPropsRemover->removeProps();

$found = false;
$propertyName = PathHelper::getNodeName($path);
foreach ($xpath->query(sprintf('//*[@sv:name="%s"]', $propertyName)) as $propertyNode) {
$found = true;
// would be nice to have the property object to ask for type
// but its in state deleted, would mean lots of refactoring
if ($propertyNode->hasAttribute('sv:type')) {
$type = strtolower($propertyNode->getAttribute('sv:type'));
if (in_array($type, ['reference', 'weakreference'])) {
$table = $this->referenceTables['reference' === $type ? PropertyType::REFERENCE : PropertyType::WEAKREFERENCE];
try {
$query = "DELETE FROM $table WHERE source_id = ? AND source_property_name = ?";
$this->getConnection()->executeUpdate($query, [$nodeId, $propertyName]);
} catch (DBALException $e) {
throw new RepositoryException(
'Unexpected exception while cleaning up deleted nodes',
$e->getCode(),
$e
);
}
foreach ($references as $type => $propertyNames) {
$table = $this->referenceTables['reference' === $type ? PropertyType::REFERENCE : PropertyType::WEAKREFERENCE];
foreach ($propertyNames as $propertyName) {
try {
$query = "DELETE FROM $table WHERE source_id = ? AND source_property_name = ?";
$this->getConnection()->executeUpdate($query, [$nodeId, $propertyName]);
} catch (DBALException $e) {
throw new RepositoryException(
'Unexpected exception while cleaning up deleted nodes',
$e->getCode(),
$e
);
}
}

$propertyNode->parentNode->removeChild($propertyNode);
}

if (!$found) {
throw new ItemNotFoundException("Node $nodePath has no property $propertyName");
}

$xml = $dom->saveXML();

$query = 'UPDATE phpcr_nodes SET props = ? WHERE id = ?';
$params = [$xml, $nodeId];

try {
$this->getConnection()->executeUpdate($query, $params);
} catch (DBALException $e) {
throw new RepositoryException("Unexpected exception while updating properties of $path", $e->getCode(), $e);
throw new RepositoryException("Unexpected exception while updating properties of $nodePath", $e->getCode(), $e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,18 @@ public function parse(): \stdClass
{
$this->data = new \stdClass();

$parser = xml_parser_create();
$parser = \xml_parser_create();

xml_set_element_handler(
\xml_set_element_handler(
$parser,
[$this, 'startHandler'],
[$this, 'endHandler']
);

xml_set_character_data_handler($parser, [$this, 'dataHandler']);
\xml_set_character_data_handler($parser, [$this, 'dataHandler']);

xml_parse($parser, $this->xml, true);
xml_parser_free($parser);
\xml_parse($parser, $this->xml, true);
\xml_parser_free($parser);
// avoid memory leaks and unset the parser see: https://www.php.net/manual/de/function.xml-parser-free.php
unset($parser);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
<?php

namespace Jackalope\Transport\DoctrineDBAL\XmlPropsRemover;

/**
* @internal
*/
class XmlPropsRemover
{
/**
* @var string
*/
private $xml;

/**
* @var string[]
*/
private $propertyNames;

/**
* @var bool
*/
private $skipCurrentTag = false;

/**
* @var string
*/
private $newXml = '';

/**
* @var string
*/
private $newStartTag = '';

private $weakReferences = [];

private $references = [];

public function __construct(string $xml, array $propertyNames)
{
$this->xml = $xml;
$this->propertyNames = $propertyNames;
}

/**
* @example [$xml, $references] = $xmlPropsRemover->removeProps($xml, $propertiesToDelete);
*
* @return array{
* 0: string,
* 1: array{
* reference: string[],
* weakreference: string[],
* },
* } An array with the new xml (0) and the references (1) which requires to be removed.
*/
public function removeProps(): array
{
$this->newXml = '<?xml version="1.0" encoding="UTF-8"?>' . PHP_EOL;
$this->references = [];
$this->weakReferences = [];
$this->newStartTag = '';
$this->skipCurrentTag = false;

$parser = \xml_parser_create();

\xml_set_element_handler(
$parser,
[$this, 'startHandler'],
[$this, 'endHandler']
);

\xml_set_character_data_handler($parser, [$this, 'dataHandler']);

\xml_parse($parser, $this->xml, true);
\xml_parser_free($parser);
// avoid memory leaks and unset the parser see: https://www.php.net/manual/de/function.xml-parser-free.php
unset($parser);

return [
$this->newXml . PHP_EOL,
[
'reference' => $this->references,
'weakreference' => $this->weakReferences,
],
];
}

/**
* @param \XmlParser $parser
* @param string $name
* @param mixed[] $attrs
*/
private function startHandler($parser, $name, $attrs): void
{
if ($this->skipCurrentTag) {
return;
}

if ($name === 'SV:PROPERTY') {
$svName = $attrs['SV:NAME'];

if (\in_array($svName, $this->propertyNames)) {
$this->skipCurrentTag = true;
$svType = $attrs['SV:TYPE'];

if ($svType === 'reference') {
$this->references[] = $svName;
} elseif ($svType === 'weakreference') {
$this->weakReferences[] = $svName;
}

return;
}
}

$tag = '<' . \strtolower($name);
foreach ($attrs as $key => $value) {
$tag .= ' ' . \strtolower($key) // there is no case key which requires escaping for performance reasons we avoid it so
. '="'
. \htmlspecialchars($value, ENT_COMPAT, 'UTF-8')
. '"';
}
$tag .= '>';

$this->newXml .= $this->newStartTag;
$this->newStartTag = $tag; // handling self closing tags in endHandler
}

private function endHandler($parser, $name): void
{
if ($name === 'SV:PROPERTY' && $this->skipCurrentTag) {
$this->skipCurrentTag = false;

return;
}

if ($this->skipCurrentTag) {
return;
}

if ($this->newStartTag) {
// if the tag is not rendered to newXml it can be a self-closing tag
$this->newXml .= \substr($this->newStartTag, 0.0, -1) . '/>';
$this->newStartTag = '';

return;
}

$this->newXml .= '</' . \strtolower($name) . '>';
}

private function dataHandler($parser, $data): void
{
if ($this->skipCurrentTag) {
return;
}

if ($data !== '') {
$this->newXml .= $this->newStartTag; // non-empty data means no self closing tag so render tag now
$this->newStartTag = '';
$this->newXml .= \htmlspecialchars($data, ENT_XML1, 'UTF-8');
}
}
}
29 changes: 29 additions & 0 deletions tests/Jackalope/Transport/DoctrineDBAL/ClientTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
use PHPCR\PropertyType;
use PHPCR\Query\QOM\QueryObjectModelConstantsInterface;
use PHPCR\Query\QueryInterface;
use PHPCR\SimpleCredentials;
use PHPCR\Util\NodeHelper;
use PHPCR\Util\PathHelper;
use PHPCR\Util\QOM\QueryBuilder;
Expand Down Expand Up @@ -240,6 +241,34 @@ public function testDeleteMoreThanOneThousandNodes()
$this->addToAssertionCount(1);
}

public function testDeleteProperties()
{
$root = $this->session->getNode('/');
$node = $root->addNode('delete-properties');
for ($i = 0; $i <= 1000; $i++) {
$node->setProperty('property-'.$i, 'value-'.$i);
}

$this->session->save();
$this->assertSame(1002, \count($node->getProperties()));

for ($i = 501; $i <= 1000; $i++) {
$node->setProperty('property-'.$i, null);
}

$this->session->save();
$this->session->refresh(false);
$node = $this->session->getNode('/delete-properties');

for ($i = 0; $i <= 1000; $i++) {
$this->assertSame(
$i < 501,
$node->hasProperty('property-'.$i),
'Unexpected result for property "property-'.$i.'"'
);
}
}

public function testPropertyLengthAttribute()
{
$rootNode = $this->session->getRootNode();
Expand Down
Loading

0 comments on commit 1b5d994

Please sign in to comment.