Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve delete properties performance by replace DOMDocument with xml_parse #432

81 changes: 43 additions & 38 deletions src/Jackalope/Transport/DoctrineDBAL/Client.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
use Jackalope\Transport\BaseTransport;
use Jackalope\Transport\DoctrineDBAL\Query\QOMWalker;
use Jackalope\Transport\DoctrineDBAL\XmlParser\XmlToPropsParser;
use Jackalope\Transport\DoctrineDBAL\XmlPropsRemover\XmlPropsRemover;
use Jackalope\Transport\MoveNodeOperation;
use Jackalope\Transport\NodeTypeManagementInterface;
use Jackalope\Transport\QueryInterface as QueryTransport;
Expand Down Expand Up @@ -1408,6 +1409,7 @@ public function getNode($path)

$nestedNodes = $this->getNodesData($rows);
$node = array_shift($nestedNodes);

foreach ($nestedNodes as $nestedPath => $nested) {
$relativePath = PathHelper::relativizePath($nestedPath, $path);
$this->nestNode($node, $nested, explode('/', $relativePath));
Expand Down Expand Up @@ -1798,10 +1800,19 @@ private function cleanIdentifierCache($root)
*/
public function deleteProperties(array $operations)
{
$this->assertLoggedIn();

$nodesByPath = [];
foreach ($operations as $op) {
$this->deleteProperty($op->srcPath);
$nodePath = PathHelper::getParentPath($op->srcPath);
$propertyName = PathHelper::getNodeName($op->srcPath);
if (!array_key_exists($nodePath, $nodesByPath)) {
$nodesByPath[$nodePath] = [];
}
$nodesByPath[$nodePath][$propertyName] = $propertyName;
}

// Doing the actual removal
foreach ($nodesByPath as $nodePath => $propertiesToDelete) {
$this->removePropertiesFromNode($nodePath, $propertiesToDelete);
}

return true;
Expand All @@ -1827,61 +1838,55 @@ public function deletePropertyImmediately($path)
*/
protected function deleteProperty($path)
{
$this->assertLoggedIn();

$nodePath = PathHelper::getParentPath($path);
$propertyName = PathHelper::getNodeName($path);
$this->removePropertiesFromNode($nodePath, [$propertyName]);
}

/**
* Removes a list of properties from a given node.
*
* @param string $nodeId
* @param array<string> $paths Path belonging to that node that should be deleted
alexander-schranz marked this conversation as resolved.
Show resolved Hide resolved
*/
private function removePropertiesFromNode($nodePath, array $propertiesToDelete): void
{
$this->assertLoggedIn();
alexander-schranz marked this conversation as resolved.
Show resolved Hide resolved
$nodeId = $this->getSystemIdForNode($nodePath);
if (!$nodeId) {
// no we really don't know that path
throw new ItemNotFoundException("No item found at " . $path);
throw new ItemNotFoundException('No item found at ' . $nodePath);
}

$query = 'SELECT props FROM phpcr_nodes WHERE id = ?';
$xml = $this->getConnection()->fetchOne($query, [$nodeId]);

$dom = new DOMDocument('1.0', 'UTF-8');
$dom->loadXML($xml);
$xpath = new DomXpath($dom);
$xmlPropsRemover = new XmlPropsRemover($xml, $propertiesToDelete);
[$xml, $references] = $xmlPropsRemover->removeProps();

$found = false;
$propertyName = PathHelper::getNodeName($path);
foreach ($xpath->query(sprintf('//*[@sv:name="%s"]', $propertyName)) as $propertyNode) {
$found = true;
// would be nice to have the property object to ask for type
// but its in state deleted, would mean lots of refactoring
if ($propertyNode->hasAttribute('sv:type')) {
$type = strtolower($propertyNode->getAttribute('sv:type'));
if (in_array($type, ['reference', 'weakreference'])) {
$table = $this->referenceTables['reference' === $type ? PropertyType::REFERENCE : PropertyType::WEAKREFERENCE];
try {
$query = "DELETE FROM $table WHERE source_id = ? AND source_property_name = ?";
$this->getConnection()->executeUpdate($query, [$nodeId, $propertyName]);
} catch (DBALException $e) {
throw new RepositoryException(
'Unexpected exception while cleaning up deleted nodes',
$e->getCode(),
$e
);
}
foreach ($references as $type => $propertyNames) {
$table = $this->referenceTables['reference' === $type ? PropertyType::REFERENCE : PropertyType::WEAKREFERENCE];
foreach ($propertyNames as $propertyName) {
try {
$query = "DELETE FROM $table WHERE source_id = ? AND source_property_name = ?";
$this->getConnection()->executeUpdate($query, [$nodeId, $propertyName]);
} catch (DBALException $e) {
throw new RepositoryException(
'Unexpected exception while cleaning up deleted nodes',
$e->getCode(),
$e
);
}
}

$propertyNode->parentNode->removeChild($propertyNode);
}

if (!$found) {
throw new ItemNotFoundException("Node $nodePath has no property $propertyName");
}

$xml = $dom->saveXML();

$query = 'UPDATE phpcr_nodes SET props = ? WHERE id = ?';
$params = [$xml, $nodeId];

try {
$this->getConnection()->executeUpdate($query, $params);
} catch (DBALException $e) {
throw new RepositoryException("Unexpected exception while updating properties of $path", $e->getCode(), $e);
throw new RepositoryException("Unexpected exception while updating properties of $nodePath", $e->getCode(), $e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,18 @@ public function parse(): \stdClass
{
$this->data = new \stdClass();

$parser = xml_parser_create();
$parser = \xml_parser_create();

xml_set_element_handler(
\xml_set_element_handler(
$parser,
[$this, 'startHandler'],
[$this, 'endHandler']
);

xml_set_character_data_handler($parser, [$this, 'dataHandler']);
\xml_set_character_data_handler($parser, [$this, 'dataHandler']);

xml_parse($parser, $this->xml, true);
xml_parser_free($parser);
\xml_parse($parser, $this->xml, true);
\xml_parser_free($parser);
// avoid memory leaks and unset the parser see: https://www.php.net/manual/de/function.xml-parser-free.php
unset($parser);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
<?php

namespace Jackalope\Transport\DoctrineDBAL\XmlPropsRemover;

/**
* @internal
*/
class XmlPropsRemover
{
/**
* @var string
*/
private $xml;

/**
* @var string[]
*/
private $propertyNames;

/**
* @var bool
*/
private $skipCurrentTag = false;

/**
* @var string
*/
private $newXml = '';

/**
* @var string
*/
private $newStartTag = '';

private $weakReferences = [];

private $references = [];

public function __construct(string $xml, array $propertyNames)
{
$this->xml = $xml;
$this->propertyNames = $propertyNames;
}

/**
* @return array{
alexander-schranz marked this conversation as resolved.
Show resolved Hide resolved
* 0: string,
* 1: array{
* reference: string[],
* weakreference: string[],
* },
* }
*/
public function removeProps(): array
{
$this->newXml = '<?xml version="1.0" encoding="UTF-8"?>' . PHP_EOL;
$this->references = [];
$this->weakReferences = [];
$this->newStartTag = '';
$this->skipCurrentTag = false;

$parser = \xml_parser_create();

\xml_set_element_handler(
$parser,
[$this, 'startHandler'],
[$this, 'endHandler']
);

\xml_set_character_data_handler($parser, [$this, 'dataHandler']);

\xml_parse($parser, $this->xml, true);
\xml_parser_free($parser);
// avoid memory leaks and unset the parser see: https://www.php.net/manual/de/function.xml-parser-free.php
unset($parser);

return [
$this->newXml . PHP_EOL,
[
'reference' => $this->references,
'weakreference' => $this->weakReferences,
],
];
}

/**
* @param \XmlParser $parser
* @param string $name
* @param mixed[] $attrs
*/
private function startHandler($parser, $name, $attrs): void
{
if ($this->skipCurrentTag) {
return;
}

if ($name === 'SV:PROPERTY') {
$svName = $attrs['SV:NAME'];

if (\in_array($svName, $this->propertyNames)) {
$this->skipCurrentTag = true;

if (\in_array($svName, $this->propertyNames)) {
$this->skipCurrentTag = true;
alexander-schranz marked this conversation as resolved.
Show resolved Hide resolved
$svType = $attrs['SV:TYPE'];

if ($svType === 'reference') {
$this->references[] = $svName;
} elseif ($svType === 'weakreference') {
$this->weakReferences[] = $svName;
}

return;
}

return;
}
}

$tag = '<' . \strtolower($name);
foreach ($attrs as $key => $value) {
$tag .= ' ' . \strtolower($key) . '="' . $value . '"'; // TODO escaping
alexander-schranz marked this conversation as resolved.
Show resolved Hide resolved
}
$tag .= '>';

$this->newXml .= $this->newStartTag;
$this->newStartTag = $tag; // handling self closing tags in endHandler
}

private function endHandler($parser, $name): void
{
if ($name === 'SV:PROPERTY' && $this->skipCurrentTag) {
$this->skipCurrentTag = false;

return;
}

if ($this->skipCurrentTag) {
return;
}

if ($this->newStartTag) {
// if the tag is not rendered to newXml it can be a self closing tag
$this->newXml .= \substr($this->newStartTag, 0.0, -1) . '/>';
$this->newStartTag = '';

return;
}

$this->newXml .= '</' . \strtolower($name) . '>';
}

private function dataHandler($parser, $data): void
{
if ($this->skipCurrentTag) {
return;
}

if ($data !== '') {
$this->newXml .= $this->newStartTag; // none empty data means no self closing tag so render tag now
alexander-schranz marked this conversation as resolved.
Show resolved Hide resolved
$this->newStartTag = '';
$this->newXml .= \htmlspecialchars($data, ENT_XML1);
}
}
}
29 changes: 29 additions & 0 deletions tests/Jackalope/Transport/DoctrineDBAL/ClientTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
use PHPCR\PropertyType;
use PHPCR\Query\QOM\QueryObjectModelConstantsInterface;
use PHPCR\Query\QueryInterface;
use PHPCR\SimpleCredentials;
use PHPCR\Util\NodeHelper;
use PHPCR\Util\PathHelper;
use PHPCR\Util\QOM\QueryBuilder;
Expand Down Expand Up @@ -240,6 +241,34 @@ public function testDeleteMoreThanOneThousandNodes()
$this->addToAssertionCount(1);
}

public function testDeleteProperties()
{
$root = $this->session->getNode('/');
$node = $root->addNode('delete-properties');
for ($i = 0; $i <= 1000; $i++) {
$node->setProperty('property-'.$i, 'value-'.$i);
}

$this->session->save();
$this->assertSame(1002, \count($node->getProperties()));

for ($i = 501; $i <= 1000; $i++) {
$node->setProperty('property-'.$i, null);
}

$this->session->save();
$this->session->refresh(false);
$node = $this->session->getNode('/delete-properties');

for ($i = 0; $i <= 1000; $i++) {
$this->assertSame(
$i < 501,
$node->hasProperty('property-'.$i),
'Unexpected result for property "property-'.$i.'"'
);
}
}

public function testPropertyLengthAttribute()
{
$rootNode = $this->session->getRootNode();
Expand Down
Loading
Loading