مرحبا here كلمة انجليزي.
'; +SharedHtml::addHtml($section, $htmlContent, false, false); + +// Save file +echo write($phpWord, basename(__FILE__, '.php'), $writers); +if (!CLI) { + include_once 'Sample_Footer.php'; +} +Settings::setDefaultRtl(false); diff --git a/src/PhpWord/PhpWord.php b/src/PhpWord/PhpWord.php index cf6f16ae02..453204d4cd 100644 --- a/src/PhpWord/PhpWord.php +++ b/src/PhpWord/PhpWord.php @@ -20,6 +20,7 @@ use BadMethodCallException; use PhpOffice\PhpWord\Element\Section; use PhpOffice\PhpWord\Exception\Exception; +use PhpOffice\PhpWord\Style\Font; /** * PHPWord main class. @@ -283,9 +284,9 @@ public function setDefaultFontSize($fontSize): void * * @return \PhpOffice\PhpWord\Style\Paragraph */ - public function setDefaultParagraphStyle($styles) + public function setDefaultParagraphStyle($styles, ?Font $fontStyles = null) { - return Style::setDefaultParagraphStyle($styles); + return Style::setDefaultParagraphStyle($styles, $fontStyles); } /** diff --git a/src/PhpWord/Reader/Word2007/AbstractPart.php b/src/PhpWord/Reader/Word2007/AbstractPart.php index 98a74772cd..28f5bc39ad 100644 --- a/src/PhpWord/Reader/Word2007/AbstractPart.php +++ b/src/PhpWord/Reader/Word2007/AbstractPart.php @@ -747,35 +747,46 @@ protected function readTableStyle(XMLReader $xmlReader, DOMElement $domNode) $borders = array_merge($margins, ['insideH', 'insideV']); if ($xmlReader->elementExists('w:tblPr', $domNode)) { + $tblStyleName = ''; if ($xmlReader->elementExists('w:tblPr/w:tblStyle', $domNode)) { - $style = $xmlReader->getAttribute('w:val', $domNode, 'w:tblPr/w:tblStyle'); - } else { - $styleNode = $xmlReader->getElement('w:tblPr', $domNode); - $styleDefs = []; - foreach ($margins as $side) { - $ucfSide = ucfirst($side); - $styleDefs["cellMargin$ucfSide"] = [self::READ_VALUE, "w:tblCellMar/w:$side", 'w:w']; - } - foreach ($borders as $side) { - $ucfSide = ucfirst($side); - $styleDefs["border{$ucfSide}Size"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:sz']; - $styleDefs["border{$ucfSide}Color"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:color']; - $styleDefs["border{$ucfSide}Style"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:val']; - } - $styleDefs['layout'] = [self::READ_VALUE, 'w:tblLayout', 'w:type']; - $styleDefs['bidiVisual'] = [self::READ_TRUE, 'w:bidiVisual']; - $styleDefs['cellSpacing'] = [self::READ_VALUE, 'w:tblCellSpacing', 'w:w']; - $style = $this->readStyleDefs($xmlReader, $styleNode, $styleDefs); - - $tablePositionNode = $xmlReader->getElement('w:tblpPr', $styleNode); - if ($tablePositionNode !== null) { - $style['position'] = $this->readTablePosition($xmlReader, $tablePositionNode); - } + $tblStyleName = $xmlReader->getAttribute('w:val', $domNode, 'w:tblPr/w:tblStyle'); + } + $styleNode = $xmlReader->getElement('w:tblPr', $domNode); + $styleDefs = []; - $indentNode = $xmlReader->getElement('w:tblInd', $styleNode); - if ($indentNode !== null) { - $style['indent'] = $this->readTableIndent($xmlReader, $indentNode); - } + foreach ($margins as $side) { + $ucfSide = ucfirst($side); + $styleDefs["cellMargin$ucfSide"] = [self::READ_VALUE, "w:tblCellMar/w:$side", 'w:w']; + } + foreach ($borders as $side) { + $ucfSide = ucfirst($side); + $styleDefs["border{$ucfSide}Size"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:sz']; + $styleDefs["border{$ucfSide}Color"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:color']; + $styleDefs["border{$ucfSide}Style"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:val']; + } + $styleDefs['layout'] = [self::READ_VALUE, 'w:tblLayout', 'w:type']; + $styleDefs['bidiVisual'] = [self::READ_TRUE, 'w:bidiVisual']; + $styleDefs['cellSpacing'] = [self::READ_VALUE, 'w:tblCellSpacing', 'w:w']; + $style = $this->readStyleDefs($xmlReader, $styleNode, $styleDefs); + + $tablePositionNode = $xmlReader->getElement('w:tblpPr', $styleNode); + if ($tablePositionNode !== null) { + $style['position'] = $this->readTablePosition($xmlReader, $tablePositionNode); + } + + $indentNode = $xmlReader->getElement('w:tblInd', $styleNode); + if ($indentNode !== null) { + $style['indent'] = $this->readTableIndent($xmlReader, $indentNode); + } + if ($xmlReader->elementExists('w:basedOn', $domNode)) { + $style['basedOn'] = $xmlReader->getAttribute('w:val', $domNode, 'w:basedOn'); + } + if ($tblStyleName !== '') { + $style['tblStyle'] = $tblStyleName; + } + // this may be unneeded + if ($xmlReader->elementExists('w:name', $domNode)) { + $style['styleName'] = $xmlReader->getAttribute('w:val', $domNode, 'w:name'); } } diff --git a/src/PhpWord/Reader/Word2007/Styles.php b/src/PhpWord/Reader/Word2007/Styles.php index 760adf9493..f67bc77463 100644 --- a/src/PhpWord/Reader/Word2007/Styles.php +++ b/src/PhpWord/Reader/Word2007/Styles.php @@ -65,8 +65,9 @@ public function read(PhpWord $phpWord): void foreach ($nodes as $node) { $type = $xmlReader->getAttribute('w:type', $node); $name = $xmlReader->getAttribute('w:val', $node, 'w:name'); + $styleId = $xmlReader->getAttribute('w:styleId', $node); if (null === $name) { - $name = $xmlReader->getAttribute('w:styleId', $node); + $name = $styleId; } $headingMatches = []; preg_match('/Heading\s*(\d)/i', $name, $headingMatches); @@ -98,7 +99,8 @@ public function read(PhpWord $phpWord): void case 'table': $tStyle = $this->readTableStyle($xmlReader, $node); if (!empty($tStyle)) { - $phpWord->addTableStyle($name, $tStyle); + $newTable = $phpWord->addTableStyle($styleId, $tStyle); + $newTable->setStyleName($name); } break; diff --git a/src/PhpWord/Settings.php b/src/PhpWord/Settings.php index 984486ccfe..b43bf05228 100644 --- a/src/PhpWord/Settings.php +++ b/src/PhpWord/Settings.php @@ -15,6 +15,8 @@ namespace PhpOffice\PhpWord; +use PhpOffice\PhpWord\SimpleType\TextDirection; + /** * PHPWord settings class. * @@ -397,6 +399,9 @@ public static function setDefaultFontSize($value): bool public static function setDefaultRtl(?bool $defaultRtl): void { self::$defaultRtl = $defaultRtl; + if ($defaultRtl === true && Style::getStyle('Normal') === null) { + Style::setDefaultParagraphStyle(['bidi' => true, 'textDirection' => TextDirection::RLTB], ['rtl' => true]); + } } public static function isDefaultRtl(): ?bool diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index 334f5c269e..f70c2180d6 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -25,9 +25,14 @@ use PhpOffice\PhpWord\Element\AbstractContainer; use PhpOffice\PhpWord\Element\Row; use PhpOffice\PhpWord\Element\Table; +use PhpOffice\PhpWord\Element\TextRun; +use PhpOffice\PhpWord\Metadata\DocInfo; +use PhpOffice\PhpWord\PhpWord; use PhpOffice\PhpWord\Settings; +use PhpOffice\PhpWord\SimpleType\Border; use PhpOffice\PhpWord\SimpleType\Jc; use PhpOffice\PhpWord\SimpleType\NumberFormat; +use PhpOffice\PhpWord\SimpleType\TextDirection; use PhpOffice\PhpWord\Style\Paragraph; /** @@ -37,14 +42,24 @@ */ class Html { + private const SPECIAL_BORDER_WIDTHS = ['thin' => '0.5pt', 'thick' => '3.5pt', 'medium' => '2.0pt']; + private const RGB_REGEXP = '/^\s*rgb\s*[(]\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d{1,3})\s*[)]\s*$/'; + private const DECLARES_CHARSET = '/ charset=/i'; + protected static $listIndex = 0; protected static $xpath; protected static $options; + /** @var ?DocInfo */ + protected static $docInfo; + + /** @var bool */ + private static $addbody = false; + /** * @var Css */ @@ -69,33 +84,86 @@ public static function addHtml($element, $html, $fullHTML = false, $preserveWhit * which could be applied when such an element occurs in the parseNode function. */ static::$options = $options; + static::$docInfo = null; + if (method_exists($element, 'getPhpWord')) { + /** @var ?PhpWord */ + $phpWord = $element->getPhpWord(); + if ($phpWord !== null) { + static::$docInfo = $phpWord->getDocInfo(); + } + } - // Preprocess: remove all line ends, decode HTML entity, - // fix ampersand and angle brackets and add body tag for HTML fragments - $html = str_replace(["\n", "\r"], '', $html); - $html = str_replace(['<', '>', '&', '"'], ['_lt_', '_gt_', '_amp_', '_quot_'], $html); - $html = html_entity_decode($html, ENT_QUOTES, 'UTF-8'); - $html = str_replace('&', '&', $html); - $html = str_replace(['_lt_', '_gt_', '_amp_', '_quot_'], ['<', '>', '&', '"'], $html); - - if (false === $fullHTML) { - $html = '' . $html . ''; + if (substr($html, 0, 2) === "\xfe\xff" || substr($html, 0, 2) === "\xff\xfe") { + $html = mb_convert_encoding($html, 'UTF-8', 'UTF-16'); + } + if (substr($html, 0, 3) === "\xEF\xBB\xBF") { + $html = substr($html, 3); + } + if (self::$addbody && false === $fullHTML) { + $html = '' . $html . ''; // @codeCoverageIgnore } // Load DOM if (\PHP_VERSION_ID < 80000) { - $orignalLibEntityLoader = libxml_disable_entity_loader(true); + $orignalLibEntityLoader = libxml_disable_entity_loader(true); // @codeCoverageIgnore } $dom = new DOMDocument(); + $html = self::replaceNonAsciiIfNeeded($html); $dom->preserveWhiteSpace = $preserveWhiteSpace; - $dom->loadXML($html); + + try { + $result = $dom->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR); + $exceptionMessage = 'DOM loadHTML failed'; + } catch (Exception $e) { + $result = false; + $exceptionMessage = $e->getMessage(); + } + if ($result === false) { + throw new Exception($exceptionMessage); // @codeCoverageIgnore + } + self::removeAnnoyingWhitespaceTextNodes($dom); static::$xpath = new DOMXPath($dom); - $node = $dom->getElementsByTagName('body'); + $node = $dom->getElementsByTagName('html'); + if (count($node) === 0 || $node->item(0) === null) { + $node = $dom->getElementsByTagName('body'); + } static::parseNode($node->item(0), $element); if (\PHP_VERSION_ID < 80000) { - libxml_disable_entity_loader($orignalLibEntityLoader); + libxml_disable_entity_loader($orignalLibEntityLoader); // @codeCoverageIgnore + } + } + + // https://www.php.net/manual/en/domdocument.loadhtml.php + private static function removeAnnoyingWhitespaceTextNodes(DOMNode $node): void + { + if ($node->hasChildNodes()) { + for ($i = $node->childNodes->length - 1; $i >= 0; --$i) { + self::removeAnnoyingWhitespaceTextNodes($node->childNodes->item($i)); + } + } + if ($node->nodeType === XML_TEXT_NODE && !$node->hasChildNodes() && !$node->hasAttributes() && empty(trim($node->textContent))) { + $node->parentNode->removeChild($node); + } + } + + private static function replaceNonAscii(array $matches): string + { + return '' . mb_ord($matches[0], 'UTF-8') . ';'; + } + + private static function replaceNonAsciiIfNeeded(string $convert): ?string + { + if (preg_match(self::DECLARES_CHARSET, $convert) !== 1) { + $lowend = "\u{80}"; + $highend = "\u{10ffff}"; + $regexp = "/[$lowend-$highend]/u"; + /** @var callable $callback */ + $callback = [self::class, 'replaceNonAscii']; + $convert = preg_replace_callback($regexp, $callback, $convert); } + + return $convert; } /** @@ -106,14 +174,21 @@ public static function addHtml($element, $html, $fullHTML = false, $preserveWhit * * @return array */ - protected static function parseInlineStyle($node, $styles = []) + protected static function parseInlineStyle($node, &$styles) { if (XML_ELEMENT_NODE == $node->nodeType) { $attributes = $node->attributes; // get all the attributes(eg: id, class) - $attributeDir = $attributes->getNamedItem('dir'); - $attributeDirValue = $attributeDir ? $attributeDir->nodeValue : ''; - $bidi = $attributeDirValue === 'rtl'; + $bidi = false; + $attrDir = $attributes->getNamedItem('dir'); + $direction = isset($attrDir) ? $attrDir->nodeValue : ''; + if ($direction === 'rtl') { + $bidi = $styles['bidi'] = $styles['rtl'] = true; + $styles['textDirection'] = TextDirection::RLTB; + } elseif ($direction === 'ltr') { + $bidi = $styles['bidi'] = $styles['rtl'] = false; + $styles['textDirection'] = TextDirection::LRTB; + } foreach ($attributes as $attribute) { $val = $attribute->value; switch (strtolower($attribute->name)) { @@ -146,7 +221,7 @@ protected static function parseInlineStyle($node, $styles = []) break; case 'bgcolor': // tables, rows, cells e.g.header a | +header b | +header c | +
---|---|---|
1 | 2 | |
This is bold text | 6 |
header a | +header b | +header c | +
---|---|---|
1 | 2 | |
This is bold text | 6 |
Header | Content |
---|
This is bold text.
'; + $section = $phpWord->addSection(); + Html::addHtml($section, $html); + self::assertTrue(true); + $element = $section->getElements()[0]; + self::assertInstanceOf(TextRun::class, $element); + $textElements = $element->getElements(); + self::assertCount(3, $textElements); + + $text = $textElements[0]; + self::assertInstanceOf(Text::class, $text); + self::assertInstanceOf(Font::class, $text->getFontStyle()); + self::assertNotTrue($text->getFontStyle()->isBold()); + + $text = $textElements[1]; + self::assertInstanceOf(Text::class, $text); + self::assertSame('boldtext', $text->getFontStyle()); + $style = Style::getStyle('boldtext'); + self::assertInstanceOf(Font::class, $style); + self::assertTrue($style->isBold()); + + $text = $textElements[2]; + self::assertInstanceOf(Text::class, $text); + self::assertInstanceOf(Font::class, $text->getFontStyle()); + self::assertNotTrue($text->getFontStyle()->isBold()); + } + /** * Test underline. */ @@ -635,7 +688,7 @@ public function testParseTableStyleAttributeInlineStyle(): void $xpath = '/w:document/w:body/w:tbl/w:tr[1]/w:tc[1]/w:tcPr/w:shd'; self::assertTrue($doc->elementExists($xpath)); - self::assertEquals('red', $doc->getElement($xpath)->getAttribute('w:fill')); + self::assertEquals('ff0000', $doc->getElement($xpath)->getAttribute('w:fill')); } /** @@ -743,7 +796,7 @@ public function testParseListWithFormat(): void { $phpWord = new PhpWord(); $section = $phpWord->addSection(); - $html = preg_replace('/\s+/', ' ', 'Bug Report:
+BugTracker X is ${facing1} an issue.
+BugTracker X is ${facing2} an issue.
+BugTracker X is ${facing1} an issue.
+ '; + $section = new Section(0); + Html::addHtml($section, $html, false, false); + $templateProcessor->setComplexBlock('test', $section); + $facing1 = new TextRun(); + $facing1->addText('facing', ['bold' => true]); + $facing2 = new TextRun(); + $facing2->addText('facing', ['italic' => true]); + + $templateProcessor->setComplexBlock('test', $section); + $templateProcessor->setComplexValue('facing1', $facing1, true); + $templateProcessor->setComplexValue('facing2', $facing2); + + $docName = $templateProcessor->save(); + $docFound = file_exists($docName); + self::assertTrue($docFound); + $contents = file_get_contents("zip://$docName#word/document2.xml"); + unlink($docName); + self::assertNotFalse($contents); + $contents = preg_replace('/>\s+', '><', $contents) ?? ''; + self::assertStringContainsString('1
+
B1
+1
+D1
+2
+B2
+C2
+2
+ + diff --git a/tests/PhpWordTests/_files/html/charset.ISO-8859-1.html4.html b/tests/PhpWordTests/_files/html/charset.ISO-8859-1.html4.html new file mode 100644 index 0000000000..8a14894517 --- /dev/null +++ b/tests/PhpWordTests/_files/html/charset.ISO-8859-1.html4.html @@ -0,0 +1,17 @@ + + + + +1
+B1
+1
+D1
+2
+B2
+C2
+2
+ + diff --git a/tests/PhpWordTests/_files/html/charset.ISO-8859-2.html b/tests/PhpWordTests/_files/html/charset.ISO-8859-2.html new file mode 100644 index 0000000000..c2b494ff99 --- /dev/null +++ b/tests/PhpWordTests/_files/html/charset.ISO-8859-2.html @@ -0,0 +1,17 @@ + + + + +1
+B1
+1
+D1
+2
+B2
+C2
+2
+ + diff --git a/tests/PhpWordTests/_files/html/charset.UTF-16.bebom.html b/tests/PhpWordTests/_files/html/charset.UTF-16.bebom.html new file mode 100644 index 0000000000..6b29e7d2b0 Binary files /dev/null and b/tests/PhpWordTests/_files/html/charset.UTF-16.bebom.html differ diff --git a/tests/PhpWordTests/_files/html/charset.UTF-16.lebom.html b/tests/PhpWordTests/_files/html/charset.UTF-16.lebom.html new file mode 100644 index 0000000000..4ba47a8139 Binary files /dev/null and b/tests/PhpWordTests/_files/html/charset.UTF-16.lebom.html differ diff --git a/tests/PhpWordTests/_files/html/charset.UTF-8.bom.html b/tests/PhpWordTests/_files/html/charset.UTF-8.bom.html new file mode 100644 index 0000000000..5a49399018 --- /dev/null +++ b/tests/PhpWordTests/_files/html/charset.UTF-8.bom.html @@ -0,0 +1,16 @@ + + + +À1
+B1
+ç1
+D1
+Ã2
+B2
+C2
+Ð2
+ + diff --git a/tests/PhpWordTests/_files/html/charset.UTF-8.html b/tests/PhpWordTests/_files/html/charset.UTF-8.html new file mode 100644 index 0000000000..9ae5a8e343 --- /dev/null +++ b/tests/PhpWordTests/_files/html/charset.UTF-8.html @@ -0,0 +1,17 @@ + + + + +À1
+B1
+ç1
+D1
+Ã2
+B2
+C2
+Ð2
+ + diff --git a/tests/PhpWordTests/_files/html/charset.gb18030.html b/tests/PhpWordTests/_files/html/charset.gb18030.html new file mode 100644 index 0000000000..271a55fc54 --- /dev/null +++ b/tests/PhpWordTests/_files/html/charset.gb18030.html @@ -0,0 +1,9 @@ + + + +ӻ
+ + diff --git a/tests/PhpWordTests/_files/html/charset.nocharset.html b/tests/PhpWordTests/_files/html/charset.nocharset.html new file mode 100644 index 0000000000..d6829b2edc --- /dev/null +++ b/tests/PhpWordTests/_files/html/charset.nocharset.html @@ -0,0 +1,8 @@ +À1
+B1
+ç1
+D1
+Ã2
+B2
+C2
+Ð2
diff --git a/tests/PhpWordTests/_files/html/charset.unknown.html b/tests/PhpWordTests/_files/html/charset.unknown.html new file mode 100644 index 0000000000..189638a80f --- /dev/null +++ b/tests/PhpWordTests/_files/html/charset.unknown.html @@ -0,0 +1,17 @@ + + + + +À1
+B1
+ç1
+D1
+Ã2
+B2
+C2
+Ð2
+ +