Skip to content

Commit 39f223d

Browse files
committed
[BUGFIX] Respect language based style names on reading Word files
Microsoft Office saves Office document with language based style mappings for default styles. For example, if a german based Word version is used, it writes following to the `word/styles.xml` in the container archive (*.docs): ``` <w:style w:type="paragraph" w:styleId="berschrift1"> <w:name w:val="heading 1"/> .... </w:style> ``` versus for a english based version it would be: ``` <w:style w:type="paragraph" w:styleId="Heading1"> <w:name w:val="heading 1"/> ... </w:style> ``` The value of `<w:name />` defines the internal native code identifier, whereas the `w:styleId` attribute on the outer `<w:style />` tag would describe the virtual or alias name. Later parsing of the document structure, for example the paragraphs, references the alias (`w:styleId`) name of a style. The reader code uses hardcoded RegEx matchings in a case-insensitive manner but using the englisch speaking variant (`Header\s+d`) - on the language based one, which would not match at all. Therefore, multiple tasks need to be done and contained in this change: * A alias map is implementend and used to register title aliases. Along with this corresponding lookup method is added. * Use the lookup method to resolve for alias where the hardcoded language RegEx is needed to be used.
1 parent 8b891bb commit 39f223d

File tree

3 files changed

+79
-7
lines changed

3 files changed

+79
-7
lines changed

src/PhpWord/Reader/Word2007/AbstractPart.php

+11-4
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
use PhpOffice\PhpWord\Element\TrackChange;
2929
use PhpOffice\PhpWord\PhpWord;
3030
use PhpOffice\PhpWord\Shared\XMLReader;
31+
use PhpOffice\PhpWord\Style;
3132

3233
/**
3334
* Abstract part reader.
@@ -290,14 +291,20 @@ protected function readParagraph(XMLReader $xmlReader, DOMElement $domNode, $par
290291
private function getHeadingDepth(?array $paragraphStyle = null)
291292
{
292293
if (is_array($paragraphStyle) && isset($paragraphStyle['styleName'])) {
293-
if ('Title' === $paragraphStyle['styleName']) {
294+
// Title styles have a special handling in the styles.xms loading and registration, therefore we need to
295+
// use the alias for it here to properly check for the correct systeling.
296+
/** @see Style::addTitleStyle() */
297+
/** @see Styles::read() */
298+
$checkStyleName = Style::findAliasForStyleName($paragraphStyle['styleName']);
299+
// Title does not have a depth, early return.
300+
if ('Title' === $checkStyleName) {
294301
return 0;
295302
}
296-
297303
$headingMatches = [];
298-
preg_match('/Heading(\d)/', $paragraphStyle['styleName'], $headingMatches);
304+
// We need to support here multiple variants: 'Heading 1' , 'Heading_1', 'Heading1'
305+
preg_match('/Heading([_\s]*)(\d)/', $checkStyleName, $headingMatches);
299306
if (!empty($headingMatches)) {
300-
return $headingMatches[1];
307+
return $headingMatches[2];
301308
}
302309
}
303310

src/PhpWord/Reader/Word2007/Styles.php

+14-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
use PhpOffice\PhpWord\PhpWord;
2121
use PhpOffice\PhpWord\Shared\XMLReader;
22+
use PhpOffice\PhpWord\Style;
2223
use PhpOffice\PhpWord\Style\Language;
2324

2425
/**
@@ -65,8 +66,17 @@ public function read(PhpWord $phpWord): void
6566
foreach ($nodes as $node) {
6667
$type = $xmlReader->getAttribute('w:type', $node);
6768
$name = $xmlReader->getAttribute('w:val', $node, 'w:name');
68-
if (null === $name) {
69-
$name = $xmlReader->getAttribute('w:styleId', $node);
69+
$alias = $xmlReader->getAttribute('w:styleId', $node);
70+
if (null === $name && null === $alias) {
71+
// no name or alias, skip it as matching would not possible otherwise.
72+
continue;
73+
}
74+
if (null === $name && null !== $alias) {
75+
// fully custom style, use alias as name.
76+
$name = $alias;
77+
}
78+
if (null !== $name && null === $alias) {
79+
$alias = $name;
7080
}
7181
$headingMatches = [];
7282
preg_match('/Heading\s*(\d)/i', $name, $headingMatches);
@@ -76,7 +86,8 @@ public function read(PhpWord $phpWord): void
7686
$paragraphStyle = $this->readParagraphStyle($xmlReader, $node);
7787
$fontStyle = $this->readFontStyle($xmlReader, $node);
7888
if (!empty($headingMatches)) {
79-
$phpWord->addTitleStyle($headingMatches[1], $fontStyle, $paragraphStyle);
89+
$titleStyleName = $phpWord->addTitleStyle($headingMatches[1], $fontStyle, $paragraphStyle)->getStyleName();
90+
Style::addStyleNameAlias($alias, $titleStyleName);
8091
} else {
8192
if (empty($fontStyle)) {
8293
if (is_array($paragraphStyle)) {

src/PhpWord/Style.php

+54
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ class Style
3535
*/
3636
private static $styles = [];
3737

38+
/**
39+
* Mapping style name to internal code identifier.
40+
*
41+
* @var array<string, string>
42+
*/
43+
private static $nameToIdentifierMapping = [];
44+
3845
/**
3946
* Add paragraph style.
4047
*
@@ -124,6 +131,52 @@ public static function addTableStyle($styleName, $styleTable, $styleFirstRow = n
124131
return self::setStyleValues($styleName, new Table($styleTable, $styleFirstRow), null);
125132
}
126133

134+
/**
135+
* Add a styleName to identifier mapping entry.
136+
*
137+
* @param string $alias
138+
* @param string $styleName
139+
*
140+
* @see self::resolveStyleNameIdentifier()
141+
*/
142+
public static function addStyleNameAlias($alias, $styleName): void
143+
{
144+
self::$nameToIdentifierMapping[$alias] = $styleName;
145+
}
146+
147+
/**
148+
* Find a the correct for a specified $alias. If $alias is a validName, it is returned. Otherwise, it will return
149+
* the styleName for the alias if one is found, and a empty string if nothing could be found.
150+
*
151+
* @param string $alias
152+
*
153+
* @return string
154+
*/
155+
public static function findStyleNameForAlias($alias)
156+
{
157+
foreach (self::$nameToIdentifierMapping as $alias => $mappedStyleName) {
158+
if ($mappedStyleName === $alias) {
159+
return $alias;
160+
}
161+
}
162+
163+
return '';
164+
}
165+
166+
/**
167+
* Returns the alias for a specific $styleName. If no alias could be found, $styleName is returned.
168+
*
169+
* @param string $styleName
170+
*
171+
* @return string
172+
*/
173+
public static function findAliasForStyleName($styleName)
174+
{
175+
return (isset(self::$nameToIdentifierMapping[$styleName]) && !empty(self::$nameToIdentifierMapping[$styleName]))
176+
? self::$nameToIdentifierMapping[$styleName]
177+
: $styleName;
178+
}
179+
127180
/**
128181
* Count styles.
129182
*
@@ -144,6 +197,7 @@ public static function countStyles()
144197
public static function resetStyles(): void
145198
{
146199
self::$styles = [];
200+
self::$nameToIdentifierMapping = [];
147201
}
148202

149203
/**

0 commit comments

Comments
 (0)