-
Notifications
You must be signed in to change notification settings - Fork 3.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Read Code Page for Xls ListWorksheetInfo/Names for BIFF5 (#3672)
* Read Code Page for Xls ListWorksheetInfo/Names for BIFF5 Fix #3671. Xls reader was not processing Code Page as part of functions ListWorksheetInfo/Names, which was causing them to fail for for BIFF5 (and BIFF7); this was not a problem for BIFF8. There were no unit tests for these functions for either BIFF5 or BIFF8. There are now. * Add getVersion and getCodePage Methods These came about because test file for non-standard codepage was supposed to be BIFF5, but turned out to be BIFF8 using UTF-16 with some string data otherwise encoded. Add a BIFF5 equivalent (some hex editing was required), and the means to distinguish one from the other. * Found MACCENTRALEUROPE Text in BIFF8 It was used for 'Last Modified By' property, even though bulk of spreadsheet uses UTF-16LE. Add a test.
- Loading branch information
Showing
4 changed files
with
162 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
<?php | ||
|
||
namespace PhpOffice\PhpSpreadsheetTests\Reader\Xls; | ||
|
||
use PhpOffice\PhpSpreadsheet\Reader\Xls; | ||
use PhpOffice\PhpSpreadsheet\Shared\CodePage; | ||
use PHPUnit\Framework\TestCase; | ||
|
||
class InfoNamesTest extends TestCase | ||
{ | ||
public function testWorksheetNamesBiff5(): void | ||
{ | ||
$filename = 'samples/templates/30templatebiff5.xls'; | ||
$reader = new Xls(); | ||
$names = $reader->listWorksheetNames($filename); | ||
$expected = ['Invoice', 'Terms and conditions']; | ||
self::assertSame($expected, $names); | ||
} | ||
|
||
public function testWorksheetInfoBiff5(): void | ||
{ | ||
$filename = 'samples/templates/30templatebiff5.xls'; | ||
$reader = new Xls(); | ||
$info = $reader->listWorksheetInfo($filename); | ||
$expected = [ | ||
[ | ||
'worksheetName' => 'Invoice', | ||
'lastColumnLetter' => 'E', | ||
'lastColumnIndex' => 4, | ||
'totalRows' => 19, | ||
'totalColumns' => 5, | ||
], | ||
[ | ||
'worksheetName' => 'Terms and conditions', | ||
'lastColumnLetter' => 'B', | ||
'lastColumnIndex' => 1, | ||
'totalRows' => 3, | ||
'totalColumns' => 2, | ||
], | ||
]; | ||
self::assertSame($expected, $info); | ||
self::assertSame(Xls::XLS_BIFF7, $reader->getVersion()); | ||
self::assertSame('CP1252', $reader->getCodepage()); | ||
} | ||
|
||
public function testWorksheetNamesBiff8(): void | ||
{ | ||
$filename = 'samples/templates/31docproperties.xls'; | ||
$reader = new Xls(); | ||
$names = $reader->listWorksheetNames($filename); | ||
$expected = ['Worksheet']; | ||
self::assertSame($expected, $names); | ||
} | ||
|
||
public function testWorksheetInfoBiff8(): void | ||
{ | ||
$filename = 'samples/templates/31docproperties.xls'; | ||
$reader = new Xls(); | ||
$info = $reader->listWorksheetInfo($filename); | ||
$expected = [ | ||
[ | ||
'worksheetName' => 'Worksheet', | ||
'lastColumnLetter' => 'B', | ||
'lastColumnIndex' => 1, | ||
'totalRows' => 1, | ||
'totalColumns' => 2, | ||
], | ||
]; | ||
self::assertSame($expected, $info); | ||
self::assertSame(Xls::XLS_BIFF8, $reader->getVersion()); | ||
self::assertSame('UTF-16LE', $reader->getCodepage()); | ||
} | ||
|
||
/** | ||
* Test load Xls file with MACCENTRALEUROPE encoding, which is implemented | ||
* as MAC-CENTRALEUROPE on some systems. Issue #549. | ||
*/ | ||
private const MAC_CE = ['MACCENTRALEUROPE', 'MAC-CENTRALEUROPE']; | ||
|
||
private const MAC_FILE5 = 'tests/data/Reader/XLS/maccentraleurope.biff5.xls'; | ||
private const MAC_FILE8 = 'tests/data/Reader/XLS/maccentraleurope.xls'; | ||
|
||
public function testWorksheetNamesBiff5Mac(): void | ||
{ | ||
$codePages = CodePage::getEncodings(); | ||
self::assertSame(self::MAC_CE, $codePages[10029]); | ||
$reader = new Xls(); | ||
$names = $reader->listWorksheetNames(self::MAC_FILE5); | ||
$expected = ['Ärkusz1']; | ||
self::assertSame($expected, $names); | ||
} | ||
|
||
public function testWorksheetInfoBiff5Mac(): void | ||
{ | ||
$codePages = CodePage::getEncodings(); | ||
// prior test has replaced array with single string | ||
self::assertContains($codePages[10029], self::MAC_CE); | ||
$reader = new Xls(); | ||
$info = $reader->listWorksheetInfo(self::MAC_FILE5); | ||
$expected = [ | ||
[ | ||
'worksheetName' => 'Ärkusz1', | ||
'lastColumnLetter' => 'P', | ||
'lastColumnIndex' => 15, | ||
'totalRows' => 3, | ||
'totalColumns' => 16, | ||
], | ||
]; | ||
self::assertSame($expected, $info); | ||
self::assertSame(Xls::XLS_BIFF7, $reader->getVersion()); | ||
self::assertContains($reader->getCodepage(), self::MAC_CE); | ||
} | ||
|
||
public function testLoadMacCentralEuropeBiff5(): void | ||
{ | ||
$reader = new Xls(); | ||
$spreadsheet = $reader->load(self::MAC_FILE5); | ||
$sheet = $spreadsheet->getActiveSheet(); | ||
self::assertSame('Ärkusz1', $sheet->getTitle()); | ||
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue()); | ||
self::assertSame(Xls::XLS_BIFF7, $reader->getVersion()); | ||
self::assertContains($reader->getCodepage(), self::MAC_CE); | ||
$spreadsheet->disconnectWorksheets(); | ||
} | ||
|
||
public function testLoadMacCentralEuropeBiff8(): void | ||
{ | ||
// Document is UTF-16LE as a whole, | ||
// but some strings are stored as MACCENTRALEUROPE | ||
$reader = new Xls(); | ||
$spreadsheet = $reader->load(self::MAC_FILE8); | ||
$sheet = $spreadsheet->getActiveSheet(); | ||
self::assertSame('Arkusz1', $sheet->getTitle()); | ||
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue()); | ||
self::assertSame(Xls::XLS_BIFF8, $reader->getVersion()); | ||
self::assertSame('UTF-16LE', $reader->getCodepage()); | ||
$properties = $spreadsheet->getProperties(); | ||
// the following is stored as MACCENTRALEUROPE, not UTF-16LE | ||
self::assertSame('Użytkownik Microsoft Office', $properties->getLastModifiedBy()); | ||
$spreadsheet->disconnectWorksheets(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.