Skip to content

Commit

Permalink
Read Code Page for Xls ListWorksheetInfo/Names for BIFF5 (#3672)
Browse files Browse the repository at this point in the history
* Read Code Page for Xls ListWorksheetInfo/Names for BIFF5

Fix #3671. Xls reader was not processing Code Page as part of functions ListWorksheetInfo/Names, which was causing them to fail for for BIFF5 (and BIFF7); this was not a problem for BIFF8. There were no unit tests for these functions for either BIFF5 or BIFF8. There are now.

* Add getVersion and getCodePage Methods

These came about because test file for non-standard codepage was supposed to be BIFF5, but turned out to be BIFF8 using UTF-16 with some string data otherwise encoded. Add a BIFF5 equivalent (some hex editing was required), and the means to distinguish one from the other.

* Found MACCENTRALEUROPE Text in BIFF8

It was used for 'Last Modified By' property, even though bulk of spreadsheet uses UTF-16LE. Add a test.
  • Loading branch information
oleibman authored Aug 17, 2023
1 parent dd97b8f commit 37a8a56
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 37 deletions.
22 changes: 20 additions & 2 deletions src/PhpSpreadsheet/Reader/Xls.php
Original file line number Diff line number Diff line change
Expand Up @@ -213,15 +213,15 @@ class Xls extends BaseReader
*
* @var int
*/
private $version;
private $version = 0;

/**
* Codepage set in the Excel file being read. Only important for BIFF5 (Excel 5.0 - Excel 95)
* For BIFF8 (Excel 97 - Excel 2003) this will always have the value 'UTF-16LE'.
*
* @var string
*/
private $codepage;
private $codepage = '';

/**
* Shared formats.
Expand Down Expand Up @@ -459,6 +459,11 @@ public function setCodepage(string $codepage): void
$this->codepage = $codepage;
}

public function getCodepage(): string
{
return $this->codepage;
}

/**
* Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object.
*
Expand Down Expand Up @@ -498,6 +503,10 @@ public function listWorksheetNames($filename)
$this->readDefault();

break 2;
case self::XLS_TYPE_CODEPAGE:
$this->readCodepage();

break;
default:
$this->readDefault();

Expand Down Expand Up @@ -557,6 +566,10 @@ public function listWorksheetInfo($filename)
$this->readDefault();

break 2;
case self::XLS_TYPE_CODEPAGE:
$this->readCodepage();

break;
default:
$this->readDefault();

Expand Down Expand Up @@ -8088,4 +8101,9 @@ private function setCFRules(array $cellRanges, string $type, string $operator, $
$this->phpSheet->getStyle($cellRange)->setConditionalStyles($conditionalStyles);
}
}

public function getVersion(): int
{
return $this->version;
}
}
142 changes: 142 additions & 0 deletions tests/PhpSpreadsheetTests/Reader/Xls/InfoNamesTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
<?php

namespace PhpOffice\PhpSpreadsheetTests\Reader\Xls;

use PhpOffice\PhpSpreadsheet\Reader\Xls;
use PhpOffice\PhpSpreadsheet\Shared\CodePage;
use PHPUnit\Framework\TestCase;

class InfoNamesTest extends TestCase
{
public function testWorksheetNamesBiff5(): void
{
$filename = 'samples/templates/30templatebiff5.xls';
$reader = new Xls();
$names = $reader->listWorksheetNames($filename);
$expected = ['Invoice', 'Terms and conditions'];
self::assertSame($expected, $names);
}

public function testWorksheetInfoBiff5(): void
{
$filename = 'samples/templates/30templatebiff5.xls';
$reader = new Xls();
$info = $reader->listWorksheetInfo($filename);
$expected = [
[
'worksheetName' => 'Invoice',
'lastColumnLetter' => 'E',
'lastColumnIndex' => 4,
'totalRows' => 19,
'totalColumns' => 5,
],
[
'worksheetName' => 'Terms and conditions',
'lastColumnLetter' => 'B',
'lastColumnIndex' => 1,
'totalRows' => 3,
'totalColumns' => 2,
],
];
self::assertSame($expected, $info);
self::assertSame(Xls::XLS_BIFF7, $reader->getVersion());
self::assertSame('CP1252', $reader->getCodepage());
}

public function testWorksheetNamesBiff8(): void
{
$filename = 'samples/templates/31docproperties.xls';
$reader = new Xls();
$names = $reader->listWorksheetNames($filename);
$expected = ['Worksheet'];
self::assertSame($expected, $names);
}

public function testWorksheetInfoBiff8(): void
{
$filename = 'samples/templates/31docproperties.xls';
$reader = new Xls();
$info = $reader->listWorksheetInfo($filename);
$expected = [
[
'worksheetName' => 'Worksheet',
'lastColumnLetter' => 'B',
'lastColumnIndex' => 1,
'totalRows' => 1,
'totalColumns' => 2,
],
];
self::assertSame($expected, $info);
self::assertSame(Xls::XLS_BIFF8, $reader->getVersion());
self::assertSame('UTF-16LE', $reader->getCodepage());
}

/**
* Test load Xls file with MACCENTRALEUROPE encoding, which is implemented
* as MAC-CENTRALEUROPE on some systems. Issue #549.
*/
private const MAC_CE = ['MACCENTRALEUROPE', 'MAC-CENTRALEUROPE'];

private const MAC_FILE5 = 'tests/data/Reader/XLS/maccentraleurope.biff5.xls';
private const MAC_FILE8 = 'tests/data/Reader/XLS/maccentraleurope.xls';

public function testWorksheetNamesBiff5Mac(): void
{
$codePages = CodePage::getEncodings();
self::assertSame(self::MAC_CE, $codePages[10029]);
$reader = new Xls();
$names = $reader->listWorksheetNames(self::MAC_FILE5);
$expected = ['Ärkusz1'];
self::assertSame($expected, $names);
}

public function testWorksheetInfoBiff5Mac(): void
{
$codePages = CodePage::getEncodings();
// prior test has replaced array with single string
self::assertContains($codePages[10029], self::MAC_CE);
$reader = new Xls();
$info = $reader->listWorksheetInfo(self::MAC_FILE5);
$expected = [
[
'worksheetName' => 'Ärkusz1',
'lastColumnLetter' => 'P',
'lastColumnIndex' => 15,
'totalRows' => 3,
'totalColumns' => 16,
],
];
self::assertSame($expected, $info);
self::assertSame(Xls::XLS_BIFF7, $reader->getVersion());
self::assertContains($reader->getCodepage(), self::MAC_CE);
}

public function testLoadMacCentralEuropeBiff5(): void
{
$reader = new Xls();
$spreadsheet = $reader->load(self::MAC_FILE5);
$sheet = $spreadsheet->getActiveSheet();
self::assertSame('Ärkusz1', $sheet->getTitle());
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue());
self::assertSame(Xls::XLS_BIFF7, $reader->getVersion());
self::assertContains($reader->getCodepage(), self::MAC_CE);
$spreadsheet->disconnectWorksheets();
}

public function testLoadMacCentralEuropeBiff8(): void
{
// Document is UTF-16LE as a whole,
// but some strings are stored as MACCENTRALEUROPE
$reader = new Xls();
$spreadsheet = $reader->load(self::MAC_FILE8);
$sheet = $spreadsheet->getActiveSheet();
self::assertSame('Arkusz1', $sheet->getTitle());
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue());
self::assertSame(Xls::XLS_BIFF8, $reader->getVersion());
self::assertSame('UTF-16LE', $reader->getCodepage());
$properties = $spreadsheet->getProperties();
// the following is stored as MACCENTRALEUROPE, not UTF-16LE
self::assertSame('Użytkownik Microsoft Office', $properties->getLastModifiedBy());
$spreadsheet->disconnectWorksheets();
}
}
35 changes: 0 additions & 35 deletions tests/PhpSpreadsheetTests/Reader/Xls/XlsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

use PhpOffice\PhpSpreadsheet\Cell\Cell;
use PhpOffice\PhpSpreadsheet\Reader\Xls;
use PhpOffice\PhpSpreadsheet\Shared\CodePage;
use PhpOffice\PhpSpreadsheetTests\Functional\AbstractFunctional;

class XlsTest extends AbstractFunctional
Expand Down Expand Up @@ -88,40 +87,6 @@ public function testLoadXlsBug1592(): void
$newspreadsheet->disconnectWorksheets();
}

/**
* Test load Xls file with MACCENTRALEUROPE encoding, which is implemented
* as MAC-CENTRALEUROPE on some systems. Issue #549.
*/
public function testLoadMacCentralEurope(): void
{
$codePages = CodePage::getEncodings();
self::assertIsArray($codePages[10029]);
$filename = 'tests/data/Reader/XLS/maccentraleurope.xls';
$reader = new Xls();
// When no fix applied, spreadsheet fails to load on some systems
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue());
$spreadsheet->disconnectWorksheets();
}

/**
* First test changes array entry in CodePage.
* This test confirms new that new entry is okay.
*/
public function testLoadMacCentralEurope2(): void
{
$codePages = CodePage::getEncodings();
self::assertIsString($codePages[10029]);
$filename = 'tests/data/Reader/XLS/maccentraleurope.xls';
$reader = new Xls();
// When no fix applied, spreadsheet fails to load on some systems
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue());
$spreadsheet->disconnectWorksheets();
}

public function testLoadXlsBug1114(): void
{
$filename = 'tests/data/Reader/XLS/bug1114.xls';
Expand Down
Binary file added tests/data/Reader/XLS/maccentraleurope.biff5.xls
Binary file not shown.

0 comments on commit 37a8a56

Please sign in to comment.