Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read Code Page for Xls ListWorksheetInfo/Names for BIFF5 #3672

Merged
merged 4 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions src/PhpSpreadsheet/Reader/Xls.php
Original file line number Diff line number Diff line change
Expand Up @@ -213,15 +213,15 @@ class Xls extends BaseReader
*
* @var int
*/
private $version;
private $version = 0;

/**
* Codepage set in the Excel file being read. Only important for BIFF5 (Excel 5.0 - Excel 95)
* For BIFF8 (Excel 97 - Excel 2003) this will always have the value 'UTF-16LE'.
*
* @var string
*/
private $codepage;
private $codepage = '';

/**
* Shared formats.
Expand Down Expand Up @@ -459,6 +459,11 @@ public function setCodepage(string $codepage): void
$this->codepage = $codepage;
}

public function getCodepage(): string
{
return $this->codepage;
}

/**
* Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object.
*
Expand Down Expand Up @@ -498,6 +503,10 @@ public function listWorksheetNames($filename)
$this->readDefault();

break 2;
case self::XLS_TYPE_CODEPAGE:
$this->readCodepage();

break;
default:
$this->readDefault();

Expand Down Expand Up @@ -557,6 +566,10 @@ public function listWorksheetInfo($filename)
$this->readDefault();

break 2;
case self::XLS_TYPE_CODEPAGE:
$this->readCodepage();

break;
default:
$this->readDefault();

Expand Down Expand Up @@ -8088,4 +8101,9 @@ private function setCFRules(array $cellRanges, string $type, string $operator, $
$this->phpSheet->getStyle($cellRange)->setConditionalStyles($conditionalStyles);
}
}

public function getVersion(): int
{
return $this->version;
}
}
142 changes: 142 additions & 0 deletions tests/PhpSpreadsheetTests/Reader/Xls/InfoNamesTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
<?php

namespace PhpOffice\PhpSpreadsheetTests\Reader\Xls;

use PhpOffice\PhpSpreadsheet\Reader\Xls;
use PhpOffice\PhpSpreadsheet\Shared\CodePage;
use PHPUnit\Framework\TestCase;

class InfoNamesTest extends TestCase
{
public function testWorksheetNamesBiff5(): void
{
$filename = 'samples/templates/30templatebiff5.xls';
$reader = new Xls();
$names = $reader->listWorksheetNames($filename);
$expected = ['Invoice', 'Terms and conditions'];
self::assertSame($expected, $names);
}

public function testWorksheetInfoBiff5(): void
{
$filename = 'samples/templates/30templatebiff5.xls';
$reader = new Xls();
$info = $reader->listWorksheetInfo($filename);
$expected = [
[
'worksheetName' => 'Invoice',
'lastColumnLetter' => 'E',
'lastColumnIndex' => 4,
'totalRows' => 19,
'totalColumns' => 5,
],
[
'worksheetName' => 'Terms and conditions',
'lastColumnLetter' => 'B',
'lastColumnIndex' => 1,
'totalRows' => 3,
'totalColumns' => 2,
],
];
self::assertSame($expected, $info);
self::assertSame(Xls::XLS_BIFF7, $reader->getVersion());
self::assertSame('CP1252', $reader->getCodepage());
}

public function testWorksheetNamesBiff8(): void
{
$filename = 'samples/templates/31docproperties.xls';
$reader = new Xls();
$names = $reader->listWorksheetNames($filename);
$expected = ['Worksheet'];
self::assertSame($expected, $names);
}

public function testWorksheetInfoBiff8(): void
{
$filename = 'samples/templates/31docproperties.xls';
$reader = new Xls();
$info = $reader->listWorksheetInfo($filename);
$expected = [
[
'worksheetName' => 'Worksheet',
'lastColumnLetter' => 'B',
'lastColumnIndex' => 1,
'totalRows' => 1,
'totalColumns' => 2,
],
];
self::assertSame($expected, $info);
self::assertSame(Xls::XLS_BIFF8, $reader->getVersion());
self::assertSame('UTF-16LE', $reader->getCodepage());
}

/**
* Test load Xls file with MACCENTRALEUROPE encoding, which is implemented
* as MAC-CENTRALEUROPE on some systems. Issue #549.
*/
private const MAC_CE = ['MACCENTRALEUROPE', 'MAC-CENTRALEUROPE'];

private const MAC_FILE5 = 'tests/data/Reader/XLS/maccentraleurope.biff5.xls';
private const MAC_FILE8 = 'tests/data/Reader/XLS/maccentraleurope.xls';

public function testWorksheetNamesBiff5Mac(): void
{
$codePages = CodePage::getEncodings();
self::assertSame(self::MAC_CE, $codePages[10029]);
$reader = new Xls();
$names = $reader->listWorksheetNames(self::MAC_FILE5);
$expected = ['Ärkusz1'];
self::assertSame($expected, $names);
}

public function testWorksheetInfoBiff5Mac(): void
{
$codePages = CodePage::getEncodings();
// prior test has replaced array with single string
self::assertContains($codePages[10029], self::MAC_CE);
$reader = new Xls();
$info = $reader->listWorksheetInfo(self::MAC_FILE5);
$expected = [
[
'worksheetName' => 'Ärkusz1',
'lastColumnLetter' => 'P',
'lastColumnIndex' => 15,
'totalRows' => 3,
'totalColumns' => 16,
],
];
self::assertSame($expected, $info);
self::assertSame(Xls::XLS_BIFF7, $reader->getVersion());
self::assertContains($reader->getCodepage(), self::MAC_CE);
}

public function testLoadMacCentralEuropeBiff5(): void
{
$reader = new Xls();
$spreadsheet = $reader->load(self::MAC_FILE5);
$sheet = $spreadsheet->getActiveSheet();
self::assertSame('Ärkusz1', $sheet->getTitle());
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue());
self::assertSame(Xls::XLS_BIFF7, $reader->getVersion());
self::assertContains($reader->getCodepage(), self::MAC_CE);
$spreadsheet->disconnectWorksheets();
}

public function testLoadMacCentralEuropeBiff8(): void
{
// Document is UTF-16LE as a whole,
// but some strings are stored as MACCENTRALEUROPE
$reader = new Xls();
$spreadsheet = $reader->load(self::MAC_FILE8);
$sheet = $spreadsheet->getActiveSheet();
self::assertSame('Arkusz1', $sheet->getTitle());
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue());
self::assertSame(Xls::XLS_BIFF8, $reader->getVersion());
self::assertSame('UTF-16LE', $reader->getCodepage());
$properties = $spreadsheet->getProperties();
// the following is stored as MACCENTRALEUROPE, not UTF-16LE
self::assertSame('Użytkownik Microsoft Office', $properties->getLastModifiedBy());
$spreadsheet->disconnectWorksheets();
}
}
35 changes: 0 additions & 35 deletions tests/PhpSpreadsheetTests/Reader/Xls/XlsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

use PhpOffice\PhpSpreadsheet\Cell\Cell;
use PhpOffice\PhpSpreadsheet\Reader\Xls;
use PhpOffice\PhpSpreadsheet\Shared\CodePage;
use PhpOffice\PhpSpreadsheetTests\Functional\AbstractFunctional;

class XlsTest extends AbstractFunctional
Expand Down Expand Up @@ -88,40 +87,6 @@ public function testLoadXlsBug1592(): void
$newspreadsheet->disconnectWorksheets();
}

/**
* Test load Xls file with MACCENTRALEUROPE encoding, which is implemented
* as MAC-CENTRALEUROPE on some systems. Issue #549.
*/
public function testLoadMacCentralEurope(): void
{
$codePages = CodePage::getEncodings();
self::assertIsArray($codePages[10029]);
$filename = 'tests/data/Reader/XLS/maccentraleurope.xls';
$reader = new Xls();
// When no fix applied, spreadsheet fails to load on some systems
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue());
$spreadsheet->disconnectWorksheets();
}

/**
* First test changes array entry in CodePage.
* This test confirms new that new entry is okay.
*/
public function testLoadMacCentralEurope2(): void
{
$codePages = CodePage::getEncodings();
self::assertIsString($codePages[10029]);
$filename = 'tests/data/Reader/XLS/maccentraleurope.xls';
$reader = new Xls();
// When no fix applied, spreadsheet fails to load on some systems
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue());
$spreadsheet->disconnectWorksheets();
}

public function testLoadXlsBug1114(): void
{
$filename = 'tests/data/Reader/XLS/bug1114.xls';
Expand Down
Binary file added tests/data/Reader/XLS/maccentraleurope.biff5.xls
Binary file not shown.