From 4a55e40cd745dcc19a7dc0a3380896cd129fee33 Mon Sep 17 00:00:00 2001 From: Huong Nguyen Date: Fri, 3 Feb 2023 14:42:07 +0700 Subject: [PATCH 1/5] Fix PHP8.2 str_split function returns empty arrays for empty strings --- .../Calculation/Engineering/ConvertHex.php | 2 +- .../Calculation/Engineering/ConvertOctal.php | 2 +- src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php | 2 +- src/PhpSpreadsheet/Reader/Csv/Delimiter.php | 12 +++++------- src/PhpSpreadsheet/Reader/Security/XmlScanner.php | 2 +- .../ConditionalFormattingRuleExtension.php | 2 +- .../Style/NumberFormat/DateFormatter.php | 2 +- tests/data/Calculation/Engineering/HEX2DEC.php | 1 + tests/data/Calculation/Engineering/OCT2DEC.php | 1 + tests/data/Calculation/MathTrig/ARABIC.php | 4 ++++ 10 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/PhpSpreadsheet/Calculation/Engineering/ConvertHex.php b/src/PhpSpreadsheet/Calculation/Engineering/ConvertHex.php index 55ce209ecd..c0e9e458ea 100644 --- a/src/PhpSpreadsheet/Calculation/Engineering/ConvertHex.php +++ b/src/PhpSpreadsheet/Calculation/Engineering/ConvertHex.php @@ -96,7 +96,7 @@ public static function toDecimal($value) } $binX = ''; - foreach (str_split($value) as $char) { + foreach (mb_str_split($value) as $char) { $binX .= str_pad(base_convert($char, 16, 2), 4, '0', STR_PAD_LEFT); } if (strlen($binX) == 40 && $binX[0] == '1') { diff --git a/src/PhpSpreadsheet/Calculation/Engineering/ConvertOctal.php b/src/PhpSpreadsheet/Calculation/Engineering/ConvertOctal.php index add7aba01b..44af5cff4e 100644 --- a/src/PhpSpreadsheet/Calculation/Engineering/ConvertOctal.php +++ b/src/PhpSpreadsheet/Calculation/Engineering/ConvertOctal.php @@ -96,7 +96,7 @@ public static function toDecimal($value) } $binX = ''; - foreach (str_split($value) as $char) { + foreach (mb_str_split($value) as $char) { $binX .= str_pad(decbin((int) $char), 3, '0', STR_PAD_LEFT); } if (strlen($binX) == 30 && $binX[0] == '1') { diff --git a/src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php b/src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php index ee4885057f..bdc0e500e9 100644 --- a/src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php +++ b/src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php @@ -60,7 +60,7 @@ private static function mollifyScrutinizer($value): array private static function strSplit(string $roman): array { - $rslt = str_split($roman); + $rslt = mb_str_split($roman); return self::mollifyScrutinizer($rslt); } diff --git a/src/PhpSpreadsheet/Reader/Csv/Delimiter.php b/src/PhpSpreadsheet/Reader/Csv/Delimiter.php index 029d4a186e..02360b4262 100644 --- a/src/PhpSpreadsheet/Reader/Csv/Delimiter.php +++ b/src/PhpSpreadsheet/Reader/Csv/Delimiter.php @@ -60,14 +60,12 @@ protected function countPotentialDelimiters(): void protected function countDelimiterValues(string $line, array $delimiterKeys): void { - $splitString = str_split($line, 1); - if (is_array($splitString)) { - $distribution = array_count_values($splitString); - $countLine = array_intersect_key($distribution, $delimiterKeys); + $splitString = mb_str_split($line, 1); + $distribution = array_count_values($splitString); + $countLine = array_intersect_key($distribution, $delimiterKeys); - foreach (self::POTENTIAL_DELIMETERS as $delimiter) { - $this->counts[$delimiter][] = $countLine[$delimiter] ?? 0; - } + foreach (self::POTENTIAL_DELIMETERS as $delimiter) { + $this->counts[$delimiter][] = $countLine[$delimiter] ?? 0; } } diff --git a/src/PhpSpreadsheet/Reader/Security/XmlScanner.php b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php index ad898ae410..56040d190a 100644 --- a/src/PhpSpreadsheet/Reader/Security/XmlScanner.php +++ b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php @@ -145,7 +145,7 @@ public function scan($xml) $xml = $this->toUtf8($xml); // Don't rely purely on libxml_disable_entity_loader() - $pattern = '/\\0?' . implode('\\0?', /** @scrutinizer ignore-type */ str_split($this->pattern)) . '\\0?/'; + $pattern = '/\\0?' . implode('\\0?', /** @scrutinizer ignore-type */ mb_str_split($this->pattern)) . '\\0?/'; if (preg_match($pattern, $xml)) { throw new Reader\Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks'); diff --git a/src/PhpSpreadsheet/Style/ConditionalFormatting/ConditionalFormattingRuleExtension.php b/src/PhpSpreadsheet/Style/ConditionalFormatting/ConditionalFormattingRuleExtension.php index 0cdbc36851..9cfdc94262 100644 --- a/src/PhpSpreadsheet/Style/ConditionalFormatting/ConditionalFormattingRuleExtension.php +++ b/src/PhpSpreadsheet/Style/ConditionalFormatting/ConditionalFormattingRuleExtension.php @@ -40,7 +40,7 @@ public function __construct(?string $id = null, string $cfRule = self::CONDITION private function generateUuid(): string { - $chars = str_split('xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'); + $chars = mb_str_split('xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'); foreach ($chars as $i => $char) { if ($char === 'x') { diff --git a/src/PhpSpreadsheet/Style/NumberFormat/DateFormatter.php b/src/PhpSpreadsheet/Style/NumberFormat/DateFormatter.php index ba54b53593..2901327266 100644 --- a/src/PhpSpreadsheet/Style/NumberFormat/DateFormatter.php +++ b/src/PhpSpreadsheet/Style/NumberFormat/DateFormatter.php @@ -177,6 +177,6 @@ private static function setLowercaseCallback(array $matches): string private static function escapeQuotesCallback(array $matches): string { - return '\\' . implode('\\', /** @scrutinizer ignore-type */ str_split($matches[1])); + return '\\' . implode('\\', /** @scrutinizer ignore-type */ mb_str_split($matches[1])); } } diff --git a/tests/data/Calculation/Engineering/HEX2DEC.php b/tests/data/Calculation/Engineering/HEX2DEC.php index 0ecdd30c1e..aaf5c2e000 100644 --- a/tests/data/Calculation/Engineering/HEX2DEC.php +++ b/tests/data/Calculation/Engineering/HEX2DEC.php @@ -24,4 +24,5 @@ [-2147483648, '"ff80000000"'], [2147483648, '"80000000"'], [2147483647, '"7fffffff"'], + [0, '""'], ]; diff --git a/tests/data/Calculation/Engineering/OCT2DEC.php b/tests/data/Calculation/Engineering/OCT2DEC.php index 8ee3a170db..b234b58a48 100644 --- a/tests/data/Calculation/Engineering/OCT2DEC.php +++ b/tests/data/Calculation/Engineering/OCT2DEC.php @@ -17,4 +17,5 @@ ['#NUM!', '"37777777770"'], // too many digits [536870911, '"3777777777"'], // highest positive [-536870912, '"4000000000"'], // lowest negative + ['0', '""'], ]; diff --git a/tests/data/Calculation/MathTrig/ARABIC.php b/tests/data/Calculation/MathTrig/ARABIC.php index e2a6f46bfd..2d1ae004b4 100644 --- a/tests/data/Calculation/MathTrig/ARABIC.php +++ b/tests/data/Calculation/MathTrig/ARABIC.php @@ -57,4 +57,8 @@ '#VALUE!', 'WRONG', ], + [ + 0, + '', + ], ]; From ea490a1d878be9b7ad9d039926d33251a5ec6b02 Mon Sep 17 00:00:00 2001 From: oleibman <10341515+oleibman@users.noreply.github.com> Date: Wed, 26 Feb 2025 00:58:57 -0800 Subject: [PATCH 2/5] Remove Unwanted Tests --- src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php | 7 +++++-- src/PhpSpreadsheet/Reader/Security/XmlScanner.php | 2 +- tests/data/Calculation/Engineering/HEX2DEC.php | 4 +--- tests/data/Calculation/Engineering/OCT2DEC.php | 2 -- tests/data/Calculation/MathTrig/ARABIC.php | 4 ++++ 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php b/src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php index 98c3e3dce6..8a8029816c 100644 --- a/src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php +++ b/src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php @@ -74,11 +74,14 @@ public static function evaluate(mixed $roman): array|int|string // Convert the roman numeral to an arabic number $negativeNumber = $roman[0] === '-'; if ($negativeNumber) { - $roman = substr($roman, 1); + $roman = trim(substr($roman, 1)); + if ($roman === '') { + return ExcelError::NAN(); + } } try { - $arabic = self::calculateArabic(str_split($roman)); + $arabic = self::calculateArabic(mb_str_split($roman)); } catch (Exception) { return ExcelError::VALUE(); // Invalid character detected } diff --git a/src/PhpSpreadsheet/Reader/Security/XmlScanner.php b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php index a80562b9f8..d518d7896d 100644 --- a/src/PhpSpreadsheet/Reader/Security/XmlScanner.php +++ b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php @@ -87,7 +87,7 @@ private function findCharSet(string $xml): string public function scan($xml): string { // Don't rely purely on libxml_disable_entity_loader() - $pattern = '/\0*' . implode('\0*', str_split($this->pattern)) . '\0*/'; + $pattern = '/\0*' . implode('\0*', mb_str_split($this->pattern)) . '\0*/'; $xml = "$xml"; if (preg_match($pattern, $xml)) { diff --git a/tests/data/Calculation/Engineering/HEX2DEC.php b/tests/data/Calculation/Engineering/HEX2DEC.php index cdec8074f4..b2c35e554e 100644 --- a/tests/data/Calculation/Engineering/HEX2DEC.php +++ b/tests/data/Calculation/Engineering/HEX2DEC.php @@ -12,9 +12,7 @@ ['4886718345', '123456789'], [ExcelError::NAN(), '123.45'], ['0', '0'], - [11, 'A2'], - [0, 'A3'], - [0, ''], + ['0', ''], [ExcelError::NAN(), 'G3579A'], [ExcelError::VALUE(), true], [ExcelError::VALUE(), false], diff --git a/tests/data/Calculation/Engineering/OCT2DEC.php b/tests/data/Calculation/Engineering/OCT2DEC.php index 15d4bc4bdf..9f3a919404 100644 --- a/tests/data/Calculation/Engineering/OCT2DEC.php +++ b/tests/data/Calculation/Engineering/OCT2DEC.php @@ -15,8 +15,6 @@ [ExcelError::NAN(), '3579'], ['44', '54'], ['-165', '7777777533'], // 2's Complement - ['65', 'A2'], - ['0', 'A3'], ['0', ''], [ExcelError::NAN(), '37777777770'], // too many digits ['536870911', '3777777777'], // highest positive diff --git a/tests/data/Calculation/MathTrig/ARABIC.php b/tests/data/Calculation/MathTrig/ARABIC.php index 2c7b0319e9..c3bf746847 100644 --- a/tests/data/Calculation/MathTrig/ARABIC.php +++ b/tests/data/Calculation/MathTrig/ARABIC.php @@ -63,4 +63,8 @@ 0, '', ], + [ + '#NUM!', + '-', + ], ]; From a1f90b76e42e85248ea6f169ac5d15bd279e75fa Mon Sep 17 00:00:00 2001 From: oleibman <10341515+oleibman@users.noreply.github.com> Date: Wed, 26 Feb 2025 18:52:31 -0800 Subject: [PATCH 3/5] Add A Test --- tests/data/Style/NumberFormatDates.php | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/tests/data/Style/NumberFormatDates.php b/tests/data/Style/NumberFormatDates.php index 477deba73f..82458ee3b2 100644 --- a/tests/data/Style/NumberFormatDates.php +++ b/tests/data/Style/NumberFormatDates.php @@ -8,38 +8,32 @@ 22269.0625, 'dd-mm-yyyy hh:mm:ss', ], - // Oasis uses upper-case - [ + 'Oasis uses upper-case' => [ '12/19/1960 01:30:00', 22269.0625, 'MM/DD/YYYY HH:MM:SS', ], - // Date with plaintext escaped with a \ - [ + 'plaintext escaped with backslash' => [ '1960-12-19T01:30:00', 22269.0625, 'yyyy-mm-dd\Thh:mm:ss', ], - // Date with plaintext in quotes - [ + 'plaintext in quotes' => [ '1960-12-19T01:30:00 Z', 22269.0625, 'yyyy-mm-dd"T"hh:mm:ss \Z', ], - // Date with quoted formatting characters - [ + 'quoted formatting characters' => [ 'y-m-d 1960-12-19 h:m:s 01:30:00', 22269.0625, '"y-m-d" yyyy-mm-dd "h:m:s" hh:mm:ss', ], - // Date with quoted formatting characters - [ - 'y-m-d 1960-12-19 h:m:s 01:30:00', + 'quoted formatting non-ascii characters' => [ + '§1960-12-19', 22269.0625, - '"y-m-d "yyyy-mm-dd" h:m:s "hh:mm:ss', + '"§"yyyy-mm-dd', ], - // Date with fractional/decimal time - [ + 'fractional/decimal time' => [ '2023/02/28 0:00:00.000', 44985, 'yyyy/mm/dd\ h:mm:ss.000', From 497d036e91469dda967b127fe6a04c80b10a1ec1 Mon Sep 17 00:00:00 2001 From: oleibman <10341515+oleibman@users.noreply.github.com> Date: Thu, 27 Feb 2025 09:49:13 -0800 Subject: [PATCH 4/5] Make Use of UTF-8 Explicit Php manual describes it as good practice to do so. --- src/PhpSpreadsheet/Calculation/Engineering/ConvertHex.php | 2 +- src/PhpSpreadsheet/Calculation/Engineering/ConvertOctal.php | 2 +- src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php | 2 +- src/PhpSpreadsheet/Reader/Csv/Delimiter.php | 2 +- src/PhpSpreadsheet/Reader/Security/XmlScanner.php | 2 +- .../ConditionalFormattingRuleExtension.php | 2 +- src/PhpSpreadsheet/Style/NumberFormat/DateFormatter.php | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/PhpSpreadsheet/Calculation/Engineering/ConvertHex.php b/src/PhpSpreadsheet/Calculation/Engineering/ConvertHex.php index 6ebdf2331a..40eacf1d18 100644 --- a/src/PhpSpreadsheet/Calculation/Engineering/ConvertHex.php +++ b/src/PhpSpreadsheet/Calculation/Engineering/ConvertHex.php @@ -96,7 +96,7 @@ public static function toDecimal($value) } $binX = ''; - foreach (mb_str_split($value) as $char) { + foreach (mb_str_split($value, 1, 'UTF-8') as $char) { $binX .= str_pad(base_convert($char, 16, 2), 4, '0', STR_PAD_LEFT); } if (strlen($binX) == 40 && $binX[0] == '1') { diff --git a/src/PhpSpreadsheet/Calculation/Engineering/ConvertOctal.php b/src/PhpSpreadsheet/Calculation/Engineering/ConvertOctal.php index 783e7fd075..d6ef56cb7d 100644 --- a/src/PhpSpreadsheet/Calculation/Engineering/ConvertOctal.php +++ b/src/PhpSpreadsheet/Calculation/Engineering/ConvertOctal.php @@ -96,7 +96,7 @@ public static function toDecimal($value) } $binX = ''; - foreach (mb_str_split($value) as $char) { + foreach (mb_str_split($value, 1, 'UTF-8') as $char) { $binX .= str_pad(decbin((int) $char), 3, '0', STR_PAD_LEFT); } if (strlen($binX) == 30 && $binX[0] == '1') { diff --git a/src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php b/src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php index 8a8029816c..8901d3f4bf 100644 --- a/src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php +++ b/src/PhpSpreadsheet/Calculation/MathTrig/Arabic.php @@ -81,7 +81,7 @@ public static function evaluate(mixed $roman): array|int|string } try { - $arabic = self::calculateArabic(mb_str_split($roman)); + $arabic = self::calculateArabic(mb_str_split($roman, 1, 'UTF-8')); } catch (Exception) { return ExcelError::VALUE(); // Invalid character detected } diff --git a/src/PhpSpreadsheet/Reader/Csv/Delimiter.php b/src/PhpSpreadsheet/Reader/Csv/Delimiter.php index 9d95cdef3a..348331e35b 100644 --- a/src/PhpSpreadsheet/Reader/Csv/Delimiter.php +++ b/src/PhpSpreadsheet/Reader/Csv/Delimiter.php @@ -55,7 +55,7 @@ protected function countPotentialDelimiters(): void protected function countDelimiterValues(string $line, array $delimiterKeys): void { - $splitString = mb_str_split($line, 1); + $splitString = mb_str_split($line, 1, 'UTF-8'); $distribution = array_count_values($splitString); $countLine = array_intersect_key($distribution, $delimiterKeys); diff --git a/src/PhpSpreadsheet/Reader/Security/XmlScanner.php b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php index d518d7896d..6d4ed449eb 100644 --- a/src/PhpSpreadsheet/Reader/Security/XmlScanner.php +++ b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php @@ -87,7 +87,7 @@ private function findCharSet(string $xml): string public function scan($xml): string { // Don't rely purely on libxml_disable_entity_loader() - $pattern = '/\0*' . implode('\0*', mb_str_split($this->pattern)) . '\0*/'; + $pattern = '/\0*' . implode('\0*', mb_str_split($this->pattern, 1, 'UTF-8')) . '\0*/'; $xml = "$xml"; if (preg_match($pattern, $xml)) { diff --git a/src/PhpSpreadsheet/Style/ConditionalFormatting/ConditionalFormattingRuleExtension.php b/src/PhpSpreadsheet/Style/ConditionalFormatting/ConditionalFormattingRuleExtension.php index a6791381b3..ad7dfc4e52 100644 --- a/src/PhpSpreadsheet/Style/ConditionalFormatting/ConditionalFormattingRuleExtension.php +++ b/src/PhpSpreadsheet/Style/ConditionalFormatting/ConditionalFormattingRuleExtension.php @@ -34,7 +34,7 @@ public function __construct(?string $id = null, string $cfRule = self::CONDITION private function generateUuid(): string { - $chars = mb_str_split('xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'); + $chars = mb_str_split('xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx', 1, 'UTF-8'); foreach ($chars as $i => $char) { if ($char === 'x') { diff --git a/src/PhpSpreadsheet/Style/NumberFormat/DateFormatter.php b/src/PhpSpreadsheet/Style/NumberFormat/DateFormatter.php index b188bed3a3..4b3d301d21 100644 --- a/src/PhpSpreadsheet/Style/NumberFormat/DateFormatter.php +++ b/src/PhpSpreadsheet/Style/NumberFormat/DateFormatter.php @@ -207,6 +207,6 @@ private static function setLowercaseCallback(array $matches): string private static function escapeQuotesCallback(array $matches): string { - return '\\' . implode('\\', mb_str_split($matches[1])); + return '\\' . implode('\\', mb_str_split($matches[1], 1, 'UTF-8')); } } From 93169c6203072ffe449a770f69d98f5a15e116f3 Mon Sep 17 00:00:00 2001 From: oleibman <10341515+oleibman@users.noreply.github.com> Date: Fri, 28 Feb 2025 15:54:46 -0800 Subject: [PATCH 5/5] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1572e96869..026a5e10b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). - Ignore ignoredErrors when not applicable. [Issue #4375](https://github.com/PHPOffice/PhpSpreadsheet/issues/4375) [PR #4377](https://github.com/PHPOffice/PhpSpreadsheet/pull/4377) - Better handling of defined names on sheets whose titles include apostrophes. [Issue #4356](https://github.com/PHPOffice/PhpSpreadsheet/issues/4356) [Issue #4362](https://github.com/PHPOffice/PhpSpreadsheet/issues/4362) [Issue #4376](https://github.com/PHPOffice/PhpSpreadsheet/issues/4376) [PR #4360](https://github.com/PHPOffice/PhpSpreadsheet/pull/4360) - Partial solution for removing rows or columns that include edge ranges. [Issue #1449](https://github.com/PHPOffice/PhpSpreadsheet/issues/1449) [PR #3528](https://github.com/PHPOffice/PhpSpreadsheet/pull/3528) +- Prefer mb_str_split to str_split. [PR #3341](https://github.com/PHPOffice/PhpSpreadsheet/pull/3341) ## 2025-02-08 - 4.0.0