Skip to content

Commit

Permalink
Jetpack Sync: fix extracting UTF-8 characters from image alt text (#4…
Browse files Browse the repository at this point in the history
  • Loading branch information
trakos authored Feb 4, 2025
1 parent b83c45c commit 6c87a8b
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Significance: patch
Type: other

Jetpack Sync: Fixed extracting UTF-8 characters from image alt-text
10 changes: 9 additions & 1 deletion projects/plugins/jetpack/class.jetpack-post-images.php
Original file line number Diff line number Diff line change
Expand Up @@ -495,10 +495,18 @@ public static function from_html( $html_or_id, $width = 200, $height = 200 ) {
// Let's grab all image tags from the HTML.
$dom_doc = new DOMDocument();

// DOMDocument defaults to ISO-8859 because we're loading only the post content, without head tag.
// Fix: Enforce encoding with meta tag.
$charset = get_option( 'blog_charset' );
if ( empty( $charset ) || ! preg_match( '/^[a-zA-Z0-9_-]+$/', $charset ) ) {
$charset = 'UTF-8';
}
$html_prefix = sprintf( '<meta http-equiv="Content-Type" content="text/html; charset=%s">', esc_attr( $charset ) );

// The @ is not enough to suppress errors when dealing with libxml,
// we have to tell it directly how we want to handle errors.
libxml_use_internal_errors( true );
@$dom_doc->loadHTML( $html_info['html'] ); // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged
@$dom_doc->loadHTML( $html_prefix . $html_info['html'] ); // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged
libxml_use_internal_errors( false );

$image_tags = $dom_doc->getElementsByTagName( 'img' );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@ public function test_from_html_no_size() {
$this->assertEquals( array(), $result );
}

/**
* @covers Jetpack_PostImages::from_html
*/
public function test_from_html_alt_utf8() {
$s = '<img src="bob.jpg" width="200" height="200" alt="Ḽơᶉëᶆ ȋṕšᶙṁ ḍỡḽǭᵳ ʂǐť ӓṁệẗ" />';

$result = Jetpack_PostImages::from_html( $s );

$this->assertIsArray( $result );
$this->assertNotEmpty( $result );
$this->assertEquals( 'Ḽơᶉëᶆ ȋṕšᶙṁ ḍỡḽǭᵳ ʂǐť ӓṁệẗ', $result[0]['alt_text'] );
}

/**
* @author scotchfield
* @covers Jetpack_PostImages::from_slideshow
Expand Down

0 comments on commit 6c87a8b

Please sign in to comment.