Skip to content

Commit

Permalink
Merge pull request #9658 from haskell/mergify/bp/3.10/pr-9609
Browse files Browse the repository at this point in the history
Ignore invalid Unicode in pkg-config descriptions (backport #9609)
  • Loading branch information
mergify[bot] authored Jan 25, 2024
2 parents e5a204d + d4dff63 commit 9f9b5bd
Show file tree
Hide file tree
Showing 11 changed files with 166 additions and 24 deletions.
34 changes: 20 additions & 14 deletions Cabal/src/Distribution/Simple/Program/Run.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,19 @@
-- This module provides a data type for program invocations and functions to
-- run them.

module Distribution.Simple.Program.Run (
ProgramInvocation(..),
IOEncoding(..),
emptyProgramInvocation,
simpleProgramInvocation,
programInvocation,
multiStageProgramInvocation,

runProgramInvocation,
getProgramInvocationOutput,
getProgramInvocationLBS,
getProgramInvocationOutputAndErrors,

getEffectiveEnvironment,
module Distribution.Simple.Program.Run
( ProgramInvocation (..)
, IOEncoding (..)
, emptyProgramInvocation
, simpleProgramInvocation
, programInvocation
, multiStageProgramInvocation
, runProgramInvocation
, getProgramInvocationOutput
, getProgramInvocationLBS
, getProgramInvocationOutputAndErrors
, getProgramInvocationLBSAndErrors
, getEffectiveEnvironment
) where

import Distribution.Compat.Prelude
Expand Down Expand Up @@ -164,6 +163,13 @@ getProgramInvocationOutputAndErrors verbosity inv = case progInvokeOutputEncodin
(output', errors, exitCode) <- getProgramInvocationIODataAndErrors verbosity inv IODataModeBinary
return (normaliseLineEndings (fromUTF8LBS output'), errors, exitCode)

getProgramInvocationLBSAndErrors
:: Verbosity
-> ProgramInvocation
-> IO (LBS.ByteString, String, ExitCode)
getProgramInvocationLBSAndErrors verbosity inv =
getProgramInvocationIODataAndErrors verbosity inv IODataModeBinary

getProgramInvocationIODataAndErrors
:: KnownIODataMode mode => Verbosity -> ProgramInvocation -> IODataMode mode
-> IO (mode, String, ExitCode)
Expand Down
1 change: 1 addition & 0 deletions cabal-install-solver/cabal-install-solver.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ library
, mtl >=2.0 && <2.4
, pretty ^>=1.1
, transformers >=0.4.2.0 && <0.7
, text (>= 1.2.3.0 && < 1.3) || (>= 2.0 && < 2.2)

if flag(debug-expensive-assertions)
cpp-options: -DDEBUG_EXPENSIVE_ASSERTIONS
Expand Down
61 changes: 52 additions & 9 deletions cabal-install-solver/src/Distribution/Solver/Types/PkgConfigDb.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE LambdaCase #-}
-----------------------------------------------------------------------------
-- |
-- Module : Distribution.Solver.Types.PkgConfigDb
Expand All @@ -23,17 +24,23 @@ module Distribution.Solver.Types.PkgConfigDb
import Distribution.Solver.Compat.Prelude
import Prelude ()

import Control.Exception (handle)
import Control.Monad (mapM)
import qualified Data.Map as M
import System.FilePath (splitSearchPath)
import Control.Exception (handle)
import Control.Monad (mapM)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Encoding.Error as T
import System.FilePath (splitSearchPath)

import Distribution.Compat.Environment (lookupEnv)
import Distribution.Package (PkgconfigName, mkPkgconfigName)
import Distribution.Parsec
import Distribution.Simple.Program
(ProgramDb, getProgramOutput, pkgConfigProgram, needProgram, ConfiguredProgram)
import Distribution.Simple.Program.Run (getProgramInvocationOutputAndErrors, programInvocation)
import Distribution.Simple.Program.Run
(getProgramInvocationOutputAndErrors, programInvocation, getProgramInvocationLBSAndErrors)
import Distribution.Simple.Utils (info)
import Distribution.Types.PkgconfigVersion
import Distribution.Types.PkgconfigVersionRange
Expand Down Expand Up @@ -63,10 +70,37 @@ readPkgConfigDb verbosity progdb = handle ioErrorHandler $ do
case mpkgConfig of
Nothing -> noPkgConfig "Cannot find pkg-config program"
Just (pkgConfig, _) -> do
pkgList <- lines <$> getProgramOutput verbosity pkgConfig ["--list-all"]
-- The output of @pkg-config --list-all@ also includes a description
-- for each package, which we do not need.
let pkgNames = map (takeWhile (not . isSpace)) pkgList
-- To prevent malformed Unicode in the descriptions from crashing cabal,
-- read without interpreting any encoding first. (#9608)
(listAllOutput, listAllErrs, listAllExitcode) <-
getProgramInvocationLBSAndErrors verbosity (programInvocation pkgConfig ["--list-all"])
when (listAllExitcode /= ExitSuccess) $
ioError (userError ("pkg-config --list-all failed: " ++ listAllErrs))
let pkgList = LBS.split (fromIntegral (ord '\n')) listAllOutput
-- Now decode the package *names* to a String. The ones where decoding
-- failed end up in 'failedPkgNames'.
let (failedPkgNames, pkgNames) =
partitionEithers
-- Drop empty package names. This will handle empty lines
-- in pkg-config's output, including the spurious one
-- after the last newline (because of LBS.split).
. filter (either (const True) (not . null))
-- Try decoding strictly; if it fails, put the lenient
-- decoding in a Left for later reporting.
. map (\bsname ->
let sbsname = LBS.toStrict bsname
in case T.decodeUtf8' sbsname of
Left _ -> Left (T.unpack (decodeUtf8LenientCompat sbsname))
Right name -> Right (T.unpack name))
-- The output of @pkg-config --list-all@ also includes a
-- description for each package, which we do not need.
-- We don't use Data.Char.isSpace because that would also
-- include 0xA0, the non-breaking space, which can occur
-- in multi-byte UTF-8 sequences.
. map (LBS.takeWhile (not . isAsciiSpace))
$ pkgList
when (not (null failedPkgNames)) $
info verbosity ("Some pkg-config packages have names containing invalid unicode: " ++ intercalate ", " failedPkgNames)
(outs, _errs, exitCode) <-
getProgramInvocationOutputAndErrors verbosity
(programInvocation pkgConfig ("--modversion" : pkgNames))
Expand Down Expand Up @@ -104,6 +138,15 @@ readPkgConfigDb verbosity progdb = handle ioErrorHandler $ do
ExitSuccess -> Just (pkg, pkgVersion)
_ -> Nothing

isAsciiSpace :: Word8 -> Bool
isAsciiSpace c = c `elem` map (fromIntegral . ord) " \t"

-- The decodeUtf8Lenient function is defined starting with text-2.0.1; this
-- function simply reimplements it. When the minimum supported GHC version
-- is >= 9.4, switch to decodeUtf8Lenient.
decodeUtf8LenientCompat :: ByteString -> T.Text
decodeUtf8LenientCompat = T.decodeUtf8With T.lenientDecode

-- | Create a `PkgConfigDb` from a list of @(packageName, version)@ pairs.
pkgConfigDbFromList :: [(String, String)] -> PkgConfigDb
pkgConfigDbFromList pairs = (PkgConfigDb . M.fromList . map convert) pairs
Expand Down
2 changes: 1 addition & 1 deletion cabal-testsuite/PackageTests/ExtraProgPath/pkg-config
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/usr/bin/sh
#!/bin/sh

exit 1;
1 change: 1 addition & 0 deletions cabal-testsuite/PackageTests/PkgConfigParse/MyLibrary.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
module MyLibrary () where
1 change: 1 addition & 0 deletions cabal-testsuite/PackageTests/PkgConfigParse/cabal.project
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
packages: *.cabal
19 changes: 19 additions & 0 deletions cabal-testsuite/PackageTests/PkgConfigParse/my.cabal
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: PkgConfigParse
version: 0.1
license: BSD3
author: Tom Smeding
maintainer: Tom Smeding
synopsis: Pkg Config Parse
category: PackageTests
build-type: Simple
cabal-version: 2.0

description:
Check that Cabal does not crash when pkg-config outputs invalid Unicode.

Library
pkgconfig-depends: vpl
default-language: Haskell2010
build-depends: base <5.0
exposed-modules:
MyLibrary
49 changes: 49 additions & 0 deletions cabal-testsuite/PackageTests/PkgConfigParse/pkg-config
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/sh

set -eu

# ugly, but "good enough" for this test
# This will need to be updated whenever cabal invokes pkg-config
# in new ways
case "$*" in
'--version')
echo 2.1.0 # whatever
;;

'--variable pc_path pkg-config')
echo '.'
;;

'--list-all')
printf 'zlib zlib - zlib compression library\n'
# \256 = \xAE is the iso-8859-1 (latin-1) encoded version of U+00AE,
# i.e. the "registered sign": ®
# This resulted in problems, see #9608
printf 'vpl Intel\256 Video Processing Library - Accelerated video decode, encode, and frame processing capabilities on Intel\256 GPUs\n'
# \360 = \xF0 is latin-1 for ð; this is orð, Icelandic for "word"/"words".
printf 'or\360 Icelandic characters\n'
;;

'--modversion '*)
shift # drop the --modversion
for arg; do
case "$arg" in
zlib) echo 1.3; ;; # whatever
vpl) echo 2.10; ;; # whatever
# No entry for orð here; let's not even try to match on that
*)
echo >&2 "Package $arg was not found in the pkg-config search path."
exit 1
esac
done
;;

# Ignore some stuff we're not implementing
'--cflags '*) ;;
'--libs '*) ;;

*)
echo >&2 "pkg-config: unrecognised arguments $* (this is an incomplete shim)"
exit 1
;;
esac
1 change: 1 addition & 0 deletions cabal-testsuite/PackageTests/PkgConfigParse/setup.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# cabal v2-build
9 changes: 9 additions & 0 deletions cabal-testsuite/PackageTests/PkgConfigParse/setup.test.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import Test.Cabal.Prelude

-- Test that invalid unicode in pkg-config output doesn't trip up cabal very much
main = cabalTest $ do
-- skipped on windows because using a script to dummy up an executable doesn't work the same.
skipIfWindows
cdir <- testCurrentDir `fmap` getTestEnv
res <- cabal' "v2-build" ["--extra-prog-path="++cdir, "-v2"]
assertOutputContains "Some pkg-config packages have names containing invalid unicode: or" res
12 changes: 12 additions & 0 deletions changelog.d/pr-9609
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
synopsis: Ignore invalid Unicode in pkg-config descriptions
packages: cabal-install-solver
prs: #9609
issues: #9608

description: {

Previously, cabal-install would crash when `pkg-config --list-all` contained
invalid Unicode. With this change, invalid unicode in package descriptions is
ignored, and unparseable package names are considered nonexistent.

}

0 comments on commit 9f9b5bd

Please sign in to comment.