Patch #1080727: add "encoding" parameter to doctest.DocFileSuite

george.yoshida · george.yoshida · commit fdde7a64e35e · 2006-05-28T16:39:09.000Z
Contributed by Bjorn Tillenius. git-svn-id: http://svn.python.org/projects/python/trunk@46502 6015fed2-1504-0410-9fe1-9d1591cc4771
diff --git a/Doc/lib/libdoctest.tex b/Doc/lib/libdoctest.tex
@@ -868,7 +868,7 @@ \subsection{Basic API\label{doctest-basic-api}}
                           globs}\optional{, verbose}\optional{,
                           report}\optional{, optionflags}\optional{,
                           extraglobs}\optional{, raise_on_error}\optional{,
-                          parser}}
+                          parser}\optional{, encoding}}
 
   All arguments except \var{filename} are optional, and should be
   specified in keyword form.
@@ -941,7 +941,13 @@ \subsection{Basic API\label{doctest-basic-api}}
   subclass) that should be used to extract tests from the files.  It
   defaults to a normal parser (i.e., \code{\class{DocTestParser}()}).
 
+  Optional argument \var{encoding} specifies an encoding that should
+  be used to convert the file to unicode.
+
   \versionadded{2.4}
+
+  \versionchanged[The parameter \var{encoding} was added]{2.5}
+
 \end{funcdesc}
 
 \begin{funcdesc}{testmod}{\optional{m}\optional{, name}\optional{,
@@ -1061,7 +1067,8 @@ \subsection{Unittest API\label{doctest-unittest-api}}
 \begin{funcdesc}{DocFileSuite}{\optional{module_relative}\optional{,
                               package}\optional{, setUp}\optional{,
                               tearDown}\optional{, globs}\optional{,
-                              optionflags}\optional{, parser}}
+                              optionflags}\optional{, parser}\optional{,
+                              encoding}}
 
   Convert doctest tests from one or more text files to a
   \class{\refmodule{unittest}.TestSuite}.
@@ -1128,11 +1135,17 @@ \subsection{Unittest API\label{doctest-unittest-api}}
   subclass) that should be used to extract tests from the files.  It
   defaults to a normal parser (i.e., \code{\class{DocTestParser}()}).
 
+  Optional argument \var{encoding} specifies an encoding that should
+  be used to convert the file to unicode.
+
   \versionadded{2.4}
 
   \versionchanged[The global \code{__file__} was added to the
   globals provided to doctests loaded from a text file using
   \function{DocFileSuite()}]{2.5}
+
+  \versionchanged[The parameter \var{encoding} was added]{2.5}
+
 \end{funcdesc}
 
 \begin{funcdesc}{DocTestSuite}{\optional{module}\optional{,
diff --git a/Lib/doctest.py b/Lib/doctest.py
@@ -1869,7 +1869,8 @@ class doctest.Tester, then merges the results into (or creates)
 
 def testfile(filename, module_relative=True, name=None, package=None,
              globs=None, verbose=None, report=True, optionflags=0,
-             extraglobs=None, raise_on_error=False, parser=DocTestParser()):
+             extraglobs=None, raise_on_error=False, parser=DocTestParser(),
+             encoding=None):
     """
     Test examples in the given file.  Return (#failures, #tests).
 
@@ -1935,6 +1936,9 @@ def testfile(filename, module_relative=True, name=None, package=None,
     Optional keyword arg "parser" specifies a DocTestParser (or
     subclass) that should be used to extract tests from the files.
 
+    Optional keyword arg "encoding" specifies an encoding that should
+    be used to convert the file to unicode.
+ 
     Advanced tomfoolery:  testmod runs methods of a local instance of
     class doctest.Tester, then merges the results into (or creates)
     global Tester instance doctest.master.  Methods of doctest.master
@@ -1969,6 +1973,9 @@ class doctest.Tester, then merges the results into (or creates)
     else:
         runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
 
+    if encoding is not None:
+        text = text.decode(encoding)
+
     # Read the file, convert it to a test, and run it.
     test = parser.get_doctest(text, globs, name, filename, 0)
     runner.run(test)
@@ -2339,7 +2346,8 @@ def format_failure(self, err):
                 )
 
 def DocFileTest(path, module_relative=True, package=None,
-                globs=None, parser=DocTestParser(), **options):
+                globs=None, parser=DocTestParser(),
+                encoding=None, **options):
     if globs is None:
         globs = {}
     else:
@@ -2357,6 +2365,10 @@ def DocFileTest(path, module_relative=True, package=None,
 
     # Find the file and read it.
     name = os.path.basename(path)
+  
+    # If an encoding is specified, use it to convert the file to unicode
+    if encoding is not None:
+        doc = doc.decode(encoding)
 
     # Convert it to a test, and wrap it in a DocFileCase.
     test = parser.get_doctest(doc, globs, name, path, 0)
@@ -2414,6 +2426,9 @@ def DocFileSuite(*paths, **kw):
     parser
       A DocTestParser (or subclass) that should be used to extract
       tests from the files.
+ 
+    encoding
+      An encoding that will be used to convert the files to unicode.
     """
     suite = unittest.TestSuite()
 
diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py
@@ -1937,9 +1937,10 @@ def test_DocFileSuite():
 
          >>> import unittest
          >>> suite = doctest.DocFileSuite('test_doctest.txt',
-         ...                              'test_doctest2.txt')
+         ...                              'test_doctest2.txt',
+         ...                              'test_doctest4.txt')
          >>> suite.run(unittest.TestResult())
-         <unittest.TestResult run=2 errors=0 failures=2>
+         <unittest.TestResult run=3 errors=0 failures=3>
 
        The test files are looked for in the directory containing the
        calling module.  A package keyword argument can be provided to
@@ -1948,9 +1949,10 @@ def test_DocFileSuite():
          >>> import unittest
          >>> suite = doctest.DocFileSuite('test_doctest.txt',
          ...                              'test_doctest2.txt',
+         ...                              'test_doctest4.txt',
          ...                              package='test')
          >>> suite.run(unittest.TestResult())
-         <unittest.TestResult run=2 errors=0 failures=2>
+         <unittest.TestResult run=3 errors=0 failures=3>
 
        '/' should be used as a path separator.  It will be converted
        to a native separator at run time:
@@ -1995,19 +1997,21 @@ def test_DocFileSuite():
 
          >>> suite = doctest.DocFileSuite('test_doctest.txt',
          ...                              'test_doctest2.txt',
+         ...                              'test_doctest4.txt',
          ...                              globs={'favorite_color': 'blue'})
          >>> suite.run(unittest.TestResult())
-         <unittest.TestResult run=2 errors=0 failures=1>
+         <unittest.TestResult run=3 errors=0 failures=2>
 
        In this case, we supplied a missing favorite color. You can
        provide doctest options:
 
          >>> suite = doctest.DocFileSuite('test_doctest.txt',
          ...                              'test_doctest2.txt',
+         ...                              'test_doctest4.txt',
          ...                         optionflags=doctest.DONT_ACCEPT_BLANKLINE,
          ...                              globs={'favorite_color': 'blue'})
          >>> suite.run(unittest.TestResult())
-         <unittest.TestResult run=2 errors=0 failures=2>
+         <unittest.TestResult run=3 errors=0 failures=3>
 
        And, you can provide setUp and tearDown functions:
 
@@ -2025,9 +2029,10 @@ def test_DocFileSuite():
 
          >>> suite = doctest.DocFileSuite('test_doctest.txt',
          ...                              'test_doctest2.txt',
+         ...                              'test_doctest4.txt',
          ...                              setUp=setUp, tearDown=tearDown)
          >>> suite.run(unittest.TestResult())
-         <unittest.TestResult run=2 errors=0 failures=1>
+         <unittest.TestResult run=3 errors=0 failures=2>
 
        But the tearDown restores sanity:
 
@@ -2060,6 +2065,17 @@ def test_DocFileSuite():
          >>> suite.run(unittest.TestResult())
          <unittest.TestResult run=1 errors=0 failures=0>
 
+       If the tests contain non-ASCII characters, we have to specify which
+       encoding the file is encoded with. We do so by using the `encoding`
+       parameter:
+
+         >>> suite = doctest.DocFileSuite('test_doctest.txt',
+         ...                              'test_doctest2.txt',
+         ...                              'test_doctest4.txt',
+         ...                              encoding='utf-8')
+         >>> suite.run(unittest.TestResult())
+         <unittest.TestResult run=3 errors=0 failures=2>
+
        """
 
 def test_trailing_space_in_test():
@@ -2266,6 +2282,32 @@ def test_testfile(): r"""
     Traceback (most recent call last):
     UnexpectedException: ...
     >>> doctest.master = None  # Reset master.
+
+If the tests contain non-ASCII characters, the tests might fail, since
+it's unknown which encoding is used. The encoding can be specified
+using the optional keyword argument `encoding`:
+
+    >>> doctest.testfile('test_doctest4.txt') # doctest: +ELLIPSIS
+    **********************************************************************
+    File "...", line 7, in test_doctest4.txt
+    Failed example:
+        u'...'
+    Expected:
+        u'f\xf6\xf6'
+    Got:
+        u'f\xc3\xb6\xc3\xb6'
+    **********************************************************************
+    ...
+    **********************************************************************
+    1 items had failures:
+       2 of   4 in test_doctest4.txt
+    ***Test Failed*** 2 failures.
+    (2, 4)
+    >>> doctest.master = None  # Reset master.
+
+    >>> doctest.testfile('test_doctest4.txt', encoding='utf-8')
+    (0, 4)
+    >>> doctest.master = None  # Reset master.
 """
 
 # old_test1, ... used to live in doctest.py, but cluttered it.  Note
diff --git a/Lib/test/test_doctest4.txt b/Lib/test/test_doctest4.txt
@@ -0,0 +1,17 @@
+This is a sample doctest in a text file that contains non-ASCII characters.
+This file is encoded using UTF-8.
+
+In order to get this test to pass, we have to manually specify the
+encoding.
+
+  >>> u'föö'
+  u'f\xf6\xf6'
+
+  >>> u'bąr'
+  u'b\u0105r'
+
+  >>> 'föö'
+  'f\xc3\xb6\xc3\xb6'
+
+  >>> 'bąr'
+  'b\xc4\x85r'
diff --git a/Misc/NEWS b/Misc/NEWS
@@ -85,6 +85,8 @@ Extension Modules
 Library
 -------
 
+- Patch #1080727: add "encoding" parameter to doctest.DocFileSuite.
+
 - Patch #1281707: speed up gzip.readline.
 
 - Patch #1180296: Two new functions were added to the locale module: