Skip to content

Commit fdde7a6

Browse files
author
george.yoshida
committed
Patch #1080727: add "encoding" parameter to doctest.DocFileSuite
Contributed by Bjorn Tillenius. git-svn-id: http://svn.python.org/projects/python/trunk@46502 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent e0ae6dd commit fdde7a6

File tree

5 files changed

+99
-10
lines changed

5 files changed

+99
-10
lines changed

Doc/lib/libdoctest.tex

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -868,7 +868,7 @@ \subsection{Basic API\label{doctest-basic-api}}
868868
globs}\optional{, verbose}\optional{,
869869
report}\optional{, optionflags}\optional{,
870870
extraglobs}\optional{, raise_on_error}\optional{,
871-
parser}}
871+
parser}\optional{, encoding}}
872872

873873
All arguments except \var{filename} are optional, and should be
874874
specified in keyword form.
@@ -941,7 +941,13 @@ \subsection{Basic API\label{doctest-basic-api}}
941941
subclass) that should be used to extract tests from the files. It
942942
defaults to a normal parser (i.e., \code{\class{DocTestParser}()}).
943943

944+
Optional argument \var{encoding} specifies an encoding that should
945+
be used to convert the file to unicode.
946+
944947
\versionadded{2.4}
948+
949+
\versionchanged[The parameter \var{encoding} was added]{2.5}
950+
945951
\end{funcdesc}
946952

947953
\begin{funcdesc}{testmod}{\optional{m}\optional{, name}\optional{,
@@ -1061,7 +1067,8 @@ \subsection{Unittest API\label{doctest-unittest-api}}
10611067
\begin{funcdesc}{DocFileSuite}{\optional{module_relative}\optional{,
10621068
package}\optional{, setUp}\optional{,
10631069
tearDown}\optional{, globs}\optional{,
1064-
optionflags}\optional{, parser}}
1070+
optionflags}\optional{, parser}\optional{,
1071+
encoding}}
10651072

10661073
Convert doctest tests from one or more text files to a
10671074
\class{\refmodule{unittest}.TestSuite}.
@@ -1128,11 +1135,17 @@ \subsection{Unittest API\label{doctest-unittest-api}}
11281135
subclass) that should be used to extract tests from the files. It
11291136
defaults to a normal parser (i.e., \code{\class{DocTestParser}()}).
11301137

1138+
Optional argument \var{encoding} specifies an encoding that should
1139+
be used to convert the file to unicode.
1140+
11311141
\versionadded{2.4}
11321142

11331143
\versionchanged[The global \code{__file__} was added to the
11341144
globals provided to doctests loaded from a text file using
11351145
\function{DocFileSuite()}]{2.5}
1146+
1147+
\versionchanged[The parameter \var{encoding} was added]{2.5}
1148+
11361149
\end{funcdesc}
11371150

11381151
\begin{funcdesc}{DocTestSuite}{\optional{module}\optional{,

Lib/doctest.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1869,7 +1869,8 @@ class doctest.Tester, then merges the results into (or creates)
18691869

18701870
def testfile(filename, module_relative=True, name=None, package=None,
18711871
globs=None, verbose=None, report=True, optionflags=0,
1872-
extraglobs=None, raise_on_error=False, parser=DocTestParser()):
1872+
extraglobs=None, raise_on_error=False, parser=DocTestParser(),
1873+
encoding=None):
18731874
"""
18741875
Test examples in the given file. Return (#failures, #tests).
18751876
@@ -1935,6 +1936,9 @@ def testfile(filename, module_relative=True, name=None, package=None,
19351936
Optional keyword arg "parser" specifies a DocTestParser (or
19361937
subclass) that should be used to extract tests from the files.
19371938
1939+
Optional keyword arg "encoding" specifies an encoding that should
1940+
be used to convert the file to unicode.
1941+
19381942
Advanced tomfoolery: testmod runs methods of a local instance of
19391943
class doctest.Tester, then merges the results into (or creates)
19401944
global Tester instance doctest.master. Methods of doctest.master
@@ -1969,6 +1973,9 @@ class doctest.Tester, then merges the results into (or creates)
19691973
else:
19701974
runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
19711975

1976+
if encoding is not None:
1977+
text = text.decode(encoding)
1978+
19721979
# Read the file, convert it to a test, and run it.
19731980
test = parser.get_doctest(text, globs, name, filename, 0)
19741981
runner.run(test)
@@ -2339,7 +2346,8 @@ def format_failure(self, err):
23392346
)
23402347

23412348
def DocFileTest(path, module_relative=True, package=None,
2342-
globs=None, parser=DocTestParser(), **options):
2349+
globs=None, parser=DocTestParser(),
2350+
encoding=None, **options):
23432351
if globs is None:
23442352
globs = {}
23452353
else:
@@ -2357,6 +2365,10 @@ def DocFileTest(path, module_relative=True, package=None,
23572365

23582366
# Find the file and read it.
23592367
name = os.path.basename(path)
2368+
2369+
# If an encoding is specified, use it to convert the file to unicode
2370+
if encoding is not None:
2371+
doc = doc.decode(encoding)
23602372

23612373
# Convert it to a test, and wrap it in a DocFileCase.
23622374
test = parser.get_doctest(doc, globs, name, path, 0)
@@ -2414,6 +2426,9 @@ def DocFileSuite(*paths, **kw):
24142426
parser
24152427
A DocTestParser (or subclass) that should be used to extract
24162428
tests from the files.
2429+
2430+
encoding
2431+
An encoding that will be used to convert the files to unicode.
24172432
"""
24182433
suite = unittest.TestSuite()
24192434

Lib/test/test_doctest.py

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1937,9 +1937,10 @@ def test_DocFileSuite():
19371937
19381938
>>> import unittest
19391939
>>> suite = doctest.DocFileSuite('test_doctest.txt',
1940-
... 'test_doctest2.txt')
1940+
... 'test_doctest2.txt',
1941+
... 'test_doctest4.txt')
19411942
>>> suite.run(unittest.TestResult())
1942-
<unittest.TestResult run=2 errors=0 failures=2>
1943+
<unittest.TestResult run=3 errors=0 failures=3>
19431944
19441945
The test files are looked for in the directory containing the
19451946
calling module. A package keyword argument can be provided to
@@ -1948,9 +1949,10 @@ def test_DocFileSuite():
19481949
>>> import unittest
19491950
>>> suite = doctest.DocFileSuite('test_doctest.txt',
19501951
... 'test_doctest2.txt',
1952+
... 'test_doctest4.txt',
19511953
... package='test')
19521954
>>> suite.run(unittest.TestResult())
1953-
<unittest.TestResult run=2 errors=0 failures=2>
1955+
<unittest.TestResult run=3 errors=0 failures=3>
19541956
19551957
'/' should be used as a path separator. It will be converted
19561958
to a native separator at run time:
@@ -1995,19 +1997,21 @@ def test_DocFileSuite():
19951997
19961998
>>> suite = doctest.DocFileSuite('test_doctest.txt',
19971999
... 'test_doctest2.txt',
2000+
... 'test_doctest4.txt',
19982001
... globs={'favorite_color': 'blue'})
19992002
>>> suite.run(unittest.TestResult())
2000-
<unittest.TestResult run=2 errors=0 failures=1>
2003+
<unittest.TestResult run=3 errors=0 failures=2>
20012004
20022005
In this case, we supplied a missing favorite color. You can
20032006
provide doctest options:
20042007
20052008
>>> suite = doctest.DocFileSuite('test_doctest.txt',
20062009
... 'test_doctest2.txt',
2010+
... 'test_doctest4.txt',
20072011
... optionflags=doctest.DONT_ACCEPT_BLANKLINE,
20082012
... globs={'favorite_color': 'blue'})
20092013
>>> suite.run(unittest.TestResult())
2010-
<unittest.TestResult run=2 errors=0 failures=2>
2014+
<unittest.TestResult run=3 errors=0 failures=3>
20112015
20122016
And, you can provide setUp and tearDown functions:
20132017
@@ -2025,9 +2029,10 @@ def test_DocFileSuite():
20252029
20262030
>>> suite = doctest.DocFileSuite('test_doctest.txt',
20272031
... 'test_doctest2.txt',
2032+
... 'test_doctest4.txt',
20282033
... setUp=setUp, tearDown=tearDown)
20292034
>>> suite.run(unittest.TestResult())
2030-
<unittest.TestResult run=2 errors=0 failures=1>
2035+
<unittest.TestResult run=3 errors=0 failures=2>
20312036
20322037
But the tearDown restores sanity:
20332038
@@ -2060,6 +2065,17 @@ def test_DocFileSuite():
20602065
>>> suite.run(unittest.TestResult())
20612066
<unittest.TestResult run=1 errors=0 failures=0>
20622067
2068+
If the tests contain non-ASCII characters, we have to specify which
2069+
encoding the file is encoded with. We do so by using the `encoding`
2070+
parameter:
2071+
2072+
>>> suite = doctest.DocFileSuite('test_doctest.txt',
2073+
... 'test_doctest2.txt',
2074+
... 'test_doctest4.txt',
2075+
... encoding='utf-8')
2076+
>>> suite.run(unittest.TestResult())
2077+
<unittest.TestResult run=3 errors=0 failures=2>
2078+
20632079
"""
20642080

20652081
def test_trailing_space_in_test():
@@ -2266,6 +2282,32 @@ def test_testfile(): r"""
22662282
Traceback (most recent call last):
22672283
UnexpectedException: ...
22682284
>>> doctest.master = None # Reset master.
2285+
2286+
If the tests contain non-ASCII characters, the tests might fail, since
2287+
it's unknown which encoding is used. The encoding can be specified
2288+
using the optional keyword argument `encoding`:
2289+
2290+
>>> doctest.testfile('test_doctest4.txt') # doctest: +ELLIPSIS
2291+
**********************************************************************
2292+
File "...", line 7, in test_doctest4.txt
2293+
Failed example:
2294+
u'...'
2295+
Expected:
2296+
u'f\xf6\xf6'
2297+
Got:
2298+
u'f\xc3\xb6\xc3\xb6'
2299+
**********************************************************************
2300+
...
2301+
**********************************************************************
2302+
1 items had failures:
2303+
2 of 4 in test_doctest4.txt
2304+
***Test Failed*** 2 failures.
2305+
(2, 4)
2306+
>>> doctest.master = None # Reset master.
2307+
2308+
>>> doctest.testfile('test_doctest4.txt', encoding='utf-8')
2309+
(0, 4)
2310+
>>> doctest.master = None # Reset master.
22692311
"""
22702312

22712313
# old_test1, ... used to live in doctest.py, but cluttered it. Note

Lib/test/test_doctest4.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
This is a sample doctest in a text file that contains non-ASCII characters.
2+
This file is encoded using UTF-8.
3+
4+
In order to get this test to pass, we have to manually specify the
5+
encoding.
6+
7+
>>> u'föö'
8+
u'f\xf6\xf6'
9+
10+
>>> u'bąr'
11+
u'b\u0105r'
12+
13+
>>> 'föö'
14+
'f\xc3\xb6\xc3\xb6'
15+
16+
>>> 'bąr'
17+
'b\xc4\x85r'

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ Extension Modules
8585
Library
8686
-------
8787

88+
- Patch #1080727: add "encoding" parameter to doctest.DocFileSuite.
89+
8890
- Patch #1281707: speed up gzip.readline.
8991

9092
- Patch #1180296: Two new functions were added to the locale module:

0 commit comments

Comments
 (0)