Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect encoding per yaml spec (fix #238) #240

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
27 changes: 27 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -91,6 +91,12 @@ def setUpClass(cls):
# dos line endings yaml
'dos.yml': '---\r\n'
'dos: true',
# UTF-16 Little Endian BOM
'non-ascii/utf16le': b'\xff\xfe---\nutf16le: true\n',
# UTF-16 Big Endian
'non-ascii/utf16be': b'\xfe\xff---\nutf16be: true\n',
# UTF-8 BOM
'non-ascii/utf8': b'\xef\xbb\xbf---\nutf8: true\n',
})

@classmethod
@@ -171,6 +177,9 @@ def test_find_files_recursively(self):
os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'no-yaml.json'),
os.path.join(self.wd, 'non-ascii/utf16be'),
os.path.join(self.wd, 'non-ascii/utf16le'),
os.path.join(self.wd, 'non-ascii/utf8'),
os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
os.path.join(self.wd, 'sub/ok.yaml'),
@@ -188,6 +197,9 @@ def test_find_files_recursively(self):
os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'no-yaml.json'),
os.path.join(self.wd, 'non-ascii/utf16be'),
os.path.join(self.wd, 'non-ascii/utf16le'),
os.path.join(self.wd, 'non-ascii/utf8'),
os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
os.path.join(self.wd, 'sub/ok.yaml'),
@@ -517,3 +529,18 @@ def test_run_non_universal_newline(self):
'\n' % path)
self.assertEqual(
(ctx.returncode, ctx.stdout, ctx.stderr), (1, expected_out, ''))

def test_encoding_detection_utf16le(self):
path = os.path.join(self.wd, 'non-ascii/utf16le')
encoding = cli.determine_encoding(path)
self.assertEqual(encoding, 'utf-16-le')

def test_encoding_detection_utf16be(self):
path = os.path.join(self.wd, 'non-ascii/utf16be')
encoding = cli.determine_encoding(path)
self.assertEqual(encoding, 'utf-16-be')

def test_encoding_detection_utf8(self):
path = os.path.join(self.wd, 'non-ascii/utf8')
encoding = cli.determine_encoding(path)
self.assertEqual(encoding, 'utf-8')
16 changes: 15 additions & 1 deletion yamllint/cli.py
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@
from __future__ import print_function

import argparse
import codecs
import io
import os
import platform
@@ -28,6 +29,18 @@
from yamllint.linter import PROBLEM_LEVELS


def determine_encoding(file):
with io.open(file, 'rb') as raw_file:
data = raw_file.read(4)
if data.startswith(codecs.BOM_UTF16_LE):
encoding = 'utf-16-le'
elif data.startswith(codecs.BOM_UTF16_BE):
encoding = 'utf-16-be'
else:
encoding = 'utf-8'
return encoding


def find_files_recursively(items, conf):
for item in items:
if os.path.isdir(item):
@@ -177,7 +190,8 @@ def run(argv=None):
for file in find_files_recursively(args.files, conf):
filepath = file[2:] if file.startswith('./') else file
try:
with io.open(file, newline='') as f:
encoding = determine_encoding(file)
with io.open(file, newline='', encoding=encoding) as f:
problems = linter.run(f, conf, filepath)
except EnvironmentError as e:
print(e, file=sys.stderr)