diff --git a/mirrulations-extractor/src/mirrextractor/extractor.py b/mirrulations-extractor/src/mirrextractor/extractor.py index b67850b0..f1cdb860 100644 --- a/mirrulations-extractor/src/mirrextractor/extractor.py +++ b/mirrulations-extractor/src/mirrextractor/extractor.py @@ -126,7 +126,7 @@ def update_stats(): # large, and a restart would try again. This ensures # that the extraction is not attempted again when the # extractor process is restarted (by Docker). - with open(output_path, 'w') as f: + with open(output_path, 'w', encoding='utf-8') as f: pass start_time = time.time() Extractor.extract_text(complete_path, output_path)