Skip to content

Commit

Permalink
bugfix outputfilename with gz
Browse files Browse the repository at this point in the history
  • Loading branch information
guipenedo committed Dec 12, 2023
1 parent 35fc53e commit 476de37
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/datatrove/pipeline/writers/jsonl.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import dataclasses
import json
from string import Template

from datatrove.data import Document
from datatrove.io import BaseOutputDataFile, BaseOutputDataFolder
Expand All @@ -14,7 +15,7 @@ def __init__(self, output_folder: BaseOutputDataFolder, output_filename: str = N
super().__init__(output_folder, output_filename=output_filename)
self.gzip = gzip
if self.gzip:
self.output_filename = self.output_filename + ".gz"
self.output_filename = Template(self.output_filename.template + ".gz")

def open(self, output_filename):
return self.output_folder.open(output_filename, mode="wt", gzip=self.gzip)
Expand Down

0 comments on commit 476de37

Please sign in to comment.