You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
While running def translate_a_single_sentence(translation_config): I have encountered an error in which the file en-de.tgz is not recognized as a gzip file. How could I do?
Below, it is reported the snippet of the error :
` downloading en-de.tgz
C:\Users..\pytorch-original-transformer\data\iwslt\en-de.tgz: 97.4kB [00:00, 1.60MB/s]
~\anaconda3\envs\pytorch-transformer\lib\gzip.py in read(self, size)
291 raise OSError(errno.EBADF, "read() on write-only GzipFile object")
--> 292 return self._buffer.read(size)
293
~\anaconda3\envs\pytorch-transformer\lib_compression.py in readinto(self, b)
67 with memoryview(b) as view, view.cast("B") as byte_view:
---> 68 data = self.read(len(byte_view))
69 byte_view[:len(data)] = data
~\anaconda3\envs\pytorch-transformer\lib\gzip.py in read(self, size)
478 self._init_read()
--> 479 if not self._read_gzip_header():
480 self._size = self._pos
~\anaconda3\envs\pytorch-transformer\lib\gzip.py in _read_gzip_header(self)
426 if magic != b'\037\213':
--> 427 raise BadGzipFile('Not a gzipped file (%r)' % magic)
428
BadGzipFile: Not a gzipped file (b'<!')
During handling of the above exception, another exception occurred:
ReadError Traceback (most recent call last)
in
85
86 # Translate the given source sentence
---> 87 translate_a_single_sentence(translation_config)
in translate_a_single_sentence(translation_config)
5 print(2)
6 # Step 1: Prepare the field processor (tokenizer, numericalizer)
----> 7 _, _, src_field_processor, trg_field_processor = get_datasets_and_vocabs(
8 translation_config['dataset_path'],
9 translation_config['language_direction'],
~\anaconda3\envs\pytorch-transformer\lib\site-packages\torchtext\data\dataset.py in download(cls, root, check)
189 # tarfile cannot handle bare .gz files
190 elif ext == '.tgz' or ext == '.gz' and ext_inner == '.tar':
--> 191 with tarfile.open(zpath, 'r:gz') as tar:
192 dirs = [member for member in tar.getmembers()]
193 tar.extractall(path=path, members=dirs)
The issue regards the dataset. I replaced it with Multi30k. from torchtext.datasets import Multi30k ...... train_dataset, val_dataset, test_dataset = Multi30k.splits(exts=(src_ext, trg_ext), fields=fields,)
The issue regards the dataset. I replaced it with Multi30k. from torchtext.datasets import Multi30k ...... train_dataset, val_dataset, test_dataset = Multi30k.splits(exts=(src_ext, trg_ext), fields=fields,)
Thank you SO MUCH for sharing the solution, I sucked on the url for a whole afternoon!
感谢这个B
While running
def translate_a_single_sentence(translation_config):
I have encountered an error in which the file en-de.tgz is not recognized as a gzip file. How could I do?Below, it is reported the snippet of the error :
` downloading en-de.tgz
C:\Users..\pytorch-original-transformer\data\iwslt\en-de.tgz: 97.4kB [00:00, 1.60MB/s]
BadGzipFile Traceback (most recent call last)
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in gzopen(cls, name, mode, fileobj, compresslevel, **kwargs)
1669 try:
-> 1670 t = cls.taropen(name, mode, fileobj, **kwargs)
1671 except OSError:
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in taropen(cls, name, mode, fileobj, **kwargs)
1646 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
-> 1647 return cls(name, mode, fileobj, **kwargs)
1648
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in init(self, name, mode, fileobj, format, tarinfo, dereference, ignore_zeros, encoding, errors, pax_headers, debug, errorlevel, copybufsize)
1509 self.firstmember = None
-> 1510 self.firstmember = self.next()
1511
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in next(self)
2310 try:
-> 2311 tarinfo = self.tarinfo.fromtarfile(self)
2312 except EOFHeaderError as e:
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in fromtarfile(cls, tarfile)
1101 """
-> 1102 buf = tarfile.fileobj.read(BLOCKSIZE)
1103 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
~\anaconda3\envs\pytorch-transformer\lib\gzip.py in read(self, size)
291 raise OSError(errno.EBADF, "read() on write-only GzipFile object")
--> 292 return self._buffer.read(size)
293
~\anaconda3\envs\pytorch-transformer\lib_compression.py in readinto(self, b)
67 with memoryview(b) as view, view.cast("B") as byte_view:
---> 68 data = self.read(len(byte_view))
69 byte_view[:len(data)] = data
~\anaconda3\envs\pytorch-transformer\lib\gzip.py in read(self, size)
478 self._init_read()
--> 479 if not self._read_gzip_header():
480 self._size = self._pos
~\anaconda3\envs\pytorch-transformer\lib\gzip.py in _read_gzip_header(self)
426 if magic != b'\037\213':
--> 427 raise BadGzipFile('Not a gzipped file (%r)' % magic)
428
BadGzipFile: Not a gzipped file (b'<!')
During handling of the above exception, another exception occurred:
ReadError Traceback (most recent call last)
in
85
86 # Translate the given source sentence
---> 87 translate_a_single_sentence(translation_config)
in translate_a_single_sentence(translation_config)
5 print(2)
6 # Step 1: Prepare the field processor (tokenizer, numericalizer)
----> 7 _, _, src_field_processor, trg_field_processor = get_datasets_and_vocabs(
8 translation_config['dataset_path'],
9 translation_config['language_direction'],
in get_datasets_and_vocabs(dataset_path, language_direction, use_iwslt, use_caching_mechanism)
41 dataset_split_fn = datasets.IWSLT.splits if use_iwslt else datasets.WMT14.splits
42
---> 43 train_dataset, val_dataset, test_dataset = dataset_split_fn(
44 exts=(src_ext, trg_ext),
45 fields=fields,
~\anaconda3\envs\pytorch-transformer\lib\site-packages\torchtext\datasets\translation.py in splits(cls, exts, fields, root, train, validation, test, **kwargs)
142 cls.urls = [cls.base_url.format(exts[0][1:], exts[1][1:], cls.dirname)]
143 check = os.path.join(root, cls.name, cls.dirname)
--> 144 path = cls.download(root, check=check)
145
146 train = '.'.join([train, cls.dirname])
~\anaconda3\envs\pytorch-transformer\lib\site-packages\torchtext\data\dataset.py in download(cls, root, check)
189 # tarfile cannot handle bare .gz files
190 elif ext == '.tgz' or ext == '.gz' and ext_inner == '.tar':
--> 191 with tarfile.open(zpath, 'r:gz') as tar:
192 dirs = [member for member in tar.getmembers()]
193 tar.extractall(path=path, members=dirs)
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in open(cls, name, mode, fileobj, bufsize, **kwargs)
1615 else:
1616 raise CompressionError("unknown compression type %r" % comptype)
-> 1617 return func(name, filemode, fileobj, **kwargs)
1618
1619 elif "|" in mode:
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in gzopen(cls, name, mode, fileobj, compresslevel, **kwargs)
1672 fileobj.close()
1673 if mode == 'r':
-> 1674 raise ReadError("not a gzip file")
1675 raise
1676 except:
ReadError: not a gzip file`
Thank you very much!
The text was updated successfully, but these errors were encountered: