From 57cc3b6b95b6282b5f157447c6a1291d84771475 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Tue, 22 Mar 2022 16:15:21 +0900 Subject: [PATCH 01/13] [WIP] refactor --- src/kyoto_reader/reader.py | 359 +++++++++++++++++++++++++------------ 1 file changed, 244 insertions(+), 115 deletions(-) diff --git a/src/kyoto_reader/reader.py b/src/kyoto_reader/reader.py index b193ced..3d9b14d 100644 --- a/src/kyoto_reader/reader.py +++ b/src/kyoto_reader/reader.py @@ -11,6 +11,7 @@ from multiprocessing import Pool from pathlib import Path from typing import List, Dict, Optional, Union, Callable, Iterable, Collection, Any +from enum import Enum from joblib import Parallel, delayed @@ -20,75 +21,168 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.WARNING) - -class ArchiveHandler: - """Base class for handling archive. - Each subclass should correspond with one extension (e.g. .zip). +# 実装方針 +""" +dir と archive は分ける +.gz.zip は無理なのでサポートしない +または予め archive は tmp dir に吐くとか +サポートするのは以下 +- archive +- directory +- compressed files in directory +- file +- compressed file +""" +class FileCollectionType(Enum): + """ + Enum for file collection types. """ + TAR_GZ = '.tar.gz' + ZIP = '.zip' + DIR = '' + - def __init__(self, archive_path: Path): - self.archive_path = archive_path +class FileCollectionHandler: + def __init__(self, path: Path): + self.path = path + self.type = self._get_type(path) + self.members = self._list_members() - @abstractmethod - def _open(self, path: Path) -> Any: - """Return a file-like object for the given path.""" - raise NotImplementedError + @staticmethod + def _get_type(path: Path) -> FileCollectionType: + if path.is_dir(): + return FileCollectionType.DIR + return FileCollectionType(path.suffix) # FIXME: .tar.gz is not recognized as a file type + + def _list_members(self) -> List[Path]: + if self.type == FileCollectionType.DIR: + return [p.relative_to(self.path) for p in self.path.glob('**/*') if p.is_file()] + elif self.type == FileCollectionType.TAR_GZ: + with tarfile.open(self.path, mode='r') as f: + return [Path(p) for p in f.getnames()] + elif self.type == FileCollectionType.ZIP: + with zipfile.ZipFile(self.path, mode='r') as f: + return [Path(p) for p in f.namelist()] + else: + raise ValueError(f'Unsupported collection type: {self.type}') @contextmanager def open(self): - """Main function to open the archive. - Specific open function for each extension should be implemented in a subclass. - """ - f = self._open(self.archive_path) try: - yield f + if self.type == FileCollectionType.DIR: + file = self.path + elif self.type == FileCollectionType.TAR_GZ: + file = tarfile.open(self.path, mode='r') + elif self.type == FileCollectionType.ZIP: + file = zipfile.ZipFile(self.path, mode='r') + else: + file = None + raise ValueError(f'Unsupported collection type: {self.type}') + yield file finally: - f.close() - - @abstractmethod - def get_member_names(self, f: Any) -> List[str]: - """Get all file names in archive.""" - raise NotImplementedError - - @abstractmethod - def open_member(self, f: Any, path: str): - """Extract file object from archive""" - raise NotImplementedError - - -class TarGzipHandler(ArchiveHandler): - def _open(self, path: Path) -> tarfile.TarFile: - return tarfile.open(path) - - def get_member_names(self, f: tarfile.TarFile) -> List[str]: - return getattr(f, "getnames")() - - @contextmanager - def open_member(self, f: tarfile.TarFile, path: str): - bytes = f.extractfile(path) - yield bytes - - -class ZipHandler(ArchiveHandler): - def _open(self, path: Path) -> zipfile.ZipFile: - return zipfile.ZipFile(path) - - def get_member_names(self, f: zipfile.ZipFile) -> List[str]: - return getattr(f, "namelist")() + hasattr(file, 'close') and file.close() @contextmanager - def open_member(self, f: zipfile.ZipFile, path: str): - g = f.open(path) + def open_member(self, file, member: Path): try: - yield g + if self.type == FileCollectionType.TAR_GZ: + f = file.extractfile(member) + elif self.type == FileCollectionType.ZIP: + f = file.open(member) + elif self.type == FileCollectionType.DIR: + f = (file / member).open() + else: + f = None + raise ValueError(f'Unsupported collection type: {self.type}') + yield file finally: - g.close() + f.close() - -ARCHIVE2HANDLER: Dict[str, Callable] = { - ".tar.gz": TarGzipHandler, - ".zip": ZipHandler -} + # def list_contents(self) -> List[str]: + # if self.type == FileCollectionType.DIR: + # return [p.name for p in self.path.glob('**/*') if p.is_file()] + # elif self.type == FileCollectionType.TAR_GZ: + # with tarfile.open(self.path, mode='r') as f: + # return f.get_names() + # elif self.type == FileCollectionType.ZIP: + # with zipfile.ZipFile(self.path, mode='r') as f: + # return f.namelist() + # else: + # raise ValueError(f'Unsupported file type: {self.type}') + + @classmethod + def is_supported_path(cls, path: Path) -> bool: + return path.is_dir() or path.suffix in (t.value for t in FileCollectionType) + + +# class ArchiveHandler: +# """Base class for handling archive. +# Each subclass should correspond with one extension (e.g. .zip). +# """ +# +# def __init__(self, archive_path: Path): +# self.archive_path = archive_path +# +# @abstractmethod +# def _open(self, path: Path) -> Any: +# """Return a file-like object for the given path.""" +# raise NotImplementedError +# +# @contextmanager +# def open(self): +# """Main function to open the archive. +# Specific open function for each extension should be implemented in a subclass. +# """ +# f = self._open(self.archive_path) +# try: +# yield f +# finally: +# f.close() +# +# @abstractmethod +# def get_member_names(self, f: Any) -> List[str]: +# """Get all file names in archive.""" +# raise NotImplementedError +# +# @abstractmethod +# def open_member(self, f: Any, path: str): +# """Extract file object from archive""" +# raise NotImplementedError +# +# +# class TarGzipHandler(ArchiveHandler): +# def _open(self, path: Path) -> tarfile.TarFile: +# return tarfile.open(path) +# +# def get_member_names(self, f: tarfile.TarFile) -> List[str]: +# return getattr(f, "getnames")() +# +# @contextmanager +# def open_member(self, f: tarfile.TarFile, path: str): +# bytes = f.extractfile(path) +# yield bytes +# +# +# class ZipHandler(ArchiveHandler): +# def _open(self, path: Path) -> zipfile.ZipFile: +# return zipfile.ZipFile(path) +# +# def get_member_names(self, f: zipfile.ZipFile) -> List[str]: +# return getattr(f, "namelist")() +# +# @contextmanager +# def open_member(self, f: zipfile.ZipFile, path: str): +# g = f.open(path) +# try: +# yield g +# finally: +# g.close() + + +# ARCHIVE2HANDLER: Dict[str, Callable] = { +# ".tar.gz": TarGzipHandler, +# ".zip": ZipHandler +# } COMPRESS2OPEN: Dict[str, Callable] = { ".gz": partial(gzip.open, mode='rt'), @@ -126,74 +220,103 @@ def __init__(self, use_pas_tag: bool = False, knp_ext: str = '.knp', pickle_ext: str = '.pkl', - recursive: bool = False, + # recursive: bool = False, mp_backend: Optional[str] = 'multiprocessing', n_jobs: int = -1, did_from_sid: bool = True, - archive2handler: Dict[str, ArchiveHandler] = ARCHIVE2HANDLER, - compress2open: Dict[str, Callable] = COMPRESS2OPEN + # archive2handler: Dict[str, ArchiveHandler] = ARCHIVE2HANDLER, + # compress2open: Dict[str, Callable] = COMPRESS2OPEN ) -> None: if not (isinstance(source, Path) or isinstance(source, str)): raise TypeError( f"document source must be Path or str type, but got '{type(source)}' type") source = Path(source) - source_suffix = "".join(source.suffixes) - self.archive_handler = None + source_suffix = ''.join(source.suffixes) + # self.archive_handler = None - if source.is_dir(): - # Yields all allowed single-file extension (e.g. .knp, .pkl.gz) - allowed_single_file_ext = list( - "".join(x) for x in product((knp_ext, pickle_ext), (("",) + tuple(compress2open.keys())))) - logger.info( - f'got directory path, files in the directory is treated as source files') - file_paths: List[Path] = [] - for ext in allowed_single_file_ext: - file_paths += sorted(source.glob( - f'**/*{ext}' if recursive else f'*{ext}')) - # If source file is an archive, build handler - elif source_suffix in archive2handler: - logger.info( - f'got compressed file, files in the compressed file are treated as source files') - # Compressed files are prohibited. + if FileCollectionHandler.is_supported_path(source): + self.handler = FileCollectionHandler(source) allowed_single_file_ext = (knp_ext, pickle_ext) - self.archive_handler = archive2handler[source_suffix](source) - with self.archive_handler.open() as archive: - file_paths = sorted( - Path(x) for x in self.archive_handler.get_member_names(archive) - if "".join(Path(x).suffixes) in allowed_single_file_ext + # file_paths = sorted(Path(p) for p in self.handler.list_contents() if ''.join(Path(p).suffixes) in allowed_single_file_ext) + self.did2pkls = {path.stem: path for path in self.handler.members if pickle_ext in path.suffixes} + # get did2knps + with self.handler.open() as collection: + args_iter = ( + (self, path, did_from_sid, collection) + for path in self.handler.members if knp_ext in path.suffixes ) + # ここで handler 用の read_knp を呼びたい + rets: List[Dict[str, str]] = self._mp_wrapper(KyotoReader.read_knp, args_iter, self.mp_backend, self.n_jobs) + self.did2knps: Dict[str, str] = dict(ChainMap(*rets)) else: - logger.info( - f'got file path, this file is treated as a source knp file') - file_paths = [source] - self.did2pkls: Dict[str, Path] = { - path.stem: path for path in file_paths if pickle_ext in path.suffixes} + assert source.is_file() is True + self.did2pkls = {path.stem: path for path in [source] if pickle_ext in path.suffixes} + rets: List[Dict[str, str]] = [self.read_knp(source, did_from_sid)] + self.did2knps: Dict[str, str] = dict(ChainMap(*rets)) + + # if source.is_dir(): + # # Yields all allowed single-file extension (e.g. .knp, .pkl.gz) + # allowed_single_file_ext = list( + # "".join(x) for x in product((knp_ext, pickle_ext), (("",) + tuple(compress2open.keys())))) + # logger.info( + # f'got directory path, files in the directory is treated as source files') + # file_paths: List[Path] = [] + # for ext in allowed_single_file_ext: + # file_paths += sorted(source.glob( + # f'**/*{ext}' if recursive else f'*{ext}')) + # # If source file is an archive, build handler + # elif source_suffix in archive2handler: + # logger.info( + # f'got compressed file, files in the compressed file are treated as source files') + # # Compressed files are prohibited. + # allowed_single_file_ext = (knp_ext, pickle_ext) + # self.archive_handler = archive2handler[source_suffix](source) + # with self.archive_handler.open() as archive: + # file_paths = sorted( + # Path(x) for x in self.archive_handler.get_member_names(archive) + # if "".join(Path(x).suffixes) in allowed_single_file_ext + # ) + # else: + # logger.info( + # f'got file path, this file is treated as a source knp file') + # file_paths = [source] + # self.did2pkls: Dict[str, Path] = { + # path.stem: path for path in file_paths if pickle_ext in path.suffixes} self.mp_backend: Optional[str] = mp_backend if n_jobs != 0 else None - if self.mp_backend is not None and self.archive_handler is not None: + if self.mp_backend is not None and self.handler is not None: logger.info( "Multiprocessing with archive is too slow, so it is disabled") logger.info( "Run without multiprocessing can be relatively slow, so please consider unarchive the archive file") self.mp_backend = None self.n_jobs: int = n_jobs + # if FileCollectionHandler.is_supported_path(source): + # with self.handler.open() as collection: + # args_iter = ( + # (self, path, did_from_sid, collection) + # for path in self.handler.members if knp_ext in path.suffixes + # ) + # rets: List[Dict[str, str]] = self._mp_wrapper(KyotoReader.read_knp, args_iter, self.mp_backend, self.n_jobs) + # else: + # rets: List[Dict[str, str]] = [self.read_knp(source, did_from_sid)] # This must be set before read_knp is called. - self.compress2open = compress2open - if self.archive_handler is not None: - with self.archive_handler.open() as archive: - args_iter = ( - (self, path, did_from_sid, archive) - for path in file_paths if knp_ext in path.suffixes - ) - rets: List[Dict[str, str]] = self._mp_wrapper(KyotoReader.read_knp, args_iter, self.mp_backend, - self.n_jobs) - else: - args_iter = ((self, path, did_from_sid) - for path in file_paths if knp_ext in path.suffixes) - rets: List[Dict[str, str]] = self._mp_wrapper( - KyotoReader.read_knp, args_iter, self.mp_backend, self.n_jobs) - - self.did2knps: Dict[str, str] = dict(ChainMap(*rets)) + # self.compress2open = compress2open + # if self.archive_handler is not None: + # with self.archive_handler.open() as archive: + # args_iter = ( + # (self, path, did_from_sid, archive) + # for path in file_paths if knp_ext in path.suffixes + # ) + # rets: List[Dict[str, str]] = self._mp_wrapper(KyotoReader.read_knp, args_iter, self.mp_backend, + # self.n_jobs) + # else: + # args_iter = ((self, path, did_from_sid, None) + # for path in file_paths if knp_ext in path.suffixes) + # rets: List[Dict[str, str]] = self._mp_wrapper( + # KyotoReader.read_knp, args_iter, self.mp_backend, self.n_jobs) + + # self.did2knps: Dict[str, str] = dict(ChainMap(*rets)) self.doc_ids: List[str] = sorted( set(self.did2knps.keys()) | set(self.did2pkls.keys())) @@ -251,21 +374,27 @@ def _read_knp(f): logger.warning(f'empty file found and skipped: {path}') if archive is not None: - with self.archive_handler.open_member(archive, str(path)) as f: - text = f.read().decode("utf-8") - _read_knp(text.split("\n")) - else: - if any(key in path.suffixes for key in self.compress2open): - compress = set(path.suffixes) & set(self.compress2open.keys()) - assert len(compress) == 1 - _open = self.compress2open[compress.pop()] - else: - _open = open - with _open(path) as f: - _read_knp(f.readlines()) + with self.handler.open_member(archive, path) as f: + _read_knp(f) # ここで f は .knp.gz とかかもしれないのでやはり archive と directory は分けたほうが良さそう + # if archive is not None: + # with self.archive_handler.open_member(archive, str(path)) as f: + # text = f.read().decode("utf-8") + # _read_knp(text.split("\n")) + # else: + # if any(key in path.suffixes for key in self.compress2open): + # compress = set(path.suffixes) & set(self.compress2open.keys()) + # assert len(compress) == 1 + # _open = self.compress2open[compress.pop()] + # else: + # _open = open + # with _open(path) as f: + # _read_knp(f.readlines()) return did2knps + def open_single_file(self, path): + pass + @staticmethod def _get_targets(input_: Optional[Collection], all_: Collection[Any], From 2d7cc3b61577ce2be28a58e2d72f81e621c821d9 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 28 Mar 2022 00:06:07 +0900 Subject: [PATCH 02/13] refactor --- src/kyoto_reader/reader.py | 347 +++++++++++++++++-------------------- tests/conftest.py | 8 +- 2 files changed, 159 insertions(+), 196 deletions(-) diff --git a/src/kyoto_reader/reader.py b/src/kyoto_reader/reader.py index 3d9b14d..668e057 100644 --- a/src/kyoto_reader/reader.py +++ b/src/kyoto_reader/reader.py @@ -1,17 +1,16 @@ -from abc import abstractmethod -import _pickle as cPickle -from contextlib import contextmanager import gzip +import io import logging +import pickle import tarfile import zipfile from collections import ChainMap -from functools import partial -from itertools import repeat, product +from contextlib import contextmanager, nullcontext +from enum import Enum +from itertools import repeat from multiprocessing import Pool from pathlib import Path -from typing import List, Dict, Optional, Union, Callable, Iterable, Collection, Any -from enum import Enum +from typing import List, Dict, Optional, Union, Callable, Iterable, Collection, Any, BinaryIO, TextIO from joblib import Parallel, delayed @@ -33,86 +32,73 @@ - file - compressed file """ -class FileCollectionType(Enum): + + +class ArchiveType(Enum): """ Enum for file collection types. """ TAR_GZ = '.tar.gz' ZIP = '.zip' - DIR = '' -class FileCollectionHandler: +class ArchiveHandler: def __init__(self, path: Path): self.path = path self.type = self._get_type(path) self.members = self._list_members() @staticmethod - def _get_type(path: Path) -> FileCollectionType: - if path.is_dir(): - return FileCollectionType.DIR - return FileCollectionType(path.suffix) # FIXME: .tar.gz is not recognized as a file type + def _get_type(path: Path) -> ArchiveType: + assert path.is_file() is True + if str(path).endswith(ArchiveType.TAR_GZ.value): + return ArchiveType.TAR_GZ + elif str(path).endswith(ArchiveType.ZIP.value): + return ArchiveType.ZIP + else: + raise ValueError(f'Unsupported archive type: {path}') def _list_members(self) -> List[Path]: - if self.type == FileCollectionType.DIR: - return [p.relative_to(self.path) for p in self.path.glob('**/*') if p.is_file()] - elif self.type == FileCollectionType.TAR_GZ: + if self.type == ArchiveType.TAR_GZ: with tarfile.open(self.path, mode='r') as f: return [Path(p) for p in f.getnames()] - elif self.type == FileCollectionType.ZIP: + elif self.type == ArchiveType.ZIP: with zipfile.ZipFile(self.path, mode='r') as f: return [Path(p) for p in f.namelist()] else: - raise ValueError(f'Unsupported collection type: {self.type}') + raise ValueError(f'Unsupported archive type: {self.type}') @contextmanager - def open(self): + def open(self) -> Union[zipfile.ZipFile, tarfile.TarFile]: + file = None try: - if self.type == FileCollectionType.DIR: - file = self.path - elif self.type == FileCollectionType.TAR_GZ: + if self.type == ArchiveType.TAR_GZ: file = tarfile.open(self.path, mode='r') - elif self.type == FileCollectionType.ZIP: + elif self.type == ArchiveType.ZIP: file = zipfile.ZipFile(self.path, mode='r') else: - file = None - raise ValueError(f'Unsupported collection type: {self.type}') + raise ValueError(f'Unsupported archive type: {self.type}') yield file finally: hasattr(file, 'close') and file.close() @contextmanager - def open_member(self, file, member: Path): + def open_member(self, archive: Union[zipfile.ZipFile, tarfile.TarFile], member: str) -> BinaryIO: + file = None try: - if self.type == FileCollectionType.TAR_GZ: - f = file.extractfile(member) - elif self.type == FileCollectionType.ZIP: - f = file.open(member) - elif self.type == FileCollectionType.DIR: - f = (file / member).open() + if self.type == ArchiveType.TAR_GZ: + file = archive.extractfile(member) + elif self.type == ArchiveType.ZIP: + file = archive.open(member) else: - f = None - raise ValueError(f'Unsupported collection type: {self.type}') + raise ValueError(f'Unsupported archive type: {self.type}') yield file finally: - f.close() - - # def list_contents(self) -> List[str]: - # if self.type == FileCollectionType.DIR: - # return [p.name for p in self.path.glob('**/*') if p.is_file()] - # elif self.type == FileCollectionType.TAR_GZ: - # with tarfile.open(self.path, mode='r') as f: - # return f.get_names() - # elif self.type == FileCollectionType.ZIP: - # with zipfile.ZipFile(self.path, mode='r') as f: - # return f.namelist() - # else: - # raise ValueError(f'Unsupported file type: {self.type}') + hasattr(file, 'close') and file.close() @classmethod def is_supported_path(cls, path: Path) -> bool: - return path.is_dir() or path.suffix in (t.value for t in FileCollectionType) + return any(str(path).endswith(t.value) for t in ArchiveType) # class ArchiveHandler: @@ -184,9 +170,52 @@ def is_supported_path(cls, path: Path) -> bool: # ".zip": ZipHandler # } -COMPRESS2OPEN: Dict[str, Callable] = { - ".gz": partial(gzip.open, mode='rt'), -} + +class FileType(Enum): + """Enum for file types.""" + GZ = '.gz' + # XZ = '.xz' + UNCOMPRESSED = '' + + +class FileHandler: + def __init__(self, path: Path): + self.path = path + self.type: FileType = self._get_type(path) + + @property + def content_basename(self) -> str: + if self.type == FileType.UNCOMPRESSED: + return self.path.name + return self.path.name[:-len(self.type.value)] + + @staticmethod + def _get_type(path: Path) -> FileType: + if path.suffix == FileType.GZ.value: + return FileType.GZ + return FileType.UNCOMPRESSED + + @contextmanager + def open(self): + file = None + try: + if self.type == FileType.GZ: + file = gzip.open(self.path, mode='rt') + elif self.type == FileType.UNCOMPRESSED: + file = self.path.open(mode='rt') + else: + raise ValueError(f'Unsupported collection type: {self.type}') + yield file + finally: + hasattr(file, 'close') and file.close() + + def __lt__(self, other): + return self.path < other.path + + +# COMPRESS2OPEN: Dict[str, Callable] = { +# ".gz": partial(gzip.open, mode='rt'), +# } class KyotoReader: @@ -203,12 +232,9 @@ class KyotoReader: knp_ext (str): KWDLC または KC ファイルの拡張子 (default: knp) pickle_ext (str): Document を pickle 形式で読む場合の拡張子 (default: pkl) use_pas_tag (bool): タグからではなく、<述語項構造:>タグから PAS を読むかどうか (default: False) - recursive (bool): source がディレクトリの場合、文書ファイルを再帰的に探索するかどうか (default: False) mp_backend (Optional[str]): 'multiprocessing', 'joblib', or None (default: 'multiprocessing') n_jobs (int): 文書を読み込む処理の並列数 (default: -1(=コア数)) did_from_sid (bool): 文書IDを文書中のS-IDから決定する (default: True) - archive2handler (Dict[str, Callable]): 拡張子と対応するアーカイブハンドラーの辞書 (default: ARCHIVE2HANDLER) - compress2open (Dict[str, Callable]): 拡張子と対応するファイルオープン関数の辞書 (default: COMPRESS2OPEN) """ def __init__(self, @@ -228,95 +254,42 @@ def __init__(self, # compress2open: Dict[str, Callable] = COMPRESS2OPEN ) -> None: if not (isinstance(source, Path) or isinstance(source, str)): - raise TypeError( - f"document source must be Path or str type, but got '{type(source)}' type") + raise TypeError(f"document source must be Path or str type, but got '{type(source)}' type") source = Path(source) - source_suffix = ''.join(source.suffixes) - # self.archive_handler = None - - if FileCollectionHandler.is_supported_path(source): - self.handler = FileCollectionHandler(source) - allowed_single_file_ext = (knp_ext, pickle_ext) - # file_paths = sorted(Path(p) for p in self.handler.list_contents() if ''.join(Path(p).suffixes) in allowed_single_file_ext) - self.did2pkls = {path.stem: path for path in self.handler.members if pickle_ext in path.suffixes} - # get did2knps - with self.handler.open() as collection: - args_iter = ( - (self, path, did_from_sid, collection) - for path in self.handler.members if knp_ext in path.suffixes - ) - # ここで handler 用の read_knp を呼びたい - rets: List[Dict[str, str]] = self._mp_wrapper(KyotoReader.read_knp, args_iter, self.mp_backend, self.n_jobs) - self.did2knps: Dict[str, str] = dict(ChainMap(*rets)) + self.archive_handler = None + + if source.is_dir(): + logger.info(f'got a directory path, files in the directory are treated as source files') + file_paths: List[FileHandler] = sorted(FileHandler(p) for p in source.glob(f'**/*') if p.is_file()) + elif ArchiveHandler.is_supported_path(source): + logger.info(f'got an archive file path, files in the archive are treated as source files') + self.archive_handler = ArchiveHandler(source) + file_paths: List[FileHandler] = sorted(FileHandler(p) for p in self.archive_handler.members) else: + logger.info(f'got a single file path, this file is treated as a source file') assert source.is_file() is True - self.did2pkls = {path.stem: path for path in [source] if pickle_ext in path.suffixes} - rets: List[Dict[str, str]] = [self.read_knp(source, did_from_sid)] - self.did2knps: Dict[str, str] = dict(ChainMap(*rets)) - - # if source.is_dir(): - # # Yields all allowed single-file extension (e.g. .knp, .pkl.gz) - # allowed_single_file_ext = list( - # "".join(x) for x in product((knp_ext, pickle_ext), (("",) + tuple(compress2open.keys())))) - # logger.info( - # f'got directory path, files in the directory is treated as source files') - # file_paths: List[Path] = [] - # for ext in allowed_single_file_ext: - # file_paths += sorted(source.glob( - # f'**/*{ext}' if recursive else f'*{ext}')) - # # If source file is an archive, build handler - # elif source_suffix in archive2handler: - # logger.info( - # f'got compressed file, files in the compressed file are treated as source files') - # # Compressed files are prohibited. - # allowed_single_file_ext = (knp_ext, pickle_ext) - # self.archive_handler = archive2handler[source_suffix](source) - # with self.archive_handler.open() as archive: - # file_paths = sorted( - # Path(x) for x in self.archive_handler.get_member_names(archive) - # if "".join(Path(x).suffixes) in allowed_single_file_ext - # ) - # else: - # logger.info( - # f'got file path, this file is treated as a source knp file') - # file_paths = [source] - # self.did2pkls: Dict[str, Path] = { - # path.stem: path for path in file_paths if pickle_ext in path.suffixes} + file_paths: List[FileHandler] = [FileHandler(source)] + + self.did2pkls = {path: path for path in file_paths if path.content_basename.endswith(pickle_ext)} + self.mp_backend: Optional[str] = mp_backend if n_jobs != 0 else None - if self.mp_backend is not None and self.handler is not None: + if self.mp_backend is not None and self.archive_handler is not None: + logger.info('Multiprocessing with archive is too slow, so it is disabled') logger.info( - "Multiprocessing with archive is too slow, so it is disabled") - logger.info( - "Run without multiprocessing can be relatively slow, so please consider unarchive the archive file") + 'Running without multiprocessing can be relatively slow, consider unarchiving the input file in advance' + ) self.mp_backend = None self.n_jobs: int = n_jobs - # if FileCollectionHandler.is_supported_path(source): - # with self.handler.open() as collection: - # args_iter = ( - # (self, path, did_from_sid, collection) - # for path in self.handler.members if knp_ext in path.suffixes - # ) - # rets: List[Dict[str, str]] = self._mp_wrapper(KyotoReader.read_knp, args_iter, self.mp_backend, self.n_jobs) - # else: - # rets: List[Dict[str, str]] = [self.read_knp(source, did_from_sid)] - - # This must be set before read_knp is called. - # self.compress2open = compress2open - # if self.archive_handler is not None: - # with self.archive_handler.open() as archive: - # args_iter = ( - # (self, path, did_from_sid, archive) - # for path in file_paths if knp_ext in path.suffixes - # ) - # rets: List[Dict[str, str]] = self._mp_wrapper(KyotoReader.read_knp, args_iter, self.mp_backend, - # self.n_jobs) - # else: - # args_iter = ((self, path, did_from_sid, None) - # for path in file_paths if knp_ext in path.suffixes) - # rets: List[Dict[str, str]] = self._mp_wrapper( - # KyotoReader.read_knp, args_iter, self.mp_backend, self.n_jobs) - - # self.did2knps: Dict[str, str] = dict(ChainMap(*rets)) + + with (self.archive_handler.open() if self.archive_handler else nullcontext()) as archive: + args_iter = ( + (self, path, did_from_sid, archive) for path in file_paths if path.content_basename.endswith(knp_ext) + ) + rets: List[Dict[str, str]] = self._mp_wrapper( + KyotoReader.read_knp, args_iter, self.mp_backend, self.n_jobs + ) + self.did2knps: Dict[str, str] = dict(ChainMap(*rets)) + self.doc_ids: List[str] = sorted( set(self.did2knps.keys()) | set(self.did2pkls.keys())) @@ -332,69 +305,59 @@ def __init__(self, def read_knp( self, - path: Path, + file: FileHandler, did_from_sid: bool, - archive: Optional[Union[zipfile.ZipFile, tarfile.TarFile]] = None + archive: Optional[Union[zipfile.ZipFile, tarfile.TarFile]] = None, ) -> Dict[str, str]: """Read KNP format file that is located at the specified path. The file can contain multiple documents. Args: - path (Path): A path to a KNP format file. + file (FileHandler): A file handler indicating a path to a KNP format file. did_from_sid (bool): If True, determine the document ID from the sentence ID in the document. archive (Optional[Union[zipfile.ZipFile, tarfile.TarFile]]): An archive to read the document from. Returns: Dict[str, str]: A mapping from a document ID to a KNP format string. """ - did2knps = {} - - def _read_knp(f): - buff = '' - did = sid = None - for line in f: - if line.startswith('# S-ID:') and did_from_sid: - sid_string = line[7:].strip().split()[0] - match = SID_PTN_KWDLC.match( - sid_string) or SID_PTN.match(sid_string) - if match is None: - raise ValueError( - f'unsupported S-ID format: {sid_string} in {path}') - if did != match.group('did') or sid == match.group('sid'): - if did is not None: - did2knps[did] = buff - buff = '' - did = match.group('did') - sid = match.group('sid') - buff += line - if did_from_sid is False: - did = path.stem - if did is not None and buff: - did2knps[did] = buff - else: - logger.warning(f'empty file found and skipped: {path}') if archive is not None: - with self.handler.open_member(archive, path) as f: - _read_knp(f) # ここで f は .knp.gz とかかもしれないのでやはり archive と directory は分けたほうが良さそう - # if archive is not None: - # with self.archive_handler.open_member(archive, str(path)) as f: - # text = f.read().decode("utf-8") - # _read_knp(text.split("\n")) - # else: - # if any(key in path.suffixes for key in self.compress2open): - # compress = set(path.suffixes) & set(self.compress2open.keys()) - # assert len(compress) == 1 - # _open = self.compress2open[compress.pop()] - # else: - # _open = open - # with _open(path) as f: - # _read_knp(f.readlines()) + with self.archive_handler.open_member(archive, str(file.path)) as f: + return self._read_knp(io.TextIOWrapper(f, encoding='utf-8'), file.path, did_from_sid) + else: + with file.open() as f: + return self._read_knp(f, file.path, did_from_sid) + @staticmethod + def _read_knp(file: TextIO, + path: Path, + did_from_sid: bool + ): + buff = '' + did = sid = None + did2knps = {} + for line in file: + if line.startswith('# S-ID:') and did_from_sid: + sid_string = line[7:].strip().split()[0] + match = SID_PTN_KWDLC.match( + sid_string) or SID_PTN.match(sid_string) + if match is None: + raise ValueError( + f'unsupported S-ID format: {sid_string} in {path}') + if did != match.group('did') or sid == match.group('sid'): + if did is not None: + did2knps[did] = buff + buff = '' + did = match.group('did') + sid = match.group('sid') + buff += line + if did_from_sid is False: + did = path.stem + if did is not None and buff: + did2knps[did] = buff + else: + logger.warning(f'empty file found and skipped: {path}') return did2knps - def open_single_file(self, path): - pass - @staticmethod def _get_targets(input_: Optional[Collection], all_: Collection[Any], @@ -423,9 +386,12 @@ def process_document( archive (Optional[Union[zipfile.ZipFile, tarfile.TarFile]]): An archive to read the document from. """ if doc_id in self.did2pkls: - _open = open if archive is None else archive.open - with _open(self.did2pkls[doc_id], 'rb') as f: - return cPickle.load(f) + if archive is not None: + with self.archive_handler.open_member(archive, str(self.did2pkls[doc_id].path)) as f: + return pickle.load(f) + else: + with self.did2pkls[doc_id].open(mode='rb') as f: + return pickle.load(f) elif doc_id in self.did2knps: return Document(self.did2knps[doc_id], doc_id, @@ -452,11 +418,8 @@ def process_documents(self, n_jobs = self.n_jobs if self.archive_handler is not None: assert self.mp_backend is None - with self.archive_handler.open() as archive: - args_iter = zip(repeat(self), doc_ids, repeat(archive)) - return self._mp_wrapper(KyotoReader.process_document, args_iter, self.mp_backend, n_jobs) - else: - args_iter = zip(repeat(self), doc_ids) + with (self.archive_handler.open() if self.archive_handler else nullcontext()) as archive: + args_iter = zip(repeat(self), doc_ids, repeat(archive)) return self._mp_wrapper(KyotoReader.process_document, args_iter, self.mp_backend, n_jobs) def process_all_documents(self, diff --git a/tests/conftest.py b/tests/conftest.py index 0edcea9..c8e0847 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,8 @@ -import pytest from pathlib import Path -from kyoto_reader import KyotoReader -from kyoto_reader import ALL_CASES, ALL_COREFS +import pytest +from kyoto_reader import KyotoReader, ALL_CASES, ALL_COREFS data_dir = Path(__file__).parent / 'data' @@ -12,5 +11,6 @@ def fixture_kyoto_reader(): reader = KyotoReader(data_dir / 'knp', target_cases=ALL_CASES, - target_corefs=ALL_COREFS) + target_corefs=ALL_COREFS, + ) yield reader From a42af7c21c636e3b7c4ee7ac4246de2632a30e27 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 28 Mar 2022 00:11:04 +0900 Subject: [PATCH 03/13] refactor --- src/kyoto_reader/reader.py | 84 ++++---------------------------------- 1 file changed, 7 insertions(+), 77 deletions(-) diff --git a/src/kyoto_reader/reader.py b/src/kyoto_reader/reader.py index 668e057..c321845 100644 --- a/src/kyoto_reader/reader.py +++ b/src/kyoto_reader/reader.py @@ -44,9 +44,9 @@ class ArchiveType(Enum): class ArchiveHandler: def __init__(self, path: Path): - self.path = path - self.type = self._get_type(path) - self.members = self._list_members() + self.path: Path = path + self.type: ArchiveType = self._get_type(path) + self.members: List[str] = self._list_members() @staticmethod def _get_type(path: Path) -> ArchiveType: @@ -58,13 +58,13 @@ def _get_type(path: Path) -> ArchiveType: else: raise ValueError(f'Unsupported archive type: {path}') - def _list_members(self) -> List[Path]: + def _list_members(self) -> List[str]: if self.type == ArchiveType.TAR_GZ: with tarfile.open(self.path, mode='r') as f: - return [Path(p) for p in f.getnames()] + return f.getnames() elif self.type == ArchiveType.ZIP: with zipfile.ZipFile(self.path, mode='r') as f: - return [Path(p) for p in f.namelist()] + return f.namelist() else: raise ValueError(f'Unsupported archive type: {self.type}') @@ -101,76 +101,6 @@ def is_supported_path(cls, path: Path) -> bool: return any(str(path).endswith(t.value) for t in ArchiveType) -# class ArchiveHandler: -# """Base class for handling archive. -# Each subclass should correspond with one extension (e.g. .zip). -# """ -# -# def __init__(self, archive_path: Path): -# self.archive_path = archive_path -# -# @abstractmethod -# def _open(self, path: Path) -> Any: -# """Return a file-like object for the given path.""" -# raise NotImplementedError -# -# @contextmanager -# def open(self): -# """Main function to open the archive. -# Specific open function for each extension should be implemented in a subclass. -# """ -# f = self._open(self.archive_path) -# try: -# yield f -# finally: -# f.close() -# -# @abstractmethod -# def get_member_names(self, f: Any) -> List[str]: -# """Get all file names in archive.""" -# raise NotImplementedError -# -# @abstractmethod -# def open_member(self, f: Any, path: str): -# """Extract file object from archive""" -# raise NotImplementedError -# -# -# class TarGzipHandler(ArchiveHandler): -# def _open(self, path: Path) -> tarfile.TarFile: -# return tarfile.open(path) -# -# def get_member_names(self, f: tarfile.TarFile) -> List[str]: -# return getattr(f, "getnames")() -# -# @contextmanager -# def open_member(self, f: tarfile.TarFile, path: str): -# bytes = f.extractfile(path) -# yield bytes -# -# -# class ZipHandler(ArchiveHandler): -# def _open(self, path: Path) -> zipfile.ZipFile: -# return zipfile.ZipFile(path) -# -# def get_member_names(self, f: zipfile.ZipFile) -> List[str]: -# return getattr(f, "namelist")() -# -# @contextmanager -# def open_member(self, f: zipfile.ZipFile, path: str): -# g = f.open(path) -# try: -# yield g -# finally: -# g.close() - - -# ARCHIVE2HANDLER: Dict[str, Callable] = { -# ".tar.gz": TarGzipHandler, -# ".zip": ZipHandler -# } - - class FileType(Enum): """Enum for file types.""" GZ = '.gz' @@ -264,7 +194,7 @@ def __init__(self, elif ArchiveHandler.is_supported_path(source): logger.info(f'got an archive file path, files in the archive are treated as source files') self.archive_handler = ArchiveHandler(source) - file_paths: List[FileHandler] = sorted(FileHandler(p) for p in self.archive_handler.members) + file_paths: List[FileHandler] = sorted(FileHandler(Path(p)) for p in self.archive_handler.members) else: logger.info(f'got a single file path, this file is treated as a source file') assert source.is_file() is True From 7c7a603fab4134f5b49ced9b863af4f809d5c680 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 28 Mar 2022 00:19:42 +0900 Subject: [PATCH 04/13] add note --- src/kyoto_reader/reader.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/kyoto_reader/reader.py b/src/kyoto_reader/reader.py index c321845..1819385 100644 --- a/src/kyoto_reader/reader.py +++ b/src/kyoto_reader/reader.py @@ -165,6 +165,12 @@ class KyotoReader: mp_backend (Optional[str]): 'multiprocessing', 'joblib', or None (default: 'multiprocessing') n_jobs (int): 文書を読み込む処理の並列数 (default: -1(=コア数)) did_from_sid (bool): 文書IDを文書中のS-IDから決定する (default: True) + + Note: + サポートされる入力パス (i.e. `source` argument) + - 単一ファイル (.knp, .knp.gz, .pkl, .pkl.gz) + - 単一ファイルを含むディレクトリ + - 単一非圧縮ファイルを含むアーカイブファイル (.tar.gz, .zip) """ def __init__(self, From 7634492d93d57f1a81ac2e20f7fa4b3f0d2a35cc Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 28 Mar 2022 00:20:16 +0900 Subject: [PATCH 05/13] remove unnecessary codes --- src/kyoto_reader/reader.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/src/kyoto_reader/reader.py b/src/kyoto_reader/reader.py index 1819385..3a90b05 100644 --- a/src/kyoto_reader/reader.py +++ b/src/kyoto_reader/reader.py @@ -20,19 +20,6 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.WARNING) -# 実装方針 -""" -dir と archive は分ける -.gz.zip は無理なのでサポートしない -または予め archive は tmp dir に吐くとか -サポートするのは以下 -- archive -- directory -- compressed files in directory -- file -- compressed file -""" - class ArchiveType(Enum): """ @@ -143,11 +130,6 @@ def __lt__(self, other): return self.path < other.path -# COMPRESS2OPEN: Dict[str, Callable] = { -# ".gz": partial(gzip.open, mode='rt'), -# } - - class KyotoReader: """A class to manage a set of corpus documents. Compressed file is supported. @@ -182,12 +164,9 @@ def __init__(self, use_pas_tag: bool = False, knp_ext: str = '.knp', pickle_ext: str = '.pkl', - # recursive: bool = False, mp_backend: Optional[str] = 'multiprocessing', n_jobs: int = -1, did_from_sid: bool = True, - # archive2handler: Dict[str, ArchiveHandler] = ARCHIVE2HANDLER, - # compress2open: Dict[str, Callable] = COMPRESS2OPEN ) -> None: if not (isinstance(source, Path) or isinstance(source, str)): raise TypeError(f"document source must be Path or str type, but got '{type(source)}' type") From 0e9737f233cb0148a376ed558bf1c606e2f729ea Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 28 Mar 2022 00:34:32 +0900 Subject: [PATCH 06/13] refactor --- src/kyoto_reader/reader.py | 39 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/kyoto_reader/reader.py b/src/kyoto_reader/reader.py index 3a90b05..848c5d1 100644 --- a/src/kyoto_reader/reader.py +++ b/src/kyoto_reader/reader.py @@ -29,8 +29,11 @@ class ArchiveType(Enum): ZIP = '.zip' +ArchiveFile = Union[tarfile.TarFile, zipfile.ZipFile] + + class ArchiveHandler: - def __init__(self, path: Path): + def __init__(self, path: Path) -> None: self.path: Path = path self.type: ArchiveType = self._get_type(path) self.members: List[str] = self._list_members() @@ -56,7 +59,7 @@ def _list_members(self) -> List[str]: raise ValueError(f'Unsupported archive type: {self.type}') @contextmanager - def open(self) -> Union[zipfile.ZipFile, tarfile.TarFile]: + def open(self) -> ArchiveFile: file = None try: if self.type == ArchiveType.TAR_GZ: @@ -70,7 +73,7 @@ def open(self) -> Union[zipfile.ZipFile, tarfile.TarFile]: hasattr(file, 'close') and file.close() @contextmanager - def open_member(self, archive: Union[zipfile.ZipFile, tarfile.TarFile], member: str) -> BinaryIO: + def open_member(self, archive: ArchiveFile, member: str) -> BinaryIO: file = None try: if self.type == ArchiveType.TAR_GZ: @@ -96,8 +99,8 @@ class FileType(Enum): class FileHandler: - def __init__(self, path: Path): - self.path = path + def __init__(self, path: Path) -> None: + self.path: Path = path self.type: FileType = self._get_type(path) @property @@ -113,7 +116,7 @@ def _get_type(path: Path) -> FileType: return FileType.UNCOMPRESSED @contextmanager - def open(self): + def open(self) -> TextIO: file = None try: if self.type == FileType.GZ: @@ -126,7 +129,7 @@ def open(self): finally: hasattr(file, 'close') and file.close() - def __lt__(self, other): + def __lt__(self, other) -> bool: return self.path < other.path @@ -205,25 +208,21 @@ def __init__(self, ) self.did2knps: Dict[str, str] = dict(ChainMap(*rets)) - self.doc_ids: List[str] = sorted( - set(self.did2knps.keys()) | set(self.did2pkls.keys())) + self.doc_ids: List[str] = sorted(set(self.did2knps.keys()) | set(self.did2pkls.keys())) - self.target_cases: Collection[str] = self._get_targets( - target_cases, ALL_CASES, 'case') - self.target_corefs: Collection[str] = self._get_targets( - target_corefs, ALL_COREFS, 'coref') + self.target_cases: Collection[str] = self._get_targets(target_cases, ALL_CASES, 'case') + self.target_corefs: Collection[str] = self._get_targets(target_corefs, ALL_COREFS, 'coref') self.relax_cases: bool = relax_cases self.extract_nes: bool = extract_nes self.use_pas_tag: bool = use_pas_tag self.knp_ext: str = knp_ext self.pickle_ext: str = pickle_ext - def read_knp( - self, - file: FileHandler, - did_from_sid: bool, - archive: Optional[Union[zipfile.ZipFile, tarfile.TarFile]] = None, - ) -> Dict[str, str]: + def read_knp(self, + file: FileHandler, + did_from_sid: bool, + archive: Optional[ArchiveFile] = None, + ) -> Dict[str, str]: """Read KNP format file that is located at the specified path. The file can contain multiple documents. Args: @@ -292,7 +291,7 @@ def _get_targets(input_: Optional[Collection], def process_document( self, doc_id: str, - archive: Optional[Union[zipfile.ZipFile, tarfile.TarFile]] = None + archive: Optional[ArchiveFile] = None ) -> Optional[Document]: """Process one document following the given document ID. From 1e877d65975336e2bc8350ddb7d332511466813f Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 28 Mar 2022 00:35:55 +0900 Subject: [PATCH 07/13] add autopep8 --- Pipfile | 1 + Pipfile.lock | 293 ++++++++++++++++++++----------------------------- pyproject.toml | 9 +- 3 files changed, 126 insertions(+), 177 deletions(-) diff --git a/Pipfile b/Pipfile index 85818a3..4bbfc1c 100644 --- a/Pipfile +++ b/Pipfile @@ -13,6 +13,7 @@ sphinx-autodoc-typehints = "*" recommonmark = "*" twine = "*" sphinx-rtd-theme = "*" +autopep8 = "*" [packages] diff --git a/Pipfile.lock b/Pipfile.lock index a3adc9a..2162acd 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "309996ad9bb8f81dc55da277aca421b044d3225e7508c4f058b106fa9899cfae" + "sha256": "6d509bf45f4f8c5e9f94820e585005e9c2654d2995a35ae6da6bd867dcac2ac4" }, "pipfile-spec": 6, "requires": { @@ -47,6 +47,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==21.4.0" }, + "autopep8": { + "hashes": [ + "sha256:44f0932855039d2c15c4510d6df665e4730f2b8582704fa48f9c55bd3e17d979", + "sha256:ed77137193bbac52d029a52c59bec1b0629b5a186c495f1eb21b126ac466083f" + ], + "index": "pypi", + "version": "==1.6.0" + }, "babel": { "hashes": [ "sha256:ab49e12b91d937cd11f0b67cb259a57ab4ad2b59ac7a3b41d6c06c0ac5b0def9", @@ -62,14 +70,6 @@ ], "version": "==0.2.0" }, - "black": { - "hashes": [ - "sha256:77b80f693a569e2e527958459634f18df9b0ba2625ba4e0c2d5da5be42e6f2b3", - "sha256:a615e69ae185e08fdd73e4715e260e2479c861b5740057fde6e8b4e3b7dd589f" - ], - "markers": "python_full_version >= '3.6.2'", - "version": "==21.12b0" - }, "bleach": { "hashes": [ "sha256:0900d8b37eba61a802ee40ac0061f8c2b5dee29c1927dd1d233e075ebf5a71da", @@ -87,19 +87,11 @@ }, "charset-normalizer": { "hashes": [ - "sha256:876d180e9d7432c5d1dfd4c5d26b72f099d503e8fcc0feb7532c9289be60fcbd", - "sha256:cb957888737fc0bbcd78e3df769addb41fd1ff8cf950dc9e7ad7793f1bf44455" + "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", + "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df" ], "markers": "python_version >= '3'", - "version": "==2.0.10" - }, - "click": { - "hashes": [ - "sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3", - "sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b" - ], - "markers": "python_version >= '3.6'", - "version": "==8.0.3" + "version": "==2.0.12" }, "colorama": { "hashes": [ @@ -121,7 +113,7 @@ "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186" ], - "markers": "python_version >= '3.5'", + "markers": "python_version >= '3.7'", "version": "==5.1.1" }, "docutils": { @@ -134,10 +126,10 @@ }, "executing": { "hashes": [ - "sha256:32fc6077b103bd19e6494a72682d66d5763cf20a106d5aa7c5ccbea4e47b0df7", - "sha256:c23bf42e9a7b9b212f185b1b2c3c91feb895963378887bb10e64a2e612ec0023" + "sha256:c6554e21c6b060590a6d3be4b82fb78f8f0194d809de5ea7df1c093763311501", + "sha256:d1eef132db1b83649a3905ca6dd8897f71ac6f8cac79a7e58a1a09cf137546c9" ], - "version": "==0.8.2" + "version": "==0.8.3" }, "idna": { "hashes": [ @@ -157,11 +149,11 @@ }, "importlib-metadata": { "hashes": [ - "sha256:899e2a40a8c4a1aec681feef45733de8a6c58f3f6a0dbed2eb6574b4387a77b6", - "sha256:951f0d8a5b7260e9db5e41d429285b5f451e928479f19d80818878527d36e95e" + "sha256:1208431ca90a8cca1a6b8af391bb53c1a2db74e5d1cef6ddced95d4b2062edc6", + "sha256:ea4c597ebf37142f827b8f39299579e31685c31d3a438b59f469406afd0f2539" ], "markers": "python_version < '3.10'", - "version": "==4.10.1" + "version": "==4.11.3" }, "iniconfig": { "hashes": [ @@ -179,11 +171,11 @@ }, "ipython": { "hashes": [ - "sha256:ab564d4521ea8ceaac26c3a2c6e5ddbca15c8848fd5a5cc325f960da88d42974", - "sha256:c503a0dd6ccac9c8c260b211f2dd4479c042b49636b097cc9a0d55fe62dff64c" + "sha256:1b672bfd7a48d87ab203d9af8727a3b0174a4566b4091e9447c22fb63ea32857", + "sha256:70e5eb132cac594a34b5f799bd252589009905f05104728aea6a403ec2519dc1" ], - "markers": "python_version >= '3.8'", - "version": "==8.0.1" + "markers": "python_version >= '3.7'", + "version": "==8.2.0" }, "jaconv": { "hashes": [ @@ -201,11 +193,11 @@ }, "jinja2": { "hashes": [ - "sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8", - "sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7" + "sha256:539835f51a74a69f41b848a9645dbdc35b4f20a3b601e2d9a7e22947b15ff119", + "sha256:640bed4bb501cbd17194b3cace1dc2126f5b619cf068a726b98192a0fde74ae9" ], - "markers": "python_version >= '3.6'", - "version": "==3.0.3" + "markers": "python_version >= '3.7'", + "version": "==3.1.1" }, "joblib": { "hashes": [ @@ -235,78 +227,49 @@ }, "markupsafe": { "hashes": [ - "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298", - "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64", - "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b", - "sha256:04635854b943835a6ea959e948d19dcd311762c5c0c6e1f0e16ee57022669194", - "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567", - "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff", - "sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724", - "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74", - "sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646", - "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35", - "sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6", - "sha256:20dca64a3ef2d6e4d5d615a3fd418ad3bde77a47ec8a23d984a12b5b4c74491a", - "sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6", - "sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad", - "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26", - "sha256:36bc903cbb393720fad60fc28c10de6acf10dc6cc883f3e24ee4012371399a38", - "sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac", - "sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7", - "sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6", - "sha256:4296f2b1ce8c86a6aea78613c34bb1a672ea0e3de9c6ba08a960efe0b0a09047", - "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75", - "sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f", - "sha256:4dc8f9fb58f7364b63fd9f85013b780ef83c11857ae79f2feda41e270468dd9b", - "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135", - "sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8", - "sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a", - "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a", - "sha256:5b6d930f030f8ed98e3e6c98ffa0652bdb82601e7a016ec2ab5d7ff23baa78d1", - "sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9", - "sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864", - "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914", - "sha256:6300b8454aa6930a24b9618fbb54b5a68135092bc666f7b06901f897fa5c2fee", - "sha256:63f3268ba69ace99cab4e3e3b5840b03340efed0948ab8f78d2fd87ee5442a4f", - "sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18", - "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8", - "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2", - "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d", - "sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b", - "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b", - "sha256:89c687013cb1cd489a0f0ac24febe8c7a666e6e221b783e53ac50ebf68e45d86", - "sha256:8d206346619592c6200148b01a2142798c989edcb9c896f9ac9722a99d4e77e6", - "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f", - "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb", - "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833", - "sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28", - "sha256:9f02365d4e99430a12647f09b6cc8bab61a6564363f313126f775eb4f6ef798e", - "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415", - "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902", - "sha256:aca6377c0cb8a8253e493c6b451565ac77e98c2951c45f913e0b52facdcff83f", - "sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d", - "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9", - "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d", - "sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145", - "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066", - "sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c", - "sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1", - "sha256:cdfba22ea2f0029c9261a4bd07e830a8da012291fbe44dc794e488b6c9bb353a", - "sha256:d6c7ebd4e944c85e2c3421e612a7057a2f48d478d79e61800d81468a8d842207", - "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f", - "sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53", - "sha256:deb993cacb280823246a026e3b2d81c493c53de6acfd5e6bfe31ab3402bb37dd", - "sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134", - "sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85", - "sha256:f0567c4dc99f264f49fe27da5f735f414c4e7e7dd850cfd8e69f0862d7c74ea9", - "sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5", - "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94", - "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509", - "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51", - "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872" + "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003", + "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88", + "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5", + "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7", + "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a", + "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603", + "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1", + "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135", + "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247", + "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6", + "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601", + "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77", + "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02", + "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e", + "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63", + "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f", + "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980", + "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b", + "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812", + "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff", + "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96", + "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1", + "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925", + "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a", + "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6", + "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e", + "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f", + "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4", + "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f", + "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3", + "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c", + "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a", + "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417", + "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a", + "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a", + "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37", + "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452", + "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933", + "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a", + "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7" ], - "markers": "python_version >= '3.6'", - "version": "==2.0.1" + "markers": "python_version >= '3.7'", + "version": "==2.1.1" }, "matplotlib-inline": { "hashes": [ @@ -316,13 +279,6 @@ "markers": "python_version >= '3.5'", "version": "==0.1.3" }, - "mypy-extensions": { - "hashes": [ - "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", - "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" - ], - "version": "==0.4.3" - }, "packaging": { "hashes": [ "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb", @@ -339,13 +295,6 @@ "markers": "python_version >= '3.6'", "version": "==0.8.3" }, - "pathspec": { - "hashes": [ - "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a", - "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1" - ], - "version": "==0.9.0" - }, "pexpect": { "hashes": [ "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937", @@ -368,14 +317,6 @@ ], "version": "==1.8.2" }, - "platformdirs": { - "hashes": [ - "sha256:1d7385c7db91728b83efd0ca99a5afb296cab9d0ed8313a45ed8ba17967ecfca", - "sha256:440633ddfebcc36264232365d7840a970e75e1018d15b4327d11f91909045fda" - ], - "markers": "python_version >= '3.7'", - "version": "==2.4.1" - }, "pluggy": { "hashes": [ "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159", @@ -386,11 +327,11 @@ }, "prompt-toolkit": { "hashes": [ - "sha256:1bb05628c7d87b645974a1bad3f17612be0c29fa39af9f7688030163f680bad6", - "sha256:e56f2ff799bacecd3e88165b1e2f5ebf9bcd59e80e06d395fa0cc4b8bd7bb506" + "sha256:30129d870dcb0b3b6a53efdc9d0a83ea96162ffd28ffe077e94215b233dc670c", + "sha256:9f1cd16b1e86c2968f2519d7fb31dd9d669916f515612c269d14e9ed52b51650" ], "markers": "python_full_version >= '3.6.2'", - "version": "==3.0.24" + "version": "==3.0.28" }, "ptyprocess": { "hashes": [ @@ -401,10 +342,10 @@ }, "pure-eval": { "hashes": [ - "sha256:0f04483b16c9429532d2c0ddc96e2b3bb6b2dc37a2bfb0e986248dbfd0b78873", - "sha256:94eeb505a88721bec7bb21a4ac49758b8b1a01530da1a70d4ffc1d9937689d71" + "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", + "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3" ], - "version": "==0.2.1" + "version": "==0.2.2" }, "py": { "hashes": [ @@ -414,6 +355,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==1.11.0" }, + "pycodestyle": { + "hashes": [ + "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20", + "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==2.8.0" + }, "pygments": { "hashes": [ "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65", @@ -427,7 +376,7 @@ "sha256:5fca288a70304c745d20da5657c6175b5eadf7a7a571ed4f57bb157eb6e7df4d", "sha256:c988cbd92d645e43d51a406835147a7c24aa849894802e22fcf5c4e1475da805" ], - "markers": "python_version >= '3.7' and python_version < '4'", + "markers": "python_version >= '3.7' and python_version < '4.0'", "version": "==0.6.1" }, "pyparsing": { @@ -440,26 +389,26 @@ }, "pytest": { "hashes": [ - "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89", - "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134" + "sha256:841132caef6b1ad17a9afde46dc4f6cfa59a05f9555aae5151f73bdf2820ca63", + "sha256:92f723789a8fdd7180b6b06483874feca4c48a5c76968e03bb3e7f806a1869ea" ], "index": "pypi", - "version": "==6.2.5" + "version": "==7.1.1" }, "pytz": { "hashes": [ - "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c", - "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326" + "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", + "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c" ], - "version": "==2021.3" + "version": "==2022.1" }, "readme-renderer": { "hashes": [ - "sha256:a50a0f2123a4c1145ac6f420e1a348aafefcc9211c846e3d51df05fe3d865b7d", - "sha256:b512beafa6798260c7d5af3e1b1f097e58bfcd9a575da7c4ddd5e037490a5b85" + "sha256:262510fe6aae81ed4e94d8b169077f325614c0b1a45916a80442c6576264a9c2", + "sha256:dfb4d17f21706d145f7473e0b61ca245ba58e810cf9b2209a48239677f82e5b0" ], "markers": "python_version >= '3.6'", - "version": "==32.0" + "version": "==34.0" }, "recommonmark": { "hashes": [ @@ -494,11 +443,11 @@ }, "setuptools": { "hashes": [ - "sha256:2404879cda71495fc4d5cbc445ed52fdaddf352b36e40be8dcc63147cb4edabe", - "sha256:68eb94073fc486091447fcb0501efd6560a0e5a1839ba249e5ff3c4c93f05f90" + "sha256:89eef7b71423ab7fccc7dfafdc145410ef170c4a89567427f932448135e08cdf", + "sha256:92b15f45ab164eb0c410d2bf661a6e9d15e3b78c0dffb0325f2bf0f313071cae" ], "markers": "python_version >= '3.7'", - "version": "==60.5.0" + "version": "==61.1.1" }, "six": { "hashes": [ @@ -533,11 +482,11 @@ }, "sphinx-autodoc-typehints": { "hashes": [ - "sha256:5bf7ae28f98dd2cae8b93c4f723fb5f4761ae340194d02d2742a81c7694d2e0a", - "sha256:91eb3c227ff0824bee39fabf92d92e21835546903d2ddde5467fd9b6fa10ad7f" + "sha256:081daf53077b4ae1c28347d6d858e13e63aefe3b4aacef79fd717dd60687b470", + "sha256:51c7b3f5cb9ccd15d0b52088c62df3094f1abd9612930340365c26def8629a14" ], "index": "pypi", - "version": "==1.15.3" + "version": "==1.17.0" }, "sphinx-rtd-theme": { "hashes": [ @@ -597,10 +546,10 @@ }, "stack-data": { "hashes": [ - "sha256:02cc0683cbc445ae4ca8c4e3a0e58cb1df59f252efb0aa016b34804a707cf9bc", - "sha256:7769ed2482ce0030e00175dd1bf4ef1e873603b6ab61cd3da443b410e64e9477" + "sha256:45692d41bd633a9503a5195552df22b583caf16f0b27c4e58c98d88c8b648e12", + "sha256:999762f9c3132308789affa03e9271bbbe947bf78311851f4d485d8402ed858e" ], - "version": "==0.1.4" + "version": "==0.2.0" }, "toml": { "hashes": [ @@ -612,11 +561,11 @@ }, "tomli": { "hashes": [ - "sha256:05b6166bff487dc068d322585c7ea4ef78deed501cc124060e0f238e89a9231f", - "sha256:e3069e4be3ead9668e21cb9b074cd948f7b3113fd9c8bba083f48247aab8b11c" + "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", + "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" ], - "markers": "python_version >= '3.6'", - "version": "==1.2.3" + "markers": "python_version >= '3.7'", + "version": "==2.0.1" }, "tornado": { "hashes": [ @@ -662,16 +611,16 @@ "sha256:fa2ba70284fa42c2a5ecb35e322e68823288a4251f9ba9cc77be04ae15eada68", "sha256:fba85b6cd9c39be262fcd23865652920832b61583de2a2ca907dbd8e8a8c81e5" ], - "markers": "python_version >= '3.5'", + "markers": "python_version > '2.7'", "version": "==6.1" }, "tqdm": { "hashes": [ - "sha256:8dd278a422499cd6b727e6ae4061c40b48fce8b76d1ccbf5d34fca9b7f925b0c", - "sha256:d359de7217506c9851b7869f3708d8ee53ed70a1b8edbba4dbcb47442592920d" + "sha256:4230a49119a416c88cc47d0d2d32d5d90f1a282d5e497d49801950704e49863d", + "sha256:6461b009d6792008d0000e1b0c7ca50195ec78c0e808a3a6b668a56a3236c3a5" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==4.62.3" + "version": "==4.63.1" }, "traitlets": { "hashes": [ @@ -683,27 +632,19 @@ }, "twine": { "hashes": [ - "sha256:28460a3db6b4532bde6a5db6755cf2dce6c5020bada8a641bb2c5c7a9b1f35b8", - "sha256:8c120845fc05270f9ee3e9d7ebbed29ea840e41f48cd059e04733f7e1d401345" + "sha256:8efa52658e0ae770686a13b675569328f1fba9837e5de1867bfe5f46a9aefe19", + "sha256:d0550fca9dc19f3d5e8eadfce0c227294df0a2a951251a4385797c8a6198b7c8" ], "index": "pypi", - "version": "==3.7.1" - }, - "typing-extensions": { - "hashes": [ - "sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e", - "sha256:7f001e5ac290a0c0401508864c7ec868be4e701886d5b573a9528ed3973d9d3b" - ], - "markers": "python_version >= '3.6'", - "version": "==4.0.1" + "version": "==3.8.0" }, "urllib3": { "hashes": [ - "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed", - "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c" + "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", + "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", - "version": "==1.26.8" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4.0'", + "version": "==1.26.9" }, "wcwidth": { "hashes": [ diff --git a/pyproject.toml b/pyproject.toml index b0471b7..4c59339 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,10 @@ [build-system] requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta:__legacy__" \ No newline at end of file +build-backend = "setuptools.build_meta:__legacy__" + +[tool.autopep8] +max_line_length = 120 +ignore = "E501,W6" # or ["E501", "W6"] +in-place = true +recursive = true +aggressive = 3 From c4a591cd91121ce2379b51ad8e4e498a99cc5ab8 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 28 Mar 2022 00:36:33 +0900 Subject: [PATCH 08/13] apply autopep8 --- src/kyoto_reader/base_phrase.py | 1 + src/kyoto_reader/coreference.py | 2 ++ src/kyoto_reader/pas.py | 2 ++ src/kyoto_reader/sentence.py | 1 + 4 files changed, 6 insertions(+) diff --git a/src/kyoto_reader/base_phrase.py b/src/kyoto_reader/base_phrase.py index f0fb410..36f4b26 100644 --- a/src/kyoto_reader/base_phrase.py +++ b/src/kyoto_reader/base_phrase.py @@ -18,6 +18,7 @@ class BasePhrase: parent (Optional[BasePhrase]): Dependency parent. children (List[BasePhrase]): Dependency children. """ + def __init__(self, tag: Tag, dmid_offset: int, diff --git a/src/kyoto_reader/coreference.py b/src/kyoto_reader/coreference.py index 1ba7844..bc5eda1 100644 --- a/src/kyoto_reader/coreference.py +++ b/src/kyoto_reader/coreference.py @@ -17,6 +17,7 @@ class Mention(BasePhrase): eids (set): Entity IDs. eids_unc (set): Uncertain entity IDs. "Uncertain" means the mention is annotated with "≒". """ + def __init__(self, bp: BasePhrase): super().__init__(bp.tag, bp.dmids[0], bp.dtid, bp.sid, bp.doc_id, parent=bp.parent, children=bp.children) self.eids: Set[int] = set() @@ -64,6 +65,7 @@ class Entity: taigen (bool, optional): Whether this entity is 体言 or not. yougen (bool, optional): Whether this entity is 用言 or not. """ + def __init__(self, eid: int, exophor: Optional[str] = None): self.eid: int = eid self.exophor: Optional[str] = exophor diff --git a/src/kyoto_reader/pas.py b/src/kyoto_reader/pas.py index 1f4ff56..166e535 100644 --- a/src/kyoto_reader/pas.py +++ b/src/kyoto_reader/pas.py @@ -15,6 +15,7 @@ class BaseArgument: """A base class for all kinds of arguments""" + def __init__(self, dep_type: str, mode: str): self.dep_type: str = dep_type self.mode: str = mode @@ -83,6 +84,7 @@ class SpecialArgument(BaseArgument): eid (int): 外界照応詞のエンティティID mode (str): モード """ + def __init__(self, exophor: str, eid: int, mode: str): self.eid = eid dep_type = 'exo' diff --git a/src/kyoto_reader/sentence.py b/src/kyoto_reader/sentence.py index c9e2a52..c046906 100644 --- a/src/kyoto_reader/sentence.py +++ b/src/kyoto_reader/sentence.py @@ -18,6 +18,7 @@ class Sentence: doc_id (str): The document ID of this sentence. bps (List[BasePhrase]): Base phrases in this sentence. """ + def __init__(self, knp_string: str, dtid_offset: int, From 79e060f3352809476dc84131dfb98bf2d004d4d4 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 28 Mar 2022 00:39:43 +0900 Subject: [PATCH 09/13] tweak --- src/kyoto_reader/reader.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/kyoto_reader/reader.py b/src/kyoto_reader/reader.py index 848c5d1..28b85c2 100644 --- a/src/kyoto_reader/reader.py +++ b/src/kyoto_reader/reader.py @@ -22,9 +22,7 @@ class ArchiveType(Enum): - """ - Enum for file collection types. - """ + """Enum for file collection types.""" TAR_GZ = '.tar.gz' ZIP = '.zip' From 7bbf77e77f208117633dd978b937cfb3ddbf37e4 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 28 Mar 2022 00:43:27 +0900 Subject: [PATCH 10/13] fix a bug --- src/kyoto_reader/reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kyoto_reader/reader.py b/src/kyoto_reader/reader.py index 28b85c2..ea9f817 100644 --- a/src/kyoto_reader/reader.py +++ b/src/kyoto_reader/reader.py @@ -186,7 +186,7 @@ def __init__(self, assert source.is_file() is True file_paths: List[FileHandler] = [FileHandler(source)] - self.did2pkls = {path: path for path in file_paths if path.content_basename.endswith(pickle_ext)} + self.did2pkls = {file.path.stem: file for file in file_paths if file.content_basename.endswith(pickle_ext)} self.mp_backend: Optional[str] = mp_backend if n_jobs != 0 else None if self.mp_backend is not None and self.archive_handler is not None: @@ -199,7 +199,7 @@ def __init__(self, with (self.archive_handler.open() if self.archive_handler else nullcontext()) as archive: args_iter = ( - (self, path, did_from_sid, archive) for path in file_paths if path.content_basename.endswith(knp_ext) + (self, file, did_from_sid, archive) for file in file_paths if file.content_basename.endswith(knp_ext) ) rets: List[Dict[str, str]] = self._mp_wrapper( KyotoReader.read_knp, args_iter, self.mp_backend, self.n_jobs From fcc0074d4fe6d26d1c98a7aba29d709dba759dbc Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 28 Mar 2022 01:09:53 +0900 Subject: [PATCH 11/13] update deps for python 3.7 --- Pipfile | 1 + Pipfile.lock | 52 ++++++++++++---------------------------------------- 2 files changed, 13 insertions(+), 40 deletions(-) diff --git a/Pipfile b/Pipfile index 4bbfc1c..45a09bc 100644 --- a/Pipfile +++ b/Pipfile @@ -5,6 +5,7 @@ verify_ssl = true [dev-packages] pytest = "*" +ipython = "<8.0" ipdb = "*" kyoto-reader = {editable = true,path = "."} sphinx = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 2162acd..931be53 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "6d509bf45f4f8c5e9f94820e585005e9c2654d2995a35ae6da6bd867dcac2ac4" + "sha256": "4a8ed9e160ca4005af441cf6a112373015ae532931724b517dd56c22bf47d75e" }, "pipfile-spec": 6, "requires": { @@ -32,13 +32,6 @@ "markers": "sys_platform == 'darwin'", "version": "==0.1.2" }, - "asttokens": { - "hashes": [ - "sha256:0844691e88552595a6f4a4281a9f7f79b8dd45ca4ccea82e5e05b4bbdb76705c", - "sha256:9a54c114f02c7a9480d56550932546a3f1fe71d8a02f1bc7ccd0ee3ee35cf4d5" - ], - "version": "==2.0.5" - }, "attrs": { "hashes": [ "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4", @@ -124,13 +117,6 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==0.17.1" }, - "executing": { - "hashes": [ - "sha256:c6554e21c6b060590a6d3be4b82fb78f8f0194d809de5ea7df1c093763311501", - "sha256:d1eef132db1b83649a3905ca6dd8897f71ac6f8cac79a7e58a1a09cf137546c9" - ], - "version": "==0.8.3" - }, "idna": { "hashes": [ "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", @@ -152,7 +138,7 @@ "sha256:1208431ca90a8cca1a6b8af391bb53c1a2db74e5d1cef6ddced95d4b2062edc6", "sha256:ea4c597ebf37142f827b8f39299579e31685c31d3a438b59f469406afd0f2539" ], - "markers": "python_version < '3.10'", + "markers": "python_version >= '3.7'", "version": "==4.11.3" }, "iniconfig": { @@ -171,11 +157,11 @@ }, "ipython": { "hashes": [ - "sha256:1b672bfd7a48d87ab203d9af8727a3b0174a4566b4091e9447c22fb63ea32857", - "sha256:70e5eb132cac594a34b5f799bd252589009905f05104728aea6a403ec2519dc1" + "sha256:468abefc45c15419e3c8e8c0a6a5c115b2127bafa34d7c641b1d443658793909", + "sha256:86df2cf291c6c70b5be6a7b608650420e89180c8ec74f376a34e2dc15c3400e7" ], - "markers": "python_version >= '3.7'", - "version": "==8.2.0" + "index": "pypi", + "version": "==7.32.0" }, "jaconv": { "hashes": [ @@ -340,13 +326,6 @@ ], "version": "==0.7.0" }, - "pure-eval": { - "hashes": [ - "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", - "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3" - ], - "version": "==0.2.2" - }, "py": { "hashes": [ "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719", @@ -376,7 +355,7 @@ "sha256:5fca288a70304c745d20da5657c6175b5eadf7a7a571ed4f57bb157eb6e7df4d", "sha256:c988cbd92d645e43d51a406835147a7c24aa849894802e22fcf5c4e1475da805" ], - "markers": "python_version >= '3.7' and python_version < '4.0'", + "markers": "python_version >= '3.7' and python_version < '4'", "version": "==0.6.1" }, "pyparsing": { @@ -466,11 +445,11 @@ }, "sphinx": { "hashes": [ - "sha256:5da895959511473857b6d0200f56865ed62c31e8f82dd338063b84ec022701fe", - "sha256:6caad9786055cb1fa22b4a365c1775816b876f91966481765d7d50e9f0dd35cc" + "sha256:7bf8ca9637a4ee15af412d1a1d9689fec70523a68ca9bb9127c2f3eeb344e2e6", + "sha256:ebf612653238bcc8f4359627a9b7ce44ede6fdd75d9d30f68255c7383d3a6226" ], "index": "pypi", - "version": "==4.4.0" + "version": "==4.5.0" }, "sphinx-autobuild": { "hashes": [ @@ -544,19 +523,12 @@ "markers": "python_version >= '3.5'", "version": "==1.1.5" }, - "stack-data": { - "hashes": [ - "sha256:45692d41bd633a9503a5195552df22b583caf16f0b27c4e58c98d88c8b648e12", - "sha256:999762f9c3132308789affa03e9271bbbe947bf78311851f4d485d8402ed858e" - ], - "version": "==0.2.0" - }, "toml": { "hashes": [ "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "markers": "python_version >= '3.7'", "version": "==0.10.2" }, "tomli": { @@ -643,7 +615,7 @@ "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4.0'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", "version": "==1.26.9" }, "wcwidth": { From c12c33d5bc49754a0f933b5ed69f09a65bf1dd76 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 28 Mar 2022 22:44:21 +0900 Subject: [PATCH 12/13] add typing_extensions --- Pipfile | 1 + Pipfile.lock | 130 +++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 118 insertions(+), 13 deletions(-) diff --git a/Pipfile b/Pipfile index 45a09bc..36d35d1 100644 --- a/Pipfile +++ b/Pipfile @@ -15,6 +15,7 @@ recommonmark = "*" twine = "*" sphinx-rtd-theme = "*" autopep8 = "*" +typing-extensions = "*" [packages] diff --git a/Pipfile.lock b/Pipfile.lock index 931be53..fee80d1 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "4a8ed9e160ca4005af441cf6a112373015ae532931724b517dd56c22bf47d75e" + "sha256": "07a593f320f8b46ce7c4f8364aa280f7b4262512904f7d132b0426efdf49f81d" }, "pipfile-spec": 6, "requires": { @@ -24,14 +24,6 @@ ], "version": "==0.7.12" }, - "appnope": { - "hashes": [ - "sha256:93aa393e9d6c54c5cd570ccadd8edad61ea0c4b9ea7a01409020c9aa019eb442", - "sha256:dd83cd4b5b460958838f6eb3000c660b1f9caf2a5b1de4264e941512f603258a" - ], - "markers": "sys_platform == 'darwin'", - "version": "==0.1.2" - }, "attrs": { "hashes": [ "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4", @@ -78,6 +70,61 @@ ], "version": "==2021.10.8" }, + "cffi": { + "hashes": [ + "sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3", + "sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2", + "sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636", + "sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20", + "sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728", + "sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27", + "sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66", + "sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443", + "sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0", + "sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7", + "sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39", + "sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605", + "sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a", + "sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37", + "sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029", + "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139", + "sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc", + "sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df", + "sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14", + "sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880", + "sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2", + "sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a", + "sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e", + "sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474", + "sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024", + "sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8", + "sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0", + "sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e", + "sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a", + "sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e", + "sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032", + "sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6", + "sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e", + "sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b", + "sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e", + "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954", + "sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962", + "sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c", + "sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4", + "sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55", + "sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962", + "sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023", + "sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c", + "sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6", + "sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8", + "sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382", + "sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7", + "sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc", + "sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997", + "sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796" + ], + "version": "==1.15.0" + }, "charset-normalizer": { "hashes": [ "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", @@ -101,6 +148,32 @@ ], "version": "==0.9.1" }, + "cryptography": { + "hashes": [ + "sha256:0a3bf09bb0b7a2c93ce7b98cb107e9170a90c51a0162a20af1c61c765b90e60b", + "sha256:1f64a62b3b75e4005df19d3b5235abd43fa6358d5516cfc43d87aeba8d08dd51", + "sha256:32db5cc49c73f39aac27574522cecd0a4bb7384e71198bc65a0d23f901e89bb7", + "sha256:4881d09298cd0b669bb15b9cfe6166f16fc1277b4ed0d04a22f3d6430cb30f1d", + "sha256:4e2dddd38a5ba733be6a025a1475a9f45e4e41139d1321f412c6b360b19070b6", + "sha256:53e0285b49fd0ab6e604f4c5d9c5ddd98de77018542e88366923f152dbeb3c29", + "sha256:70f8f4f7bb2ac9f340655cbac89d68c527af5bb4387522a8413e841e3e6628c9", + "sha256:7b2d54e787a884ffc6e187262823b6feb06c338084bbe80d45166a1cb1c6c5bf", + "sha256:7be666cc4599b415f320839e36367b273db8501127b38316f3b9f22f17a0b815", + "sha256:8241cac0aae90b82d6b5c443b853723bcc66963970c67e56e71a2609dc4b5eaf", + "sha256:82740818f2f240a5da8dfb8943b360e4f24022b093207160c77cadade47d7c85", + "sha256:8897b7b7ec077c819187a123174b645eb680c13df68354ed99f9b40a50898f77", + "sha256:c2c5250ff0d36fd58550252f54915776940e4e866f38f3a7866d92b32a654b86", + "sha256:ca9f686517ec2c4a4ce930207f75c00bf03d94e5063cbc00a1dc42531511b7eb", + "sha256:d2b3d199647468d410994dbeb8cec5816fb74feb9368aedf300af709ef507e3e", + "sha256:da73d095f8590ad437cd5e9faf6628a218aa7c387e1fdf67b888b47ba56a17f0", + "sha256:e167b6b710c7f7bc54e67ef593f8731e1f45aa35f8a8a7b72d6e42ec76afd4b3", + "sha256:ea634401ca02367c1567f012317502ef3437522e2fc44a3ea1844de028fa4b84", + "sha256:ec6597aa85ce03f3e507566b8bcdf9da2227ec86c4266bd5e6ab4d9e0cc8dab2", + "sha256:f64b232348ee82f13aac22856515ce0195837f6968aeaa94a3d0353ea2ec06a6" + ], + "markers": "python_version >= '3.6'", + "version": "==36.0.2" + }, "decorator": { "hashes": [ "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", @@ -138,7 +211,7 @@ "sha256:1208431ca90a8cca1a6b8af391bb53c1a2db74e5d1cef6ddced95d4b2062edc6", "sha256:ea4c597ebf37142f827b8f39299579e31685c31d3a438b59f469406afd0f2539" ], - "markers": "python_version >= '3.7'", + "markers": "python_version < '3.10'", "version": "==4.11.3" }, "iniconfig": { @@ -177,6 +250,14 @@ "markers": "python_version >= '3.6'", "version": "==0.18.1" }, + "jeepney": { + "hashes": [ + "sha256:1b5a0ea5c0e7b166b2f5895b91a08c14de8915afda4407fb5022a195224958ac", + "sha256:fa9e232dfa0c498bd0b8a3a73b8d8a31978304dcef0515adc859d4e096f96f4f" + ], + "markers": "sys_platform == 'linux'", + "version": "==0.7.1" + }, "jinja2": { "hashes": [ "sha256:539835f51a74a69f41b848a9645dbdc35b4f20a3b601e2d9a7e22947b15ff119", @@ -342,6 +423,13 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==2.8.0" }, + "pycparser": { + "hashes": [ + "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9", + "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206" + ], + "version": "==2.21" + }, "pygments": { "hashes": [ "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65", @@ -420,13 +508,21 @@ "markers": "python_version >= '3.7'", "version": "==2.0.0" }, + "secretstorage": { + "hashes": [ + "sha256:422d82c36172d88d6a0ed5afdec956514b189ddbfb72fefab0c8a1cee4eaf71f", + "sha256:fd666c51a6bf200643495a04abb261f83229dcb6fd8472ec393df7ffc8b6f195" + ], + "markers": "sys_platform == 'linux'", + "version": "==3.3.1" + }, "setuptools": { "hashes": [ - "sha256:89eef7b71423ab7fccc7dfafdc145410ef170c4a89567427f932448135e08cdf", - "sha256:92b15f45ab164eb0c410d2bf661a6e9d15e3b78c0dffb0325f2bf0f313071cae" + "sha256:8f4813dd6a4d6cc17bde85fb2e635fe19763f96efbb0ddf5575562e5ee0bc47a", + "sha256:c3d4e2ab578fbf83775755cd76dae73627915a22832cf4ea5de895978767833b" ], "markers": "python_version >= '3.7'", - "version": "==61.1.1" + "version": "==61.2.0" }, "six": { "hashes": [ @@ -610,6 +706,14 @@ "index": "pypi", "version": "==3.8.0" }, + "typing-extensions": { + "hashes": [ + "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42", + "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2" + ], + "index": "pypi", + "version": "==4.1.1" + }, "urllib3": { "hashes": [ "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", From aa2826b6d40b4b3cfbbe22065c95cca13ea5083f Mon Sep 17 00:00:00 2001 From: Masato Umakosi Date: Tue, 29 Mar 2022 10:45:08 +0900 Subject: [PATCH 13/13] Fix type of archive in docstring --- src/kyoto_reader/reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kyoto_reader/reader.py b/src/kyoto_reader/reader.py index ea9f817..da86fbf 100644 --- a/src/kyoto_reader/reader.py +++ b/src/kyoto_reader/reader.py @@ -226,7 +226,7 @@ def read_knp(self, Args: file (FileHandler): A file handler indicating a path to a KNP format file. did_from_sid (bool): If True, determine the document ID from the sentence ID in the document. - archive (Optional[Union[zipfile.ZipFile, tarfile.TarFile]]): An archive to read the document from. + archive (Optional[ArchiveFile]): An archive to read the document from. Returns: Dict[str, str]: A mapping from a document ID to a KNP format string. @@ -295,7 +295,7 @@ def process_document( Args: doc_id (str): An ID of a document to process. - archive (Optional[Union[zipfile.ZipFile, tarfile.TarFile]]): An archive to read the document from. + archive (Optional[ArchiveFile]): An archive to read the document from. """ if doc_id in self.did2pkls: if archive is not None: