Skip to content

Commit

Permalink
Don't skip hidden files in dl_manager.iter_files when they are give…
Browse files Browse the repository at this point in the history
…n as input (#6230)

* Fix  when a hidden file is given as input

* Nit
  • Loading branch information
mariosasko authored Sep 13, 2023
1 parent 722cea0 commit 12adf38
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 13 deletions.
6 changes: 1 addition & 5 deletions src/datasets/download/download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,10 @@ def _iter_from_paths(cls, urlpaths: Union[str, List[str]]) -> Generator[str, Non
urlpaths = [urlpaths]
for urlpath in urlpaths:
if os.path.isfile(urlpath):
if os.path.basename(urlpath).startswith((".", "__")):
# skipping hidden files
continue
yield urlpath
else:
for dirpath, dirnames, filenames in os.walk(urlpath):
# skipping hidden directories; prune the search
# [:] for the in-place list modification required by os.walk
# in-place modification to prune the search
dirnames[:] = sorted([dirname for dirname in dirnames if not dirname.startswith((".", "__"))])
if os.path.basename(dirpath).startswith((".", "__")):
# skipping hidden directories
Expand Down
2 changes: 0 additions & 2 deletions src/datasets/download/mock_download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,6 @@ def iter_files(self, paths):
paths = [paths]
for path in paths:
if os.path.isfile(path):
if os.path.basename(path).startswith((".", "__")):
return
yield path
else:
for dirpath, dirnames, filenames in os.walk(path):
Expand Down
7 changes: 1 addition & 6 deletions src/datasets/download/streaming_download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -914,15 +914,10 @@ def _iter_from_urlpaths(
urlpaths = [urlpaths]
for urlpath in urlpaths:
if xisfile(urlpath, download_config=download_config):
if xbasename(urlpath).startswith((".", "__")):
# skipping hidden files
continue
yield urlpath
elif xisdir(urlpath, download_config=download_config):
for dirpath, dirnames, filenames in xwalk(urlpath, download_config=download_config):
# skipping hidden directories; prune the search
# [:] for the in-place list modification required by os.walk
# (only works for local paths as fsspec's walk doesn't support the in-place modification)
# in-place modification to prune the search
dirnames[:] = sorted([dirname for dirname in dirnames if not dirname.startswith((".", "__"))])
if xbasename(dirpath).startswith((".", "__")):
# skipping hidden directories
Expand Down

0 comments on commit 12adf38

Please sign in to comment.