From 68911158f54fb9d1ca490191a4605c9f53add3a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20H=C3=B6ng?= Date: Fri, 21 Aug 2020 17:36:18 +0200 Subject: [PATCH 1/6] Add move and copy methods to local fs --- drfs/filesystems/base.py | 10 +++++++++- drfs/filesystems/local.py | 11 ++++++++++- drfs/tests/conftest.py | 2 +- drfs/tests/test_filesystem.py | 12 ++++++------ 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drfs/filesystems/base.py b/drfs/filesystems/base.py index eebe33c..cacf180 100644 --- a/drfs/filesystems/base.py +++ b/drfs/filesystems/base.py @@ -92,10 +92,18 @@ def copy(self, path, *args, **kwargs): except AttributeError: return self.fs.cp(path, *args, **kwargs) + @allow_pathlib + @maybe_remove_scheme + def move(self, path, *args, **kwargs): + try: + return self.fs.mv(path, *args, **kwargs) + except AttributeError: + return self.fs.move(path, *args, **kwargs) + @allow_pathlib @maybe_remove_scheme def mv(self, path, *args, **kwargs): - return self.fs.mv(path, *args, **kwargs) + self.move(path, *args, **kwargs) @allow_pathlib @maybe_remove_scheme diff --git a/drfs/filesystems/local.py b/drfs/filesystems/local.py index 7f1f6e1..1e2d8d1 100644 --- a/drfs/filesystems/local.py +++ b/drfs/filesystems/local.py @@ -49,11 +49,20 @@ def ls(self, path): return list(map(lambda x: os.path.join(path, x), os.listdir(path))) @allow_pathlib - def mv(self, src, dst): + def move(self, src, dst): """Move file or directory. Source parent dir will be created.""" self._makedirs_parent(dst) shutil.move(src, dst) + @allow_pathlib + def mv(self, src, dst): + self.move(src, dst) + + @allow_pathlib + def copy(self, src, dst): + self._makedirs_parent(dst) + shutil.copyfile(src, dst) + @allow_pathlib def rmdir(self, path): """Remove directory.""" diff --git a/drfs/tests/conftest.py b/drfs/tests/conftest.py index 09af4aa..8a05b3c 100644 --- a/drfs/tests/conftest.py +++ b/drfs/tests/conftest.py @@ -27,7 +27,7 @@ def s3_data_dir(): import boto3 import s3fs conn = boto3.client("s3") - conn.create_bucket(Bucket="s3-bimadi-test-bucket") + conn.create_bucket(Bucket="s3-test-bucket") fs = s3fs.S3FileSystem() for i in range(1, 11): diff --git a/drfs/tests/test_filesystem.py b/drfs/tests/test_filesystem.py index a053b5f..575851a 100644 --- a/drfs/tests/test_filesystem.py +++ b/drfs/tests/test_filesystem.py @@ -200,24 +200,24 @@ def test_memory_fs_recursive_rm(): def test_list_files(s3_data_dir): - fs = get_fs('s3://s3-bimadi-test-bucket/', rtype='instance') + fs = get_fs('s3://s3-test-bucket/', rtype='instance') - res = fs.ls('s3://s3-bimadi-test-bucket/dump/') + res = fs.ls('s3://s3-test-bucket/dump/') assert all([str(p).startswith('s3://s3-') for p in res]) assert len(res) == 10 def test_glob_files(s3_data_dir): - fs = get_fs('s3://s3-bimadi-test-bucket/', rtype='instance') + fs = get_fs('s3://s3-test-bucket/', rtype='instance') - res = fs.glob('s3://s3-bimadi-test-bucket/dump/*.csv') + res = fs.glob('s3://s3-test-bucket/dump/*.csv') assert all([str(p).startswith('s3://s3-') for p in res]) assert len(res) == 10 @pytest.mark.parametrize('scheme, path, exp', [ - ('s3', 's3-bimadi-bucket/test', 's3://s3-bimadi-bucket/test'), - ('s3', 's3://s3-bimadi-bucket/test', 's3://s3-bimadi-bucket/test'), + ('s3', 's3-bucket/test', 's3://s3-bucket/test'), + ('s3', 's3://s3-bucket/test', 's3://s3-bucket/test'), ('', '/user/ubuntu/test', 'file://user/ubuntu/test'), ('', 'file://user/ubuntu/test', 'file://user/ubuntu/test'), ]) From 67fe43baa3ce0e010f393e4d3c9ac465e2d44b7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20H=C3=B6ng?= Date: Fri, 21 Aug 2020 17:37:05 +0200 Subject: [PATCH 2/6] Allow structure to be instantiated multiple times Useful in testing scenarios, or when you are working with two buckets or filesystem locations --- drfs/structure.py | 15 +++++++++------ drfs/tests/test_structure.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 drfs/tests/test_structure.py diff --git a/drfs/structure.py b/drfs/structure.py index 53b5a87..965cb2c 100644 --- a/drfs/structure.py +++ b/drfs/structure.py @@ -1,4 +1,5 @@ import inspect +from copy import copy from pathlib import Path from textwrap import indent from typing import Union @@ -24,18 +25,18 @@ def __new__(mcs, name, bases, attrs): new_attrs[attr_name] = attr_value(attr_name) else: new_attrs[attr_name] = attr_value - + return type.__new__(mcs, name, bases, new_attrs) - + class Tree(metaclass=_MetaTree): def __init__(self, root): self.root = DRPath(root) - + @property def root(self): return self._root - + @root.setter def root(self, value): """Recursively set root in this and all child trees.""" @@ -43,9 +44,11 @@ def root(self, value): self._root = value for node_name, node_value in self._get_nodes(): if isinstance(node_value, Tree): + node_value = copy(node_value) + setattr(self, node_name, node_value) node_root = getattr(node_value, '__root__', node_name) node_value.root = self._root / node_root - + def _get_nodes(self): nodes = inspect.getmembers( self, @@ -67,7 +70,7 @@ def __repr__(self): s = '' res = f'{res}{s}' return res - + def add(self, key, value): if isinstance(value, (str, DRPathMixin)): value = _root_function(value) diff --git a/drfs/tests/test_structure.py b/drfs/tests/test_structure.py new file mode 100644 index 0000000..533ecb9 --- /dev/null +++ b/drfs/tests/test_structure.py @@ -0,0 +1,15 @@ +from drfs import Tree, P, DRPath + + +class Structure(Tree): + + class data(Tree): + file: P = DRPath('{name}.csv') + + +def test_independent_instances(): + structure1 = Structure('/tmp') + structure2 = Structure('/tmp/dir/') + + assert str(structure1.data.file.format(name='file1')) == "/tmp/data/file1.csv" + assert str(structure2.data.file.format(name='file1')) == "/tmp/dir/data/file1.csv" From 038204723e308b51ad80e8b4555791b044bf0c79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20H=C3=B6ng?= Date: Fri, 21 Aug 2020 17:39:57 +0200 Subject: [PATCH 3/6] Make path act like a string Add __get_item__ acts on str version of path. As well as startswith. This way s3fs.S3FileSystem.put and s3fs.S3FileSystem.copy will work with DRPath. --- drfs/path.py | 24 ++++++++++++++++-------- drfs/tests/test_path.py | 14 +++++++++++++- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/drfs/path.py b/drfs/path.py index d56afe5..6544cfe 100644 --- a/drfs/path.py +++ b/drfs/path.py @@ -17,18 +17,18 @@ class DRPathMixin: def is_template(self): s = str(self) return 0 <= s.find('{') < s.find('}') - + @property def is_wildcard(self): return '*' in str(self) - + @property def flag(self): return self / '_SUCCESS' - + def format(self, *args, **kwargs): return DRPath(str(self).format(*args, **kwargs)) - + @property def storage_options(self): try: @@ -39,14 +39,14 @@ def storage_options(self): if opts is not None: return opts return settings.FS_OPTS - + opts = storage_options class RemotePath(URL, DRPathMixin): """ A very pathlib.Path version for RemotePaths. - + If you use a method that requires an underlying filesystem to be instantiated (such as `open`), storage_options may be needed (to provide credentials etc.). They are taken from settings.FS_OPTS by default, @@ -120,13 +120,21 @@ def path(self): return self._root \ + self._flavour.sep.join(urllib.parse.quote(i, safe=safe_pchars) for i in self._parts[begin:-1] + [self.name]) \ + self.trailing_sep - + def _make_child(self, args): res = super()._make_child(args) res._storage_options = self._storage_options res._acc_real = self._acc_real return res + def startswith(self, *args, **kwargs): + """Act like a string - for compatibility with s3fs.put""" + return str(self).startswith(*args, **kwargs) + + def __getitem__(self, item): + """Act like a string - for compatibility with s3fs.put""" + return str(self)[item] + class LocalPath(PATH_CLASS, DRPathMixin): pass @@ -141,7 +149,7 @@ def __new__(cls, path, *args, **kwargs): cls = LocalPath obj = cls(path, *args, **kwargs) return obj - + def asstr(arg): """Convert arg into its string representation. diff --git a/drfs/tests/test_path.py b/drfs/tests/test_path.py index 5c6b217..8ba9cfc 100644 --- a/drfs/tests/test_path.py +++ b/drfs/tests/test_path.py @@ -38,8 +38,20 @@ def test_remote_div(s3): assert p2._acc_real is not None assert p2._acc_real.fs.key == opts['key'] assert p2._acc_real.fs.secret == opts['secret'] - + p3 = p2 / 'test.txt' assert p3.storage_options == p2.storage_options assert p3._acc_real is not None assert p3._acc_real is p2._acc_real + + +def test_path_get_item(): + p = DRPath("s3://test_bucket") + + assert p[:5] == "s3://" + + +def test_path_startswith(): + p = DRPath("s3://test_bucket") + + assert p.startswith("s3://") From e0032c6825b0e0a3f7383195e64f373e354e7ad2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20D?= Date: Wed, 26 Aug 2020 15:17:21 +0200 Subject: [PATCH 4/6] Test copy method of local fs --- drfs/tests/filesystems/test_local.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drfs/tests/filesystems/test_local.py b/drfs/tests/filesystems/test_local.py index bf51456..8585a16 100644 --- a/drfs/tests/filesystems/test_local.py +++ b/drfs/tests/filesystems/test_local.py @@ -23,7 +23,17 @@ def test_local_filesystem(tmpdir): fs.info(non_existing_file) with fs.open(existing_file, 'r') as f: assert f.read() == 'test' + + copy_path = tmpdir.join('copy.txt') + fs.copy(existing_file, copy_path) + assert copy_path.exists() + copy_path2 = tmpdir.join('copy2.txt') + fs.cp(existing_file, copy_path2) + assert copy_path2.exists() + fs.remove(existing_file) + fs.remove(copy_path) + fs.remove(copy_path2) assert fs.ls(tmpdir.strpath) == [] assert not fs.exists(existing_file) From 8c5cf2f43de239572def82c8df29577c7ae67a26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20D?= Date: Wed, 26 Aug 2020 15:23:41 +0200 Subject: [PATCH 5/6] Act like string also for local paths #time 3m #time 5m --- drfs/path.py | 16 ++++++++-------- drfs/tests/test_path.py | 28 ++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/drfs/path.py b/drfs/path.py index 6544cfe..ae30cb2 100644 --- a/drfs/path.py +++ b/drfs/path.py @@ -40,6 +40,14 @@ def storage_options(self): return opts return settings.FS_OPTS + def startswith(self, *args, **kwargs): + """Act like a string - for compatibility with s3fs.put""" + return str(self).startswith(*args, **kwargs) + + def __getitem__(self, item): + """Act like a string - for compatibility with s3fs.put""" + return str(self)[item] + opts = storage_options @@ -127,14 +135,6 @@ def _make_child(self, args): res._acc_real = self._acc_real return res - def startswith(self, *args, **kwargs): - """Act like a string - for compatibility with s3fs.put""" - return str(self).startswith(*args, **kwargs) - - def __getitem__(self, item): - """Act like a string - for compatibility with s3fs.put""" - return str(self)[item] - class LocalPath(PATH_CLASS, DRPathMixin): pass diff --git a/drfs/tests/test_path.py b/drfs/tests/test_path.py index 8ba9cfc..0ae1aa9 100644 --- a/drfs/tests/test_path.py +++ b/drfs/tests/test_path.py @@ -1,3 +1,5 @@ +import pytest + from drfs.path import DRPath from drfs.settings import FS_OPTS @@ -45,13 +47,27 @@ def test_remote_div(s3): assert p3._acc_real is p2._acc_real -def test_path_get_item(): - p = DRPath("s3://test_bucket") +@pytest.mark.parametrize( + ("str_path",), + [ + ("s3://test_bucket",), + ("/home/test_dir",), + ] +) +def test_path_get_item(str_path): + p = DRPath(str_path) - assert p[:5] == "s3://" + assert p[:5] == str_path[:5] -def test_path_startswith(): - p = DRPath("s3://test_bucket") +@pytest.mark.parametrize( + ("str_path",), + [ + ("s3://test_bucket",), + ("/home/test_dir",), + ] +) +def test_path_startswith(str_path): + p = DRPath(str_path) - assert p.startswith("s3://") + assert p.startswith(str_path[:5]) From 36a6e1c7271015e0087f2662a1695c8405f3f5db Mon Sep 17 00:00:00 2001 From: Diana Carvalho Date: Fri, 31 Jul 2020 15:52:09 +0100 Subject: [PATCH 6/6] on LocalFileSystem return an empty list when path doesn't exist #time 1h 23m #time 0m --- drfs/filesystems/local.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drfs/filesystems/local.py b/drfs/filesystems/local.py index 1e2d8d1..ca969b2 100644 --- a/drfs/filesystems/local.py +++ b/drfs/filesystems/local.py @@ -46,7 +46,9 @@ def remove(self, path): @allow_pathlib def ls(self, path): """List directory.""" - return list(map(lambda x: os.path.join(path, x), os.listdir(path))) + if os.path.exists(path): + return list(map(lambda x: os.path.join(path, x), os.listdir(path))) + return list() @allow_pathlib def move(self, src, dst):