Skip to content

Commit

Permalink
Merge pull request #21 from datarevenue-berlin/ah-misc-fixes-and-impr…
Browse files Browse the repository at this point in the history
…ovements

Misc fixes and improvements
  • Loading branch information
Alan Höng authored Aug 26, 2020
2 parents 9a4ae5f + 36a6e1c commit ff47775
Show file tree
Hide file tree
Showing 9 changed files with 108 additions and 25 deletions.
10 changes: 9 additions & 1 deletion drfs/filesystems/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,18 @@ def copy(self, path, *args, **kwargs):
except AttributeError:
return self.fs.cp(path, *args, **kwargs)

@allow_pathlib
@maybe_remove_scheme
def move(self, path, *args, **kwargs):
try:
return self.fs.mv(path, *args, **kwargs)
except AttributeError:
return self.fs.move(path, *args, **kwargs)

@allow_pathlib
@maybe_remove_scheme
def mv(self, path, *args, **kwargs):
return self.fs.mv(path, *args, **kwargs)
self.move(path, *args, **kwargs)

@allow_pathlib
@maybe_remove_scheme
Expand Down
15 changes: 13 additions & 2 deletions drfs/filesystems/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,25 @@ def remove(self, path):
@allow_pathlib
def ls(self, path):
"""List directory."""
return list(map(lambda x: os.path.join(path, x), os.listdir(path)))
if os.path.exists(path):
return list(map(lambda x: os.path.join(path, x), os.listdir(path)))
return list()

@allow_pathlib
def mv(self, src, dst):
def move(self, src, dst):
"""Move file or directory. Source parent dir will be created."""
self._makedirs_parent(dst)
shutil.move(src, dst)

@allow_pathlib
def mv(self, src, dst):
self.move(src, dst)

@allow_pathlib
def copy(self, src, dst):
self._makedirs_parent(dst)
shutil.copyfile(src, dst)

@allow_pathlib
def rmdir(self, path):
"""Remove directory."""
Expand Down
24 changes: 16 additions & 8 deletions drfs/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,18 @@ class DRPathMixin:
def is_template(self):
s = str(self)
return 0 <= s.find('{') < s.find('}')

@property
def is_wildcard(self):
return '*' in str(self)

@property
def flag(self):
return self / '_SUCCESS'

def format(self, *args, **kwargs):
return DRPath(str(self).format(*args, **kwargs))

@property
def storage_options(self):
try:
Expand All @@ -39,14 +39,22 @@ def storage_options(self):
if opts is not None:
return opts
return settings.FS_OPTS


def startswith(self, *args, **kwargs):
"""Act like a string - for compatibility with s3fs.put"""
return str(self).startswith(*args, **kwargs)

def __getitem__(self, item):
"""Act like a string - for compatibility with s3fs.put"""
return str(self)[item]

opts = storage_options


class RemotePath(URL, DRPathMixin):
"""
A very pathlib.Path version for RemotePaths.
If you use a method that requires an underlying filesystem to
be instantiated (such as `open`), storage_options may be needed (to provide
credentials etc.). They are taken from settings.FS_OPTS by default,
Expand Down Expand Up @@ -120,7 +128,7 @@ def path(self):
return self._root \
+ self._flavour.sep.join(urllib.parse.quote(i, safe=safe_pchars) for i in self._parts[begin:-1] + [self.name]) \
+ self.trailing_sep

def _make_child(self, args):
res = super()._make_child(args)
res._storage_options = self._storage_options
Expand All @@ -141,7 +149,7 @@ def __new__(cls, path, *args, **kwargs):
cls = LocalPath
obj = cls(path, *args, **kwargs)
return obj


def asstr(arg):
"""Convert arg into its string representation.
Expand Down
15 changes: 9 additions & 6 deletions drfs/structure.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import inspect
from copy import copy
from pathlib import Path
from textwrap import indent
from typing import Union
Expand All @@ -24,28 +25,30 @@ def __new__(mcs, name, bases, attrs):
new_attrs[attr_name] = attr_value(attr_name)
else:
new_attrs[attr_name] = attr_value

return type.__new__(mcs, name, bases, new_attrs)


class Tree(metaclass=_MetaTree):
def __init__(self, root):
self.root = DRPath(root)

@property
def root(self):
return self._root

@root.setter
def root(self, value):
"""Recursively set root in this and all child trees."""
value = DRPath(value)
self._root = value
for node_name, node_value in self._get_nodes():
if isinstance(node_value, Tree):
node_value = copy(node_value)
setattr(self, node_name, node_value)
node_root = getattr(node_value, '__root__', node_name)
node_value.root = self._root / node_root

def _get_nodes(self):
nodes = inspect.getmembers(
self,
Expand All @@ -67,7 +70,7 @@ def __repr__(self):
s = ''
res = f'{res}{s}'
return res

def add(self, key, value):
if isinstance(value, (str, DRPathMixin)):
value = _root_function(value)
Expand Down
2 changes: 1 addition & 1 deletion drfs/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def s3_data_dir():
import boto3
import s3fs
conn = boto3.client("s3")
conn.create_bucket(Bucket="s3-bimadi-test-bucket")
conn.create_bucket(Bucket="s3-test-bucket")
fs = s3fs.S3FileSystem()

for i in range(1, 11):
Expand Down
10 changes: 10 additions & 0 deletions drfs/tests/filesystems/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,17 @@ def test_local_filesystem(tmpdir):
fs.info(non_existing_file)
with fs.open(existing_file, 'r') as f:
assert f.read() == 'test'

copy_path = tmpdir.join('copy.txt')
fs.copy(existing_file, copy_path)
assert copy_path.exists()
copy_path2 = tmpdir.join('copy2.txt')
fs.cp(existing_file, copy_path2)
assert copy_path2.exists()

fs.remove(existing_file)
fs.remove(copy_path)
fs.remove(copy_path2)
assert fs.ls(tmpdir.strpath) == []
assert not fs.exists(existing_file)

Expand Down
12 changes: 6 additions & 6 deletions drfs/tests/test_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,24 +200,24 @@ def test_memory_fs_recursive_rm():


def test_list_files(s3_data_dir):
fs = get_fs('s3://s3-bimadi-test-bucket/', rtype='instance')
fs = get_fs('s3://s3-test-bucket/', rtype='instance')

res = fs.ls('s3://s3-bimadi-test-bucket/dump/')
res = fs.ls('s3://s3-test-bucket/dump/')
assert all([str(p).startswith('s3://s3-') for p in res])
assert len(res) == 10


def test_glob_files(s3_data_dir):
fs = get_fs('s3://s3-bimadi-test-bucket/', rtype='instance')
fs = get_fs('s3://s3-test-bucket/', rtype='instance')

res = fs.glob('s3://s3-bimadi-test-bucket/dump/*.csv')
res = fs.glob('s3://s3-test-bucket/dump/*.csv')
assert all([str(p).startswith('s3://s3-') for p in res])
assert len(res) == 10


@pytest.mark.parametrize('scheme, path, exp', [
('s3', 's3-bimadi-bucket/test', 's3://s3-bimadi-bucket/test'),
('s3', 's3://s3-bimadi-bucket/test', 's3://s3-bimadi-bucket/test'),
('s3', 's3-bucket/test', 's3://s3-bucket/test'),
('s3', 's3://s3-bucket/test', 's3://s3-bucket/test'),
('', '/user/ubuntu/test', 'file://user/ubuntu/test'),
('', 'file://user/ubuntu/test', 'file://user/ubuntu/test'),
])
Expand Down
30 changes: 29 additions & 1 deletion drfs/tests/test_path.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

from drfs.path import DRPath
from drfs.settings import FS_OPTS

Expand Down Expand Up @@ -38,8 +40,34 @@ def test_remote_div(s3):
assert p2._acc_real is not None
assert p2._acc_real.fs.key == opts['key']
assert p2._acc_real.fs.secret == opts['secret']

p3 = p2 / 'test.txt'
assert p3.storage_options == p2.storage_options
assert p3._acc_real is not None
assert p3._acc_real is p2._acc_real


@pytest.mark.parametrize(
("str_path",),
[
("s3://test_bucket",),
("/home/test_dir",),
]
)
def test_path_get_item(str_path):
p = DRPath(str_path)

assert p[:5] == str_path[:5]


@pytest.mark.parametrize(
("str_path",),
[
("s3://test_bucket",),
("/home/test_dir",),
]
)
def test_path_startswith(str_path):
p = DRPath(str_path)

assert p.startswith(str_path[:5])
15 changes: 15 additions & 0 deletions drfs/tests/test_structure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from drfs import Tree, P, DRPath


class Structure(Tree):

class data(Tree):
file: P = DRPath('{name}.csv')


def test_independent_instances():
structure1 = Structure('/tmp')
structure2 = Structure('/tmp/dir/')

assert str(structure1.data.file.format(name='file1')) == "/tmp/data/file1.csv"
assert str(structure2.data.file.format(name='file1')) == "/tmp/dir/data/file1.csv"

0 comments on commit ff47775

Please sign in to comment.