diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c054bb201f5..626ec6f1edad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,13 @@ [11902]: https://github.com/enso-org/enso/pull/11902 [11908]: https://github.com/enso-org/enso/pull/11908 +#### Enso Standard Library + +- [Allow using `/` to access files inside a directory reached through a data + link.][11926] + +[11926]: https://github.com/enso-org/enso/pull/11926 + #### Enso Language & Runtime - [Promote broken values instead of ignoring them][11777]. diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/Decomposed_S3_Path.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/Decomposed_S3_Path.enso new file mode 100644 index 000000000000..941393793312 --- /dev/null +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/Decomposed_S3_Path.enso @@ -0,0 +1,66 @@ +private + +from Standard.Base import all +import Standard.Base.Errors.Illegal_Argument.Illegal_Argument +import Standard.Base.Internal.Path_Helpers + +import project.Internal.S3_Path.S3_Path + +type Path_Entry + Directory (name : Text) + + File (name : Text) + + is_directory self -> Boolean = case self of + Path_Entry.Directory _ -> True + Path_Entry.File _ -> False + +type Decomposed_S3_Path + Value (parts : Vector Path_Entry) + + ## Reconstructs the original path. + key self -> Text = + add_directory_suffix = self.parts.not_empty && self.parts.last.is_directory + suffix = if add_directory_suffix then S3_Path.delimiter else "" + self.parts.map .name . join separator=S3_Path.delimiter suffix=suffix + + parse (key : Text) -> Decomposed_S3_Path = + has_directory_suffix = key.ends_with S3_Path.delimiter + parts = key.split S3_Path.delimiter . filter (p-> p.is_empty.not) + entries = case has_directory_suffix of + True -> parts.map Path_Entry.Directory + False -> + if parts.is_empty then [] else + (parts.drop (..Last 1) . map Path_Entry.Directory) + [Path_Entry.File parts.last] + Decomposed_S3_Path.Value entries + + join (paths : Vector Decomposed_S3_Path) -> Decomposed_S3_Path = + if paths.is_empty then Error.throw (Illegal_Argument.Error "Cannot join an empty list of paths.") else + flattened = paths.flat_map .parts + # Any `File` parts from the middle are now transformed to `Directory`: + aligned = flattened.map_with_index ix-> part-> case part of + Path_Entry.Directory _ -> part + Path_Entry.File name -> + is_last = ix == flattened.length-1 + if is_last then part else Path_Entry.Directory name + Decomposed_S3_Path.Value aligned + + normalize self -> Decomposed_S3_Path = + new_parts = Path_Helpers.normalize_segments self.parts .name + Decomposed_S3_Path.Value new_parts + + parent self -> Decomposed_S3_Path | Nothing = + if self.parts.is_empty then Nothing else + new_parts = self.parts.drop (..Last 1) + Decomposed_S3_Path.Value new_parts + + is_empty self -> Boolean = self.parts.is_empty + + first_part self -> Path_Entry | Nothing = + if self.parts.is_empty then Nothing else + self.parts.first + + drop_first_part self -> Decomposed_S3_Path = + if self.parts.is_empty then self else + new_parts = self.parts.drop 1 + Decomposed_S3_Path.Value new_parts diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso index a904c3721801..85b3270e1485 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso @@ -1,8 +1,8 @@ from Standard.Base import all import Standard.Base.Errors.Illegal_Argument.Illegal_Argument -import Standard.Base.Internal.Path_Helpers import project.Errors.S3_Error +import project.Internal.Decomposed_S3_Path.Decomposed_S3_Path import project.S3.S3 ## PRIVATE @@ -25,8 +25,7 @@ type S3_Path bucket = (without_prefix.take first_slash_index) if bucket == "" then Error.throw (Illegal_Argument.Error "Invalid S3 path: empty bucket name with key name.") else key = (without_prefix.drop first_slash_index+1) - normalized = Decomposed_S3_Path.parse key . normalize . key - S3_Path.Value bucket normalized + S3_Path.Value bucket key ## PRIVATE to_text self -> Text = @@ -43,6 +42,20 @@ type S3_Path Checks if this path represents a directory. is_directory self -> Boolean = self.is_root || (self.key.ends_with S3_Path.delimiter) + ## PRIVATE + private set_new_path self new_path:Decomposed_S3_Path -> S3_Path = + # Handle the edge case of resolving `s3://` path without bucket - first part of the key becomes the bucket name + has_no_bucket = self.bucket == "" + set_new_bucket = has_no_bucket && new_path.is_empty.not + case set_new_bucket of + True -> + new_bucket = new_path.first_part.name + new_key = new_path.drop_first_part.normalize.key + S3_Path.Value new_bucket new_key + False -> + new_key = new_path.normalize.key + S3_Path.Value self.bucket new_key + ## PRIVATE Resolves a subdirectory entry. This only makes logical sense for paths for which `path.is_directory == True`, @@ -52,15 +65,12 @@ type S3_Path if `subpath` ends with the delimiter. resolve self (subpath : Text) -> S3_Path = joined = Decomposed_S3_Path.join [Decomposed_S3_Path.parse self.key, Decomposed_S3_Path.parse subpath] - new_key = joined.normalize.key - S3_Path.Value self.bucket new_key + self.set_new_path joined ## PRIVATE join self (subpaths : Vector) -> S3_Path = joined = Decomposed_S3_Path.join (([self.key]+subpaths).map Decomposed_S3_Path.parse) - new_key = joined.normalize.key - S3_Path.Value self.bucket new_key - + self.set_new_path joined ## PRIVATE Returns the parent directory. @@ -94,65 +104,9 @@ type S3_Path path delimiter. In the future we could allow customizing it. delimiter -> Text = "/" -## PRIVATE -type Path_Entry - ## PRIVATE - Directory (name : Text) - - ## PRIVATE - File (name : Text) - - ## PRIVATE - is_directory self -> Boolean = case self of - Path_Entry.Directory _ -> True - Path_Entry.File _ -> False - -## PRIVATE -type Decomposed_S3_Path - ## PRIVATE - Value (parts : Vector Path_Entry) (go_to_root : Boolean) - - ## PRIVATE - Reconstructs the original path. - key self -> Text = - add_directory_suffix = self.parts.not_empty && self.parts.last.is_directory - suffix = if add_directory_suffix then S3_Path.delimiter else "" - self.parts.map .name . join separator=S3_Path.delimiter suffix=suffix - - ## PRIVATE - parse (key : Text) -> Decomposed_S3_Path = - has_directory_suffix = key.ends_with S3_Path.delimiter - has_root_prefix = key.starts_with S3_Path.delimiter - parts = key.split S3_Path.delimiter . filter (p-> p.is_empty.not) - entries = case has_directory_suffix of - True -> parts.map Path_Entry.Directory - False -> - if parts.is_empty then [] else - (parts.drop (..Last 1) . map Path_Entry.Directory) + [Path_Entry.File parts.last] - Decomposed_S3_Path.Value entries has_root_prefix - - ## PRIVATE - join (paths : Vector Decomposed_S3_Path) -> Decomposed_S3_Path = - if paths.is_empty then Error.throw (Illegal_Argument.Error "Cannot join an empty list of paths.") else - last_root_ix = paths.last_index_of (.go_to_root) - without_ignored_paths = if last_root_ix.is_nothing then paths else - paths.drop last_root_ix - flattened = without_ignored_paths.flat_map .parts - # Any `File` parts from the middle are now transformed to `Directory`: - aligned = flattened.map_with_index ix-> part-> case part of - Path_Entry.Directory _ -> part - Path_Entry.File name -> - is_last = ix == flattened.length-1 - if is_last then part else Path_Entry.Directory name - Decomposed_S3_Path.Value aligned (last_root_ix.is_nothing.not) - ## PRIVATE - normalize self -> Decomposed_S3_Path ! S3_Error = - new_parts = Path_Helpers.normalize_segments self.parts .name - Decomposed_S3_Path.Value new_parts self.go_to_root + bucket_root self -> S3_Path = S3_Path.Value self.bucket "" ## PRIVATE - parent self -> Decomposed_S3_Path | Nothing = - if self.parts.is_empty then Nothing else - new_parts = self.parts.drop (..Last 1) - Decomposed_S3_Path.Value new_parts self.go_to_root + without_trailing_slash self -> S3_Path = + if self.key.ends_with S3_Path.delimiter then S3_Path.Value self.bucket (self.key.drop (..Last 1)) else self diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso index 56ddae721bfc..df98c56f1370 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso @@ -1,10 +1,13 @@ from Standard.Base import all import Standard.Base.Enso_Cloud.Data_Link.Data_Link +import Standard.Base.Enso_Cloud.Data_Link.Data_Link_From_File import Standard.Base.Enso_Cloud.Data_Link_Helpers import Standard.Base.Errors.Common.Syntax_Error import Standard.Base.Errors.File_Error.File_Error import Standard.Base.Errors.Illegal_Argument.Illegal_Argument +import Standard.Base.Errors.Illegal_State.Illegal_State import Standard.Base.Errors.Unimplemented.Unimplemented +import Standard.Base.Internal.Path_Helpers import Standard.Base.Runtime.Context import Standard.Base.System.File.Data_Link_Access.Data_Link_Access import Standard.Base.System.File.Generic.File_Like.File_Like @@ -24,6 +27,8 @@ import project.Internal.S3_File_Write_Strategy import project.Internal.S3_Path.S3_Path import project.S3.S3 +polyglot java import org.enso.aws.file_system.S3DataLinkCache + ## Represents an S3 file or folder If the path ends with a slash, it is a folder. Otherwise, it is a file. type S3_File @@ -48,7 +53,9 @@ type S3_File @credentials AWS_Credential.default_widget new : Text -> AWS_Credential -> S3_File ! Illegal_Argument new (uri : Text = S3.uri_prefix) credentials:AWS_Credential=..Default = - S3_File.Value (S3_Path.parse uri) credentials + parsed_path = S3_Path.parse uri + root = S3_File.Value parsed_path.bucket_root credentials + root / parsed_path.key ## PRIVATE private Value (s3_path : S3_Path) credentials:AWS_Credential @@ -67,9 +74,10 @@ type S3_File - action: A function that operates on the output stream and returns some value. The value is returned from this method. with_output_stream : Vector File_Access -> (Output_Stream -> Any ! File_Error) -> Any ! File_Error - with_output_stream self (open_options : Vector) action = if self.is_directory then Error.throw (S3_Error.Error "S3 directory cannot be opened as a stream." self.uri) else + with_output_stream self (open_options : Vector) action = if self.is_directory_no_follow_links then Error.throw (S3_Error.Error "S3 directory cannot be opened as a stream." self.uri) else Context.Output.if_enabled disabled_message="As writing is disabled, cannot write to S3. Press the Write button ▶ to perform the operation." panic=False <| - open_as_data_link = (open_options.contains Data_Link_Access.No_Follow . not) && (Data_Link.is_data_link self) + open_as_data_link = (open_options.contains Data_Link_Access.No_Follow . not) && self.is_data_link + _invalidate_caches_on_write self if open_as_data_link then Data_Link_Helpers.write_data_link_as_stream self open_options action else if open_options.contains File_Access.Append then Error.throw (S3_Error.Error "S3 does not support appending to a file. Instead you may read it, modify and then write the new contents." self.uri) else File_Access.ensure_only_allowed_options "with_output_stream" [File_Access.Write, File_Access.Create_New, File_Access.Truncate_Existing, File_Access.Create, Data_Link_Access.No_Follow] open_options <| @@ -82,7 +90,7 @@ type S3_File result = tmp_file.with_output_stream [File_Access.Write] action # Only proceed if the write succeeded result.if_not_error <| - (translate_file_errors self <| S3.upload_file tmp_file self.s3_path.bucket self.s3_path.key self.credentials) . if_not_error <| + (_translate_file_errors self <| S3.upload_file tmp_file self.s3_path.bucket self.s3_path.key self.credentials) . if_not_error <| result @@ -100,11 +108,11 @@ type S3_File - action: A function that operates on the input stream and returns some value. The value is returned from this method. with_input_stream : Vector File_Access -> (Input_Stream -> Any ! File_Error) -> Any ! S3_Error | Illegal_Argument - with_input_stream self (open_options : Vector) action = if self.is_directory then Error.throw (Illegal_Argument.Error "S3 folders cannot be opened as a stream." self.uri) else - open_as_data_link = (open_options.contains Data_Link_Access.No_Follow . not) && (Data_Link.is_data_link self) + with_input_stream self (open_options : Vector) action = if self.is_directory_no_follow_links then Error.throw (Illegal_Argument.Error "S3 folders cannot be opened as a stream." self.uri) else + open_as_data_link = (open_options.contains Data_Link_Access.No_Follow . not) && self.is_data_link if open_as_data_link then Data_Link_Helpers.read_data_link_as_stream self open_options action else File_Access.ensure_only_allowed_options "with_input_stream" [File_Access.Read, Data_Link_Access.No_Follow] open_options <| - response_body = translate_file_errors self <| S3.get_object self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter + response_body = _translate_file_errors self <| S3.get_object self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter response_body.with_stream action ## ALIAS load, open, import @@ -137,11 +145,11 @@ type S3_File @format File_Format.default_widget read : File_Format -> Problem_Behavior -> Any ! S3_Error read self format=Auto_Detect (on_problems : Problem_Behavior = ..Report_Warning) = - if self.is_directory then Error.throw (Illegal_Argument.Error "Cannot `read` a directory, use `list`.") else - if Data_Link.is_data_link self then Data_Link_Helpers.read_data_link self format on_problems else + if self.is_directory_no_follow_links then Error.throw (Illegal_Argument.Error "Cannot `read` a directory, use `list`.") else + if self.is_data_link then Data_Link_Helpers.read_data_link self format on_problems else case format of Auto_Detect -> - response = translate_file_errors self <| S3.get_object self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter + response = _translate_file_errors self <| S3.get_object self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter response.decode Auto_Detect _ -> metadata = File_Format_Metadata.Value path=self.path name=self.name @@ -174,14 +182,14 @@ type S3_File thrown as these are currently unsupported. list : Text -> Boolean -> Vector S3_File list self name_filter:Text="" recursive:Boolean=False = - if Data_Link.is_data_link self then Data_Link_Helpers.interpret_data_link_target_as_file self . list name_filter=name_filter recursive=recursive else + if self.is_data_link then _s3_file_as_data_link self . list name_filter=name_filter recursive=recursive else check_name_filter action = if name_filter != "" then Unimplemented.throw "S3 listing with name filter is not currently implemented." else action check_recursion action = if recursive then Unimplemented.throw "S3 listing with recursion is not currently implemented." else action - check_directory action = if self.is_directory.not then Error.throw (Illegal_Argument.Error "Cannot `list` a non-directory." self.uri) else action + check_directory action = if self.is_directory_no_follow_links.not then Error.throw (Illegal_Argument.Error "Cannot `list` a non-directory." self.uri) else action check_directory <| check_recursion <| check_name_filter <| - if self.s3_path.bucket == "" then translate_file_errors self <| S3.list_buckets self.credentials . map bucket-> S3_File.Value (S3_Path.Value bucket "") self.credentials else - pair = translate_file_errors self <| S3.read_bucket self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter + if self.s3_path.bucket == "" then _translate_file_errors self <| S3.list_buckets self.credentials . map bucket-> S3_File.Value (S3_Path.Value bucket "") self.credentials else + pair = _translate_file_errors self <| S3.read_bucket self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter bucket = self.s3_path.bucket sub_folders = pair.first . map key-> S3_File.Value (S3_Path.Value bucket key) self.credentials @@ -261,7 +269,7 @@ type S3_File if s3_destination == self then (if self.exists then self else Error.throw (File_Error.Not_Found self)) else if replace_existing.not && s3_destination.exists then Error.throw (File_Error.Already_Exists destination) else destination_path = s3_destination.s3_path - translate_file_errors self <| S3.copy_object self.s3_path.bucket self.s3_path.key destination_path.bucket destination_path.key self.credentials + _translate_file_errors self <| S3.copy_object self.s3_path.bucket self.s3_path.key destination_path.bucket destination_path.key self.credentials . if_not_error destination_writable.file_for_return _ -> generic_copy self destination_writable replace_existing @@ -292,8 +300,8 @@ type S3_File as a local move often is. move_to : File_Like -> Boolean -> Any ! File_Error move_to self (destination : File_Like) (replace_existing : Boolean = False) = - if self.is_directory then Error.throw (S3_Error.Error "Moving S3 folders is currently not implemented." self.uri) else - Data_Link_Helpers.disallow_links_in_move self destination <| + Data_Link_Helpers.disallow_links_in_move self destination <| + if self.is_directory then Error.throw (S3_Error.Error "Moving S3 folders is currently not implemented." self.uri) else Context.Output.if_enabled disabled_message="As writing is disabled, cannot move the file. Press the Write button ▶ to perform the operation." panic=False <| r = self.copy_to destination replace_existing=replace_existing r.if_not_error <| @@ -319,6 +327,10 @@ type S3_File error is thrown. - If the Output operations are disabled, a `Forbidden_Operation` panic will occur. + + ? Data Links + If the file is a data link, this will delete the link itself, not + affecting its target. delete : Boolean -> Nothing delete self (recursive : Boolean = False) = if self.exists.not then Error.throw (File_Error.Not_Found self) else @@ -344,7 +356,7 @@ type S3_File delete_if_exists : Boolean -> Nothing delete_if_exists self (recursive : Boolean = False) = Context.Output.if_enabled disabled_message="As writing is disabled, cannot delete the file. Press the Write button ▶ to perform the operation." panic=False <| - case self.is_directory of + case self.is_directory_no_follow_links of True -> # This is a temporary simplified implementation to ensure cleaning up after tests # TODO improve recursive deletion for S3 folders: https://github.com/enso-org/enso/issues/9704 @@ -352,7 +364,7 @@ type S3_File if children.is_empty.not && recursive.not then Error.throw (File_Error.Directory_Not_Empty self) else r = children.map child-> child.delete_if_exists recursive r.if_not_error self - False -> translate_file_errors self <| S3.delete_object self.s3_path.bucket self.s3_path.key self.credentials . if_not_error Nothing + False -> _translate_file_errors self <| S3.delete_object self.s3_path.bucket self.s3_path.key self.credentials . if_not_error Nothing ## GROUP Output ICON folder_add @@ -371,6 +383,7 @@ type S3_File result. create_directory : File create_directory self = + _invalidate_caches_on_write self warning = S3_Warning.Warning "The `create_directory` on `S3_File` is only kept for compatibility, it does not do anything. To make sure a directory reports as `exists` you must put some files into it." Warning.attach warning self @@ -402,7 +415,13 @@ type S3_File See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html / : Text -> S3_File / self subpath = - S3_File.Value (self.s3_path.resolve subpath) self.credentials + Path_Helpers.resolve_many_parts self subpath + + ## PRIVATE + An internal helper method that resolves a single part (subpath) of a path. + resolve_single_part self part:Text -> Any = + if self.is_data_link then _s3_file_as_data_link self . resolve_single_part part else + S3_File.Value (self.s3_path.resolve part) self.credentials ## GROUP Standard.Base.Calculations ICON folder @@ -448,7 +467,7 @@ type S3_File Returns the extension of the file. extension : Text - extension self = if self.is_directory then Error.throw (S3_Error.Error "Directories do not have extensions." self.uri) else + extension self = if self.is_directory_no_follow_links then Error.throw (S3_Error.Error "Directories do not have extensions." self.uri) else find_extension_from_name self.name ## GROUP Standard.Base.Metadata @@ -477,10 +496,14 @@ type S3_File ! Error Conditions - If the credential is invalid, an `AWS_SDK_Error` is thrown. - If access is denied to the bucket, an `S3_Error` is thrown. + + ? Data Links + If the file is a data link, this checks if the data link itself exists. + It does not tell anything about existence of the data link target. exists : Boolean exists self = if self.s3_path.bucket == "" then True else - raw_result = if self.s3_path.is_root then translate_file_errors self <| S3.head self.s3_path.bucket "" self.credentials . is_error . not else - pair = translate_file_errors self <| S3.read_bucket self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter max_count=1 + raw_result = if self.s3_path.is_root then _translate_file_errors self <| S3.head self.s3_path.bucket "" self.credentials . is_error . not else + pair = _translate_file_errors self <| S3.read_bucket self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter max_count=1 pair.second.contains self.s3_path.key raw_result.catch S3_Bucket_Not_Found _->False @@ -489,15 +512,19 @@ type S3_File Returns the size of a file in bytes. + ? Data Links + If the file is a data link that points to a file, the size of the + target file will be returned. + ! Error Conditions - If the `S3_File` represents a directory, an `S3_Error` error is thrown. - If the bucket or object does not exist, a `File_Error.Not_Found` is thrown. - If the object is not accessible, an `S3_Error` is thrown. size : Integer - size self = + size self = if self.is_data_link then _s3_file_as_data_link self . size else if self.is_directory then Error.throw (S3_Error.Error "size can only be called on files." self.uri) else - content_length = translate_file_errors self <| S3.raw_head self.s3_path.bucket self.s3_path.key self.credentials . contentLength + content_length = _translate_file_errors self <| S3.raw_head self.s3_path.bucket self.s3_path.key self.credentials . contentLength if content_length.is_nothing then Error.throw (S3_Error.Error "ContentLength header is missing." self.uri) else content_length ## GROUP Standard.Base.Metadata @@ -508,6 +535,10 @@ type S3_File Returns: - An `S3_Error` error as only the last modified time is available for S3 objects. + + ? Data Links + If the file is a data link, this returns the creation time of the data + link. creation_time : Date_Time ! File_Error creation_time self = Error.throw (S3_Error.Error "Creation time is not available for S3 files, consider using `last_modified_time` instead." self.uri) @@ -522,10 +553,14 @@ type S3_File - If the bucket or object does not exist, a `File_Error.Not_Found` is thrown. - If the object is not accessible, an `S3_Error` is thrown. + + ? Data Links + If the file is a data link, this returns the modification time of the + data link. last_modified_time : Date_Time ! File_Error last_modified_time self = if self.is_directory then Error.throw (S3_Error.Error "`last_modified_time` can only be called on files." self.uri) else - instant = translate_file_errors self <| S3.raw_head self.s3_path.bucket self.s3_path.key self.credentials . lastModified + instant = _translate_file_errors self <| S3.raw_head self.s3_path.bucket self.s3_path.key self.credentials . lastModified if instant.is_nothing then Error.throw (S3_Error.Error "Missing information for: lastModified" self.uri) else instant.at_zone Time_Zone.system @@ -544,8 +579,21 @@ type S3_File The trailing slash determines if the given path is treated as a directory or as a regular file. + + ? Data Links + If the file is a data link, this checks whether the target of the data + link is a directory. is_directory : Boolean - is_directory self = self.s3_path.is_directory + is_directory self = + case self.is_data_link of + True -> + _s3_file_as_data_link self . is_directory + . catch Illegal_Argument _->False + False -> self.s3_path.is_directory + + ## PRIVATE + Checks if this file is a directory, not following links. + private is_directory_no_follow_links self = self.is_data_link.not && self.s3_path.is_directory ## GROUP Standard.Base.Metadata ICON metadata @@ -562,8 +610,27 @@ type S3_File The trailing slash determines if the given path is treated as a directory or as a regular file. + + ? Data Links + If the file is a data link, this checks whether the target of the data + link is a regular file. is_regular_file : Boolean - is_regular_file self = self.is_directory.not + is_regular_file self = + case self.is_data_link of + True -> + _s3_file_as_data_link self . is_directory . not + . catch Illegal_Argument _->False + False -> + self.s3_path.is_directory.not + + ## PRIVATE + is_data_link self -> Boolean = + ## First we perform the cheap check of the necessary condition - if the + name contains the correct extension. + may_be_data_link = Data_Link.is_data_link_name self.name + if may_be_data_link.not then False else + ttl = Duration.new seconds=30 + S3DataLinkCache.INSTANCE.getOrCompute self.path (_-> _check_is_data_link self) ttl ## GROUP Standard.Base.Metadata ICON metadata @@ -606,13 +673,92 @@ File_Format_Metadata.from (that : S3_File) = File_Format_Metadata.Value that.uri File_Like.from (that : S3_File) = File_Like.Value that ## PRIVATE -Writable_File.from (that : S3_File) = if Data_Link.is_data_link that then Data_Link_Helpers.interpret_data_link_as_writable_file that else +Writable_File.from (that : S3_File) = if that.is_data_link then Data_Link_Helpers.interpret_data_link_as_writable_file (_without_trailing_slash that) else Writable_File.Value that S3_File_Write_Strategy.instance +## PRIVATE +Data_Link_From_File.from (that : S3_File) = Data_Link_From_File.Value that + ## PRIVATE A helper that translates lower level S3 errors to file-system errors. -translate_file_errors related_file result = +private _translate_file_errors related_file result = result.catch S3_Key_Not_Found error-> s3_path = S3_Path.Value error.bucket error.key s3_file = S3_File.Value s3_path related_file.credentials Error.throw (File_Error.Not_Found s3_file) + +## Ensure the file has no trailing slash. + This is needed when treating a possible directory as a data link entity - to + find its S3 object we need to strip the trailing slash if it has one. +private _without_trailing_slash (f : S3_File) -> S3_File = + S3_File.Value f.s3_path.without_trailing_slash f.credentials + +private _s3_file_as_data_link (f : S3_File) = + Data_Link_Helpers.interpret_data_link_target_as_file (_without_trailing_slash f) + +## If the file _may_ be a data link we now need to verify it further. + On S3 this is complicated because there is no direct notion of + directories, so a path `s3://Bucket/a.datalink/b` can mean both + that `a.datalink` is a data link file, or just that `b` is a + regular file inside a weirdly named 'directory' `a.datalink`. + + To distinguish these scenarios we use the following heuristics: + - if the entity under the given path `s3://Bucket/a.datalink` + exists, that means there is a file, so we treat it as a data link. + - if that entity does not exist, but there are entities under the + path `s3://Bucket/a.datalink/`, we treat it as a directory. + - if that entity does not exist and there are no 'child entities' + we determine the treatment based on the exact path containing a + trailing slash character - if the path was + `s3://Bucket/a.datalink`, that will be a data link (e.g. this + may be passed as destination of a write method while creating a + new link), but if it contains a slash + (`s3://Bucket/a.datalink/`) that will be treated as a directory. + + We acknowledge an edge case where both objects + `s3://Bucket/a.datalink` and `s3://Bucket/a.datalink/b` exist + (since the concept of directories in S3 is 'simulated' and there + is nothing preventing such aliasing). In that case, we treat the + first object (`s3://Bucket/a.datalink`) as a data link, thus + rendering `s3://Bucket/a.datalink/b` inaccessible. This is not + ideal, but we report a warning. Note that the user cannot create + such situation from Enso because once the entity starts to exist + (either as data link or as directory) it cannot be interpreted as + the other one. We can still encounter such scenarios in buckets + created externally. +private _check_is_data_link (f : S3_File) = + Runtime.assert (Data_Link.is_data_link_name f.name) "This should only be run on potential data links." + + exists_entity = _exists_entity_direct (_without_trailing_slash f) + has_children = _has_children_entities f + clash = exists_entity && has_children + case clash of + True -> + warning = Illegal_State.Error "The S3 path ["+f.path+"] is ambiguous - its key is both a data link object and a directory containing other objects. The data link will be used." + Warning.attach warning True + False -> + if exists_entity then True else + if has_children then False else + has_trailing_slash = f.s3_path.is_directory + has_trailing_slash.not + + +## Checks if an object exists under the given key _directly_. + This is different from `exists` which will be true if any child objects exist + under a given directory key. This method, however, will only return true if + an object with the exact key exists and is not just an empty marker (as used + sometimes to mark directories). +private _exists_entity_direct (f : S3_File) -> Boolean = + size = S3.raw_head f.s3_path.bucket f.s3_path.key f.credentials . contentLength + if size.is_error then False else size > 0 + +private _has_children_entities (f : S3_File) -> Boolean = + pair = S3.read_bucket f.s3_path.bucket f.s3_path.key f.credentials delimiter=S3_Path.delimiter max_count=2 + entries = pair.first + pair.second + entries.filter (k-> k != f.s3_path.key) . is_empty . not + +private _invalidate_caches_on_write (f : S3_File) = + S3DataLinkCache.INSTANCE.invalidateEntry (_without_trailing_slash f).path + parent = f.parent + if parent.is_nothing.not then + S3DataLinkCache.INSTANCE.invalidatePrefix (_without_trailing_slash parent).path diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data.enso index b6342f1277c6..d434394fa6df 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data.enso @@ -447,7 +447,7 @@ download : (URI | Text) -> Writable_File -> HTTP_Method -> Vector (Header | Pair download (uri:(URI | Text)=(Missing_Argument.throw "uri")) file:Writable_File (method:HTTP_Method=..Get) (headers:(Vector (Header | Pair Text Text))=[]) = Context.Output.if_enabled disabled_message="As writing is disabled, cannot download to a file. Press the Write button ▶ to perform the operation." panic=False <| response = HTTP.fetch uri method headers cache_policy=Cache_Policy.No_Cache - case Data_Link.is_data_link response.body.metadata of + case Data_Link.is_data_link_from_metadata response.body.metadata of True -> # If the resource was a data link, we follow it, download the target data and try to write it to a file. data_link = Data_Link_Helpers.interpret_json_as_data_link response.decode_as_json diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link.enso index 8c511951477d..b57233ba392b 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link.enso @@ -4,6 +4,7 @@ import project.Data.Text.Text import project.Data.Vector.Vector import project.Enso_Cloud.Errors.Missing_Data_Link_Library import project.Error.Error +import project.Errors.Common.No_Such_Conversion import project.Errors.File_Error.File_Error import project.Errors.Illegal_Argument.Illegal_Argument import project.Errors.Illegal_State.Illegal_State @@ -111,7 +112,7 @@ type Data_Link Arguments: - file: The file to read the configuration from. read_raw_config (file : File_Like) -> Text = - if Data_Link.is_data_link file . not then + if Data_Link.is_data_link_file file . not then Panic.throw (Illegal_Argument.Error " Data_Link should only be used for reading config of Data Links, but "+file.to_display_text+" is not a Data Link.") options = [File_Access.Read, Data_Link_Access.No_Follow] bytes = file.underlying.with_input_stream options input_stream-> @@ -130,7 +131,7 @@ type Data_Link existing file. By default, the operation will fail if the file already exists. write_raw_config (file : File_Like) (raw_content : Text) (replace_existing : Boolean = False) = - if Data_Link.is_data_link file.underlying . not then + if Data_Link.is_data_link_file file.underlying . not then Panic.throw (Illegal_Argument.Error " Data_Link should only be used for writing config to Data Links, but "+file.to_display_text+" is not a Data Link.") exist_options = if replace_existing then [File_Access.Create, File_Access.Truncate_Existing] else [File_Access.Create_New] options = exist_options + [File_Access.Write, Data_Link_Access.No_Follow] @@ -141,8 +142,13 @@ type Data_Link r.if_not_error file.underlying ## PRIVATE - Checks if the given file is a data-link. - is_data_link (file_metadata : File_Format_Metadata) -> Boolean = + Checks if the given entity is a data link, based on its metadata (content type and file name). + + ! Files + For regular files, it is preferred to use the `is_data_link_file` method, + as it can rely on custom logic that may be more efficient and more + precise (e.g. correctly distinguishing links from directories with unexpected name suffix). + is_data_link_from_metadata (file_metadata : File_Format_Metadata) -> Boolean = content_type_matches = case file_metadata.interpret_content_type of content_type : Content_Type_Metadata -> content_type.base_type == data_link_content_type @@ -151,10 +157,40 @@ type Data_Link # If the content type matches, it is surely a data link. if content_type_matches then True else ## If the content type does not match, we check the extension even if _different content type was provided_. - That is because many HTTP servers will not understand data links and may return a data link with + That is because most HTTP servers will not understand data links and may return a data link with a content type like `text/plain` or `application/json`. We still want to treat the file as a data link if its extension is correct. case file_metadata.guess_extension of extension : Text -> extension == data_link_extension Nothing -> False + + ## PRIVATE + Checks if the given file represents a data link. + is_data_link_file (file : Any) -> Boolean = + data_link_from_file = Panic.catch No_Such_Conversion (Data_Link_From_File.from file) _-> + Panic.throw (Illegal_State.Error "The file system associated with file "+file.to_display_text+" ("+(Meta.type_of file . to_display_text)+") does not have data link support.") + data_link_from_file.is_data_link + + ## PRIVATE + Checks if the given file name ends with the `.datalink` suffix associated with data links. + This is a necessary but not sufficient condition for a file to be a data link, + various file systems may impose additional constraints. + is_data_link_name (name : Text) -> Boolean = + name.ends_with data_link_extension + +## PRIVATE + A type class that file systems can implement to indicate their custom logic for handling data links. + A type that implements this type class should provide `is_data_link` method. +type Data_Link_From_File + ## PRIVATE + Value underlying + + ## PRIVATE + Checks if the given file is a data-link. + is_data_link self -> Boolean = self.underlying.is_data_link + +## PRIVATE + Files may sometimes be converted to File_Like. + If they implement `Data_Link_From_File`, we want to inherit that implementation. +Data_Link_From_File.from (that : File_Like) = Data_Link_From_File.from that.underlying diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link_Capabilities.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link_Capabilities.enso index 95d755d33c37..43d16a0b7c53 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link_Capabilities.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link_Capabilities.enso @@ -28,7 +28,7 @@ type Data_Link_With_Input_Stream ## PRIVATE Creates a `Data_Link_With_Input_Stream` from a data link instance, if that data link supports streaming. If it does not, an error is thrown. - find data_link_instance (if_not_supported = (Error.throw (Illegal_Argument.Error "The "+(data_link_name data_link_instance)+" cannot be opened as a stream."))) -> Data_Link_With_Input_Stream ! Illegal_Argument = + find data_link_instance (if_not_supported = (Error.throw (Illegal_Argument.Error "The "+(data_link_name data_link_instance)+" cannot be opened as a stream."))) = handle_no_conversion _ = if_not_supported # `if_not_error` as workaround for https://github.com/enso-org/enso/issues/9669 data_link_instance . if_not_error <| @@ -50,7 +50,7 @@ type Data_Link_With_Output_Stream ## PRIVATE Creates a `Data_Link_With_Output_Stream` from a data link instance, if that data link supports streaming. If it does not, an error is thrown. - find data_link_instance (if_not_supported = (Error.throw (Illegal_Argument.Error "The "+(data_link_name data_link_instance)+" does not support writing raw data to it."))) -> Data_Link_With_Output_Stream ! Illegal_Argument = + find data_link_instance (if_not_supported = (Error.throw (Illegal_Argument.Error "The "+(data_link_name data_link_instance)+" does not support writing raw data to it."))) = handle_no_conversion _ = if_not_supported # `if_not_error` as workaround for https://github.com/enso-org/enso/issues/9669 data_link_instance . if_not_error <| @@ -72,7 +72,7 @@ type Writable_Data_Link ## PRIVATE Creates a `Writable_Data_Link` from a data link instance, if that data link supports writing. If it does not, an error is thrown. - find data_link_instance (if_not_supported = (Error.throw (Illegal_Argument.Error "The "+(data_link_name data_link_instance)+" does not support writing."))) -> Writable_Data_Link ! Illegal_Argument = + find data_link_instance (if_not_supported = (Error.throw (Illegal_Argument.Error "The "+(data_link_name data_link_instance)+" does not support writing."))) = handle_no_conversion _ = if_not_supported Panic.catch No_Such_Conversion (Writable_Data_Link.from data_link_instance) handle_no_conversion @@ -86,6 +86,6 @@ type File_Like_Data_Link ## PRIVATE Creates a `Writable_Data_Link` from a data link instance, if that data link supports writing. If it does not, an error is thrown. - find data_link_instance (if_not_supported = (Error.throw (Illegal_Argument.Error "The "+(data_link_name data_link_instance)+" is not a link to a file object."))) -> File_Like_Data_Link ! Illegal_Argument = + find data_link_instance (if_not_supported = (Error.throw (Illegal_Argument.Error "The "+(data_link_name data_link_instance)+" is not a link to a file object."))) = handle_no_conversion _ = if_not_supported Panic.catch No_Such_Conversion (File_Like_Data_Link.from data_link_instance) handle_no_conversion diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link_Helpers.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link_Helpers.enso index da02a10de343..d170b2b06e2a 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link_Helpers.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link_Helpers.enso @@ -120,6 +120,12 @@ interpret_data_link_target_as_file (file : File_Like) -> Any = file_like_data_link = File_Like_Data_Link.find data_link_instance file_like_data_link.as_file +## PRIVATE +interpret_data_link_target_as_file_or_nothing (file : File_Like) -> Any | Nothing = + data_link_instance = read_and_interpret_data_link file + file_like_data_link = File_Like_Data_Link.find data_link_instance if_not_supported=Nothing + file_like_data_link.if_not_nothing <| file_like_data_link.as_file + ## PRIVATE interpret_data_link_as_writable_file (file : File_Like) -> Writable_File = data_link_instance = read_and_interpret_data_link file @@ -175,16 +181,16 @@ data_link_name data_link_instance = ## PRIVATE disallow_links_in_copy source target ~action = - is_source_data_link = Data_Link.is_data_link source - is_target_data_link = Data_Link.is_data_link target + is_source_data_link = Data_Link.is_data_link_file source + is_target_data_link = Data_Link.is_data_link_file target if is_source_data_link && is_target_data_link then Error.throw (Illegal_Argument.Error "The `copy_to` operation cannot be used with data links. If you want to copy links, use `Data_Link.copy`. If you want to copy the contents, `.read` the data link and then write the data to the destination using the appropriate method.") else if is_source_data_link || is_target_data_link then Error.throw (Illegal_Argument.Error "The `copy_to` operation cannot be used with data links. Please `.read` the data link and then write the data to the destination using the appropriate method.") else action ## PRIVATE disallow_links_in_move source target ~action = - is_source_data_link = Data_Link.is_data_link source - is_target_data_link = Data_Link.is_data_link target + is_source_data_link = Data_Link.is_data_link_file source + is_target_data_link = Data_Link.is_data_link_file target if is_source_data_link && is_target_data_link then Error.throw (Illegal_Argument.Error "The `move_to` operation cannot be used with data links. If you want to move the link, use `Data_Link.move`.") else if is_source_data_link || is_target_data_link then Error.throw (Illegal_Argument.Error "The `move_to` operation cannot be used with data links. Please `.read` the data link and then write the data to the destination using the appropriate method.") else action @@ -212,3 +218,21 @@ save_password_for_data_link data_link_location:Enso_File secure_value:Text|Enso_ location_name = if data_link_location.name.ends_with data_link_extension then data_link_location.name.drop (..Last data_link_extension.length) else data_link_location.name secret_location.if_not_error <| store_as_secret secret_location location_name+"-"+name_hint secure_value + +## PRIVATE + Common logic for checking if the target of a datalink is a directory or falling back to a direct check. +is_directory file ~check_file_directly = + case Data_Link.is_data_link_file file of + True -> + target = interpret_data_link_target_as_file_or_nothing file + if target.is_nothing then False else target.is_directory + False -> check_file_directly + +## PRIVATE + Common logic for checking if the target of a datalink is a regular file or falling back to a direct check. +is_regular_file file ~check_file_directly = + case Data_Link.is_data_link_file file of + True -> + target = interpret_data_link_target_as_file_or_nothing file + if target.is_nothing then False else target.is_regular_file + False -> check_file_directly diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Enso_File.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Enso_File.enso index 9fb90e46bb04..8bdb45a0da94 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Enso_File.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Enso_File.enso @@ -8,6 +8,7 @@ import project.Data.Time.Date_Time.Date_Time import project.Data.Time.Date_Time_Formatter.Date_Time_Formatter import project.Data.Vector.Vector import project.Enso_Cloud.Data_Link.Data_Link +import project.Enso_Cloud.Data_Link.Data_Link_From_File import project.Enso_Cloud.Data_Link_Helpers import project.Enso_Cloud.Enso_User.Enso_User import project.Enso_Cloud.Errors.Enso_Cloud_Error @@ -23,6 +24,7 @@ import project.Errors.Illegal_Argument.Illegal_Argument import project.Errors.Problem_Behavior.Problem_Behavior import project.Errors.Time_Error.Time_Error import project.Errors.Unimplemented.Unimplemented +import project.Internal.Path_Helpers import project.Network.HTTP.HTTP import project.Network.HTTP.HTTP_Method.HTTP_Method import project.Network.URI.URI @@ -41,6 +43,7 @@ import project.System.Input_Stream.Input_Stream import project.System.Output_Stream.Output_Stream from project.Data.Boolean import Boolean, False, True from project.Data.Text.Extensions import all +from project.Enso_Cloud.Data_Link_Helpers import data_link_extension from project.Enso_Cloud.Internal.Enso_File_Helpers import all from project.Enso_Cloud.Public_Utils import get_required_field from project.System.File import find_extension_from_name @@ -57,7 +60,8 @@ type Enso_File new : Text -> Enso_File ! Not_Found new (path : Text) = resolved_path = Enso_Path.parse path - resolved_path.if_not_error <| Enso_File.Value resolved_path + # Traverse the path to possibly pick up any data links. + resolved_path.path_segments.fold Enso_File.root .resolve_single_part ## PRIVATE private Value (enso_path : Enso_Path) @@ -106,7 +110,11 @@ type Enso_File ## GROUP Metadata ICON metadata - Checks if the folder or file exists + Checks if the folder or file exists. + + ? Data Links + If the file is a data link, this checks if the data link itself exists. + It does not tell anything about existence of the data link target. exists : Boolean exists self = r = Existing_Enso_Asset.get_asset_reference_for self . if_not_error True @@ -117,20 +125,28 @@ type Enso_File ## GROUP Metadata ICON metadata Gets the size of a file in bytes. + + ? Data Links + If the file is a data link that points to a file, the size of the + target file will be returned. size : Integer - size self -> Integer = + size self -> Integer = if self.is_data_link then Data_Link_Helpers.interpret_data_link_target_as_file self . size else asset = Existing_Enso_Asset.get_asset_reference_for self - if asset.is_regular_file.not then Error.throw (Illegal_Argument.Error "`size` can only be queried for files.") else + if asset.asset_type != Enso_Asset_Type.File then Error.throw (Illegal_Argument.Error "`size` can only be queried for files.") else metadata = asset.get_file_description |> get_required_field "metadata" get_required_field "size" metadata expected_type=Integer ## GROUP Metadata ICON metadata Gets the creation time of a file. + + ? Data Links + If the file is a data link, this returns the creation time of the data + link. creation_time : Date_Time creation_time self -> Date_Time = asset = Existing_Enso_Asset.get_asset_reference_for self - if asset.is_regular_file.not then Error.throw (Illegal_Argument.Error "`creation_time` can only be queried for files.") else + if asset.asset_type != Enso_Asset_Type.File then Error.throw (Illegal_Argument.Error "`creation_time` can only be queried for files.") else metadata = asset.get_file_description |> get_required_field "metadata" Date_Time.parse (get_required_field "createdAt" metadata expected_type=Text) Date_Time_Formatter.iso_offset_date_time . catch Time_Error error-> Error.throw (Enso_Cloud_Error.Invalid_Response_Payload error) @@ -138,6 +154,10 @@ type Enso_File ## GROUP Metadata ICON metadata Gets the last modified time of a file. + + ? Data Links + If the file is a data link, this returns the modification time of the + data link. last_modified_time : Date_Time last_modified_time self -> Date_Time = asset = Existing_Enso_Asset.get_asset_reference_for self want_metadata=True @@ -262,17 +282,41 @@ type Enso_File ## GROUP Metadata ICON metadata - Checks if this is a folder + Checks if this is a folder. + + ? Data Links + If the file is a data link, this checks whether the target of the data + link is a directory. is_directory : Boolean - is_directory self = - Existing_Enso_Asset.get_asset_reference_for self . is_directory + is_directory self = Data_Link_Helpers.is_directory self <| + Existing_Enso_Asset.get_asset_reference_for self . asset_type == Enso_Asset_Type.Directory ## GROUP Metadata ICON metadata - Checks if this is a regular file + Checks if this is a regular file. + + ? Data Links + If the file is a data link, this checks whether the target of the data + link is a regular file. is_regular_file : Boolean - is_regular_file self = - Existing_Enso_Asset.get_asset_reference_for self . is_regular_file + is_regular_file self = Data_Link_Helpers.is_regular_file self <| + Existing_Enso_Asset.get_asset_reference_for self . asset_type == Enso_Asset_Type.File + + ## PRIVATE + is_data_link self -> Boolean = + ## Checking the `asset_type` requires actually fetching the metadata from the cloud. + To avoid performing requests for every part of a path that we are resolving, + we rely on the assumption that every data link has a name ending with the `.datalink` extension. + Thus we perform the costly check only if we find a good potential candidate for the data link. + may_be_data_link = Data_Link.is_data_link_name self.name + if may_be_data_link.not then False else + # We catch `File_Error` which means that the asset just does not exist. + asset_type = self.asset_type.catch File_Error _->Nothing + ## If the file does not exist, we treat it as a 'possible data link' to allow creating new data links with + that `Enso_File` as the data link location. Thus we return True. + If the asset already exists, we rely on its type. + if asset_type.is_nothing then True else + self.asset_type == Enso_Asset_Type.Data_Link ## GROUP Metadata ICON folder @@ -314,7 +358,7 @@ type Enso_File with_output_stream : Vector File_Access -> (Output_Stream -> Any ! File_Error) -> Any ! File_Error with_output_stream self (open_options : Vector) action = Context.Output.if_enabled disabled_message="As writing is disabled, cannot write to a file. Press the Write button ▶ to perform the operation." panic=False <| - is_data_link = Data_Link.is_data_link self + is_data_link = self.is_data_link open_as_data_link = (open_options.contains Data_Link_Access.No_Follow . not) && is_data_link if open_as_data_link then Data_Link_Helpers.write_data_link_as_stream self open_options action else if open_options.contains File_Access.Append then Unimplemented.throw "Enso_File currently does not support appending to a file. Instead you may read it, modify and then write the new contents." else @@ -344,7 +388,7 @@ type Enso_File if it returns exceptionally). with_input_stream : Vector File_Access -> (Input_Stream -> Any ! File_Error) -> Any ! File_Error | Illegal_Argument with_input_stream self (open_options : Vector) action = - open_as_data_link = (open_options.contains Data_Link_Access.No_Follow . not) && (Data_Link.is_data_link self) + open_as_data_link = (open_options.contains Data_Link_Access.No_Follow . not) && self.is_data_link if open_as_data_link then Data_Link_Helpers.read_data_link_as_stream self open_options action else File_Access.ensure_only_allowed_options "with_input_stream" [File_Access.Read, Data_Link_Access.No_Follow] open_options <| asset = Existing_Enso_Asset.get_asset_reference_for self @@ -435,10 +479,10 @@ type Enso_File ## GROUP Input ICON data_input - Gets a list of assets within self. + Lists assets contained in this directory. list : Text -> Boolean -> Vector Enso_File list self (name_filter:Text="") recursive:Boolean=False = - if Data_Link.is_data_link self then Data_Link_Helpers.interpret_data_link_target_as_file self . list name_filter=name_filter recursive=recursive else + if self.is_data_link then Data_Link_Helpers.interpret_data_link_target_as_file self . list name_filter=name_filter recursive=recursive else if name_filter != "" then Error.throw (Illegal_Argument.Error "Filtering by name is currently not supported in Enso_File.") else if recursive then Error.throw (Illegal_Argument.Error "Recursive listing is currently not supported in Enso_File.") else if self.is_directory.not then Error.throw (Illegal_Argument.Error "Cannot `list` a non-directory.") else @@ -469,16 +513,21 @@ type Enso_File removed if this is set to `True`. Defaults to `False`, meaning that the operation will fail if the directory is not empty. This option has no effect for files or data links. + + ? Data Links + If the file is a data link, this will delete the link itself, not + affecting its target. delete : Boolean -> Nothing delete self (recursive : Boolean = False) = if self.enso_path.is_root then Error.throw (Illegal_Argument.Error "The root directory cannot be deleted.") else asset = Existing_Enso_Asset.get_asset_reference_for self # The cloud defaults to recursively removing the whole directory, so we need a check on our side. # The `self.list` is last of the `&&` conditions because it is the most expensive one. # TODO ideally we should have a parameter `recursive` that would move this check to be local - if asset.is_directory && recursive.not && self.list.is_empty.not then Error.throw (File_Error.Directory_Not_Empty self) else + is_directory = asset.asset_type == Enso_Asset_Type.Directory + if is_directory && recursive.not && self.list.is_empty.not then Error.throw (File_Error.Directory_Not_Empty self) else uri = URI.from asset.asset_uri . add_query_argument "force" "true" response = Utils.http_request HTTP_Method.Delete uri - if asset.is_directory then Asset_Cache.invalidate_subtree self else Asset_Cache.invalidate self + if is_directory then Asset_Cache.invalidate_subtree self else Asset_Cache.invalidate self response.if_not_error Nothing ## ICON data_output @@ -527,8 +576,15 @@ type Enso_File ICON folder Resolves a file or directory within this directory. / : Text -> Enso_File ! Not_Found - / self (name : Text) -> Enso_File ! Not_Found = - Enso_File.Value (self.enso_path.resolve name) + / self (name : Text) -> Enso_File | Any ! Not_Found = + Path_Helpers.resolve_many_parts self name + + ## PRIVATE + An internal helper method that resolves a single part (subpath) of a path, + used by `Path_Resolver`. + resolve_single_part self part:Text -> Any = + if self.is_data_link then Data_Link_Helpers.interpret_data_link_target_as_file self . resolve_single_part part else + Enso_File.Value (self.enso_path.resolve part) ## PRIVATE Returns the text representation of the file descriptor. @@ -581,5 +637,8 @@ File_Format_Metadata.from (that:Enso_File) = File_Like.from (that : Enso_File) = File_Like.Value that ## PRIVATE -Writable_File.from (that : Enso_File) = if Data_Link.is_data_link that then Data_Link_Helpers.interpret_data_link_as_writable_file that else +Writable_File.from (that : Enso_File) = if that.is_data_link then Data_Link_Helpers.interpret_data_link_as_writable_file that else Writable_File.Value that Enso_File_Write_Strategy.instance + +## PRIVATE +Data_Link_From_File.from (that : Enso_File) = Data_Link_From_File.Value that diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Enso_File_Helpers.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Enso_File_Helpers.enso index f1cbd72fa87f..54819f5cfb72 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Enso_File_Helpers.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Enso_File_Helpers.enso @@ -135,7 +135,7 @@ create_directory_with_parents (target : Enso_File) -> Existing_Enso_Asset = if parent_file.enso_path.is_root then Error.throw (Illegal_State.Error "Unexpected error: when creating directory "+target.path+", reached the root directory and the backend reported that it does not exist - which should not happen. Please report this error.") else create_directory_with_parents parent_file _ -> error - if parent_asset.is_directory.not then Error.throw (File_Error.Not_A_Directory parent_file) else + if parent_asset.asset_type != Enso_Asset_Type.Directory then Error.throw (File_Error.Not_A_Directory parent_file) else body = JS_Object.from_pairs [["title", target.name], ["parentId", parent_asset.id]] Asset_Cache.invalidate target response = Utils.http_request_as_json HTTP_Method.Post Utils.directory_api body diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Existing_Enso_Asset.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Existing_Enso_Asset.enso index 96061c72dc52..b95f9508d9d0 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Existing_Enso_Asset.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Existing_Enso_Asset.enso @@ -103,12 +103,6 @@ type Existing_Enso_Asset Existing_Enso_Asset.from_json response . catch Not_Found _-> if_not_found - ## PRIVATE - is_directory self = self.asset_type == Enso_Asset_Type.Directory - - ## PRIVATE - is_regular_file self = self.asset_type == Enso_Asset_Type.File - ## PRIVATE list_directory self = if self.asset_type != Enso_Asset_Type.Directory then Error.throw (Illegal_Argument.Error "Only directories can be listed.") else diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Utils.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Utils.enso index c7b973b73e49..51f90a3dc9e2 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Utils.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Utils.enso @@ -123,19 +123,19 @@ http_request (method : HTTP_Method) (url : URI) (body : Request_Body = ..Empty) action and caches it for future use. If `cache_duration` is set to `Nothing`, then the cache is always skipped. get_cached (key : Text) ~action (cache_duration : Duration | Nothing = Duration.new seconds=60) = - CloudRequestCache.getOrCompute key (_->action) cache_duration + CloudRequestCache.INSTANCE.getOrCompute key (_->action) cache_duration ## PRIVATE Invalidates the cache entry for the given key. invalidate_cache (key : Text) = - CloudRequestCache.invalidateEntry key + CloudRequestCache.INSTANCE.invalidateEntry key ## PRIVATE Invalidates all cache entries that share a common prefix. invalidate_caches_with_prefix (prefix : Text) = - CloudRequestCache.invalidatePrefix prefix + CloudRequestCache.INSTANCE.invalidatePrefix prefix ## PRIVATE If `cache_duration` is set to `Nothing`, then this action does not do anything. set_cached (key : Text) value (cache_duration : Duration | Nothing = Duration.new seconds=60) = - CloudRequestCache.put key value cache_duration + CloudRequestCache.INSTANCE.put key value cache_duration diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Data_Read_Helpers.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Data_Read_Helpers.enso index fa00cdafa20d..598e5ce8c9be 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Data_Read_Helpers.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Data_Read_Helpers.enso @@ -53,7 +53,7 @@ fetch_following_data_links (uri:URI) (method:HTTP_Method = ..Get) (headers:Vecto decode_http_response_following_data_links response format = # If Raw_Response is requested, we ignore data link handling. if format == Raw_Response then response.with_materialized_body else - case Data_Link.is_data_link response.body.metadata of + case Data_Link.is_data_link_from_metadata response.body.metadata of True -> data_link = Data_Link_Helpers.interpret_json_as_data_link response.decode_as_json data_link.read format Problem_Behavior.Report_Error diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Path_Helpers.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Path_Helpers.enso index 39095aa9e9c8..70e28bd82af9 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Path_Helpers.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Path_Helpers.enso @@ -1,9 +1,12 @@ import project.Any.Any import project.Data.List.List +import project.Data.Text.Regex.Regex import project.Data.Text.Text import project.Data.Vector.Vector import project.Error.Error import project.Errors.Illegal_Argument.Illegal_Argument +from project.Data.Boolean import Boolean, False, True +from project.Data.Text.Extensions import all ## PRIVATE A helper that gathers the common logic of normalizing the `..` and `.` @@ -19,3 +22,30 @@ normalize_segments (segments : Vector) (get_name : Any -> Text) -> Vector ! Ille List.Cons _ tail -> tail _ -> List.Cons part stack new_stack.to_vector.reverse + +## PRIVATE + The method splits the `subpath` into parts based on the `/` delimiter and + applies each part iteratively using the `resolve_single_part` method on the + file objects. + + Each delimiter is applied together with the file part preceding it (if any), + so call `resolve_many_parts f "/a/b/c"` translates into + `f.resolve_single_part "/" . resolve_single_part "a/" . resolve_single_part "b/" . resolve_single_part "c"`. + Keeping the delimiter is needed for some systems to be able to distinguish + directories from regular files (e.g. S3). +resolve_many_parts (base_file : Any) (subpath : Text) (windows_delimiter : Boolean = True) = + parts = split_path subpath windows_delimiter + parts.fold base_file f-> part-> f.resolve_single_part part + +## PRIVATE + Splits the given path into parts based on the delimiter. +split_path (path : Text) (windows_delimiter : Boolean = True) -> Vector Text = + # The `\` is doubled to escape it in the regex. + delimiters = if windows_delimiter then "/\\" else "/" + ## This matches either a string of non-slash characters followed by a slash, + a single slash with no characters attached to it, or a string of + non-slash characters with no slash at the end. + With one delimiter this should be equivalent to: [^/]*/|[^/]+ + Enabling `windows_delimiter` also allows it to break on the backslash. + regex = Regex.compile "[^"+delimiters+"]*["+delimiters+"]|[^"+delimiters+"]+" + path.tokenize regex diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso index 51decf6624fe..69e0839e6d3c 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso @@ -9,6 +9,7 @@ import project.Data.Text.Text import project.Data.Time.Date_Time.Date_Time import project.Data.Vector.Vector import project.Enso_Cloud.Data_Link.Data_Link +import project.Enso_Cloud.Data_Link.Data_Link_From_File import project.Enso_Cloud.Data_Link_Helpers import project.Enso_Cloud.Enso_File.Enso_File import project.Error.Error @@ -18,6 +19,7 @@ import project.Errors.File_Error.File_Error import project.Errors.Illegal_Argument.Illegal_Argument import project.Errors.Problem_Behavior.Problem_Behavior import project.Function.Function +import project.Internal.Path_Helpers import project.Meta import project.Metadata.Display import project.Metadata.Widget @@ -38,10 +40,12 @@ import project.Warning.Warning from project.Data.Boolean import Boolean, False, True from project.Metadata.Choice import Option from project.System.File_Format import Auto_Detect, File_Format +from project.System.Internal.File_Builtins import all polyglot java import java.io.File as Java_File polyglot java import java.io.InputStream as Java_Input_Stream polyglot java import java.io.OutputStream as Java_Output_Stream +polyglot java import java.nio.file.InvalidPathException polyglot java import java.nio.file.StandardCopyOption polyglot java import java.nio.file.StandardOpenOption polyglot java import java.time.ZonedDateTime @@ -79,7 +83,7 @@ type File example_new = File.new Examples.csv_path new : (Text | File) -> Any ! Illegal_Argument new path = case path of - _ : Text -> if path.contains "://" . not then resolve_path path else + _ : Text -> if path.contains "://" . not then resolve_local_file path else protocol = path.split "://" . first file_system = FileSystemSPI.get_type protocol False if file_system.is_nothing then Error.throw (Illegal_Argument.Error "Unsupported protocol "+protocol) else @@ -186,7 +190,7 @@ type File new_output_stream : File -> Vector File_Access -> Output_Stream ! File_Error new_output_stream file open_options = opts = open_options . map (_.to_java) - stream = File_Error.handle_java_exceptions file (file.output_stream_builtin opts) + stream = File_Error.handle_java_exceptions file (output_stream_builtin file opts) ## Re-wrap the File Not Found error to return the parent directory instead of the file itself, as that is the issue if not present. @@ -196,33 +200,12 @@ type File Output_Stream.new wrapped (File_Error.handle_java_exceptions self) Context.Output.if_enabled disabled_message="As writing is disabled, cannot write to a file. Press the Write button ▶ to perform the operation." panic=False <| - open_as_data_link = (open_options.contains Data_Link_Access.No_Follow . not) && (Data_Link.is_data_link self) + open_as_data_link = (open_options.contains Data_Link_Access.No_Follow . not) && self.is_data_link if open_as_data_link then Data_Link_Helpers.write_data_link_as_stream self open_options action else # We ignore the Data_Link_Access options at this stage: just_file_options = open_options.filter opt-> opt.is_a File_Access Managed_Resource.bracket (new_output_stream self just_file_options) (_.close) action - ## PRIVATE - Creates a new output stream for this file. Recommended to use - `File.with_output_stream` instead, which does resource management. - - Arguments: - - options: A vector of `File_Access` objects determining how to open - the stream. These options set the access properties of the stream. - output_stream_builtin : Vector File_Access -> Java_Output_Stream - output_stream_builtin self options = @Builtin_Method "File.output_stream_builtin" - - ## PRIVATE - Creates a new input stream for this file. Recommended to use - `File.with_input_stream` instead, which does resource management. - - Arguments: - - open_options: A vector of `StandardOpenOption` polyglot objects - determining how to open the stream. These options set the access - properties of the stream. - input_stream_builtin : Vector StandardOpenOption -> Java_Input_Stream - input_stream_builtin self options = @Builtin_Method "File.input_stream_builtin" - ## PRIVATE ADVANCED Creates a new input stream for this file and runs the specified action @@ -252,11 +235,11 @@ type File new_input_stream : File -> Vector File_Access -> Input_Stream ! File_Error new_input_stream file open_options = opts = open_options . map (_.to_java) - stream = File_Error.handle_java_exceptions file (file.input_stream_builtin opts) + stream = File_Error.handle_java_exceptions file (input_stream_builtin file opts) Input_Stream.new stream (File_Error.handle_java_exceptions self) associated_source=self - if self.is_directory then Error.throw (File_Error.IO_Error self "File '"+self.path+"' is a directory") else - open_as_data_link = (open_options.contains Data_Link_Access.No_Follow . not) && (Data_Link.is_data_link self) + if (is_directory_builtin self) then Error.throw (File_Error.IO_Error self "File '"+self.path+"' is a directory") else + open_as_data_link = (open_options.contains Data_Link_Access.No_Follow . not) && (self.is_data_link) if open_as_data_link then Data_Link_Helpers.read_data_link_as_stream self open_options action else # We ignore the Data_Link_Access options at this stage: just_file_options = open_options.filter opt-> opt.is_a File_Access @@ -298,7 +281,7 @@ type File read self format=Auto_Detect (on_problems : Problem_Behavior = ..Report_Warning) = if self.exists.not then Error.throw (File_Error.Not_Found self) else if self.is_directory then Error.throw (Illegal_Argument.Error "Cannot `read` a directory, use `list`.") else - if Data_Link.is_data_link self then Data_Link_Helpers.read_data_link self format on_problems else + if self.is_data_link then Data_Link_Helpers.read_data_link self format on_problems else resolved_format = File_Format.resolve format resolved_format.read self on_problems @@ -356,7 +339,8 @@ type File example_append = Examples.data_dir / "scratch_file" @subpath get_child_widget / : Text -> File - / self (subpath : Text) = self.resolve subpath + / self (subpath : Text) = + Path_Helpers.resolve_many_parts self subpath ## GROUP Calculations ICON folder @@ -385,9 +369,17 @@ type File _ -> self.join [subpaths] ## PRIVATE - Internal method to join two path segments together. + Internal method kept for compatibility, we should probably prefer `/` as + it is used across all of our file systems. resolve : Text -> File - resolve self (subpath : Text) = @Builtin_Method "File.resolve" + resolve self (subpath : Text) = self / subpath + + ## PRIVATE + An internal helper method that resolves a single part (subpath) of a path. + resolve_single_part self part:Text -> Any = + if self.is_data_link then Data_Link_Helpers.interpret_data_link_target_as_file self . resolve_single_part part else + handle_invalid_path part <| + resolve_builtin self part ## PRIVATE Convert the file descriptor to a JS_Object. @@ -414,6 +406,10 @@ type File import Standard.Examples example_exists = Examples.csv.exists + + ? Data Links + If the file is a data link, this checks if the data link itself exists. + It does not tell anything about existence of the data link target. exists : Boolean exists self = @Builtin_Method "File.exists" @@ -427,9 +423,14 @@ type File import Standard.Examples example_exists = Examples.csv.size + + ? Data Links + If the file is a data link that points to a file, the size of the + target file will be returned. size : Integer size self = - File_Error.handle_java_exceptions self <| self.size_builtin + if self.is_data_link then Data_Link_Helpers.interpret_data_link_target_as_file self . size else + File_Error.handle_java_exceptions self (size_builtin self) ## GROUP Text ICON preparation @@ -456,9 +457,13 @@ type File import Standard.Examples example_exists = Examples.csv.creation_time + + ? Data Links + If the file is a data link, this returns the creation time of the data + link. creation_time : Date_Time ! File_Error creation_time self = - File_Error.handle_java_exceptions self <| self.creation_time_builtin + File_Error.handle_java_exceptions self (creation_time_builtin self) ## GROUP Metadata ICON metadata @@ -470,9 +475,13 @@ type File import Standard.Examples example_exists = Examples.csv.last_modified_time + + ? Data Links + If the file is a data link, this returns the modification time of the + data link. last_modified_time : Date_Time ! File_Error last_modified_time self = - File_Error.handle_java_exceptions self <| self.last_modified_time_builtin + File_Error.handle_java_exceptions self (last_modified_time_builtin self) ## ICON metadata Gets the POSIX permissions associated with the file. @@ -483,9 +492,13 @@ type File import Standard.Examples example_permissions = Examples.csv.posix_permissions.group_read + + ? Data Links + If the file is a data link, this returns the permissions of the link, + not the target. posix_permissions : File_Permissions posix_permissions self = - File_Permissions.from_java_set self.posix_permissions_builtin + File_Permissions.from_java_set (posix_permissions_builtin self) ## GROUP Metadata ICON metadata @@ -497,8 +510,13 @@ type File import Standard.Examples example_is_directory = Examples.csv.is_directory + + ? Data Links + If the file is a data link, this checks whether the target of the data + link is a directory. is_directory : Boolean - is_directory self = @Builtin_Method "File.is_directory" + is_directory self = + Data_Link_Helpers.is_directory self (is_directory_builtin self) ## GROUP Output ICON folder_add @@ -516,12 +534,7 @@ type File create_directory : File ! File_Error create_directory self = Context.Output.if_enabled disabled_message="As writing is disabled, cannot create directory. Press the Write button ▶ to perform the operation." panic=False <| - File_Error.handle_java_exceptions self self.create_directory_builtin . if_not_error self - - ## PRIVATE - Creates the directory represented by this file if it did not exist. - create_directory_builtin : Nothing - create_directory_builtin self = @Builtin_Method "File.create_directory_builtin" + File_Error.handle_java_exceptions self (create_directory_builtin self) . if_not_error self ## GROUP Metadata ICON metadata @@ -529,8 +542,8 @@ type File ? Regular Files A regular file is one that does not have any special meaning to the - operating system. Examples of files that are not regular are symlinks, - pipes, devices, sockets and directories. + operating system. Examples of files that are not regular are pipes, + devices, sockets and directories. > Example Check if a file is regular. @@ -538,8 +551,17 @@ type File import Standard.Examples example_is_regular_file = Examples.csv.is_regular_file + + ? Data Links + If the file is a data link, this checks whether the target of the data + link is a regular file. is_regular_file : Boolean - is_regular_file self = @Builtin_Method "File.is_regular_file" + is_regular_file self = + Data_Link_Helpers.is_regular_file self (is_regular_file_builtin self) + + ## PRIVATE + is_data_link self -> Boolean = + (is_directory_builtin self).not && Data_Link.is_data_link_name self.name ## GROUP Metadata ICON metadata @@ -548,6 +570,10 @@ type File ? Read-only Files If a file is read-only, it may still be possible to move or delete it, depending on the permissions associated with its parent directory. + + ? Data Links + If the file is a data link, this checks if the link itself is + modifiable. is_writable : Boolean is_writable self = @Builtin_Method "File.is_writable" @@ -663,10 +689,14 @@ type File file = Examples.data_dir / "my_file" file.write_text "hello" file.delete + + ? Data Links + If the file is a data link, this will delete the link itself, not + affecting its target. delete : Boolean -> Nothing ! File_Error delete self (recursive : Boolean = False) -> Nothing ! File_Error = Context.Output.if_enabled disabled_message="As writing is disabled, cannot delete file. Press the Write button ▶ to perform the operation." panic=False <| - File_Error.handle_java_exceptions self (self.delete_builtin recursive) + File_Error.handle_java_exceptions self (delete_builtin self recursive) ## ICON data_output Copies the file to the specified destination. @@ -682,7 +712,7 @@ type File because the conversion would already start resolving the data link too soon. destination_writable = Writable_File.from destination r = case destination_writable.file of - _ : File -> local_file_copy self destination_writable.file replace_existing + _ : File -> _local_file_copy self destination_writable.file replace_existing _ -> destination_writable.copy_from_local self replace_existing r.if_not_error destination_writable.file_for_return @@ -700,7 +730,7 @@ type File because the conversion would already start resolving the data link too soon. destination_writable = Writable_File.from destination r = case destination_writable.file of - _ : File -> local_file_move self destination_writable.file replace_existing + _ : File -> _local_file_move self destination_writable.file replace_existing _ -> r = destination_writable.copy_from_local self replace_existing r.if_not_error <| @@ -739,7 +769,7 @@ type File read_last_bytes : Integer -> Vector ! File_Error read_last_bytes self n = File_Error.handle_java_exceptions self <| - Vector.from_polyglot_array (self.read_last_bytes_builtin n) + Vector.from_polyglot_array (read_last_bytes_builtin self n) ## GROUP Input ICON data_input @@ -794,8 +824,8 @@ type File @name_filter File_Format.name_filter_widget list : Text -> Boolean -> Vector File list self name_filter:Text="" recursive:Boolean=False = - if Data_Link.is_data_link self then Data_Link_Helpers.interpret_data_link_target_as_file self . list name_filter=name_filter recursive=recursive else - if self.is_directory.not then Error.throw (Illegal_Argument.Error "Cannot `list` a non-directory.") else + if self.is_data_link then Data_Link_Helpers.interpret_data_link_target_as_file self . list name_filter=name_filter recursive=recursive else + if (is_directory_builtin self).not then Error.throw (Illegal_Argument.Error "Cannot `list` a non-directory.") else all_files = if recursive then list_descendants self else self.list_immediate_children filtered_files = case name_filter of "" -> all_files @@ -819,7 +849,6 @@ type File relativize : File -> Boolean relativize self child = @Builtin_Method "File.relativize" - ## PRIVATE Utility function that lists immediate children of a directory. list_immediate_children : Vector File @@ -852,22 +881,6 @@ list_descendants file = False -> Nothing go file -## PRIVATE - - Gets a file corresponding to the current working directory of the - program. -get_cwd : File -get_cwd = @Builtin_Method "File.get_cwd" - -## PRIVATE - - Gets a file corresponding to the provided path. - - Arguments: - - path: The path to obtain a file at. -get_file : Text -> File -get_file path = @Builtin_Method "File.get_file" - ## PRIVATE Resolves the given path to a corresponding file location. @@ -876,12 +889,26 @@ get_file path = @Builtin_Method "File.get_file" relative to the project's location. - if running locally, the path is resolved to a local file, relative to the current working directory. -resolve_path (path : Text) -> File | Enso_File = + + The method also does some extra processing to ensure that if the path + contains any data links, they will be correctly resolved. We cannot just + directly construct the path from text - we instead find its root + (or base directory if relative) and rely on the `/` operator to resolve all + the parts, allowing the data link logic to kick in. +resolve_local_file (path : Text) = handle_invalid_path path <| local_file = get_file path + resolved_local_file = + parts = Path_Helpers.split_path path + # Special case - if there are no parts, it means path was "", so we use that as base. + base_part = parts.get 0 if_missing="" + base = get_file base_part + rest = parts.drop 1 + rest.fold base acc-> part-> acc.resolve_single_part part + # Absolute files always resolve to themselves. - if local_file.is_absolute then local_file else + if local_file.is_absolute then resolved_local_file else case Enso_File.cloud_project_parent_directory of - Nothing -> local_file + Nothing -> resolved_local_file base_cloud_directory -> base_cloud_directory / path ## PRIVATE @@ -911,17 +938,24 @@ file_as_java (file : File) = Java_File.new file.absolute.normalize.path File_Like.from (that : File) = File_Like.Value that ## PRIVATE -Writable_File.from (that : File) = if Data_Link.is_data_link that then Data_Link_Helpers.interpret_data_link_as_writable_file that else +Writable_File.from (that : File) = if that.is_data_link then Data_Link_Helpers.interpret_data_link_as_writable_file that else Writable_File.Value that.absolute.normalize Local_File_Write_Strategy.instance ## PRIVATE -local_file_copy (source : File) (destination : File) (replace_existing : Boolean) -> Nothing = +Data_Link_From_File.from (that : File) = Data_Link_From_File.Value that + +private _local_file_copy (source : File) (destination : File) (replace_existing : Boolean) -> Nothing = File_Error.handle_java_exceptions source <| copy_options = if replace_existing then [StandardCopyOption.REPLACE_EXISTING.to_text] else [] - source.copy_builtin destination copy_options + copy_builtin source destination copy_options -## PRIVATE -local_file_move (source : File) (destination : File) (replace_existing : Boolean) -> Nothing = +private _local_file_move (source : File) (destination : File) (replace_existing : Boolean) -> Nothing = File_Error.handle_java_exceptions source <| copy_options = if replace_existing then [StandardCopyOption.REPLACE_EXISTING.to_text] else [] - source.move_builtin destination copy_options + move_builtin source destination copy_options + +## On some filesystems some paths may be invalid, + we want to catch the Java exception and turn it into an Enso error. +private handle_invalid_path ~path ~action = + Panic.catch InvalidPathException action caught_panic-> + Error.throw (Illegal_Argument.Error "The path "+path.to_display_text+" is invalid: "+caught_panic.payload.getMessage) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/Internal/File_Builtins.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/Internal/File_Builtins.enso new file mode 100644 index 000000000000..1395a0d3de7e --- /dev/null +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/System/Internal/File_Builtins.enso @@ -0,0 +1,50 @@ +private + +## Gets a file corresponding to the current working directory of the + program. +get_cwd = @Builtin_Method "File.get_cwd" + +## The builtin that returns a File instance for a given path. +get_file path = @Builtin_Method "File.get_file" + +is_directory_builtin file = @Builtin_Method "File.is_directory_builtin" + +is_regular_file_builtin file = @Builtin_Method "File.is_regular_file_builtin" + +resolve_builtin file part = @Builtin_Method "File.resolve_builtin" + +## Creates the directory represented by this file if it did not exist. +create_directory_builtin file = @Builtin_Method "File.create_directory_builtin" + +copy_builtin source target options = @Builtin_Method "File.copy_builtin" + +delete_builtin file recursive = @Builtin_Method "File.delete_builtin" + +move_builtin source target options = @Builtin_Method "File.move_builtin" + +size_builtin file = @Builtin_Method "File.size_builtin" + +creation_time_builtin file = @Builtin_Method "File.creation_time_builtin" + +last_modified_time_builtin file = @Builtin_Method "File.last_modified_time_builtin" + +posix_permissions_builtin file = @Builtin_Method "File.posix_permissions_builtin" + +read_last_bytes_builtin file n = @Builtin_Method "File.read_last_bytes_builtin" + +## Creates a new output stream for this file. Recommended to use + `File.with_output_stream` instead, which does resource management. + + Arguments: + - options: A vector of `File_Access` objects determining how to open + the stream. These options set the access properties of the stream. +output_stream_builtin file options = @Builtin_Method "File.output_stream_builtin" + +## Creates a new input stream for this file. Recommended to use + `File.with_input_stream` instead, which does resource management. + + Arguments: + - options: A vector of `StandardOpenOption` polyglot objects + determining how to open the stream. These options set the access + properties of the stream. +input_stream_builtin file options = @Builtin_Method "File.input_stream_builtin" diff --git a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoFile.java b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoFile.java index 335cd65d4ff4..5c449f716f5b 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoFile.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoFile.java @@ -42,7 +42,6 @@ import org.enso.interpreter.runtime.data.vector.ArrayLikeAtNode; import org.enso.interpreter.runtime.data.vector.ArrayLikeHelpers; import org.enso.interpreter.runtime.data.vector.ArrayLikeLengthNode; -import org.enso.interpreter.runtime.error.DataflowError; import org.enso.interpreter.runtime.error.PanicException; /** @@ -71,14 +70,15 @@ protected String builtinName() { @Builtin.WrapException(from = IOException.class) @Builtin.Specialize @TruffleBoundary - public EnsoObject outputStream( + public static EnsoObject outputStream( + EnsoFile file, Object opts, @Cached ArrayLikeLengthNode lengthNode, @Cached ArrayLikeAtNode atNode, EnsoContext ctx) throws IOException { var options = namesToValues(opts, lengthNode, atNode, ctx, StandardOpenOption::valueOf); - var os = this.truffleFile.newOutputStream(options.toArray(OpenOption[]::new)); + var os = file.truffleFile.newOutputStream(options.toArray(OpenOption[]::new)); return new EnsoOutputStream(os); } @@ -198,14 +198,15 @@ public Object toDisplayString(boolean allowSideEffects) { @Builtin.WrapException(from = IOException.class) @Builtin.Specialize @TruffleBoundary - public EnsoObject inputStream( + public static EnsoObject inputStream( + EnsoFile file, Object opts, @Cached ArrayLikeLengthNode lengthNode, @Cached ArrayLikeAtNode atNode, EnsoContext ctx) throws IOException { var options = namesToValues(opts, lengthNode, atNode, ctx, StandardOpenOption::valueOf); - var is = this.truffleFile.newInputStream(options.toArray(OpenOption[]::new)); + var is = file.truffleFile.newInputStream(options.toArray(OpenOption[]::new)); return new EnsoInputStream(is); } @@ -440,9 +441,9 @@ private static List namesToValues( @Builtin.Method(name = "read_last_bytes_builtin") @Builtin.WrapException(from = IOException.class) @TruffleBoundary - public EnsoObject readLastBytes(long n) throws IOException { + public static EnsoObject readLastBytes(EnsoFile file, long n) throws IOException { try (SeekableByteChannel channel = - this.truffleFile.newByteChannel(Set.of(StandardOpenOption.READ))) { + file.truffleFile.newByteChannel(Set.of(StandardOpenOption.READ))) { int bytesToRead = Math.toIntExact(Math.min(channel.size(), n)); channel.position(channel.size() - bytesToRead); ByteBuffer buffer = ByteBuffer.allocate(bytesToRead); @@ -455,10 +456,11 @@ public EnsoObject readLastBytes(long n) throws IOException { } } - @Builtin.Method(name = "resolve") + @Builtin.Method(name = "resolve_builtin") + @Builtin.WrapException(from = IllegalArgumentException.class) @Builtin.Specialize - public EnsoFile resolve(String subPath) { - return new EnsoFile(this.truffleFile.resolve(subPath)); + public static EnsoFile resolve(EnsoFile file, Text part) { + return new EnsoFile(file.truffleFile.resolve(part.toString())); } @Builtin.Method @@ -469,24 +471,25 @@ public boolean exists() { @Builtin.Method(name = "creation_time_builtin") @Builtin.WrapException(from = IOException.class) @TruffleBoundary - public EnsoDateTime getCreationTime() throws IOException { + public static EnsoDateTime getCreationTime(EnsoFile file) throws IOException { return new EnsoDateTime( - ZonedDateTime.ofInstant(truffleFile.getCreationTime().toInstant(), ZoneOffset.UTC)); + ZonedDateTime.ofInstant(file.truffleFile.getCreationTime().toInstant(), ZoneOffset.UTC)); } @Builtin.Method(name = "last_modified_time_builtin") @Builtin.WrapException(from = IOException.class) @TruffleBoundary - public EnsoDateTime getLastModifiedTime() throws IOException { + public static EnsoDateTime getLastModifiedTime(EnsoFile file) throws IOException { return new EnsoDateTime( - ZonedDateTime.ofInstant(truffleFile.getLastModifiedTime().toInstant(), ZoneOffset.UTC)); + ZonedDateTime.ofInstant( + file.truffleFile.getLastModifiedTime().toInstant(), ZoneOffset.UTC)); } @Builtin.Method(name = "posix_permissions_builtin") @Builtin.WrapException(from = IOException.class) @TruffleBoundary - public Text getPosixPermissions() throws IOException { - return Text.create(PosixFilePermissions.toString(truffleFile.getPosixPermissions())); + public static Text getPosixPermissions(EnsoFile file) throws IOException { + return Text.create(PosixFilePermissions.toString(file.truffleFile.getPosixPermissions())); } @Builtin.Method(name = "parent") @@ -527,18 +530,18 @@ public boolean isAbsolute() { return this.truffleFile.isAbsolute(); } - @Builtin.Method + @Builtin.Method(name = "is_directory_builtin") @TruffleBoundary - public boolean isDirectory() { - return this.truffleFile.isDirectory(); + public static boolean isDirectory(EnsoFile file) { + return file.truffleFile.isDirectory(); } @Builtin.Method(name = "create_directory_builtin") @Builtin.WrapException(from = IOException.class) @TruffleBoundary - public void createDirectories() throws IOException { + public static void createDirectories(EnsoFile file) throws IOException { try { - this.truffleFile.createDirectories(); + file.truffleFile.createDirectories(); } catch (NoSuchFileException e) { throw replaceCreateDirectoriesNoSuchFileException(e); } catch (FileSystemException e) { @@ -636,10 +639,10 @@ public EnsoFile relativize(EnsoFile other) { return new EnsoFile(this.truffleFile.relativize(other.truffleFile)); } - @Builtin.Method + @Builtin.Method(name = "is_regular_file_builtin") @TruffleBoundary - public boolean isRegularFile() { - return this.truffleFile.isRegularFile(); + public static boolean isRegularFile(EnsoFile file) { + return file.truffleFile.isRegularFile(); } @Builtin.Method @@ -658,11 +661,11 @@ public Text getName() { @Builtin.Method(name = "size_builtin") @Builtin.WrapException(from = IOException.class) @TruffleBoundary - public long getSize() throws IOException { - if (this.truffleFile.isDirectory()) { + public static long getSize(EnsoFile file) throws IOException { + if (file.truffleFile.isDirectory()) { throw new IOException("size can only be called on files."); } - return this.truffleFile.size(); + return file.truffleFile.size(); } @TruffleBoundary @@ -690,15 +693,15 @@ public EnsoFile normalize() { @Builtin.Method(name = "delete_builtin") @Builtin.WrapException(from = IOException.class) @TruffleBoundary - public void delete(boolean recursive) throws IOException { - if (recursive && truffleFile.isDirectory(LinkOption.NOFOLLOW_LINKS)) { - deleteRecursively(truffleFile); + public static void delete(EnsoFile file, boolean recursive) throws IOException { + if (recursive && file.truffleFile.isDirectory(LinkOption.NOFOLLOW_LINKS)) { + deleteRecursively(file.truffleFile); } else { - truffleFile.delete(); + file.truffleFile.delete(); } } - private void deleteRecursively(TruffleFile file) throws IOException { + private static void deleteRecursively(TruffleFile file) throws IOException { if (file.isDirectory(LinkOption.NOFOLLOW_LINKS)) { for (TruffleFile child : file.list()) { deleteRecursively(child); @@ -711,7 +714,8 @@ private void deleteRecursively(TruffleFile file) throws IOException { @Builtin.WrapException(from = IOException.class) @Builtin.Specialize @TruffleBoundary - public void copy( + public static void copy( + EnsoFile source, EnsoFile target, Object options, @Cached ArrayLikeLengthNode lengthNode, @@ -719,14 +723,15 @@ public void copy( EnsoContext ctx) throws IOException { var copyOptions = namesToValues(options, lengthNode, atNode, ctx, StandardCopyOption::valueOf); - truffleFile.copy(target.truffleFile, copyOptions.toArray(CopyOption[]::new)); + source.truffleFile.copy(target.truffleFile, copyOptions.toArray(CopyOption[]::new)); } @Builtin.Method(name = "move_builtin", description = "Move this file to a target destination") @Builtin.WrapException(from = IOException.class) @Builtin.Specialize @TruffleBoundary - public void move( + public static void move( + EnsoFile source, EnsoFile target, Object options, @Cached ArrayLikeLengthNode lengthNode, @@ -734,7 +739,7 @@ public void move( EnsoContext ctx) throws IOException { var copyOptions = namesToValues(options, lengthNode, atNode, ctx, StandardCopyOption::valueOf); - truffleFile.move(target.truffleFile, copyOptions.toArray(CopyOption[]::new)); + source.truffleFile.move(target.truffleFile, copyOptions.toArray(CopyOption[]::new)); } @Builtin.Method @@ -749,21 +754,13 @@ public boolean startsWith(EnsoFile parent) { "Takes the text representation of a path and returns a TruffleFile corresponding to it.", autoRegister = false) @Builtin.Specialize + @Builtin.WrapException(from = IllegalArgumentException.class) + @Builtin.WrapException(from = UnsupportedOperationException.class) @TruffleBoundary @SuppressWarnings("generic-enso-builtin-type") - public static Object fromString(EnsoContext context, String path) - throws IllegalArgumentException { - try { - TruffleFile file = context.getPublicTruffleFile(path); - return new EnsoFile(file); - } catch (IllegalArgumentException | UnsupportedOperationException ex) { - var err = - context - .getBuiltins() - .error() - .makeUnsupportedArgumentsError(new Object[] {Text.create(path)}, ex.getMessage()); - return DataflowError.withDefaultTrace(err, null); - } + public static Object fromString(EnsoContext context, String path) { + TruffleFile file = context.getPublicTruffleFile(path); + return new EnsoFile(file); } @Builtin.Method( diff --git a/std-bits/aws/src/main/java/org/enso/aws/file_system/S3DataLinkCache.java b/std-bits/aws/src/main/java/org/enso/aws/file_system/S3DataLinkCache.java new file mode 100644 index 000000000000..72c1de6c036a --- /dev/null +++ b/std-bits/aws/src/main/java/org/enso/aws/file_system/S3DataLinkCache.java @@ -0,0 +1,7 @@ +package org.enso.aws.file_system; + +import org.enso.base.cache.APIRequestCache; + +public class S3DataLinkCache extends APIRequestCache { + public static final S3DataLinkCache INSTANCE = new S3DataLinkCache(); +} diff --git a/std-bits/base/src/main/java/org/enso/base/cache/APIRequestCache.java b/std-bits/base/src/main/java/org/enso/base/cache/APIRequestCache.java new file mode 100644 index 000000000000..6067b1ae53d5 --- /dev/null +++ b/std-bits/base/src/main/java/org/enso/base/cache/APIRequestCache.java @@ -0,0 +1,80 @@ +package org.enso.base.cache; + +import java.time.Duration; +import java.time.LocalDateTime; +import java.util.HashMap; +import java.util.function.Function; +import org.graalvm.polyglot.Value; + +/** + * A cache that can be used to save results of requests to some API to avoid re-fetching them every + * time. + * + *

The cache is supposed to store the already processed (parsed etc.) result, that is relatively + * small. If the result is not cached or the cache entry is expired, the cache will recompute the + * value using the provided callback. + */ +public class APIRequestCache { + private final HashMap cache = new HashMap<>(); + + public void clear() { + cache.clear(); + } + + public Object getOrCompute(String key, Function compute, Duration ttl) { + if (ttl == null) { + // If the TTL is null, we deliberately ignore the cache. + return compute.apply(key); + } + + cleanExpiredEntries(); + + var entry = cache.get(key); + if (entry != null && entry.expiresAt.isAfter(LocalDateTime.now())) { + return entry.value; + } else { + var value = compute.apply(key); + put(key, value, ttl); + return value; + } + } + + public void invalidateEntry(String key) { + cache.remove(key); + } + + public void invalidatePrefix(String prefix) { + cache.keySet().removeIf(key -> key.startsWith(prefix)); + } + + public void cleanExpiredEntries() { + boolean hasExpiredEntries = + firstToExpire != null && firstToExpire.isBefore(LocalDateTime.now()); + if (hasExpiredEntries) { + cache.entrySet().removeIf(entry -> entry.getValue().expiresAt.isBefore(LocalDateTime.now())); + firstToExpire = + cache.values().stream() + .map(CacheEntry::expiresAt) + .min(LocalDateTime::compareTo) + .orElse(null); + } + } + + public void put(String key, Value value, Duration ttl) { + if (ttl == null) { + // If the TTL is null, we deliberately ignore the cache. + return; + } + + var expiresAt = LocalDateTime.now().plus(ttl); + if (firstToExpire == null || expiresAt.isBefore(firstToExpire)) { + firstToExpire = expiresAt; + } + + cache.put(key, new CacheEntry(value, expiresAt)); + } + + private LocalDateTime firstToExpire = null; + + private record CacheEntry(Value value, LocalDateTime expiresAt) {} +} diff --git a/std-bits/base/src/main/java/org/enso/base/enso_cloud/CloudAPI.java b/std-bits/base/src/main/java/org/enso/base/enso_cloud/CloudAPI.java index da55de0fd35a..71ed6b8f1bea 100644 --- a/std-bits/base/src/main/java/org/enso/base/enso_cloud/CloudAPI.java +++ b/std-bits/base/src/main/java/org/enso/base/enso_cloud/CloudAPI.java @@ -36,7 +36,7 @@ public static String getCloudSessionId() { } public static void flushCloudCaches() { - CloudRequestCache.clear(); + CloudRequestCache.INSTANCE.clear(); AuthenticationProvider.reset(); EnsoSecretReader.flushCache(); AuditLog.resetCache(); diff --git a/std-bits/base/src/main/java/org/enso/base/enso_cloud/CloudRequestCache.java b/std-bits/base/src/main/java/org/enso/base/enso_cloud/CloudRequestCache.java index 3931ff214451..901c13b4c126 100644 --- a/std-bits/base/src/main/java/org/enso/base/enso_cloud/CloudRequestCache.java +++ b/std-bits/base/src/main/java/org/enso/base/enso_cloud/CloudRequestCache.java @@ -1,56 +1,10 @@ package org.enso.base.enso_cloud; -import java.time.Duration; -import java.time.LocalDateTime; -import java.util.HashMap; -import java.util.function.Function; +import org.enso.base.cache.APIRequestCache; /** * A cache that can be used to save results of cloud requests to avoid re-fetching them every time. - * - *

The cache is supposed to store the already processed (parsed etc.) result. If the result is - * not cached or the cache entry is expired, the cache will recompute the value using the provided - * callback. */ -public final class CloudRequestCache { - private static final HashMap cache = new HashMap<>(); - - public static void clear() { - cache.clear(); - } - - public static Object getOrCompute(String key, Function compute, Duration ttl) { - if (ttl == null) { - // If the TTL is null, we deliberately ignore the cache. - return compute.apply(key); - } - - var entry = cache.get(key); - if (entry != null && entry.expiresAt.isAfter(LocalDateTime.now())) { - return entry.value; - } else { - var value = compute.apply(key); - put(key, value, ttl); - return value; - } - } - - public static void invalidateEntry(String key) { - cache.remove(key); - } - - public static void invalidatePrefix(String prefix) { - cache.keySet().removeIf(key -> key.startsWith(prefix)); - } - - public static void put(String key, Object value, Duration ttl) { - if (ttl == null) { - // If the TTL is null, we deliberately ignore the cache. - return; - } - - cache.put(key, new CacheEntry(value, LocalDateTime.now().plus(ttl))); - } - - private record CacheEntry(Object value, LocalDateTime expiresAt) {} +public final class CloudRequestCache extends APIRequestCache { + public static final CloudRequestCache INSTANCE = new CloudRequestCache(); } diff --git a/test/AWS_Tests/src/Inter_Backend_File_Operations_Spec.enso b/test/AWS_Tests/src/Inter_Backend_File_Operations_Spec.enso index 7c287b79cef2..7daccf6c2b31 100644 --- a/test/AWS_Tests/src/Inter_Backend_File_Operations_Spec.enso +++ b/test/AWS_Tests/src/Inter_Backend_File_Operations_Spec.enso @@ -195,7 +195,7 @@ add_specs suite_builder = source_file = regular_source_file_provider.get destination_file = regular_destination_file_provider.get - test_mixed source destination method = + test_mixed source destination method = Test.with_clue "("+source.to_text+" "+method.to_text+" "+destination.to_text+") " <| r = method source destination r.should_fail_with Illegal_Argument r.catch.to_display_text . should_contain "Please `.read` the data link and then write the data to the destination using the appropriate method." diff --git a/test/AWS_Tests/src/S3_Spec.enso b/test/AWS_Tests/src/S3_Spec.enso index 24f88c90e3bd..0df2fb291f2c 100644 --- a/test/AWS_Tests/src/S3_Spec.enso +++ b/test/AWS_Tests/src/S3_Spec.enso @@ -111,10 +111,8 @@ add_specs suite_builder = hello_txt.parent . should_equal (root / "examples/" / "folder 2/") hello_txt.parent.is_directory . should_be_true - # Leading slash will mean starting back from bucket root: - (hello_txt / "/foo/bar") . should_equal (root / "foo/bar") - (hello_txt / "/") . should_equal root - (hello_txt / "////") . should_equal root + # Multiple consecutive slashes are ignored + (root / "//foo////bar") . should_equal (root / "foo/bar") group_builder.specify "should support path traversal using `join`" <| root.join ["foo", "bar"] . path . should_equal "s3://"+bucket_name+"/foo/bar" @@ -261,6 +259,16 @@ add_specs suite_builder = r3.should_be_a Vector r3.map .name . should_contain object_name + group_builder.specify "should be able to list buckets by constructing a root object" <| + with_default_credentials <| + just_s3 = File.new "s3://" + just_s3.list . map .path . should_contain root.path + + group_builder.specify "should be able to construct bucket reference by resolving a path from root" <| + with_default_credentials <| + just_s3 = File.new "s3://" + (just_s3 / bucket_name).path . should_equal root.path + group_builder.specify "will fail if no credentials are provided and no Default credentials are available" pending=(if AWS_Credential.is_default_credential_available then "Default AWS credentials are defined in the environment and this test has no way of overriding them, so it is impossible to test this scenario in such environment.") <| root_without_credentials = S3_File.new "s3://"+bucket_name+"/" r = root_without_credentials.list @@ -571,15 +579,15 @@ add_specs suite_builder = Panic.with_finalizer s3_link.delete <| s3_link.read . should_equal "Hello WORLD!" - s3_to_s3_datalinks = Symlink_Test_Setup.make (my_writable_dir / "target-link-dir-1") (my_writable_dir / "datalink-dir-1") (create_data_link_to_s3 test_credentials) + s3_to_s3_datalinks = Symlink_Test_Setup.make (my_writable_dir / "target-link-dir-1") (my_writable_dir / "datalink-dir-1") (create_data_link_to_s3 test_credentials) needs_directory_suffix=True with_prepared_environment_for_file_new=with_default_credentials add_symlink_spec group_builder "S3 -> S3" s3_to_s3_datalinks cloud_test_root = Temporary_Directory.make "DataLinks-from-3S-to-Cloud" group_builder.teardown cloud_test_root.cleanup - s3_to_cloud_datalinks = Symlink_Test_Setup.make (cloud_test_root.get / "target-link-dir-2") (my_writable_dir / "datalink-dir-2") create_data_link_to_cloud + s3_to_cloud_datalinks = Symlink_Test_Setup.make (cloud_test_root.get / "target-link-dir-2") (my_writable_dir / "datalink-dir-2") create_data_link_to_cloud needs_directory_suffix=True with_prepared_environment_for_file_new=with_default_credentials add_symlink_spec group_builder "S3 -> Cloud" s3_to_cloud_datalinks pending=cloud_setup.real_cloud_pending - cloud_to_s3_datalinks = Symlink_Test_Setup.make (my_writable_dir / "target-link-dir-3") (cloud_test_root.get / "datalink-dir-3") (create_data_link_to_s3 test_credentials) + cloud_to_s3_datalinks = Symlink_Test_Setup.make (my_writable_dir / "target-link-dir-3") (cloud_test_root.get / "datalink-dir-3") (create_data_link_to_s3 test_credentials) needs_directory_suffix=True add_symlink_spec group_builder "Cloud -> S3" cloud_to_s3_datalinks pending=cloud_setup.real_cloud_pending group_builder.specify "should be able to read an S3 data link overriding the format" <| with_default_credentials <| diff --git a/test/Base_Tests/src/Network/Enso_Cloud/Cloud_Data_Link_Spec.enso b/test/Base_Tests/src/Network/Enso_Cloud/Cloud_Data_Link_Spec.enso index 19d3d3e4b4e5..57e7e70e8fcc 100644 --- a/test/Base_Tests/src/Network/Enso_Cloud/Cloud_Data_Link_Spec.enso +++ b/test/Base_Tests/src/Network/Enso_Cloud/Cloud_Data_Link_Spec.enso @@ -229,17 +229,48 @@ add_symlink_spec group_builder prefix setup:Lazy_Ref pending=Nothing = file_datalink = setup.get.file_datalink file_datalink.list . should_fail_with Illegal_Argument - group_builder.specify full_prefix+"allows to cross file-systems through the directory datalink using `/`" pending=(pending.if_nothing "TODO later") <| + group_builder.specify full_prefix+"allows to cross file-systems through the directory datalink using `/`" pending=pending <| dir_datalink = setup.get.dir_datalink (dir_datalink / "file1.txt").read . should_equal setup.get.file1_content (dir_datalink / "file1.txt").path . should_equal (setup.get.target_subdir / "file1.txt").path - group_builder.specify full_prefix+"calling size on a datalink checks the size of the target" pending=(pending.if_nothing "TODO later") <| + # It should also work if multiple parts are provided in one sub-path and one of them is a data link: + (setup.get.datalink_location / "to-directory.datalink/file1.txt") . read . should_equal setup.get.file1_content + + # Or when the path is resolved from a string: + crossing_path = setup.get.datalink_location.path+"/to-directory.datalink/file1.txt" + + # Some backends may need additional setup to resolve raw text paths. + setup.get.with_prepared_environment_for_file_new <| + File.new crossing_path . read . should_equal setup.get.file1_content + + group_builder.specify full_prefix+"calling size on a datalink checks the size of the target" pending=pending <| file_datalink = setup.get.file_datalink file_datalink.size . should_equal setup.get.file1_content.length + group_builder.specify full_prefix+"calling is_directory/is_regular_file on a datalink checks the target" pending=pending <| + file_datalink = setup.get.file_datalink + file_datalink.is_directory . should_be_false + file_datalink.is_regular_file . should_be_true + + dir_datalink = setup.get.dir_datalink + dir_datalink.is_directory . should_be_true + dir_datalink.is_regular_file . should_be_false + + # We want this to work, because it is possible for filesystems not aware of Enso to contain directories named with the `.datalink` suffix and Enso should still be able to cope with that. + group_builder.specify full_prefix+"a directory with name 'just-a-directory.datalink' is still handled correctly" pending=pending <| + # Some backends (e.g. S3) need the `/` suffix to distinguish a file from directory. + directory_name = "just-a-directory.datalink" + (if setup.get.needs_directory_suffix then "/" else "") + just_a_directory = setup.get.datalink_location / directory_name + just_a_directory.create_directory . should_equal just_a_directory + just_a_directory.is_directory . should_be_true + just_a_directory.is_regular_file . should_be_false + + "test".write (just_a_directory / "file.txt") . should_equal (just_a_directory / "file.txt") + (just_a_directory / "file.txt").read . should_equal "test" + type Symlink_Test_Setup - Value ~target_location ~datalink_location + Value ~target_location ~datalink_location needs_directory_suffix:Boolean with_prepared_environment_for_file_new:Function # The trailing slash is needed e.g. for the S3 backend to treat the path as directory. target_subdir self = self.target_location / "symlink-test-subdir/" @@ -251,9 +282,11 @@ type Symlink_Test_Setup expected_size = 1234 Vector.fill expected_size "a" . join - make ~target_location ~datalink_location create_data_link -> Lazy_Ref = + ## PRIVATE + Arguments: TODO + make ~target_location ~datalink_location create_data_link needs_directory_suffix:Boolean=False with_prepared_environment_for_file_new:Function|Nothing=Nothing -> Lazy_Ref = Lazy_Ref.Value <| - setup = Symlink_Test_Setup.Value target_location datalink_location + setup = Symlink_Test_Setup.Value target_location datalink_location needs_directory_suffix (with_prepared_environment_for_file_new.if_nothing (x->x)) setup.target_subdir.create_directory . should_equal setup.target_subdir file1 = (setup.target_subdir / "file1.txt") file2 = (setup.target_subdir / "file2.txt") diff --git a/test/Base_Tests/src/System/File_Spec.enso b/test/Base_Tests/src/System/File_Spec.enso index 826771ed3de6..78a849156056 100644 --- a/test/Base_Tests/src/System/File_Spec.enso +++ b/test/Base_Tests/src/System/File_Spec.enso @@ -1,7 +1,6 @@ from Standard.Base import all import Standard.Base.Errors.Common.Forbidden_Operation import Standard.Base.Errors.Common.Dry_Run_Operation -import Standard.Base.Errors.Common.Unsupported_Argument_Types import Standard.Base.Errors.Encoding_Error.Encoding_Error import Standard.Base.Errors.File_Error.File_Error import Standard.Base.Errors.Illegal_Argument.Illegal_Argument @@ -51,7 +50,10 @@ add_specs suite_builder = group_builder.specify "invalid character in path on Windows" pending=only_on_windows <| err = File.new "C:\dev:a" - err . should_fail_with Unsupported_Argument_Types + err . should_fail_with Illegal_Argument + + err2 = File.new "." / ":" + err2 . should_fail_with Illegal_Argument group_builder.specify "should have `new` be a no-op on a file" <| file = File.new sample_file @@ -329,6 +331,7 @@ add_specs suite_builder = group_builder.specify "will not resolve a relative directory to absolute in `parent` if not necessary" <| f = File.new "foo/bar" f.parent . should_equal (File.new "foo") + f.parent.path . should_equal "foo" group_builder.specify "will return Nothing for a root path that has no parent" <| root = File.new "/" . absolute . normalize @@ -416,6 +419,9 @@ add_specs suite_builder = Panic.with_finalizer f.delete_if_exists <| Data.read "abc/def.txt" . should_equal txt + group_builder.specify "will resolve empty path to the current working directory" <| + File.new "" . should_equal File.current_directory + suite_builder.group "read_text" group_builder-> group_builder.specify "should allow reading a UTF-8 file" <| contents = sample_file.read_text diff --git a/test/Table_Tests/src/Database/Postgres_Spec.enso b/test/Table_Tests/src/Database/Postgres_Spec.enso index d6260c8f40cb..78b25201f022 100644 --- a/test/Table_Tests/src/Database/Postgres_Spec.enso +++ b/test/Table_Tests/src/Database/Postgres_Spec.enso @@ -1063,6 +1063,13 @@ add_data_link_specs suite_builder = r3 = data_link_file.get.with_input_stream [File_Access.Read, Data_Link_Access.No_Follow] .read_all_bytes r3.should_be_a Vector + # The Postgres connection is neither a directory nor a file: + data_link_file.get.is_directory . should_be_false + data_link_file.get.is_regular_file . should_be_false + + # We also cannot check its size + data_link_file.get.size.should_fail_with Illegal_Argument + ## datalink support group_builder.specify "does not allow to write 'byte' data to a database data link" <| r = "foobar".write data_link_file.get