Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OPFS (Origin Private File System) Support #1856

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
ff7c4bf
add opfs feature
e1arikawa Sep 16, 2024
30669c4
add test for url with long query string
e1arikawa Sep 17, 2024
caaf8ad
update s3rver cors settings
e1arikawa Sep 18, 2024
0977660
update httpfs test.
e1arikawa Sep 21, 2024
383ff1b
Merge commit 'cbac44c5e9151730024f3304556b3b30f026fa7a' into feature/…
e1arikawa Sep 22, 2024
426a1a4
update httpfs test.
e1arikawa Sep 24, 2024
2916b3b
update httpfs test for eslint
e1arikawa Sep 24, 2024
08db255
Merge branch 'main' into feature/opfs_support
e1arikawa Sep 24, 2024
6ddfa1a
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Sep 25, 2024
560be07
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Sep 25, 2024
87d9c23
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Sep 25, 2024
fda93d0
Fixup patch, now allowing installing from other repositories via 'INS…
carlopi Sep 27, 2024
54b82e9
fix dropfile
e1arikawa Sep 29, 2024
f5f51c4
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Sep 29, 2024
8a6f95a
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Oct 4, 2024
4a3d5b1
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Oct 6, 2024
ecec0df
Update packages/duckdb-wasm/test/opfs.test.ts
e1arikawa Oct 6, 2024
fd475ee
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Oct 7, 2024
8964dc1
Improve README
carlopi Oct 7, 2024
48297e0
Update README.md
carlopi Oct 7, 2024
1fa2fae
Add npm_tags.yml
carlopi Oct 7, 2024
b1a3449
Perform checkout
carlopi Oct 7, 2024
b78525d
Fix registerFileHandle.
e1arikawa Oct 9, 2024
8470107
update comment
e1arikawa Oct 9, 2024
1e26bb1
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Oct 9, 2024
d690d69
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Oct 14, 2024
4c2149c
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Oct 29, 2024
118ff41
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Oct 30, 2024
fe98901
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Nov 5, 2024
09564ee
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Nov 11, 2024
fc46200
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Nov 13, 2024
eb6ec89
add test for using file in dirrectory
e1arikawa Nov 13, 2024
6feeadd
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Nov 26, 2024
d91967d
Merge branch 'duckdb:main' into feature/opfs_support
e1arikawa Nov 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/esbuild-node/index.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import * as duckdb from '@duckdb/duckdb-wasm';
import * as arrow from 'apache-arrow';
import path from 'path';
import Worker from 'web-worker';
import { createRequire } from 'module';

const require = createRequire(import.meta.url);
const DUCKDB_DIST = path.dirname(require.resolve('@duckdb/duckdb-wasm'));
const Worker = require('web-worker');

(async () => {
try {
Expand Down
2 changes: 2 additions & 0 deletions lib/include/duckdb/web/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ struct WebDBConfig {
std::optional<int8_t> access_mode = std::nullopt;
/// The thread count
uint32_t maximum_threads = (STATIC_WEBDB_FEATURES & (1 << WebDBFeature::THREADS)) ? 4 : 1;
/// The direct io flag
bool use_direct_io = false;
/// The query config
QueryConfig query = {
.cast_bigint_to_double = std::nullopt,
Expand Down
2 changes: 2 additions & 0 deletions lib/include/duckdb/web/io/web_filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ class WebFileSystem : public duckdb::FileSystem {
DataBuffer file_buffer);
/// Try to drop a specific file
bool TryDropFile(std::string_view file_name);
/// drop a specific file
void DropFile(std::string_view file_name);
/// Drop all files without references (including buffers)
void DropDanglingFiles();
/// Configure file statistics
Expand Down
4 changes: 4 additions & 0 deletions lib/js-stubs.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ addToLibrary({
duckdb_web_fs_file_sync: function (fileId) {
return globalThis.DUCKDB_RUNTIME.syncFile(Module, fileId);
},
duckdb_web_fs_file_drop_file__sig: 'vpi',
duckdb_web_fs_file_drop_file: function (fileName, fileNameLen) {
return globalThis.DUCKDB_RUNTIME.dropFile(Module, fileName, fileNameLen);
},
duckdb_web_fs_file_close__sig: 'vi',
duckdb_web_fs_file_close: function (fileId) {
return globalThis.DUCKDB_RUNTIME.closeFile(Module, fileId);
Expand Down
4 changes: 4 additions & 0 deletions lib/src/arrow_type_mapping.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ arrow::Result<duckdb::LogicalType> mapArrowTypeToDuckDB(const arrow::DataType& t
case arrow::Type::type::EXTENSION:
case arrow::Type::type::SPARSE_UNION:
case arrow::Type::type::DENSE_UNION:
case arrow::Type::type::STRING_VIEW:
case arrow::Type::type::BINARY_VIEW:
case arrow::Type::type::LIST_VIEW:
case arrow::Type::type::LARGE_LIST_VIEW:
return arrow::Status::NotImplemented("DuckDB type mapping for: ", type.ToString());
}
return duckdb::LogicalTypeId::INVALID;
Expand Down
3 changes: 3 additions & 0 deletions lib/src/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ WebDBConfig WebDBConfig::ReadFrom(std::string_view args_json) {
if (doc.HasMember("allowUnsignedExtensions") && doc["allowUnsignedExtensions"].IsBool()) {
config.allow_unsigned_extensions = doc["allowUnsignedExtensions"].GetBool();
}
if (doc.HasMember("useDirectIO") && doc["useDirectIO"].IsBool()) {
config.use_direct_io = doc["useDirectIO"].GetBool();
}
if (doc.HasMember("query") && doc["query"].IsObject()) {
auto q = doc["query"].GetObject();
if (q.HasMember("queryPollingInterval") && q["queryPollingInterval"].IsNumber()) {
Expand Down
23 changes: 18 additions & 5 deletions lib/src/io/web_filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ RT_FN(void duckdb_web_fs_file_close(size_t file_id), {
auto &infos = GetLocalState();
infos.handles.erase(file_id);
});
RT_FN(void duckdb_web_fs_file_drop_file(const char *fileName, size_t pathLen), {});
RT_FN(void duckdb_web_fs_file_truncate(size_t file_id, double new_size), { GetOrOpen(file_id).Truncate(new_size); });
RT_FN(time_t duckdb_web_fs_file_get_last_modified_time(size_t file_id), {
auto &file = GetOrOpen(file_id);
Expand Down Expand Up @@ -226,6 +227,8 @@ WebFileSystem::DataProtocol WebFileSystem::inferDataProtocol(std::string_view ur
proto = WebFileSystem::DataProtocol::HTTP;
} else if (hasPrefix(url, "s3://")) {
proto = WebFileSystem::DataProtocol::S3;
} else if (hasPrefix(url, "opfs://")) {
proto = WebFileSystem::DataProtocol::BROWSER_FSACCESS;
} else if (hasPrefix(url, "file://")) {
data_url = std::string_view{url}.substr(7);
proto = default_data_protocol_;
Expand Down Expand Up @@ -453,6 +456,7 @@ void WebFileSystem::DropDanglingFiles() {
for (auto &[file_id, file] : files_by_id_) {
if (file->handle_count_ == 0) {
files_by_name_.erase(file->file_name_);
DropFile(file->file_name_);
if (file->data_url_.has_value()) {
files_by_url_.erase(file->data_url_.value());
}
Expand Down Expand Up @@ -481,6 +485,13 @@ bool WebFileSystem::TryDropFile(std::string_view file_name) {
return false;
}

/// drop a file
void WebFileSystem::DropFile(std::string_view file_name) {
DEBUG_TRACE();
std::string fileNameS = std::string{file_name};
duckdb_web_fs_file_drop_file(fileNameS.c_str(), fileNameS.size());
}

/// Write the global filesystem info
rapidjson::Value WebFileSystem::WriteGlobalFileInfo(rapidjson::Document &doc, uint32_t cache_epoch) {
DEBUG_TRACE();
Expand Down Expand Up @@ -793,7 +804,7 @@ void WebFileSystem::Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_b
auto file_size = file_hdl.file_->file_size_;
auto writer = static_cast<char *>(buffer);
file_hdl.position_ = location;
while (nr_bytes > 0 && location < file_size) {
while (nr_bytes > 0) {
auto n = Write(handle, writer, nr_bytes);
writer += n;
nr_bytes -= n;
Expand Down Expand Up @@ -1006,10 +1017,12 @@ void WebFileSystem::FileSync(duckdb::FileHandle &handle) {
vector<std::string> WebFileSystem::Glob(const std::string &path, FileOpener *opener) {
std::unique_lock<LightMutex> fs_guard{fs_mutex_};
std::vector<std::string> results;
auto glob = glob_to_regex(path);
for (auto [name, file] : files_by_name_) {
if (std::regex_match(file->file_name_, glob)) {
results.push_back(std::string{name});
if (!FileSystem::IsRemoteFile(path)) {
auto glob = glob_to_regex(path);
for (auto [name, file] : files_by_name_) {
if (std::regex_match(file->file_name_, glob)) {
results.push_back(std::string{name});
}
}
}
auto &state = GetLocalState();
Expand Down
6 changes: 6 additions & 0 deletions lib/src/json_typedef.cc
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,12 @@ arrow::Result<rapidjson::Value> WriteSQLType(rapidjson::Document& doc, const duc
case duckdb::LogicalTypeId::AGGREGATE_STATE:
case duckdb::LogicalTypeId::BIT:
case duckdb::LogicalTypeId::LAMBDA:
case duckdb::LogicalTypeId::STRING_LITERAL:
case duckdb::LogicalTypeId::INTEGER_LITERAL:
case duckdb::LogicalTypeId::UHUGEINT:
case duckdb::LogicalTypeId::UNION:
case duckdb::LogicalTypeId::ARRAY:
case duckdb::LogicalTypeId::VARINT:
break;
}
return out;
Expand Down
22 changes: 17 additions & 5 deletions lib/src/webdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,7 @@ arrow::Status WebDB::Open(std::string_view args_json) {
db_config.options.access_mode = access_mode;
db_config.options.duckdb_api = "wasm";
db_config.options.custom_user_agent = config_->custom_user_agent;
db_config.options.use_direct_io = config_->use_direct_io;
auto db = make_shared_ptr<duckdb::DuckDB>(config_->path, &db_config);
#ifndef WASM_LOADABLE_EXTENSIONS
duckdb_web_parquet_init(db.get());
Expand Down Expand Up @@ -912,18 +913,29 @@ arrow::Status WebDB::RegisterFileBuffer(std::string_view file_name, std::unique_
/// Drop all files
arrow::Status WebDB::DropFiles() {
file_page_buffer_->DropDanglingFiles();
pinned_web_files_.clear();
std::vector<std::string> files_to_drop;
for (const auto& [key, handle] : pinned_web_files_) {
files_to_drop.push_back(handle->GetName());
}
for (const auto& fileName : files_to_drop) {
arrow::Status status = DropFile(fileName);
if (!status.ok()) {
return arrow::Status::Invalid("Failed to drop file: " + fileName);
}
}
if (auto fs = io::WebFileSystem::Get()) {
fs->DropDanglingFiles();
}
return arrow::Status::OK();
}
/// Drop a file
arrow::Status WebDB::DropFile(std::string_view file_name) {
file_page_buffer_->TryDropFile(file_name);
pinned_web_files_.erase(file_name);
arrow::Status WebDB::DropFile(std::string_view fileName) {
file_page_buffer_->TryDropFile(fileName);
pinned_web_files_.erase(fileName);
if (auto fs = io::WebFileSystem::Get()) {
if (!fs->TryDropFile(file_name)) {
if (fs->TryDropFile(fileName)) {
fs->DropFile(fileName);
} else {
return arrow::Status::Invalid("file is in use");
}
}
Expand Down
Loading