Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(npm): optimize loading npm resolution snapshot from lockfile by only loading necessary version info #27261

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ deno_bench_util = { version = "0.175.0", path = "./bench_util" }
deno_config = { version = "=0.39.3", features = ["workspace", "sync"] }
deno_lockfile = "=0.23.2"
deno_media_type = { version = "0.2.0", features = ["module_specifier"] }
deno_npm = "=0.26.0"
deno_npm = { version = "=0.26.0", git = "https://github.com/nathanwhit/deno_npm", branch = "version-info-lockfile" }
deno_path_util = "=0.2.1"
deno_permissions = { version = "0.41.0", path = "./runtime/permissions" }
deno_runtime = { version = "0.190.0", path = "./runtime" }
Expand Down
1 change: 1 addition & 0 deletions resolvers/npm_cache/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ tar.workspace = true
tempfile = "3.4.0"
thiserror.workspace = true
url.workspace = true
memchr = "2.7.4"
189 changes: 189 additions & 0 deletions resolvers/npm_cache/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

use std::collections::HashMap;
use std::collections::HashSet;
use std::io::ErrorKind;
use std::io::Read;
use std::io::Seek;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
Expand All @@ -12,6 +15,7 @@ use anyhow::Error as AnyError;
use deno_cache_dir::npm::NpmCacheDir;
use deno_npm::npm_rc::ResolvedNpmRc;
use deno_npm::registry::NpmPackageInfo;
use deno_npm::registry::NpmPackageVersionInfo;
use deno_npm::NpmPackageCacheFolderId;
use deno_semver::package::PackageNv;
use deno_semver::Version;
Expand Down Expand Up @@ -260,16 +264,64 @@ impl<TEnv: NpmCacheEnv> NpmCache<TEnv> {
Ok(serde_json::from_str(&file_text)?)
}

pub fn load_version_info(
&self,
package: &PackageNv,
) -> Result<Option<NpmPackageVersionInfo>, AnyError> {
let file_cache_path =
self.get_registry_package_info_file_cache_path(&package.name);
if !file_cache_path.exists() {
return Ok(None);
}
let version_index_path =
file_cache_path.parent().unwrap().join("version-index");
if !version_index_path.exists() {
return Ok(None);
}

let index_bytes = std::fs::read(version_index_path)?;
// let index = PackageInfoIndex::from_bytes(&index_bytes)
// .with_context(|| format!("loading package {package:?}"))?;
// let Some((version_start, version_end)) =
// index.versions.get(&package.version)
// else {
// return Ok(None);
// };
let Some((version_start, version_end)) =
PackageInfoIndex::only_version(&index_bytes, &package.version)?
else {
eprintln!("miss {package}");
return Ok(None);
};
let mut file = std::fs::File::open(file_cache_path)?;
file.seek(std::io::SeekFrom::Start(version_start as u64))?;
let mut buf = vec![0u8; version_end - version_start];

file
.read_exact(&mut buf)
.with_context(|| format!("on package {package:?}"))?;

let info = serde_json::from_slice(&buf)?;
Ok(Some(info))
}

pub fn save_package_info(
&self,
name: &str,
package_info: &NpmPackageInfo,
) -> Result<(), AnyError> {
let file_cache_path = self.get_registry_package_info_file_cache_path(name);
let file_text = serde_json::to_string(&package_info)?;
let index = index_package_info(&file_text);
let index_bytes = index.to_bytes();
let version_index_path =
file_cache_path.parent().unwrap().join("version-index");
self
.env
.atomic_write_file_with_retries(&file_cache_path, file_text.as_bytes())?;
self
.env
.atomic_write_file_with_retries(&version_index_path, &index_bytes)?;
Ok(())
}

Expand All @@ -278,6 +330,143 @@ impl<TEnv: NpmCacheEnv> NpmCache<TEnv> {
name_folder_path.join("registry.json")
}
}
struct PackageInfoIndexSer {
versions: Vec<(String, usize, usize)>,
}

#[derive(Debug)]
struct PackageInfoIndex {
#[allow(dead_code)]
versions: HashMap<Version, (usize, usize)>,
}

impl PackageInfoIndex {
#[allow(dead_code)]
fn from_bytes(bytes: &[u8]) -> Result<Self, AnyError> {
let mut versions = HashMap::new();
let mut i = 0;
loop {
let version_start = i;
let Some(version_end_offset) = memchr::memchr(b':', &bytes[i..]) else {
break;
};
let version_end = version_start + version_end_offset;
let start_start = version_end + 1;
let start_end = start_start + 8;
let end_start = start_end;
let end_end = end_start + 8;

// eprintln!(
// "{i} {version_start} {version_end} {start_start} {start_end} {end_start} {end_end}"
// );

let version = core::str::from_utf8(&bytes[version_start..version_end])?;
// eprintln!("version {version}");
let version = Version::parse_standard(version)?;
let start =
usize::from_le_bytes(bytes[start_start..start_end].try_into().unwrap());
let end =
usize::from_le_bytes(bytes[end_start..end_end].try_into().unwrap());
// eprintln!("version {version} : {start} {end}");
versions.insert(version, (start, end));
i = end_end;
}

Ok(Self { versions })
}

fn only_version(
bytes: &[u8],
version: &Version,
) -> Result<Option<(usize, usize)>, AnyError> {
let version_string = version.to_string();
let mut i = 0;
loop {
let version_start = i;
let Some(version_end_offset) = memchr::memchr(b':', &bytes[i..]) else {
break;
};
let version_end = version_start + version_end_offset;
let start_start = version_end + 1;
let start_end = start_start + 8;
let end_start = start_end;
let end_end = end_start + 8;

// eprintln!(
// "{i} {version_start} {version_end} {start_start} {start_end} {end_start} {end_end}"
// );

let version = core::str::from_utf8(&bytes[version_start..version_end])?;
// eprintln!("version {version}");
// let version = Version::parse_standard(version)?;
if version == version_string {
let start = usize::from_le_bytes(
bytes[start_start..start_end].try_into().unwrap(),
);
let end =
usize::from_le_bytes(bytes[end_start..end_end].try_into().unwrap());
return Ok(Some((start, end)));
}
// eprintln!("version {version} : {start} {end}");
i = end_end;
}
Ok(None)
}
}

impl PackageInfoIndexSer {
fn to_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::with_capacity(self.versions.len() * 24);
for (version, start, end) in &self.versions {
// eprintln!("writing {version} | {start} {end}");
bytes.extend(version.as_bytes());
bytes.push(b':');
bytes.extend(start.to_le_bytes());
bytes.extend(end.to_le_bytes());
}
bytes
}
}
fn index_package_info(text: &str) -> PackageInfoIndexSer {
let mut versions = Vec::new();
let version_key = "\"version\"";
let bytes = text.as_bytes();
for (index, _) in text.match_indices(version_key) {
let mut start = None;
let mut version = None;
let mut braces = 1;
let mut end = None;
let search_idx = index + version_key.len();
for (i, &b) in bytes[search_idx..].iter().enumerate() {
if b == b'"' && version.is_none() {
match start {
Some(start) => version = Some(&bytes[start..search_idx + i]),
None => start = Some(search_idx + i + 1),
}
}
match b {
b'{' => braces += 1,
b'}' => braces -= 1,
_ => {}
}

if braces == 0 {
end = Some(search_idx + i + 1);
break;
}
}
let version_start = index - 1;
let version_end = end.unwrap();
let version = version.unwrap();
let version = String::from_utf8(version.to_vec()).unwrap();
if Version::parse_standard(&version).is_err() {
continue;
}
versions.push((version, version_start, version_end))
}

PackageInfoIndexSer { versions }
}

const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock";

Expand Down
26 changes: 26 additions & 0 deletions resolvers/npm_cache/registry_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ use anyhow::Error as AnyError;
use async_trait::async_trait;
use deno_npm::npm_rc::ResolvedNpmRc;
use deno_npm::registry::NpmPackageInfo;
use deno_npm::registry::NpmPackageVersionInfo;
use deno_npm::registry::NpmRegistryApi;
use deno_npm::registry::NpmRegistryPackageInfoLoadError;
use deno_semver::package::PackageNv;
use deno_unsync::sync::AtomicFlag;
use deno_unsync::sync::MultiRuntimeAsyncValueCreator;
use futures::future::LocalBoxFuture;
Expand Down Expand Up @@ -167,6 +169,10 @@ impl<TEnv: NpmCacheEnv> RegistryInfoProvider<TEnv> {
self: &Arc<Self>,
name: &str,
) -> Result<Option<Arc<NpmPackageInfo>>, AnyError> {
// eprintln!(
// "maybe_package_info({name}): {}",
// std::backtrace::Backtrace::capture()
// );
self.load_package_info_inner(name).await.with_context(|| {
format!(
"Failed loading {} for package \"{}\"",
Expand All @@ -183,8 +189,10 @@ impl<TEnv: NpmCacheEnv> RegistryInfoProvider<TEnv> {
let (cache_item, clear_id) = {
let mut mem_cache = self.memory_cache.lock();
let cache_item = if let Some(cache_item) = mem_cache.get(name) {
// eprintln!("mem cache hit: {name}");
cache_item.clone()
} else {
// eprintln!("mem cache miss: {name}");
let value_creator = MultiRuntimeAsyncValueCreator::new({
let downloader = self.clone();
let name = name.to_string();
Expand All @@ -199,18 +207,22 @@ impl<TEnv: NpmCacheEnv> RegistryInfoProvider<TEnv> {

match cache_item {
MemoryCacheItem::FsCached => {
// eprintln!("cached: fs {name}");
// this struct previously loaded from the registry, so we can load it from the file system cache
self
.load_file_cached_package_info(name)
.await
.map(|info| Some(Arc::new(info)))
}
MemoryCacheItem::MemoryCached(maybe_info) => {
// eprintln!("cached: memory {name}");
maybe_info.clone().map_err(|e| anyhow!("{}", e))
}
MemoryCacheItem::Pending(value_creator) => {
// eprintln!("cached: pending {name}");
match value_creator.get().await {
Ok(FutureResult::SavedFsCache(info)) => {
// eprintln!("pending cached: fs");
// return back the future and mark this package as having
// been saved in the cache for next time it's requested
self.memory_cache.lock().try_insert(
Expand Down Expand Up @@ -368,6 +380,20 @@ impl<TEnv: NpmCacheEnv> NpmRegistryApi for NpmRegistryApiAdapter<TEnv> {
fn mark_force_reload(&self) -> bool {
self.0.mark_force_reload()
}

async fn version_info(
&self,
package: &PackageNv,
) -> Result<NpmPackageVersionInfo, NpmRegistryPackageInfoLoadError> {
if let Some(info) =
self.0.cache.load_version_info(&package).map_err(Arc::new)?
{
return Ok(info);
} else {
let package_info = self.0.package_info(&package.name).await?;
Ok(package_info.version_info(package)?)
}
}
}

// todo(#27198): make this private and only use RegistryInfoProvider in the rest of
Expand Down
Loading