Skip to content

Commit

Permalink
fix(package): detect dirty workspace manifest
Browse files Browse the repository at this point in the history
This addresses one of the corner cases of VCS dirtiness in #14967.

> Workspace inheritance — like changing `workspace.package.edition`
> to `2021` would actually make member Cargo.toml dirty,
> but the current dirty status check doesn't capture that.

The solution here is

* Retrieve workspace manifest from Git index.
* Use the ws manifest from index to normalize package manifest.
* Compare the difference between normalized tomls from Git index and
  from Git working directory.

The implementation here is a bit ugly, as it exposes some internals
functions to `pub(crate)`.

The current implementation also has performance issues. When the
workspace contains lots of members and has a dirty workspace manifest:

* It adds one extra manifest parsing and normalization for
  checking each member.
  * Parsing part can be cached for the entire workspace.
    However normalization cannot be skipped.
* It adds two TOML serializations for checking each member.
  * If we derive `Eq` for manifest types, we might be able to skip
    serializations and instead just compare them.
  • Loading branch information
weihanglo committed Jan 22, 2025
1 parent 37fe6a7 commit 1a91bd3
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 23 deletions.
2 changes: 1 addition & 1 deletion src/cargo/ops/cargo_package/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ fn prepare_archive(
let src_files = src.list_files(pkg)?;

// Check (git) repository state, getting the current commit hash.
let vcs_info = vcs::check_repo_state(pkg, &src_files, gctx, &opts)?;
let vcs_info = vcs::check_repo_state(pkg, &src_files, ws, &opts)?;

build_ar_list(ws, pkg, src_files, vcs_info)
}
Expand Down
161 changes: 154 additions & 7 deletions src/cargo/ops/cargo_package/vcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@ use std::path::PathBuf;

use anyhow::Context as _;
use cargo_util::paths;
use cargo_util_schemas::manifest::TomlManifest;
use serde::Serialize;
use tracing::debug;

use crate::core::Package;
use crate::core::Workspace;
use crate::core::WorkspaceRootConfig;
use crate::sources::PathEntry;
use crate::CargoResult;
use crate::GlobalContext;
Expand Down Expand Up @@ -44,9 +47,10 @@ pub struct GitVcsInfo {
pub fn check_repo_state(
p: &Package,
src_files: &[PathEntry],
gctx: &GlobalContext,
ws: &Workspace<'_>,
opts: &PackageOpts<'_>,
) -> CargoResult<Option<VcsInfo>> {
let gctx = ws.gctx();
let Ok(repo) = git2::Repository::discover(p.root()) else {
gctx.shell().verbose(|shell| {
shell.warn(format!("no (git) VCS found for `{}`", p.root().display()))
Expand Down Expand Up @@ -105,7 +109,7 @@ pub fn check_repo_state(
.and_then(|p| p.to_str())
.unwrap_or("")
.replace("\\", "/");
let Some(git) = git(p, gctx, src_files, &repo, &opts)? else {
let Some(git) = git(p, ws, src_files, &repo, &opts)? else {
// If the git repo lacks essensial field like `sha1`, and since this field exists from the beginning,
// then don't generate the corresponding file in order to maintain consistency with past behavior.
return Ok(None);
Expand Down Expand Up @@ -163,11 +167,12 @@ fn warn_symlink_checked_out_as_plain_text_file(
/// The real git status check starts from here.
fn git(
pkg: &Package,
gctx: &GlobalContext,
ws: &Workspace<'_>,
src_files: &[PathEntry],
repo: &git2::Repository,
opts: &PackageOpts<'_>,
) -> CargoResult<Option<GitVcsInfo>> {
let gctx = ws.gctx();
// This is a collection of any dirty or untracked files. This covers:
// - new/modified/deleted/renamed/type change (index or worktree)
// - untracked files (which are "new" worktree files)
Expand All @@ -189,7 +194,7 @@ fn git(
.iter()
.filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
.map(|p| p.as_ref())
.chain(dirty_files_outside_pkg_root(pkg, repo, src_files)?.iter())
.chain(dirty_files_outside_pkg_root(ws, pkg, repo, src_files)?.iter())
.map(|path| {
pathdiff::diff_paths(path, cwd)
.as_ref()
Expand Down Expand Up @@ -233,6 +238,7 @@ fn git(
/// current package root, but still under the git workdir, affecting the
/// final packaged `.crate` file.
fn dirty_files_outside_pkg_root(
ws: &Workspace<'_>,
pkg: &Package,
repo: &git2::Repository,
src_files: &[PathEntry],
Expand All @@ -247,7 +253,7 @@ fn dirty_files_outside_pkg_root(
.map(|path| paths::normalize_path(&pkg_root.join(path)))
.collect();

let mut dirty_symlinks = HashSet::new();
let mut dirty_files = HashSet::new();
for rel_path in src_files
.iter()
.filter(|p| p.is_symlink_or_under_symlink())
Expand All @@ -259,10 +265,151 @@ fn dirty_files_outside_pkg_root(
.filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
{
if repo.status_file(&rel_path)? != git2::Status::CURRENT {
dirty_symlinks.insert(workdir.join(rel_path));
dirty_files.insert(workdir.join(rel_path));
}
}
Ok(dirty_symlinks)

if let Some(dirty_ws_manifest) = dirty_workspace_manifest(ws, pkg, repo)? {
dirty_files.insert(dirty_ws_manifest);
}
Ok(dirty_files)
}

fn dirty_workspace_manifest(
ws: &Workspace<'_>,
pkg: &Package,
repo: &git2::Repository,
) -> CargoResult<Option<PathBuf>> {
let workdir = repo.workdir().unwrap();
let ws_manifest_path = ws.root_manifest();
if pkg.manifest_path() == ws_manifest_path {
// The workspace manifest is also the primary package manifest.
// Normal file statuc check should have covered it.
return Ok(None);
}
if paths::strip_prefix_canonical(ws_manifest_path, pkg.root()).is_ok() {
// Inside package root. Don't bother checking git status.
return Ok(None);
}
let Ok(rel_path) = paths::strip_prefix_canonical(ws_manifest_path, workdir) else {
// Completely outside this git workdir.
return Ok(None);
};

// Outside package root but under git workdir.
if repo.status_file(&rel_path)? == git2::Status::CURRENT {
return Ok(None);
}

let from_index = ws_manifest_and_root_config_from_index(ws, repo, &rel_path);
// If there is no workable workspace manifest in Git index,
// create a default inheritable fields.
// With it, we can detect any member manifest has inherited fields,
// and then the workspace manifest should be considered dirty.
let inheritable = if let Some(fields) = from_index
.as_ref()
.map(|(_, root_config)| root_config.inheritable())
{
fields
} else {
&Default::default()
};

let empty = Vec::new();
let cargo_features = crate::core::Features::new(
from_index
.as_ref()
.and_then(|(manifest, _)| manifest.cargo_features.as_ref())
.unwrap_or(&empty),
ws.gctx(),
&mut Default::default(),
pkg.package_id().source_id().is_path(),
)
.unwrap_or_default();

let dirty_path = || Ok(Some(workdir.join(&rel_path)));
let dirty = |msg| {
debug!(
"{msg} for `{}` of repo at `{}`",
rel_path.display(),
workdir.display(),
);
dirty_path()
};

let Ok(normalized_toml) = crate::util::toml::normalize_toml(
pkg.manifest().original_toml(),
&cargo_features,
&|| Ok(inheritable),
pkg.manifest_path(),
ws.gctx(),
&mut Default::default(),
&mut Default::default(),
) else {
return dirty("failed to normalize pkg manifest from index");
};

let Ok(from_index) = toml::to_string_pretty(&normalized_toml) else {
return dirty("failed to serialize pkg manifest from index");
};

let Ok(from_working_dir) = toml::to_string_pretty(pkg.manifest().normalized_toml()) else {
return dirty("failed to serialize pkg manifest from working directory");
};

if from_index != from_working_dir {
tracing::trace!("--- from index ---\n{from_index}");
tracing::trace!("--- from working dir ---\n{from_working_dir}");
return dirty("normalized manifests from index and in working directory mismatched");
}

Ok(None)
}

/// Gets workspace manifest and workspace root config from Git index.
///
/// This returns an `Option` because workspace manifest might be broken or not
/// exist at all.
fn ws_manifest_and_root_config_from_index(
ws: &Workspace<'_>,
repo: &git2::Repository,
ws_manifest_rel_path: &Path,
) -> Option<(TomlManifest, WorkspaceRootConfig)> {
let workdir = repo.workdir().unwrap();
let dirty = |msg| {
debug!(
"{msg} for `{}` of repo at `{}`",
ws_manifest_rel_path.display(),
workdir.display(),
);
None
};
let Ok(index) = repo.index() else {
debug!("no index for repo at `{}`", workdir.display());
return None;
};
let Some(entry) = index.get_path(ws_manifest_rel_path, 0) else {
return dirty("workspace manifest not found");
};
let Ok(blob) = repo.find_blob(entry.id) else {
return dirty("failed to find manifest blob");
};
let Ok(contents) = String::from_utf8(blob.content().to_vec()) else {
return dirty("failed parse as UTF-8 encoding");
};
let Ok(document) = crate::util::toml::parse_document(&contents) else {
return dirty("failed to parse file");
};
let Ok(ws_manifest_from_index) = crate::util::toml::deserialize_toml(&document) else {
return dirty("failed to deserialize doc");
};
let Some(toml_workspace) = ws_manifest_from_index.workspace.as_ref() else {
return dirty("not a workspace manifest");
};

let ws_root_config =
crate::util::toml::to_workspace_root_config(toml_workspace, ws.root_manifest());
Some((ws_manifest_from_index, ws_root_config))
}

/// Helper to collect dirty statuses for a single repo.
Expand Down
24 changes: 12 additions & 12 deletions src/cargo/util/toml/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,14 @@ pub fn read_manifest(
.borrow_mut()
.insert(package_root.to_owned(), ws_root_config.clone());
}
let inherit_cell = LazyCell::new();
let inherit = || {
inherit_cell.try_borrow_with(|| load_inheritable_fields(gctx, path, &workspace_config))
};
let normalized_toml = normalize_toml(
&original_toml,
&features,
&workspace_config,
&inherit,
path,
gctx,
&mut warnings,
Expand Down Expand Up @@ -158,12 +162,14 @@ fn read_toml_string(path: &Path, gctx: &GlobalContext) -> CargoResult<String> {
}

#[tracing::instrument(skip_all)]
fn parse_document(contents: &str) -> Result<toml_edit::ImDocument<String>, toml_edit::de::Error> {
pub(crate) fn parse_document(
contents: &str,
) -> Result<toml_edit::ImDocument<String>, toml_edit::de::Error> {
toml_edit::ImDocument::parse(contents.to_owned()).map_err(Into::into)
}

#[tracing::instrument(skip_all)]
fn deserialize_toml(
pub(crate) fn deserialize_toml(
document: &toml_edit::ImDocument<String>,
) -> Result<manifest::TomlManifest, toml_edit::de::Error> {
let mut unused = BTreeSet::new();
Expand Down Expand Up @@ -242,7 +248,7 @@ fn to_workspace_config(
Ok(workspace_config)
}

fn to_workspace_root_config(
pub(crate) fn to_workspace_root_config(
normalized_toml: &manifest::TomlWorkspace,
manifest_file: &Path,
) -> WorkspaceRootConfig {
Expand All @@ -266,22 +272,16 @@ fn to_workspace_root_config(

/// See [`Manifest::normalized_toml`] for more details
#[tracing::instrument(skip_all)]
fn normalize_toml(
pub(crate) fn normalize_toml<'a>(
original_toml: &manifest::TomlManifest,
features: &Features,
workspace_config: &WorkspaceConfig,
inherit: &dyn Fn() -> CargoResult<&'a InheritableFields>,
manifest_file: &Path,
gctx: &GlobalContext,
warnings: &mut Vec<String>,
errors: &mut Vec<String>,
) -> CargoResult<manifest::TomlManifest> {
let package_root = manifest_file.parent().unwrap();

let inherit_cell: LazyCell<InheritableFields> = LazyCell::new();
let inherit = || {
inherit_cell
.try_borrow_with(|| load_inheritable_fields(gctx, manifest_file, &workspace_config))
};
let workspace_root = || inherit().map(|fields| fields.ws_root().as_path());

let mut normalized_toml = manifest::TomlManifest {
Expand Down
21 changes: 18 additions & 3 deletions tests/testsuite/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1493,10 +1493,13 @@ version = "1"
);

p.cargo("package --workspace --no-verify --no-metadata")
.with_status(101)
.with_stderr_data(str![[r#"
[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard)
[UPDATING] `dummy-registry` index
[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
[ERROR] 1 files in the working directory contain changes that were not yet committed into git:
Cargo.toml
to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag
"#]])
.run();
Expand Down Expand Up @@ -1571,6 +1574,18 @@ fn dirty_and_broken_workspace_manifest_with_inherited_fields() {
);

p.cargo("package --workspace --no-verify --no-metadata")
.with_status(101)
.with_stderr_data(str![[r#"
[ERROR] 1 files in the working directory contain changes that were not yet committed into git:
Cargo.toml
to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag
"#]])
.run();

p.cargo("package --workspace --no-verify --no-metadata --allow-dirty")
.with_stderr_data(str![[r#"
[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard)
[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
Expand Down

0 comments on commit 1a91bd3

Please sign in to comment.