From ae71578be27f4369a4d9a0c7d9b849be14c82162 Mon Sep 17 00:00:00 2001 From: Eric Buehler <65165915+EricLBuehler@users.noreply.github.com> Date: Mon, 2 Sep 2024 13:08:08 -0400 Subject: [PATCH] Final changes for v0.3.0 (#741) * Remove cargo-dist usage * Fix silent mode when isq --- .github/workflows/release.yml | 286 -------------------------- Cargo.toml | 33 --- mistralrs-core/src/pipeline/isq.rs | 61 ++++-- mistralrs-core/src/pipeline/normal.rs | 11 +- mistralrs-core/src/pipeline/vision.rs | 11 +- 5 files changed, 61 insertions(+), 341 deletions(-) delete mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 29f888030..000000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,286 +0,0 @@ -# Copyright 2022-2024, axodotdev -# SPDX-License-Identifier: MIT or Apache-2.0 -# -# CI that: -# -# * checks for a Git Tag that looks like a release -# * builds artifacts with cargo-dist (archives, installers, hashes) -# * uploads those artifacts to temporary workflow zip -# * on success, uploads the artifacts to a GitHub Release -# -# Note that the GitHub Release will be created with a generated -# title/body based on your changelogs. - -name: Release - -permissions: - contents: write - -# This task will run whenever you push a git tag that looks like a version -# like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc. -# Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where -# PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION -# must be a Cargo-style SemVer Version (must have at least major.minor.patch). -# -# If PACKAGE_NAME is specified, then the announcement will be for that -# package (erroring out if it doesn't have the given version or isn't cargo-dist-able). -# -# If PACKAGE_NAME isn't specified, then the announcement will be for all -# (cargo-dist-able) packages in the workspace with that version (this mode is -# intended for workspaces with only one dist-able package, or with all dist-able -# packages versioned/released in lockstep). -# -# If you push multiple tags at once, separate instances of this workflow will -# spin up, creating an independent announcement for each one. However, GitHub -# will hard limit this to 3 tags per commit, as it will assume more tags is a -# mistake. -# -# If there's a prerelease-style suffix to the version, then the release(s) -# will be marked as a prerelease. -on: - pull_request: - push: - tags: - - '**[0-9]+.[0-9]+.[0-9]+*' - -jobs: - # Run 'cargo dist plan' (or host) to determine what tasks we need to do - plan: - runs-on: "ubuntu-20.04" - outputs: - val: ${{ steps.plan.outputs.manifest }} - tag: ${{ !github.event.pull_request && github.ref_name || '' }} - tag-flag: ${{ !github.event.pull_request && format('--tag={0}', github.ref_name) || '' }} - publishing: ${{ !github.event.pull_request }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Install cargo-dist - # we specify bash to get pipefail; it guards against the `curl` command - # failing. otherwise `sh` won't catch that `curl` returned non-0 - shell: bash - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.17.0/cargo-dist-installer.sh | sh" - - name: Cache cargo-dist - uses: actions/upload-artifact@v4 - with: - name: cargo-dist-cache - path: ~/.cargo/bin/cargo-dist - # sure would be cool if github gave us proper conditionals... - # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible - # functionality based on whether this is a pull_request, and whether it's from a fork. - # (PRs run on the *source* but secrets are usually on the *target* -- that's *good* - # but also really annoying to build CI around when it needs secrets to work right.) - - id: plan - run: | - cargo dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json - echo "cargo dist ran successfully" - cat plan-dist-manifest.json - echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" - - name: "Upload dist-manifest.json" - uses: actions/upload-artifact@v4 - with: - name: artifacts-plan-dist-manifest - path: plan-dist-manifest.json - - # Build and packages all the platform-specific things - build-local-artifacts: - name: build-local-artifacts (${{ join(matrix.targets, ', ') }}) - # Let the initial task tell us to not run (currently very blunt) - needs: - - plan - if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }} - strategy: - fail-fast: false - # Target platforms/runners are computed by cargo-dist in create-release. - # Each member of the matrix has the following arguments: - # - # - runner: the github runner - # - dist-args: cli flags to pass to cargo dist - # - install-dist: expression to run to install cargo-dist on the runner - # - # Typically there will be: - # - 1 "global" task that builds universal installers - # - N "local" tasks that build each platform's binaries and platform-specific installers - matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }} - runs-on: ${{ matrix.runner }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json - steps: - - name: enable windows longpaths - run: | - git config --global core.longpaths true - - uses: actions/checkout@v4 - with: - submodules: recursive - - uses: swatinem/rust-cache@v2 - with: - key: ${{ join(matrix.targets, '-') }} - cache-provider: ${{ matrix.cache_provider }} - - name: Install cargo-dist - run: ${{ matrix.install_dist }} - # Get the dist-manifest - - name: Fetch local artifacts - uses: actions/download-artifact@v4 - with: - pattern: artifacts-* - path: target/distrib/ - merge-multiple: true - - name: Install dependencies - run: | - ${{ matrix.packages_install }} - - name: Build artifacts - run: | - # Actually do builds and make zips and whatnot - cargo dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json - echo "cargo dist ran successfully" - - id: cargo-dist - name: Post-build - # We force bash here just because github makes it really hard to get values up - # to "real" actions without writing to env-vars, and writing to env-vars has - # inconsistent syntax between shell and powershell. - shell: bash - run: | - # Parse out what we just built and upload it to scratch storage - echo "paths<> "$GITHUB_OUTPUT" - jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT" - echo "EOF" >> "$GITHUB_OUTPUT" - - cp dist-manifest.json "$BUILD_MANIFEST_NAME" - - name: "Upload artifacts" - uses: actions/upload-artifact@v4 - with: - name: artifacts-build-local-${{ join(matrix.targets, '_') }} - path: | - ${{ steps.cargo-dist.outputs.paths }} - ${{ env.BUILD_MANIFEST_NAME }} - - # Build and package all the platform-agnostic(ish) things - build-global-artifacts: - needs: - - plan - - build-local-artifacts - runs-on: "ubuntu-20.04" - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Install cached cargo-dist - uses: actions/download-artifact@v4 - with: - name: cargo-dist-cache - path: ~/.cargo/bin/ - - run: chmod +x ~/.cargo/bin/cargo-dist - # Get all the local artifacts for the global tasks to use (for e.g. checksums) - - name: Fetch local artifacts - uses: actions/download-artifact@v4 - with: - pattern: artifacts-* - path: target/distrib/ - merge-multiple: true - - id: cargo-dist - shell: bash - run: | - cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json - echo "cargo dist ran successfully" - - # Parse out what we just built and upload it to scratch storage - echo "paths<> "$GITHUB_OUTPUT" - jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT" - echo "EOF" >> "$GITHUB_OUTPUT" - - cp dist-manifest.json "$BUILD_MANIFEST_NAME" - - name: "Upload artifacts" - uses: actions/upload-artifact@v4 - with: - name: artifacts-build-global - path: | - ${{ steps.cargo-dist.outputs.paths }} - ${{ env.BUILD_MANIFEST_NAME }} - # Determines if we should publish/announce - host: - needs: - - plan - - build-local-artifacts - - build-global-artifacts - # Only run if we're "publishing", and only if local and global didn't fail (skipped is fine) - if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - runs-on: "ubuntu-20.04" - outputs: - val: ${{ steps.host.outputs.manifest }} - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Install cached cargo-dist - uses: actions/download-artifact@v4 - with: - name: cargo-dist-cache - path: ~/.cargo/bin/ - - run: chmod +x ~/.cargo/bin/cargo-dist - # Fetch artifacts from scratch-storage - - name: Fetch artifacts - uses: actions/download-artifact@v4 - with: - pattern: artifacts-* - path: target/distrib/ - merge-multiple: true - - id: host - shell: bash - run: | - cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json - echo "artifacts uploaded and released successfully" - cat dist-manifest.json - echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" - - name: "Upload dist-manifest.json" - uses: actions/upload-artifact@v4 - with: - # Overwrite the previous copy - name: artifacts-dist-manifest - path: dist-manifest.json - # Create a GitHub Release while uploading all files to it - - name: "Download GitHub Artifacts" - uses: actions/download-artifact@v4 - with: - pattern: artifacts-* - path: artifacts - merge-multiple: true - - name: Cleanup - run: | - # Remove the granular manifests - rm -f artifacts/*-dist-manifest.json - - name: Create GitHub Release - env: - PRERELEASE_FLAG: "${{ fromJson(steps.host.outputs.manifest).announcement_is_prerelease && '--prerelease' || '' }}" - ANNOUNCEMENT_TITLE: "${{ fromJson(steps.host.outputs.manifest).announcement_title }}" - ANNOUNCEMENT_BODY: "${{ fromJson(steps.host.outputs.manifest).announcement_github_body }}" - RELEASE_COMMIT: "${{ github.sha }}" - run: | - # Write and read notes from a file to avoid quoting breaking things - echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt - - gh release create "${{ needs.plan.outputs.tag }}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/* - - announce: - needs: - - plan - - host - # use "always() && ..." to allow us to wait for all publish jobs while - # still allowing individual publish jobs to skip themselves (for prereleases). - # "host" however must run to completion, no skipping allowed! - if: ${{ always() && needs.host.result == 'success' }} - runs-on: "ubuntu-20.04" - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive diff --git a/Cargo.toml b/Cargo.toml index 9bd4a85c0..a73072f13 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,36 +48,3 @@ half = "2.4.0" rayon = "1.1.0" url = "2.5.2" data-url = "0.3.1" - -# Config for 'cargo dist' -[workspace.metadata.dist] -# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax) -cargo-dist-version = "0.17.0" -# CI backends to support -ci = "github" -# The installers to generate for each app -installers = ["shell"] -# Target platforms to build apps for (Rust target-triple syntax) -targets = ["aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu"] -# Publish jobs to run in CI -pr-run-mode = "plan" -# Whether to auto-include files like READMEs, LICENSE, and CHANGELOGs (default true) -auto-includes = false -# Whether to install an updater program -install-updater = true - -[profile.profiling] -inherits = "release" -debug = true - -[profile.dev] -opt-level = 3 - -# The profile that 'cargo dist' will build with -[profile.dist] -inherits = "release" -lto = "thin" - -[profile.release] -codegen-units = 1 -lto = "fat" diff --git a/mistralrs-core/src/pipeline/isq.rs b/mistralrs-core/src/pipeline/isq.rs index 1f515e311..fde850405 100644 --- a/mistralrs-core/src/pipeline/isq.rs +++ b/mistralrs-core/src/pipeline/isq.rs @@ -89,6 +89,7 @@ pub trait IsqModel { dtype: Option, device: Device, topology: Option<&Topology>, + silent: bool, ) -> candle_core::Result<()> { { let (tensors, mapper) = self.get_layers(); @@ -187,8 +188,48 @@ pub trait IsqModel { use rayon::iter::{ IndexedParallelIterator, IntoParallelIterator, ParallelIterator, }; + if silent { + tensors.into_par_iter().zip(devices_and_dtypes).for_each( + |((tensor, _), (device, dtype))| { + *tensor = tensor + .clone() + .apply_isq(dtype, device.clone(), &n_quantized) + .unwrap(); + device.synchronize().unwrap(); + }, + ); + } else { + tensors + .into_par_iter() + .zip(devices_and_dtypes) + .progress_with(bar) + .for_each(|((tensor, _), (device, dtype))| { + *tensor = tensor + .clone() + .apply_isq(dtype, device.clone(), &n_quantized) + .unwrap(); + device.synchronize().unwrap(); + }); + } + }); + } + + #[cfg(feature = "metal")] + { + use indicatif::ProgressIterator; + if silent { + tensors.into_iter().zip(devices_and_dtypes).for_each( + |((tensor, _), (device, dtype))| { + *tensor = tensor + .clone() + .apply_isq(dtype, device.clone(), &n_quantized) + .unwrap(); + device.synchronize().unwrap(); + }, + ); + } else { tensors - .into_par_iter() + .into_iter() .zip(devices_and_dtypes) .progress_with(bar) .for_each(|((tensor, _), (device, dtype))| { @@ -198,23 +239,7 @@ pub trait IsqModel { .unwrap(); device.synchronize().unwrap(); }); - }); - } - - #[cfg(feature = "metal")] - { - use indicatif::ProgressIterator; - tensors - .into_iter() - .zip(devices_and_dtypes) - .progress_with(bar) - .for_each(|((tensor, _), (device, dtype))| { - *tensor = tensor - .clone() - .apply_isq(dtype, device.clone(), &n_quantized) - .unwrap(); - device.synchronize().unwrap(); - }); + } } let delta = Instant::now().duration_since(t_start).as_secs_f32(); info!("Applied in-situ quantization into {dtype:?} to {n_quantized:?} tensors out of {total_tensors} total tensors. Took {delta:.2}s", ); diff --git a/mistralrs-core/src/pipeline/normal.rs b/mistralrs-core/src/pipeline/normal.rs index 90de4e322..0febd848e 100644 --- a/mistralrs-core/src/pipeline/normal.rs +++ b/mistralrs-core/src/pipeline/normal.rs @@ -57,6 +57,7 @@ pub struct NormalPipeline { model_id: String, metadata: Arc, topology: Option, + silent: bool, } /// A loader for a "normal" (non-quantized) model. @@ -339,7 +340,12 @@ impl Loader for NormalLoader { let chat_template = get_chat_template(paths, &self.chat_template, None); if in_situ_quant.is_some() || self.config.topology.is_some() { - model.quantize(in_situ_quant, device.clone(), self.config.topology.as_ref())?; + model.quantize( + in_situ_quant, + device.clone(), + self.config.topology.as_ref(), + silent, + )?; } let paged_attn_config = if matches!(self.kind, ModelKind::Adapter { .. }) { @@ -396,6 +402,7 @@ impl Loader for NormalLoader { prompt_batchsize: self.config.prompt_batchsize, }), topology: self.config.topology.clone(), + silent, }))) } @@ -424,7 +431,7 @@ impl IsqPipelineMixin for NormalPipeline { fn re_isq_model(&mut self, dtype: IsqType) -> Result<()> { let device = self.device().clone(); self.model - .quantize(Some(dtype), device, self.topology.as_ref()) + .quantize(Some(dtype), device, self.topology.as_ref(), self.silent) .map_err(anyhow::Error::msg) } } diff --git a/mistralrs-core/src/pipeline/vision.rs b/mistralrs-core/src/pipeline/vision.rs index be30aa572..d965a09e6 100644 --- a/mistralrs-core/src/pipeline/vision.rs +++ b/mistralrs-core/src/pipeline/vision.rs @@ -49,6 +49,7 @@ pub struct VisionPipeline { processor: Arc, preprocessor_config: Arc, topology: Option, + silent: bool, } /// A loader for a vision (non-quantized) model. @@ -261,7 +262,12 @@ impl Loader for VisionLoader { let chat_template = get_chat_template(paths, &self.chat_template, None); if in_situ_quant.is_some() || self.config.topology.is_some() { - model.quantize(in_situ_quant, device.clone(), self.config.topology.as_ref())?; + model.quantize( + in_situ_quant, + device.clone(), + self.config.topology.as_ref(), + silent, + )?; } let (cache_config, cache_engine) = if let Some(paged_attn_config) = paged_attn_config { @@ -310,6 +316,7 @@ impl Loader for VisionLoader { processor, preprocessor_config: Arc::new(preprocessor_config), topology: self.config.topology.clone(), + silent, }))) } @@ -338,7 +345,7 @@ impl IsqPipelineMixin for VisionPipeline { fn re_isq_model(&mut self, dtype: IsqType) -> Result<()> { let device = self.device().clone(); self.model - .quantize(Some(dtype), device, self.topology.as_ref()) + .quantize(Some(dtype), device, self.topology.as_ref(), self.silent) .map_err(anyhow::Error::msg) } }