From 952d378ca4d69f46ee16d3033a86a0f4b767f00b Mon Sep 17 00:00:00 2001 From: Felicitas Pojtinger Date: Thu, 21 Mar 2024 02:19:44 +0100 Subject: [PATCH 01/16] feat: Add ability to resume snapshots and write back changes to the backing file continously, add API endpoint to `msync` the files backing snapshots --- .../seccomp/aarch64-unknown-linux-musl.json | 4 ++ .../seccomp/x86_64-unknown-linux-musl.json | 4 ++ .../src/api_server/parsed_request.rs | 7 ++- .../src/api_server/request/snapshot.rs | 25 ++++++++- src/firecracker/swagger/firecracker.yaml | 35 ++++++++++++ src/vmm/src/persist.rs | 29 +++++++++- src/vmm/src/rpc_interface.rs | 53 +++++++++++++++++-- src/vmm/src/vmm_config/snapshot.rs | 8 +++ src/vmm/src/vstate/memory.rs | 19 ++++++- 9 files changed, 173 insertions(+), 11 deletions(-) diff --git a/resources/seccomp/aarch64-unknown-linux-musl.json b/resources/seccomp/aarch64-unknown-linux-musl.json index 4302a87c3a9..dd575c0385a 100644 --- a/resources/seccomp/aarch64-unknown-linux-musl.json +++ b/resources/seccomp/aarch64-unknown-linux-musl.json @@ -35,6 +35,10 @@ { "syscall": "fsync" }, + { + "syscall": "msync", + "comment": "Used for live migration to sync dirty pages" + }, { "syscall": "close" }, diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json index a735997383f..1e4fc14726c 100644 --- a/resources/seccomp/x86_64-unknown-linux-musl.json +++ b/resources/seccomp/x86_64-unknown-linux-musl.json @@ -35,6 +35,10 @@ { "syscall": "fsync" }, + { + "syscall": "msync", + "comment": "Used for live migration to sync dirty pages" + }, { "syscall": "close" }, diff --git a/src/firecracker/src/api_server/parsed_request.rs b/src/firecracker/src/api_server/parsed_request.rs index 4ebb97ee914..c5fc931e83e 100644 --- a/src/firecracker/src/api_server/parsed_request.rs +++ b/src/firecracker/src/api_server/parsed_request.rs @@ -23,7 +23,9 @@ use super::request::machine_configuration::{ use super::request::metrics::parse_put_metrics; use super::request::mmds::{parse_get_mmds, parse_patch_mmds, parse_put_mmds}; use super::request::net::{parse_patch_net, parse_put_net}; -use super::request::snapshot::{parse_patch_vm_state, parse_put_snapshot}; +use super::request::snapshot::{ + parse_patch_vm_state, parse_put_snapshot, parse_put_snapshot_nomemory, +}; use super::request::version::parse_get_version; use super::request::vsock::parse_put_vsock; use super::ApiServer; @@ -97,6 +99,9 @@ impl TryFrom<&Request> for ParsedRequest { parse_put_net(body, path_tokens.next()) } (Method::Put, "snapshot", Some(body)) => parse_put_snapshot(body, path_tokens.next()), + (Method::Put, "snapshot-nomemory", Some(body)) => { + parse_put_snapshot_nomemory(body, path_tokens.next()) + } (Method::Put, "vsock", Some(body)) => parse_put_vsock(body), (Method::Put, "entropy", Some(body)) => parse_put_entropy(body), (Method::Put, _, None) => method_to_error(Method::Put), diff --git a/src/firecracker/src/api_server/request/snapshot.rs b/src/firecracker/src/api_server/request/snapshot.rs index 7addd8cdc74..5750795e398 100644 --- a/src/firecracker/src/api_server/request/snapshot.rs +++ b/src/firecracker/src/api_server/request/snapshot.rs @@ -5,8 +5,8 @@ use serde::de::Error as DeserializeError; use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::snapshot::{ - CreateSnapshotParams, LoadSnapshotConfig, LoadSnapshotParams, MemBackendConfig, MemBackendType, - Vm, VmState, + CreateSnapshotNoMemoryParams, CreateSnapshotParams, LoadSnapshotConfig, LoadSnapshotParams, + MemBackendConfig, MemBackendType, Vm, VmState, }; use super::super::parsed_request::{Error, ParsedRequest}; @@ -42,6 +42,27 @@ pub(crate) fn parse_put_snapshot( } } +pub(crate) fn parse_put_snapshot_nomemory( + body: &Body, + request_type_from_path: Option<&str>, +) -> Result { + match request_type_from_path { + Some(request_type) => match request_type { + "create" => Ok(ParsedRequest::new_sync(VmmAction::CreateSnapshotNoMemory( + serde_json::from_slice::(body.raw())?, + ))), + _ => Err(Error::InvalidPathMethod( + format!("/snapshot-nomemory/{}", request_type), + Method::Put, + )), + }, + None => Err(Error::Generic( + StatusCode::BadRequest, + "Missing snapshot operation type.".to_string(), + )), + } +} + pub(crate) fn parse_patch_vm_state(body: &Body) -> Result { let vm = serde_json::from_slice::(body.raw())?; diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index ca754204b54..e561cf6e742 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -591,6 +591,32 @@ paths: schema: $ref: "#/definitions/Error" + /snapshot-nomemory/create: + put: + summary: Creates a full snapshot without memory. Post-boot only. + description: + Creates a snapshot of the microVM state, without memory. The microVM should be + in the `Paused` state. + operationId: createSnapshotNoMemory + parameters: + - name: body + in: body + description: The configuration used for creating a snaphot. + required: true + schema: + $ref: "#/definitions/SnapshotCreateNoMemoryParams" + responses: + 204: + description: Snapshot created + 400: + description: Snapshot cannot be created due to bad input + schema: + $ref: "#/definitions/Error" + default: + description: Internal server error + schema: + $ref: "#/definitions/Error" + /snapshot/load: put: summary: Loads a snapshot. Pre-boot only. @@ -1203,6 +1229,15 @@ definitions: Type of snapshot to create. It is optional and by default, a full snapshot is created. + SnapshotCreateNoMemoryParams: + type: object + required: + - snapshot_path + properties: + snapshot_path: + type: string + description: Path to the file that will contain the microVM state. + SnapshotLoadParams: type: object description: diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index e1290ca8492..db9f5918332 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -34,7 +34,8 @@ use crate::vmm_config::boot_source::BootSourceConfig; use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::{HugePageConfig, MachineConfigUpdate, VmConfigError}; use crate::vmm_config::snapshot::{ - CreateSnapshotParams, LoadSnapshotParams, MemBackendType, SnapshotType, + CreateSnapshotNoMemoryParams, CreateSnapshotParams, LoadSnapshotParams, MemBackendType, + SnapshotType, }; use crate::vstate::memory::{ GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryState, MemoryError, @@ -148,6 +149,8 @@ pub enum CreateSnapshotError { MicrovmState(MicrovmStateError), /// Cannot serialize the microVM state: {0} SerializeMicrovmState(crate::snapshot::Error), + /// Failed to sync Microvm memory. + MemoryMsync(MemoryError), /// Cannot perform {0} on the snapshot backing file: {1} SnapshotBackingFile(&'static str, io::Error), /// Size mismatch when writing diff snapshot on top of base layer: base layer size is {0} but diff layer is size {1}. @@ -174,6 +177,25 @@ pub fn create_snapshot( Ok(()) } +/// Creates a Microvm snapshot without memory. +pub fn create_snapshot_nomemory( + vmm: &mut Vmm, + vm_info: &VmInfo, + params: &CreateSnapshotNoMemoryParams, +) -> std::result::Result<(), CreateSnapshotError> { + let microvm_state = vmm + .save_state(vm_info) + .map_err(CreateSnapshotError::MicrovmState)?; + + snapshot_state_to_file(µvm_state, ¶ms.snapshot_path)?; + + vmm.guest_memory() + .msync() + .map_err(CreateSnapshotError::MemoryMsync)?; + + Ok(()) +} + fn snapshot_state_to_file( microvm_state: &MicrovmState, snapshot_path: &Path, @@ -488,7 +510,10 @@ fn guest_memory_from_file( track_dirty_pages: bool, huge_pages: HugePageConfig, ) -> Result { - let mem_file = File::open(mem_file_path)?; + let mem_file = OpenOptions::new() + .read(true) + .write(true) + .open(mem_file_path)?; let guest_mem = GuestMemoryMmap::from_state(Some(&mem_file), mem_state, track_dirty_pages, huge_pages)?; Ok(guest_mem) diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index b8a3929f42b..37caaee1495 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -8,15 +8,15 @@ use seccompiler::BpfThreadMap; use serde_json::Value; #[cfg(test)] use tests::{ - build_and_boot_microvm, create_snapshot, restore_from_snapshot, MockVmRes as VmResources, - MockVmm as Vmm, + build_and_boot_microvm, create_snapshot, create_snapshot_nomemory, restore_from_snapshot, + MockVmRes as VmResources, MockVmm as Vmm, }; use super::VmmError; #[cfg(not(test))] use super::{ - builder::build_and_boot_microvm, persist::create_snapshot, persist::restore_from_snapshot, - resources::VmResources, Vmm, + builder::build_and_boot_microvm, persist::create_snapshot, persist::create_snapshot_nomemory, + persist::restore_from_snapshot, resources::VmResources, Vmm, }; use crate::builder::StartMicrovmError; use crate::cpu_config::templates::{CustomCpuTemplate, GuestConfigError}; @@ -38,7 +38,9 @@ use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::{ NetworkInterfaceConfig, NetworkInterfaceError, NetworkInterfaceUpdateConfig, }; -use crate::vmm_config::snapshot::{CreateSnapshotParams, LoadSnapshotParams, SnapshotType}; +use crate::vmm_config::snapshot::{ + CreateSnapshotNoMemoryParams, CreateSnapshotParams, LoadSnapshotParams, SnapshotType, +}; use crate::vmm_config::vsock::{VsockConfigError, VsockDeviceConfig}; use crate::vmm_config::{self, RateLimiterUpdate}; use crate::EventManager; @@ -59,6 +61,9 @@ pub enum VmmAction { /// Create a snapshot using as input the `CreateSnapshotParams`. This action can only be called /// after the microVM has booted and only when the microVM is in `Paused` state. CreateSnapshot(CreateSnapshotParams), + /// Create a snapshot without memory using as input the `CreateSnapshotNoMemoryParams`. This action can only be called + /// after the microVM has booted and only when the microVM is in `Paused` state. + CreateSnapshotNoMemory(CreateSnapshotNoMemoryParams), /// Get the balloon device configuration. GetBalloonConfig, /// Get the ballon device latest statistics. @@ -439,6 +444,7 @@ impl<'a> PrebootApiController<'a> { SetEntropyDevice(config) => self.set_entropy_device(config), // Operations not allowed pre-boot. CreateSnapshot(_) + | CreateSnapshotNoMemory(_) | FlushMetrics | Pause | Resume @@ -627,6 +633,9 @@ impl RuntimeApiController { match request { // Supported operations allowed post-boot. CreateSnapshot(snapshot_create_cfg) => self.create_snapshot(&snapshot_create_cfg), + CreateSnapshotNoMemory(snapshot_create_cfg) => { + self.create_snapshot_nomemory(&snapshot_create_cfg) + } FlushMetrics => self.flush_metrics(), GetBalloonConfig => self .vmm @@ -795,6 +804,30 @@ impl RuntimeApiController { Ok(VmmData::Empty) } + fn create_snapshot_nomemory( + &mut self, + create_params: &CreateSnapshotNoMemoryParams, + ) -> Result { + log_dev_preview_warning("Virtual machine snapshots without memory", None); + + let mut locked_vmm = self.vmm.lock().unwrap(); + let vm_info = VmInfo::from(&self.vm_resources); + let create_start_us = utils::time::get_time_us(utils::time::ClockType::Monotonic); + + create_snapshot_nomemory(&mut locked_vmm, &vm_info, create_params)?; + + let elapsed_time_us = update_metric_with_elapsed_time( + &METRICS.latencies_us.vmm_full_create_snapshot, + create_start_us, + ); + info!( + "'create full snapshot without memory' VMM action took {} us.", + elapsed_time_us + ); + + Ok(VmmData::Empty) + } + /// Updates block device properties: /// - path of the host file backing the emulated block device, update the disk image on the /// device and its virtio configuration @@ -1228,6 +1261,16 @@ mod tests { Ok(()) } + // Need to redefine this since the non-test one uses real Vmm + // instead of our mocks. + pub fn create_snapshot_nomemory( + _: &mut Vmm, + _: &VmInfo, + _: &CreateSnapshotNoMemoryParams, + ) -> Result<(), CreateSnapshotError> { + Ok(()) + } + // Need to redefine this since the non-test one uses real Vmm // instead of our mocks. pub fn restore_from_snapshot( diff --git a/src/vmm/src/vmm_config/snapshot.rs b/src/vmm/src/vmm_config/snapshot.rs index e1850b74939..aff29567856 100644 --- a/src/vmm/src/vmm_config/snapshot.rs +++ b/src/vmm/src/vmm_config/snapshot.rs @@ -47,6 +47,14 @@ pub struct CreateSnapshotParams { pub mem_file_path: PathBuf, } +/// Stores the configuration that will be used for creating a snapshot without memory. +#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] +#[serde(deny_unknown_fields)] +pub struct CreateSnapshotNoMemoryParams { + /// Path to the file that will contain the microVM state. + pub snapshot_path: PathBuf, +} + /// Stores the configuration that will be used for loading a snapshot. #[derive(Debug, PartialEq, Eq)] pub struct LoadSnapshotParams { diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 1dd89f9b104..73f6bc5222d 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -89,6 +89,9 @@ where huge_pages: HugePageConfig, ) -> Result; + /// Flushes memory contents to disk. + fn msync(&self) -> std::result::Result<(), MemoryError>; + /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState; @@ -243,7 +246,7 @@ impl GuestMemoryExtension for GuestMemoryMmap { .collect::, std::io::Error>>() .map_err(MemoryError::FileError)?; - Self::from_raw_regions_file(regions, track_dirty_pages, false) + Self::from_raw_regions_file(regions, track_dirty_pages, true) } None => { let regions = state @@ -256,6 +259,20 @@ impl GuestMemoryExtension for GuestMemoryMmap { } } + /// Flushes memory contents to disk. + fn msync(&self) -> std::result::Result<(), MemoryError> { + Ok(self.iter().for_each(|region| { + // TODO: Describe safety aspects of this + unsafe { + libc::msync( + region.as_ptr() as *mut libc::c_void, + region.size(), + libc::MS_SYNC, + ); + } + })) + } + /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState { let mut guest_memory_state = GuestMemoryState::default(); From e266c12485be3220325d22e40fb6f0534b36a47c Mon Sep 17 00:00:00 2001 From: Felicitas Pojtinger Date: Thu, 21 Mar 2024 02:41:37 +0100 Subject: [PATCH 02/16] feat: Add support for suspending and resuming to/from snapshots under PVM --- src/vmm/src/arch/x86_64/msr.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/vmm/src/arch/x86_64/msr.rs b/src/vmm/src/arch/x86_64/msr.rs index 543369db471..af0fed2cadf 100644 --- a/src/vmm/src/arch/x86_64/msr.rs +++ b/src/vmm/src/arch/x86_64/msr.rs @@ -50,7 +50,7 @@ const APIC_BASE_MSR: u32 = 0x800; /// Number of APIC MSR indexes const APIC_MSR_INDEXES: u32 = 0x400; -/// Custom MSRs fall in the range 0x4b564d00-0x4b564dff +/// Custom KVM MSRs fall in the range 0x4b564d00-0x4b564def (0x4b564df0-0x4b564dff is reserved for PVM) const MSR_KVM_WALL_CLOCK_NEW: u32 = 0x4b56_4d00; const MSR_KVM_SYSTEM_TIME_NEW: u32 = 0x4b56_4d01; const MSR_KVM_ASYNC_PF_EN: u32 = 0x4b56_4d02; @@ -59,6 +59,16 @@ const MSR_KVM_PV_EOI_EN: u32 = 0x4b56_4d04; const MSR_KVM_POLL_CONTROL: u32 = 0x4b56_4d05; const MSR_KVM_ASYNC_PF_INT: u32 = 0x4b56_4d06; +// Custom PVM MSRs fall in the range 0x4b564df0-0x4b564dff +const MSR_PVM_LINEAR_ADDRESS_RANGE: u32 = 0x4b56_4df0; +const MSR_PVM_VCPU_STRUCT: u32 = 0x4b56_4df1; +const MSR_PVM_SUPERVISOR_RSP: u32 = 0x4b56_4df2; +const MSR_PVM_SUPERVISOR_REDZONE: u32 = 0x4b56_4df3; +const MSR_PVM_EVENT_ENTRY: u32 = 0x4b56_4df4; +const MSR_PVM_RETU_RIP: u32 = 0x4b56_4df5; +const MSR_PVM_RETS_RIP: u32 = 0x4b56_4df6; +const MSR_PVM_SWITCH_CR3: u32 = 0x4b56_4df7; + /// Taken from arch/x86/include/asm/msr-index.h /// Spectre mitigations control MSR pub const MSR_IA32_SPEC_CTRL: u32 = 0x0000_0048; @@ -236,6 +246,14 @@ static SERIALIZABLE_MSR_RANGES: &[MsrRange] = &[ MSR_RANGE!(MSR_K7_HWCR), MSR_RANGE!(MSR_KVM_POLL_CONTROL), MSR_RANGE!(MSR_KVM_ASYNC_PF_INT), + MSR_RANGE!(MSR_PVM_LINEAR_ADDRESS_RANGE), + MSR_RANGE!(MSR_PVM_VCPU_STRUCT), + MSR_RANGE!(MSR_PVM_SUPERVISOR_RSP), + MSR_RANGE!(MSR_PVM_SUPERVISOR_REDZONE), + MSR_RANGE!(MSR_PVM_EVENT_ENTRY), + MSR_RANGE!(MSR_PVM_RETU_RIP), + MSR_RANGE!(MSR_PVM_RETS_RIP), + MSR_RANGE!(MSR_PVM_SWITCH_CR3), ]; /// Specifies whether a particular MSR should be included in vcpu serialization. From 0a32eb3559e7a3274619107c105841d9243c2d65 Mon Sep 17 00:00:00 2001 From: Felicitas Pojtinger Date: Thu, 21 Mar 2024 18:50:54 +0100 Subject: [PATCH 03/16] feat: Encode snapshot size information in snapshot file to allow suspending to and resuming from block devices --- src/vmm/src/persist.rs | 7 ++++--- src/vmm/src/snapshot/mod.rs | 17 +++++++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index db9f5918332..c7762d26399 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -476,7 +476,7 @@ pub enum SnapshotStateFromFileError { /// Failed to open snapshot file: {0} Open(std::io::Error), /// Failed to read snapshot file metadata: {0} - Meta(std::io::Error), + Meta(crate::snapshot::Error), /// Failed to load snapshot state from file: {0} Load(#[from] crate::snapshot::Error), } @@ -487,8 +487,9 @@ fn snapshot_state_from_file( let snapshot = Snapshot::new(SNAPSHOT_VERSION); let mut snapshot_reader = File::open(snapshot_path).map_err(SnapshotStateFromFileError::Open)?; - let metadata = std::fs::metadata(snapshot_path).map_err(SnapshotStateFromFileError::Meta)?; - let snapshot_len = u64_to_usize(metadata.len()); + let raw_snapshot_len: u64 = + Snapshot::deserialize(&mut snapshot_reader).map_err(SnapshotStateFromFileError::Meta)?; + let snapshot_len = u64_to_usize(raw_snapshot_len); let state: MicrovmState = snapshot .load_with_version_check(&mut snapshot_reader, snapshot_len) .map_err(SnapshotStateFromFileError::Load)?; diff --git a/src/vmm/src/snapshot/mod.rs b/src/vmm/src/snapshot/mod.rs index b45d58bbcab..1e31e55a470 100644 --- a/src/vmm/src/snapshot/mod.rs +++ b/src/vmm/src/snapshot/mod.rs @@ -62,6 +62,10 @@ pub enum Error { Io(i32), /// An error occured with serialization/deserialization: {0} Serde(String), + /// Failed to flush snapshot CRC to snapshot buffer + Flush, + /// Failed to write snapshot buffer to snapshot file + Write, } /// Firecracker snapshot header @@ -204,12 +208,21 @@ impl Snapshot { T: Write + Debug, O: Serialize + Debug, { - let mut crc_writer = CRC64Writer::new(writer); + let mut snapshot_buf = Vec::new(); + + let mut crc_writer = CRC64Writer::new(&mut snapshot_buf); self.save_without_crc(&mut crc_writer, object)?; // Now write CRC value let checksum = crc_writer.checksum(); - Self::serialize(&mut crc_writer, &checksum) + Self::serialize(&mut crc_writer, &checksum)?; + + crc_writer.flush().map_err(|_| Error::Flush)?; + + let snapshot_len = snapshot_buf.len() as u64; + Self::serialize(writer, &snapshot_len)?; + + writer.write_all(&snapshot_buf).map_err(|_| Error::Write) } /// Save a snapshot with no CRC64 checksum included. From a3b0f09d6d566fad8a5e2988fa0fd970f2f40cbb Mon Sep 17 00:00:00 2001 From: Felicitas Pojtinger Date: Fri, 5 Apr 2024 04:09:19 +0200 Subject: [PATCH 04/16] Revert "feat: Add ability to resume snapshots and write back changes to the backing file continously, add API endpoint to `msync` the files backing snapshots" This reverts commit 952d378ca4d69f46ee16d3033a86a0f4b767f00b. --- .../seccomp/aarch64-unknown-linux-musl.json | 4 -- .../seccomp/x86_64-unknown-linux-musl.json | 4 -- .../src/api_server/parsed_request.rs | 7 +-- .../src/api_server/request/snapshot.rs | 25 +-------- src/firecracker/swagger/firecracker.yaml | 35 ------------ src/vmm/src/persist.rs | 29 +--------- src/vmm/src/rpc_interface.rs | 53 ++----------------- src/vmm/src/vmm_config/snapshot.rs | 8 --- src/vmm/src/vstate/memory.rs | 19 +------ 9 files changed, 11 insertions(+), 173 deletions(-) diff --git a/resources/seccomp/aarch64-unknown-linux-musl.json b/resources/seccomp/aarch64-unknown-linux-musl.json index dd575c0385a..4302a87c3a9 100644 --- a/resources/seccomp/aarch64-unknown-linux-musl.json +++ b/resources/seccomp/aarch64-unknown-linux-musl.json @@ -35,10 +35,6 @@ { "syscall": "fsync" }, - { - "syscall": "msync", - "comment": "Used for live migration to sync dirty pages" - }, { "syscall": "close" }, diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json index 1e4fc14726c..a735997383f 100644 --- a/resources/seccomp/x86_64-unknown-linux-musl.json +++ b/resources/seccomp/x86_64-unknown-linux-musl.json @@ -35,10 +35,6 @@ { "syscall": "fsync" }, - { - "syscall": "msync", - "comment": "Used for live migration to sync dirty pages" - }, { "syscall": "close" }, diff --git a/src/firecracker/src/api_server/parsed_request.rs b/src/firecracker/src/api_server/parsed_request.rs index c5fc931e83e..4ebb97ee914 100644 --- a/src/firecracker/src/api_server/parsed_request.rs +++ b/src/firecracker/src/api_server/parsed_request.rs @@ -23,9 +23,7 @@ use super::request::machine_configuration::{ use super::request::metrics::parse_put_metrics; use super::request::mmds::{parse_get_mmds, parse_patch_mmds, parse_put_mmds}; use super::request::net::{parse_patch_net, parse_put_net}; -use super::request::snapshot::{ - parse_patch_vm_state, parse_put_snapshot, parse_put_snapshot_nomemory, -}; +use super::request::snapshot::{parse_patch_vm_state, parse_put_snapshot}; use super::request::version::parse_get_version; use super::request::vsock::parse_put_vsock; use super::ApiServer; @@ -99,9 +97,6 @@ impl TryFrom<&Request> for ParsedRequest { parse_put_net(body, path_tokens.next()) } (Method::Put, "snapshot", Some(body)) => parse_put_snapshot(body, path_tokens.next()), - (Method::Put, "snapshot-nomemory", Some(body)) => { - parse_put_snapshot_nomemory(body, path_tokens.next()) - } (Method::Put, "vsock", Some(body)) => parse_put_vsock(body), (Method::Put, "entropy", Some(body)) => parse_put_entropy(body), (Method::Put, _, None) => method_to_error(Method::Put), diff --git a/src/firecracker/src/api_server/request/snapshot.rs b/src/firecracker/src/api_server/request/snapshot.rs index 5750795e398..7addd8cdc74 100644 --- a/src/firecracker/src/api_server/request/snapshot.rs +++ b/src/firecracker/src/api_server/request/snapshot.rs @@ -5,8 +5,8 @@ use serde::de::Error as DeserializeError; use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::snapshot::{ - CreateSnapshotNoMemoryParams, CreateSnapshotParams, LoadSnapshotConfig, LoadSnapshotParams, - MemBackendConfig, MemBackendType, Vm, VmState, + CreateSnapshotParams, LoadSnapshotConfig, LoadSnapshotParams, MemBackendConfig, MemBackendType, + Vm, VmState, }; use super::super::parsed_request::{Error, ParsedRequest}; @@ -42,27 +42,6 @@ pub(crate) fn parse_put_snapshot( } } -pub(crate) fn parse_put_snapshot_nomemory( - body: &Body, - request_type_from_path: Option<&str>, -) -> Result { - match request_type_from_path { - Some(request_type) => match request_type { - "create" => Ok(ParsedRequest::new_sync(VmmAction::CreateSnapshotNoMemory( - serde_json::from_slice::(body.raw())?, - ))), - _ => Err(Error::InvalidPathMethod( - format!("/snapshot-nomemory/{}", request_type), - Method::Put, - )), - }, - None => Err(Error::Generic( - StatusCode::BadRequest, - "Missing snapshot operation type.".to_string(), - )), - } -} - pub(crate) fn parse_patch_vm_state(body: &Body) -> Result { let vm = serde_json::from_slice::(body.raw())?; diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index e561cf6e742..ca754204b54 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -591,32 +591,6 @@ paths: schema: $ref: "#/definitions/Error" - /snapshot-nomemory/create: - put: - summary: Creates a full snapshot without memory. Post-boot only. - description: - Creates a snapshot of the microVM state, without memory. The microVM should be - in the `Paused` state. - operationId: createSnapshotNoMemory - parameters: - - name: body - in: body - description: The configuration used for creating a snaphot. - required: true - schema: - $ref: "#/definitions/SnapshotCreateNoMemoryParams" - responses: - 204: - description: Snapshot created - 400: - description: Snapshot cannot be created due to bad input - schema: - $ref: "#/definitions/Error" - default: - description: Internal server error - schema: - $ref: "#/definitions/Error" - /snapshot/load: put: summary: Loads a snapshot. Pre-boot only. @@ -1229,15 +1203,6 @@ definitions: Type of snapshot to create. It is optional and by default, a full snapshot is created. - SnapshotCreateNoMemoryParams: - type: object - required: - - snapshot_path - properties: - snapshot_path: - type: string - description: Path to the file that will contain the microVM state. - SnapshotLoadParams: type: object description: diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index c7762d26399..de4ef2e5f7e 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -34,8 +34,7 @@ use crate::vmm_config::boot_source::BootSourceConfig; use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::{HugePageConfig, MachineConfigUpdate, VmConfigError}; use crate::vmm_config::snapshot::{ - CreateSnapshotNoMemoryParams, CreateSnapshotParams, LoadSnapshotParams, MemBackendType, - SnapshotType, + CreateSnapshotParams, LoadSnapshotParams, MemBackendType, SnapshotType, }; use crate::vstate::memory::{ GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryState, MemoryError, @@ -149,8 +148,6 @@ pub enum CreateSnapshotError { MicrovmState(MicrovmStateError), /// Cannot serialize the microVM state: {0} SerializeMicrovmState(crate::snapshot::Error), - /// Failed to sync Microvm memory. - MemoryMsync(MemoryError), /// Cannot perform {0} on the snapshot backing file: {1} SnapshotBackingFile(&'static str, io::Error), /// Size mismatch when writing diff snapshot on top of base layer: base layer size is {0} but diff layer is size {1}. @@ -177,25 +174,6 @@ pub fn create_snapshot( Ok(()) } -/// Creates a Microvm snapshot without memory. -pub fn create_snapshot_nomemory( - vmm: &mut Vmm, - vm_info: &VmInfo, - params: &CreateSnapshotNoMemoryParams, -) -> std::result::Result<(), CreateSnapshotError> { - let microvm_state = vmm - .save_state(vm_info) - .map_err(CreateSnapshotError::MicrovmState)?; - - snapshot_state_to_file(µvm_state, ¶ms.snapshot_path)?; - - vmm.guest_memory() - .msync() - .map_err(CreateSnapshotError::MemoryMsync)?; - - Ok(()) -} - fn snapshot_state_to_file( microvm_state: &MicrovmState, snapshot_path: &Path, @@ -511,10 +489,7 @@ fn guest_memory_from_file( track_dirty_pages: bool, huge_pages: HugePageConfig, ) -> Result { - let mem_file = OpenOptions::new() - .read(true) - .write(true) - .open(mem_file_path)?; + let mem_file = File::open(mem_file_path)?; let guest_mem = GuestMemoryMmap::from_state(Some(&mem_file), mem_state, track_dirty_pages, huge_pages)?; Ok(guest_mem) diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 37caaee1495..b8a3929f42b 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -8,15 +8,15 @@ use seccompiler::BpfThreadMap; use serde_json::Value; #[cfg(test)] use tests::{ - build_and_boot_microvm, create_snapshot, create_snapshot_nomemory, restore_from_snapshot, - MockVmRes as VmResources, MockVmm as Vmm, + build_and_boot_microvm, create_snapshot, restore_from_snapshot, MockVmRes as VmResources, + MockVmm as Vmm, }; use super::VmmError; #[cfg(not(test))] use super::{ - builder::build_and_boot_microvm, persist::create_snapshot, persist::create_snapshot_nomemory, - persist::restore_from_snapshot, resources::VmResources, Vmm, + builder::build_and_boot_microvm, persist::create_snapshot, persist::restore_from_snapshot, + resources::VmResources, Vmm, }; use crate::builder::StartMicrovmError; use crate::cpu_config::templates::{CustomCpuTemplate, GuestConfigError}; @@ -38,9 +38,7 @@ use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::{ NetworkInterfaceConfig, NetworkInterfaceError, NetworkInterfaceUpdateConfig, }; -use crate::vmm_config::snapshot::{ - CreateSnapshotNoMemoryParams, CreateSnapshotParams, LoadSnapshotParams, SnapshotType, -}; +use crate::vmm_config::snapshot::{CreateSnapshotParams, LoadSnapshotParams, SnapshotType}; use crate::vmm_config::vsock::{VsockConfigError, VsockDeviceConfig}; use crate::vmm_config::{self, RateLimiterUpdate}; use crate::EventManager; @@ -61,9 +59,6 @@ pub enum VmmAction { /// Create a snapshot using as input the `CreateSnapshotParams`. This action can only be called /// after the microVM has booted and only when the microVM is in `Paused` state. CreateSnapshot(CreateSnapshotParams), - /// Create a snapshot without memory using as input the `CreateSnapshotNoMemoryParams`. This action can only be called - /// after the microVM has booted and only when the microVM is in `Paused` state. - CreateSnapshotNoMemory(CreateSnapshotNoMemoryParams), /// Get the balloon device configuration. GetBalloonConfig, /// Get the ballon device latest statistics. @@ -444,7 +439,6 @@ impl<'a> PrebootApiController<'a> { SetEntropyDevice(config) => self.set_entropy_device(config), // Operations not allowed pre-boot. CreateSnapshot(_) - | CreateSnapshotNoMemory(_) | FlushMetrics | Pause | Resume @@ -633,9 +627,6 @@ impl RuntimeApiController { match request { // Supported operations allowed post-boot. CreateSnapshot(snapshot_create_cfg) => self.create_snapshot(&snapshot_create_cfg), - CreateSnapshotNoMemory(snapshot_create_cfg) => { - self.create_snapshot_nomemory(&snapshot_create_cfg) - } FlushMetrics => self.flush_metrics(), GetBalloonConfig => self .vmm @@ -804,30 +795,6 @@ impl RuntimeApiController { Ok(VmmData::Empty) } - fn create_snapshot_nomemory( - &mut self, - create_params: &CreateSnapshotNoMemoryParams, - ) -> Result { - log_dev_preview_warning("Virtual machine snapshots without memory", None); - - let mut locked_vmm = self.vmm.lock().unwrap(); - let vm_info = VmInfo::from(&self.vm_resources); - let create_start_us = utils::time::get_time_us(utils::time::ClockType::Monotonic); - - create_snapshot_nomemory(&mut locked_vmm, &vm_info, create_params)?; - - let elapsed_time_us = update_metric_with_elapsed_time( - &METRICS.latencies_us.vmm_full_create_snapshot, - create_start_us, - ); - info!( - "'create full snapshot without memory' VMM action took {} us.", - elapsed_time_us - ); - - Ok(VmmData::Empty) - } - /// Updates block device properties: /// - path of the host file backing the emulated block device, update the disk image on the /// device and its virtio configuration @@ -1261,16 +1228,6 @@ mod tests { Ok(()) } - // Need to redefine this since the non-test one uses real Vmm - // instead of our mocks. - pub fn create_snapshot_nomemory( - _: &mut Vmm, - _: &VmInfo, - _: &CreateSnapshotNoMemoryParams, - ) -> Result<(), CreateSnapshotError> { - Ok(()) - } - // Need to redefine this since the non-test one uses real Vmm // instead of our mocks. pub fn restore_from_snapshot( diff --git a/src/vmm/src/vmm_config/snapshot.rs b/src/vmm/src/vmm_config/snapshot.rs index aff29567856..e1850b74939 100644 --- a/src/vmm/src/vmm_config/snapshot.rs +++ b/src/vmm/src/vmm_config/snapshot.rs @@ -47,14 +47,6 @@ pub struct CreateSnapshotParams { pub mem_file_path: PathBuf, } -/// Stores the configuration that will be used for creating a snapshot without memory. -#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] -#[serde(deny_unknown_fields)] -pub struct CreateSnapshotNoMemoryParams { - /// Path to the file that will contain the microVM state. - pub snapshot_path: PathBuf, -} - /// Stores the configuration that will be used for loading a snapshot. #[derive(Debug, PartialEq, Eq)] pub struct LoadSnapshotParams { diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 73f6bc5222d..1dd89f9b104 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -89,9 +89,6 @@ where huge_pages: HugePageConfig, ) -> Result; - /// Flushes memory contents to disk. - fn msync(&self) -> std::result::Result<(), MemoryError>; - /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState; @@ -246,7 +243,7 @@ impl GuestMemoryExtension for GuestMemoryMmap { .collect::, std::io::Error>>() .map_err(MemoryError::FileError)?; - Self::from_raw_regions_file(regions, track_dirty_pages, true) + Self::from_raw_regions_file(regions, track_dirty_pages, false) } None => { let regions = state @@ -259,20 +256,6 @@ impl GuestMemoryExtension for GuestMemoryMmap { } } - /// Flushes memory contents to disk. - fn msync(&self) -> std::result::Result<(), MemoryError> { - Ok(self.iter().for_each(|region| { - // TODO: Describe safety aspects of this - unsafe { - libc::msync( - region.as_ptr() as *mut libc::c_void, - region.size(), - libc::MS_SYNC, - ); - } - })) - } - /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState { let mut guest_memory_state = GuestMemoryState::default(); From c76df56550f48b79777ba8706d6e247e93783047 Mon Sep 17 00:00:00 2001 From: Felicitas Pojtinger Date: Fri, 5 Apr 2024 06:29:34 +0200 Subject: [PATCH 05/16] feat: Add ability to resume snapshots and write back changes to the backing file continously, add API endpoint to `msync` the files backing snapshots and to optionally serialize the VM state --- .../seccomp/aarch64-unknown-linux-musl.json | 4 + .../seccomp/x86_64-unknown-linux-musl.json | 4 + src/firecracker/src/api_server/mod.rs | 8 + .../src/api_server/request/snapshot.rs | 5 + src/firecracker/swagger/firecracker.yaml | 7 + src/vmm/src/logger/metrics.rs | 6 + src/vmm/src/persist.rs | 139 +++++++++++------- src/vmm/src/rpc_interface.rs | 24 +++ src/vmm/src/vmm_config/snapshot.rs | 11 ++ src/vmm/src/vstate/memory.rs | 44 +++++- src/vmm/tests/integration_tests.rs | 1 + 11 files changed, 192 insertions(+), 61 deletions(-) diff --git a/resources/seccomp/aarch64-unknown-linux-musl.json b/resources/seccomp/aarch64-unknown-linux-musl.json index 4302a87c3a9..dd575c0385a 100644 --- a/resources/seccomp/aarch64-unknown-linux-musl.json +++ b/resources/seccomp/aarch64-unknown-linux-musl.json @@ -35,6 +35,10 @@ { "syscall": "fsync" }, + { + "syscall": "msync", + "comment": "Used for live migration to sync dirty pages" + }, { "syscall": "close" }, diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json index a735997383f..1e4fc14726c 100644 --- a/resources/seccomp/x86_64-unknown-linux-musl.json +++ b/resources/seccomp/x86_64-unknown-linux-musl.json @@ -35,6 +35,10 @@ { "syscall": "fsync" }, + { + "syscall": "msync", + "comment": "Used for live migration to sync dirty pages" + }, { "syscall": "close" }, diff --git a/src/firecracker/src/api_server/mod.rs b/src/firecracker/src/api_server/mod.rs index 80f5510abe1..2c54c50f0d9 100644 --- a/src/firecracker/src/api_server/mod.rs +++ b/src/firecracker/src/api_server/mod.rs @@ -158,6 +158,14 @@ impl ApiServer { &METRICS.latencies_us.diff_create_snapshot, "create diff snapshot", )), + SnapshotType::Msync => Some(( + &METRICS.latencies_us.diff_create_snapshot, + "memory synchronization snapshot", + )), + SnapshotType::MsyncAndState => Some(( + &METRICS.latencies_us.diff_create_snapshot, + "memory synchronization and state snapshot", + )), }, VmmAction::LoadSnapshot(_) => { Some((&METRICS.latencies_us.load_snapshot, "load snapshot")) diff --git a/src/firecracker/src/api_server/request/snapshot.rs b/src/firecracker/src/api_server/request/snapshot.rs index 7addd8cdc74..30318a2a9eb 100644 --- a/src/firecracker/src/api_server/request/snapshot.rs +++ b/src/firecracker/src/api_server/request/snapshot.rs @@ -99,6 +99,7 @@ fn parse_put_snapshot_load(body: &Body) -> Result { mem_backend, enable_diff_snapshots: snapshot_config.enable_diff_snapshots, resume_vm: snapshot_config.resume_vm, + shared: snapshot_config.shared, }; // Construct the `ParsedRequest` object. @@ -175,6 +176,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: false, + shared: false, }; let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert!(parsed_request @@ -202,6 +204,7 @@ mod tests { }, enable_diff_snapshots: true, resume_vm: false, + shared: false, }; let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert!(parsed_request @@ -229,6 +232,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: true, + shared: false, }; let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert!(parsed_request @@ -253,6 +257,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: true, + shared: false, }; let parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert_eq!( diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index ca754204b54..87e7dbaf804 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -1199,6 +1199,8 @@ definitions: enum: - Full - Diff + - Msync + - MsyncAndState description: Type of snapshot to create. It is optional and by default, a full snapshot is created. @@ -1234,6 +1236,11 @@ definitions: type: boolean description: When set to true, the vm is also resumed if the snapshot load is successful. + shared: + type: boolean + description: When set to true and the guest memory backend is a file, + changes to the memory are asynchronously written back to the + backend as the VM is running. TokenBucket: type: object diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index 2d39a050ed5..ace728e7a74 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -616,6 +616,10 @@ pub struct PerformanceMetrics { pub vmm_full_create_snapshot: SharedStoreMetric, /// Measures the snapshot diff create time, at the VMM level, in microseconds. pub vmm_diff_create_snapshot: SharedStoreMetric, + /// Measures the snapshot memory synchronization time, at the VMM level, in microseconds. + pub vmm_msync_create_snapshot: SharedStoreMetric, + /// Measures the snapshot memory synchronization and state time, at the VMM level, in microseconds. + pub vmm_msync_and_state_create_snapshot: SharedStoreMetric, /// Measures the snapshot load time, at the VMM level, in microseconds. pub vmm_load_snapshot: SharedStoreMetric, /// Measures the microVM pausing duration, at the VMM level, in microseconds. @@ -634,6 +638,8 @@ impl PerformanceMetrics { resume_vm: SharedStoreMetric::new(), vmm_full_create_snapshot: SharedStoreMetric::new(), vmm_diff_create_snapshot: SharedStoreMetric::new(), + vmm_msync_create_snapshot: SharedStoreMetric::new(), + vmm_msync_and_state_create_snapshot: SharedStoreMetric::new(), vmm_load_snapshot: SharedStoreMetric::new(), vmm_pause_vm: SharedStoreMetric::new(), vmm_resume_vm: SharedStoreMetric::new(), diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index de4ef2e5f7e..70919a01a8e 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -142,6 +142,8 @@ pub enum CreateSnapshotError { UnsupportedVersion, /// Cannot write memory file: {0} Memory(MemoryError), + /// Cannot msync memory file: {0} + MemoryMsync(MemoryError), /// Cannot perform {0} on the memory backing file: {1} MemoryBackingFile(&'static str, io::Error), /// Cannot save the microVM state: {0} @@ -163,11 +165,16 @@ pub fn create_snapshot( vm_info: &VmInfo, params: &CreateSnapshotParams, ) -> Result<(), CreateSnapshotError> { - let microvm_state = vmm - .save_state(vm_info) - .map_err(CreateSnapshotError::MicrovmState)?; + match params.snapshot_type { + SnapshotType::Diff | SnapshotType::Full | SnapshotType::MsyncAndState => { + let microvm_state = vmm + .save_state(vm_info) + .map_err(CreateSnapshotError::MicrovmState)?; - snapshot_state_to_file(µvm_state, ¶ms.snapshot_path)?; + snapshot_state_to_file(µvm_state, ¶ms.snapshot_path)?; + } + SnapshotType::Msync => (), + } snapshot_memory_to_file(vmm, ¶ms.mem_file_path, params.snapshot_type)?; @@ -211,55 +218,63 @@ fn snapshot_memory_to_file( ) -> Result<(), CreateSnapshotError> { use self::CreateSnapshotError::*; - // Need to check this here, as we create the file in the line below - let file_existed = mem_file_path.exists(); + match snapshot_type { + SnapshotType::Diff | SnapshotType::Full => { + // Need to check this here, as we create the file in the line below + let file_existed = mem_file_path.exists(); + + let mut file = OpenOptions::new() + .write(true) + .create(true) + .open(mem_file_path) + .map_err(|err| MemoryBackingFile("open", err))?; + + // Determine what size our total memory area is. + let mem_size_mib = mem_size_mib(vmm.guest_memory()); + let expected_size = mem_size_mib * 1024 * 1024; + + if file_existed { + let file_size = file + .metadata() + .map_err(|e| MemoryBackingFile("get_metadata", e))? + .len(); + + // Here we only truncate the file if the size mismatches. + // - For full snapshots, the entire file's contents will be overwritten anyway. We have to + // avoid truncating here to deal with the edge case where it represents the snapshot file + // from which this very microVM was loaded (as modifying the memory file would be + // reflected in the mmap of the file, meaning a truncate operation would zero out guest + // memory, and thus corrupt the VM). + // - For diff snapshots, we want to merge the diff layer directly into the file. + if file_size != expected_size { + file.set_len(0) + .map_err(|err| MemoryBackingFile("truncate", err))?; + } + } - let mut file = OpenOptions::new() - .write(true) - .create(true) - .open(mem_file_path) - .map_err(|err| MemoryBackingFile("open", err))?; - - // Determine what size our total memory area is. - let mem_size_mib = mem_size_mib(vmm.guest_memory()); - let expected_size = mem_size_mib * 1024 * 1024; - - if file_existed { - let file_size = file - .metadata() - .map_err(|e| MemoryBackingFile("get_metadata", e))? - .len(); - - // Here we only truncate the file if the size mismatches. - // - For full snapshots, the entire file's contents will be overwritten anyway. We have to - // avoid truncating here to deal with the edge case where it represents the snapshot file - // from which this very microVM was loaded (as modifying the memory file would be - // reflected in the mmap of the file, meaning a truncate operation would zero out guest - // memory, and thus corrupt the VM). - // - For diff snapshots, we want to merge the diff layer directly into the file. - if file_size != expected_size { - file.set_len(0) - .map_err(|err| MemoryBackingFile("truncate", err))?; + // Set the length of the file to the full size of the memory area. + file.set_len(expected_size) + .map_err(|e| MemoryBackingFile("set_length", e))?; + + match snapshot_type { + SnapshotType::Diff => { + let dirty_bitmap = vmm.get_dirty_bitmap().map_err(DirtyBitmap)?; + vmm.guest_memory() + .dump_dirty(&mut file, &dirty_bitmap) + .map_err(Memory) + } + SnapshotType::Full => vmm.guest_memory().dump(&mut file).map_err(Memory), + _ => Ok(()), + }?; + file.flush() + .map_err(|err| MemoryBackingFile("flush", err))?; + file.sync_all() + .map_err(|err| MemoryBackingFile("sync_all", err)) } - } - - // Set the length of the file to the full size of the memory area. - file.set_len(expected_size) - .map_err(|e| MemoryBackingFile("set_length", e))?; - - match snapshot_type { - SnapshotType::Diff => { - let dirty_bitmap = vmm.get_dirty_bitmap().map_err(DirtyBitmap)?; - vmm.guest_memory() - .dump_dirty(&mut file, &dirty_bitmap) - .map_err(Memory) + SnapshotType::Msync | SnapshotType::MsyncAndState => { + vmm.guest_memory().msync().map_err(MemoryMsync) } - SnapshotType::Full => vmm.guest_memory().dump(&mut file).map_err(Memory), - }?; - file.flush() - .map_err(|err| MemoryBackingFile("flush", err))?; - file.sync_all() - .map_err(|err| MemoryBackingFile("sync_all", err)) + } } /// Validates that snapshot CPU vendor matches the host CPU vendor. @@ -421,6 +436,7 @@ pub fn restore_from_snapshot( mem_state, track_dirty_pages, vm_resources.vm_config.huge_pages, + params.shared, ) .map_err(RestoreFromSnapshotGuestMemoryError::File)?, None, @@ -488,10 +504,24 @@ fn guest_memory_from_file( mem_state: &GuestMemoryState, track_dirty_pages: bool, huge_pages: HugePageConfig, + shared: bool, ) -> Result { - let mem_file = File::open(mem_file_path)?; - let guest_mem = - GuestMemoryMmap::from_state(Some(&mem_file), mem_state, track_dirty_pages, huge_pages)?; + let mem_file = if shared { + OpenOptions::new() + .read(true) + .write(true) + .open(mem_file_path)? + } else { + File::open(mem_file_path)? + }; + + let guest_mem = GuestMemoryMmap::from_state( + Some(&mem_file), + mem_state, + track_dirty_pages, + huge_pages, + shared, + )?; Ok(guest_mem) } @@ -550,7 +580,8 @@ fn create_guest_memory( track_dirty_pages: bool, huge_pages: HugePageConfig, ) -> Result<(GuestMemoryMmap, Vec), GuestMemoryFromUffdError> { - let guest_memory = GuestMemoryMmap::from_state(None, mem_state, track_dirty_pages, huge_pages)?; + let guest_memory = + GuestMemoryMmap::from_state(None, mem_state, track_dirty_pages, huge_pages, false)?; let mut backend_mappings = Vec::with_capacity(guest_memory.num_regions()); for (mem_region, state_region) in guest_memory.iter().zip(mem_state.regions.iter()) { backend_mappings.push(GuestRegionUffdMapping { diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index b8a3929f42b..d0f5d478c26 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -791,6 +791,26 @@ impl RuntimeApiController { elapsed_time_us ); } + SnapshotType::Msync => { + let elapsed_time_us = update_metric_with_elapsed_time( + &METRICS.latencies_us.vmm_msync_create_snapshot, + create_start_us, + ); + info!( + "'create memory synchronization snapshot' VMM action took {} us.", + elapsed_time_us + ); + } + SnapshotType::MsyncAndState => { + let elapsed_time_us = update_metric_with_elapsed_time( + &METRICS.latencies_us.vmm_msync_and_state_create_snapshot, + create_start_us, + ); + info!( + "'create memory synchronization and state snapshot' VMM action took {} us.", + elapsed_time_us + ); + } } Ok(VmmData::Empty) } @@ -1733,6 +1753,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: false, + shared: false, }); // Request should succeed. preboot.handle_preboot_request(req).unwrap(); @@ -1749,6 +1770,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: true, + shared: false, }); // Request should succeed. preboot.handle_preboot_request(req).unwrap(); @@ -2130,6 +2152,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: false, + shared: false, }), VmmActionError::OperationNotSupportedPostBoot, ); @@ -2156,6 +2179,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: false, + shared: false, }); let err = preboot.handle_preboot_request(req); assert_eq!( diff --git a/src/vmm/src/vmm_config/snapshot.rs b/src/vmm/src/vmm_config/snapshot.rs index e1850b74939..43b03e563af 100644 --- a/src/vmm/src/vmm_config/snapshot.rs +++ b/src/vmm/src/vmm_config/snapshot.rs @@ -18,6 +18,10 @@ pub enum SnapshotType { /// Full snapshot. #[default] Full, + /// Memory synchronization snapshot. + Msync, + /// Memory synchronization and state snapshot. + MsyncAndState, } /// Specifies the method through which guest memory will get populated when @@ -60,6 +64,10 @@ pub struct LoadSnapshotParams { /// When set to true, the vm is also resumed if the snapshot load /// is successful. pub resume_vm: bool, + /// When set to true and the guest memory backend is a file, + /// changes to the memory are asynchronously written back to the + /// backend as the VM is running. + pub shared: bool, } /// Stores the configuration for loading a snapshot that is provided by the user. @@ -82,6 +90,9 @@ pub struct LoadSnapshotConfig { /// Whether or not to resume the vm post snapshot load. #[serde(default)] pub resume_vm: bool, + /// Whether or not to asynchronously write back memory changes to the backing file. + #[serde(default)] + pub shared: bool, } /// Stores the configuration used for managing snapshot memory. diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 1dd89f9b104..7e92991d85a 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -87,8 +87,12 @@ where state: &GuestMemoryState, track_dirty_pages: bool, huge_pages: HugePageConfig, + shared: bool, ) -> Result; + /// Flushes memory contents to disk. + fn msync(&self) -> std::result::Result<(), MemoryError>; + /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState; @@ -224,6 +228,7 @@ impl GuestMemoryExtension for GuestMemoryMmap { state: &GuestMemoryState, track_dirty_pages: bool, huge_pages: HugePageConfig, + shared: bool, ) -> Result { match file { Some(f) => { @@ -243,7 +248,7 @@ impl GuestMemoryExtension for GuestMemoryMmap { .collect::, std::io::Error>>() .map_err(MemoryError::FileError)?; - Self::from_raw_regions_file(regions, track_dirty_pages, false) + Self::from_raw_regions_file(regions, track_dirty_pages, shared) } None => { let regions = state @@ -256,6 +261,20 @@ impl GuestMemoryExtension for GuestMemoryMmap { } } + /// Flushes memory contents to disk. + fn msync(&self) -> std::result::Result<(), MemoryError> { + Ok(self.iter().for_each(|region| { + // TODO: Describe safety aspects of this + unsafe { + libc::msync( + region.as_ptr() as *mut libc::c_void, + region.size(), + libc::MS_SYNC, + ); + } + })) + } + /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState { let mut guest_memory_state = GuestMemoryState::default(); @@ -505,9 +524,14 @@ mod tests { let file = TempFile::new().unwrap().into_file(); // No mapping of snapshots that were taken with hugetlbfs enabled - let err = - GuestMemoryMmap::from_state(Some(&file), &state, false, HugePageConfig::Hugetlbfs2M) - .unwrap_err(); + let err = GuestMemoryMmap::from_state( + Some(&file), + &state, + false, + HugePageConfig::Hugetlbfs2M, + false, + ) + .unwrap_err(); assert!(matches!(err, MemoryError::HugetlbfsSnapshot), "{:?}", err); } @@ -696,6 +720,7 @@ mod tests { &memory_state, false, HugePageConfig::None, + false, ) .unwrap(); @@ -754,9 +779,14 @@ mod tests { guest_memory.dump_dirty(&mut file, &dirty_bitmap).unwrap(); // We can restore from this because this is the first dirty dump. - let restored_guest_memory = - GuestMemoryMmap::from_state(Some(&file), &memory_state, false, HugePageConfig::None) - .unwrap(); + let restored_guest_memory = GuestMemoryMmap::from_state( + Some(&file), + &memory_state, + false, + HugePageConfig::None, + false, + ) + .unwrap(); // Check that the region contents are the same. let mut restored_region = vec![0u8; region_size]; diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs index 8fb7395ab00..7b9e18199ad 100644 --- a/src/vmm/tests/integration_tests.rs +++ b/src/vmm/tests/integration_tests.rs @@ -238,6 +238,7 @@ fn verify_load_snapshot(snapshot_file: TempFile, memory_file: TempFile) { µvm_state.memory_state, false, HugePageConfig::None, + false, ) .unwrap(); From c6fc50d18005c36d73aa034fc4bcd2144ec706dd Mon Sep 17 00:00:00 2001 From: Shivansh Vij Date: Thu, 18 Apr 2024 16:13:25 -0400 Subject: [PATCH 06/16] Backporting https://github.com/firecracker-microvm/firecracker/pull/4491 --- docs/cpu_templates/cpu-template-helper.md | 3 ++- src/vmm/src/arch/x86_64/msr.rs | 3 +++ .../functional/test_cpu_template_helper.py | 18 ++++++++++++++++-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/docs/cpu_templates/cpu-template-helper.md b/docs/cpu_templates/cpu-template-helper.md index 156798ce73e..b867b5a98af 100644 --- a/docs/cpu_templates/cpu-template-helper.md +++ b/docs/cpu_templates/cpu-template-helper.md @@ -180,7 +180,7 @@ CPU features to a heterogeneous fleet consisting of multiple CPU models. ### MSRs excluded from guest CPU configuration dump | Register name | Index | -| --------------------------------------- | ----------------------- | +|-----------------------------------------|-------------------------| | MSR_IA32_TSC | 0x00000010 | | MSR_ARCH_PERFMON_PERFCTRn | 0x000000c1 - 0x000000d2 | | MSR_ARCH_PERFMON_EVENTSELn | 0x00000186 - 0x00000197 | @@ -230,6 +230,7 @@ CPU features to a heterogeneous fleet consisting of multiple CPU models. | HV_X64_MSR_SYNDBG_RECV_BUFFER | 0x400000f4 | | HV_X64_MSR_SYNDBG_PENDING_BUFFER | 0x400000f5 | | HV_X64_MSR_SYNDBG_OPTIONS | 0x400000ff | +| HV_X64_MSR_TSC_INVARIANT_CONTROL | 0x40000118 | ### ARM registers excluded from guest CPU configuration dump diff --git a/src/vmm/src/arch/x86_64/msr.rs b/src/vmm/src/arch/x86_64/msr.rs index af0fed2cadf..ec8b70d32bf 100644 --- a/src/vmm/src/arch/x86_64/msr.rs +++ b/src/vmm/src/arch/x86_64/msr.rs @@ -246,6 +246,7 @@ static SERIALIZABLE_MSR_RANGES: &[MsrRange] = &[ MSR_RANGE!(MSR_K7_HWCR), MSR_RANGE!(MSR_KVM_POLL_CONTROL), MSR_RANGE!(MSR_KVM_ASYNC_PF_INT), + MSR_RANGE!(MSR_IA32_TSX_CTRL), MSR_RANGE!(MSR_PVM_LINEAR_ADDRESS_RANGE), MSR_RANGE!(MSR_PVM_VCPU_STRUCT), MSR_RANGE!(MSR_PVM_SUPERVISOR_RSP), @@ -405,6 +406,8 @@ static UNDUMPABLE_MSR_RANGES: &[MsrRange] = &[ MSR_RANGE!(HV_X64_MSR_SYNDBG_CONTROL, 5), // HV_X64_MSR_SYNDBG_OPTIONS MSR_RANGE!(HV_X64_MSR_SYNDBG_OPTIONS), + // HV_X64_MSR_TSC_INVARIANT_CONTROL + MSR_RANGE!(HV_X64_MSR_TSC_INVARIANT_CONTROL), ]; /// Specifies whether a particular MSR should be dumped. diff --git a/tests/integration_tests/functional/test_cpu_template_helper.py b/tests/integration_tests/functional/test_cpu_template_helper.py index 36d429f00e4..492185a66e4 100644 --- a/tests/integration_tests/functional/test_cpu_template_helper.py +++ b/tests/integration_tests/functional/test_cpu_template_helper.py @@ -438,6 +438,9 @@ def test_consecutive_cpu_config_consistency(uvm_plain, cpu_template_helper, tmp_ # Strip common entries. cpu_template_helper.template_strip([cpu_config_1, cpu_config_2]) + config_1 = json.loads(cpu_config_1.read_text(encoding="utf-8")) + config_2 = json.loads(cpu_config_2.read_text(encoding="utf-8")) + # Check the stripped result is empty. if PLATFORM == "x86_64": empty_cpu_config = { @@ -446,10 +449,21 @@ def test_consecutive_cpu_config_consistency(uvm_plain, cpu_template_helper, tmp_ "msr_modifiers": [], } elif PLATFORM == "aarch64": + # 0x603000000013df01 -> CNTPCT_EL0 + ignore_registers = ["0x603000000013df01"] + + config_1["reg_modifiers"] = [ + rm for rm in config_1["reg_modifiers"] if rm["addr"] not in ignore_registers + ] + config_2["reg_modifiers"] = [ + rm for rm in config_2["reg_modifiers"] if rm["addr"] not in ignore_registers + ] + empty_cpu_config = { "kvm_capabilities": [], "reg_modifiers": [], "vcpu_features": [], } - assert json.loads(cpu_config_1.read_text(encoding="utf-8")) == empty_cpu_config - assert json.loads(cpu_config_2.read_text(encoding="utf-8")) == empty_cpu_config + + assert config_1 == empty_cpu_config + assert config_2 == empty_cpu_config From 81a50677b1ff2f3ba4ba7498582084b8a932e389 Mon Sep 17 00:00:00 2001 From: Shivansh Vij Date: Thu, 18 Apr 2024 16:17:06 -0400 Subject: [PATCH 07/16] Backporting https://github.com/firecracker-microvm/firecracker/pull/4539 --- .../functional/test_cpu_template_helper.py | 3 + .../functional/test_feat_parity.py | 80 ------------------- .../security/test_vulnerabilities.py | 15 ++-- 3 files changed, 10 insertions(+), 88 deletions(-) diff --git a/tests/integration_tests/functional/test_cpu_template_helper.py b/tests/integration_tests/functional/test_cpu_template_helper.py index 492185a66e4..25b996e76af 100644 --- a/tests/integration_tests/functional/test_cpu_template_helper.py +++ b/tests/integration_tests/functional/test_cpu_template_helper.py @@ -208,6 +208,9 @@ def build_cpu_config_dict(cpu_config_path): 0x48, # MSR_IA32_SMBASE is not accessible outside of System Management Mode. 0x9E, + # MSR_IA32_TSX_CTRL is R/W MSR to disable Intel TSX feature as a mitigation + # against TAA vulnerability. + 0x122, # MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP and MSR_IA32_SYSENTER_EIP are # R/W MSRs that will be set up by OS to call fast system calls with # SYSENTER. diff --git a/tests/integration_tests/functional/test_feat_parity.py b/tests/integration_tests/functional/test_feat_parity.py index ae1aac3b429..1eadbc6d29c 100644 --- a/tests/integration_tests/functional/test_feat_parity.py +++ b/tests/integration_tests/functional/test_feat_parity.py @@ -141,83 +141,3 @@ def test_feat_parity_cpuid_inst_set(vm): must_be_set, must_be_unset, ) - - -def test_feat_parity_cpuid_sec(vm): - """ - Verify that security-related CPUID feature flags are properly set - for T2CL and T2A CPU templates. - """ - - # fmt: off - must_be_set_common = [ - (0x7, 0x0, "edx", - (1 << 26) | # IBRS/IBPB - (1 << 27) | # STIBP - (1 << 31) # SSBD - ) - # Security feature bits in 0x80000008 EBX are set differently by - # 4.14 and 5.10 KVMs. - # 4.14 populates them from host's AMD flags (0x80000008 EBX), while - # 5.10 takes them from host's common flags (0x7 EDX). - # There is no great value in checking that this actually happens, as - # we cannot really control it. - # When we drop 4.14 support, we may consider enabling this check. - # (0x80000008, 0x0, "ebx", - # (1 << 12) | # IBPB - # (1 << 14) | # IBRS - # (1 << 15) | # STIBP - # (1 << 24) # SSBD - # ) - ] - - must_be_set_intel_only = [ - (0x7, 0x0, "edx", - (1 << 10) | # MD_CLEAR - (1 << 29) # IA32_ARCH_CAPABILITIES - ) - ] - - must_be_set_amd_only = [ - (0x80000008, 0x0, "ebx", - (1 << 18) | # IbrsPreferred - (1 << 19) # IbrsProvidesSameModeProtection - ) - ] - - must_be_unset_common = [ - (0x7, 0x0, "edx", - (1 << 28) # L1D_FLUSH - ) - ] - - must_be_unset_intel_only = [ - (0x80000008, 0x0, "ebx", - (1 << 18) | # IbrsPreferred - (1 << 19) # IbrsProvidesSameModeProtection - ) - ] - - must_be_unset_amd_only = [ - (0x7, 0x0, "edx", - (1 << 10) | # MD_CLEAR - (1 << 29) # IA32_ARCH_CAPABILITIES - ) - ] - # fmt: on - - vendor = cpuid_utils.get_cpu_vendor() - if vendor == cpuid_utils.CpuVendor.INTEL: - must_be_set = must_be_set_common + must_be_set_intel_only - must_be_unset = must_be_unset_common + must_be_unset_intel_only - elif vendor == cpuid_utils.CpuVendor.AMD: - must_be_set = must_be_set_common + must_be_set_amd_only - must_be_unset = must_be_unset_common + must_be_unset_amd_only - else: - raise Exception("Unsupported CPU vendor.") - - cpuid_utils.check_cpuid_feat_flags( - vm, - must_be_set, - must_be_unset, - ) diff --git a/tests/integration_tests/security/test_vulnerabilities.py b/tests/integration_tests/security/test_vulnerabilities.py index 2b7d301d56d..b22a76a2ab6 100644 --- a/tests/integration_tests/security/test_vulnerabilities.py +++ b/tests/integration_tests/security/test_vulnerabilities.py @@ -387,11 +387,12 @@ def get_vuln_files_exception_dict(template): # https://github.com/torvalds/linux/commit/da3db168fb671f15e393b227f5c312c698ecb6ea # Thus, since the FLUSH_L1D bit is masked off prior to kernel v6.4, guests with # IA32_ARCH_CAPABILITIES.FB_CLEAR (bit 17) = 0 (like guests on Intel Skylake and guests with - # T2S template) fall onto the second hand of the condition and fail the test. The expected value - # "Vulnerable: Clear CPU buffers attempted, no microcode" means that the kernel is using the - # best effort mode which invokes the mitigation instructions (VERW in this case) without a - # guarantee that they clear the CPU buffers. If the host has the microcode update applied - # correctly, the mitigation works and it is safe to ignore the "Vulnerable" message. + # T2S template) fall onto the second hand of the condition and fail the test. The value is + # "Vulnerable: Clear CPU buffers attempted, no microcode" on guests on Intel Skylake and guests + # with T2S template but "Mitigation: Clear CPU buffers; SMT Host state unknown" on kernel v6.4 + # or later. In any case, the kernel attempts to clear CPU buffers using VERW instruction and it + # is safe to ingore the "Vulnerable" message if the host has the microcode update applied + # correctly. Here we expect the common string "Clear CPU buffers" to cover both cases. # # Guest on Intel Skylake with C3 template # --------------------------------------- @@ -409,9 +410,7 @@ def get_vuln_files_exception_dict(template): if global_props.cpu_codename == "INTEL_SKYLAKE" and template == "C3": exception_dict["mmio_stale_data"] = "Unknown: No mitigations" elif global_props.cpu_codename == "INTEL_SKYLAKE" or template == "T2S": - exception_dict[ - "mmio_stale_data" - ] = "Vulnerable: Clear CPU buffers attempted, no microcode" + exception_dict["mmio_stale_data"] = "Clear CPU buffers" return exception_dict From 789b390bdcf62d4351a7e7f6bf01f0be249da392 Mon Sep 17 00:00:00 2001 From: Shivansh Vij Date: Thu, 18 Apr 2024 17:25:35 -0400 Subject: [PATCH 08/16] Backporting https://github.com/codesandbox/firecracker/commit/41643714f571f281ac9777ff72254591856ea0da --- .../src/cpu_config/x86_64/cpuid/amd/normalize.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs index 5a682441e8d..533bc8304c3 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs @@ -24,6 +24,8 @@ pub enum NormalizeCpuidError { MissingLeaf0x80000000, /// Missing leaf 0x80000001. MissingLeaf0x80000001, + /// Missing leaf 0x40000001. + MissingLeaf0x40000001, /// Failed to set feature entry leaf: {0} FeatureEntry(#[from] FeatureEntryError), /// Failed to set extended cache topology leaf: {0} @@ -110,6 +112,7 @@ impl super::AmdCpuid { self.update_extended_cache_topology_entry(cpu_count, cpus_per_core)?; self.update_extended_apic_id_entry(cpu_index, cpus_per_core)?; self.update_brand_string_entry()?; + self.disable_kvm_feature_async_pf()?; Ok(()) } @@ -407,6 +410,17 @@ impl super::AmdCpuid { .map_err(NormalizeCpuidError::BrandString)?; Ok(()) } + + fn disable_kvm_feature_async_pf(&mut self) -> Result<(), NormalizeCpuidError> { + let leaf_40000001 = self + .get_mut(&CpuidKey::leaf(0x40000001)) + .ok_or(NormalizeCpuidError::MissingLeaf0x40000001)?; + + // Disable KVM_FEATURE_ASYNC_PF_INT_BIT + leaf_40000001.result.eax.write_bit(Self::KVM_FEATURE_ASYNC_PF_INT_BIT, false); + set_bit(&mut leaf_40000001.result.eax, 14, false); + Ok(()) + } } #[cfg(test)] From bee01a3b6de80c54f64b6d344b31ff8c541ce345 Mon Sep 17 00:00:00 2001 From: Shivansh Vij Date: Thu, 18 Apr 2024 17:29:54 -0400 Subject: [PATCH 09/16] Fixing typo --- src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs index 533bc8304c3..0b80f795e3a 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs @@ -416,8 +416,7 @@ impl super::AmdCpuid { .get_mut(&CpuidKey::leaf(0x40000001)) .ok_or(NormalizeCpuidError::MissingLeaf0x40000001)?; - // Disable KVM_FEATURE_ASYNC_PF_INT_BIT - leaf_40000001.result.eax.write_bit(Self::KVM_FEATURE_ASYNC_PF_INT_BIT, false); + // Disable KVM_FEATURE_ASYNC_PF set_bit(&mut leaf_40000001.result.eax, 14, false); Ok(()) } From e7734f3ebf96ec324aee6addde2d156ce3935e8e Mon Sep 17 00:00:00 2001 From: Shivansh Vij Date: Fri, 19 Apr 2024 14:25:11 -0700 Subject: [PATCH 10/16] Revert 8d0fa80 and ed7116d --- .../src/cpu_config/x86_64/cpuid/amd/normalize.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs index 0b80f795e3a..5a682441e8d 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs @@ -24,8 +24,6 @@ pub enum NormalizeCpuidError { MissingLeaf0x80000000, /// Missing leaf 0x80000001. MissingLeaf0x80000001, - /// Missing leaf 0x40000001. - MissingLeaf0x40000001, /// Failed to set feature entry leaf: {0} FeatureEntry(#[from] FeatureEntryError), /// Failed to set extended cache topology leaf: {0} @@ -112,7 +110,6 @@ impl super::AmdCpuid { self.update_extended_cache_topology_entry(cpu_count, cpus_per_core)?; self.update_extended_apic_id_entry(cpu_index, cpus_per_core)?; self.update_brand_string_entry()?; - self.disable_kvm_feature_async_pf()?; Ok(()) } @@ -410,16 +407,6 @@ impl super::AmdCpuid { .map_err(NormalizeCpuidError::BrandString)?; Ok(()) } - - fn disable_kvm_feature_async_pf(&mut self) -> Result<(), NormalizeCpuidError> { - let leaf_40000001 = self - .get_mut(&CpuidKey::leaf(0x40000001)) - .ok_or(NormalizeCpuidError::MissingLeaf0x40000001)?; - - // Disable KVM_FEATURE_ASYNC_PF - set_bit(&mut leaf_40000001.result.eax, 14, false); - Ok(()) - } } #[cfg(test)] From 02d9154a6e8510c2556db882a8ef50c9e864221a Mon Sep 17 00:00:00 2001 From: Felicitas Pojtinger Date: Thu, 20 Jun 2024 16:32:14 -0700 Subject: [PATCH 11/16] build: Add CI/CD configuration based on Alpine --- .github/workflows/hydrun.yaml | 104 ++++++++++++++++++++++++++++++++++ Hydrunfile | 29 ++++++++++ 2 files changed, 133 insertions(+) create mode 100644 .github/workflows/hydrun.yaml create mode 100755 Hydrunfile diff --git a/.github/workflows/hydrun.yaml b/.github/workflows/hydrun.yaml new file mode 100644 index 00000000000..16ffed01e1f --- /dev/null +++ b/.github/workflows/hydrun.yaml @@ -0,0 +1,104 @@ +name: hydrun CI + +on: + push: + pull_request: + schedule: + - cron: "0 0 * * 0" + +jobs: + build-linux: + runs-on: ${{ matrix.target.runner }} + permissions: + contents: read + strategy: + matrix: + target: + # Binaries + - id: rust.x86_64 + src: . + os: alpine:edge + flags: "" + cmd: ./Hydrunfile rust x86_64 + dst: out/* + runner: depot-ubuntu-22.04-32 + - id: rust.aarch64 + src: . + os: alpine:edge + flags: "" + cmd: ./Hydrunfile rust aarch64 + dst: out/* + runner: depot-ubuntu-22.04-arm-32 + + steps: + - name: Maximize build space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + - name: Checkout + uses: actions/checkout@v4 + - name: Restore ccache + uses: actions/cache/restore@v4 + with: + path: | + /tmp/ccache + key: cache-ccache-${{ matrix.target.id }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Set up hydrun + run: | + curl -L -o /tmp/hydrun "https://github.com/pojntfx/hydrun/releases/latest/download/hydrun.linux-$(uname -m)" + sudo install /tmp/hydrun /usr/local/bin + - name: Build with hydrun + working-directory: ${{ matrix.target.src }} + run: hydrun -o ${{ matrix.target.os }} ${{ matrix.target.flags }} "${{ matrix.target.cmd }}" + - name: Fix permissions for output + run: sudo chown -R $USER . + - name: Save ccache + uses: actions/cache/save@v4 + with: + path: | + /tmp/ccache + key: cache-ccache-${{ matrix.target.id }} + - name: Upload output + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.target.id }} + path: ${{ matrix.target.dst }} + + publish-linux: + runs-on: ubuntu-latest + permissions: + contents: write + needs: build-linux + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Download output + uses: actions/download-artifact@v4 + with: + path: /tmp/out + - name: Extract branch name + id: extract_branch + run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" + - name: Publish pre-release to GitHub releases + if: ${{ github.ref == 'refs/heads/firecracker-v1.6-live-migration' || github.ref == 'refs/heads/firecracker-v1.6-live-migration-and-pvm' || github.ref == 'refs/heads/firecracker-v1.7-live-migration-and-msync' || github.ref == 'refs/heads/firecracker-v1.7-live-migration-pvm-and-msync' || github.ref == 'refs/heads/main-live-migration-and-msync' }} + uses: marvinpinto/action-automatic-releases@latest + with: + repo_token: "${{ secrets.GITHUB_TOKEN }}" + automatic_release_tag: release-${{ steps.extract_branch.outputs.branch }} + prerelease: true + files: | + /tmp/out/*/* + - name: Publish release to GitHub releases + if: startsWith(github.ref, 'refs/tags/v') + uses: marvinpinto/action-automatic-releases@latest + with: + repo_token: "${{ secrets.GITHUB_TOKEN }}" + prerelease: false + files: | + /tmp/out/*/* diff --git a/Hydrunfile b/Hydrunfile new file mode 100755 index 00000000000..6184657168c --- /dev/null +++ b/Hydrunfile @@ -0,0 +1,29 @@ +#!/bin/sh + +set -e + +# Rust +if [ "$1" = "rust" ]; then + # Install native dependencies + apk add rust cargo clang-dev cmake linux-headers make git + + # Configure Git + git config --global --add safe.directory '*' + + # Build + cp "resources/seccomp/$2-unknown-linux-musl.json" "resources/seccomp/$2-alpine-linux-musl.json" + export RUSTFLAGS='-C target-feature=+crt-static' + cargo build --target "$2-alpine-linux-musl" --all-features --release + + # Stage binaries + mkdir -p out + + dir="./build/cargo_target/$2-alpine-linux-musl/release" + for file in $(ls "$dir"); do + if [[ -x "$dir/$file" && ! -d "$dir/$file" ]]; then + cp "$dir/$file" "./out/${file}.linux-$2" + fi + done + + exit 0 +fi From 3f29981bbc179f8511576756460bad5f6a3925fc Mon Sep 17 00:00:00 2001 From: Felicitas Pojtinger Date: Fri, 21 Jun 2024 15:54:56 -0700 Subject: [PATCH 12/16] build: Bump build system conventions --- .github/workflows/hydrun.yaml | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/.github/workflows/hydrun.yaml b/.github/workflows/hydrun.yaml index 16ffed01e1f..929a7efe1a9 100644 --- a/.github/workflows/hydrun.yaml +++ b/.github/workflows/hydrun.yaml @@ -31,11 +31,6 @@ jobs: runner: depot-ubuntu-22.04-arm-32 steps: - - name: Maximize build space - run: | - sudo rm -rf /usr/share/dotnet - sudo rm -rf /usr/local/lib/android - sudo rm -rf /opt/ghc - name: Checkout uses: actions/checkout@v4 - name: Restore ccache @@ -86,19 +81,17 @@ jobs: id: extract_branch run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" - name: Publish pre-release to GitHub releases - if: ${{ github.ref == 'refs/heads/firecracker-v1.6-live-migration' || github.ref == 'refs/heads/firecracker-v1.6-live-migration-and-pvm' || github.ref == 'refs/heads/firecracker-v1.7-live-migration-and-msync' || github.ref == 'refs/heads/firecracker-v1.7-live-migration-pvm-and-msync' || github.ref == 'refs/heads/main-live-migration-and-msync' }} - uses: marvinpinto/action-automatic-releases@latest + if: ${{ github.ref == 'refs/heads/master' }} + uses: softprops/action-gh-release@v2 with: - repo_token: "${{ secrets.GITHUB_TOKEN }}" - automatic_release_tag: release-${{ steps.extract_branch.outputs.branch }} + tag_name: release-${{ steps.extract_branch.outputs.branch }} prerelease: true files: | /tmp/out/*/* - name: Publish release to GitHub releases if: startsWith(github.ref, 'refs/tags/v') - uses: marvinpinto/action-automatic-releases@latest + uses: softprops/action-gh-release@v2 with: - repo_token: "${{ secrets.GITHUB_TOKEN }}" prerelease: false files: | /tmp/out/*/* From 237eafffdb64e243add44bf43eb3451603be3642 Mon Sep 17 00:00:00 2001 From: Felicitas Pojtinger Date: Fri, 21 Jun 2024 16:00:54 -0700 Subject: [PATCH 13/16] build: Use new release branch allowlist --- .github/workflows/hydrun.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hydrun.yaml b/.github/workflows/hydrun.yaml index 929a7efe1a9..9fcb3c28b4b 100644 --- a/.github/workflows/hydrun.yaml +++ b/.github/workflows/hydrun.yaml @@ -81,7 +81,7 @@ jobs: id: extract_branch run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" - name: Publish pre-release to GitHub releases - if: ${{ github.ref == 'refs/heads/master' }} + if: ${{ github.ref == github.ref == 'refs/heads/firecracker-v1.7-live-migration-and-msync' || github.ref == 'refs/heads/firecracker-v1.7-live-migration-pvm-and-msync' || github.ref == 'refs/heads/main-live-migration-and-msync' }} uses: softprops/action-gh-release@v2 with: tag_name: release-${{ steps.extract_branch.outputs.branch }} From 77e9cd4415f0d7bfa560ac0cb6d3ef07f060e89d Mon Sep 17 00:00:00 2001 From: Felicitas Pojtinger Date: Sat, 22 Jun 2024 00:18:12 -0700 Subject: [PATCH 14/16] build: List all packages to build manually --- Hydrunfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Hydrunfile b/Hydrunfile index 6184657168c..5a4e13e0b94 100755 --- a/Hydrunfile +++ b/Hydrunfile @@ -13,7 +13,7 @@ if [ "$1" = "rust" ]; then # Build cp "resources/seccomp/$2-unknown-linux-musl.json" "resources/seccomp/$2-alpine-linux-musl.json" export RUSTFLAGS='-C target-feature=+crt-static' - cargo build --target "$2-alpine-linux-musl" --all-features --release + cargo build --package firecracker --package jailer --package seccompiler-bin --package rebase-snap --package cpu-template-helper --target "$2-alpine-linux-musl" --all-features --release # Stage binaries mkdir -p out From b7899bebb9fd130449883d131e43817f2c3843e3 Mon Sep 17 00:00:00 2001 From: Felicitas Pojtinger Date: Sat, 22 Jun 2024 00:20:16 -0700 Subject: [PATCH 15/16] fix: Use correct package name for `seccompiler` --- Hydrunfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Hydrunfile b/Hydrunfile index 5a4e13e0b94..cb2fe830c90 100755 --- a/Hydrunfile +++ b/Hydrunfile @@ -13,7 +13,7 @@ if [ "$1" = "rust" ]; then # Build cp "resources/seccomp/$2-unknown-linux-musl.json" "resources/seccomp/$2-alpine-linux-musl.json" export RUSTFLAGS='-C target-feature=+crt-static' - cargo build --package firecracker --package jailer --package seccompiler-bin --package rebase-snap --package cpu-template-helper --target "$2-alpine-linux-musl" --all-features --release + cargo build --package firecracker --package jailer --package seccompiler --package rebase-snap --package cpu-template-helper --target "$2-alpine-linux-musl" --all-features --release # Stage binaries mkdir -p out From 5c3ce7957eb97b612d1c7e71fdee1f445957782d Mon Sep 17 00:00:00 2001 From: Felicitas Pojtinger Date: Sat, 22 Jun 2024 00:40:13 -0700 Subject: [PATCH 16/16] fix: Correct ref expression in workflow --- .github/workflows/hydrun.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hydrun.yaml b/.github/workflows/hydrun.yaml index 9fcb3c28b4b..12b4e099e98 100644 --- a/.github/workflows/hydrun.yaml +++ b/.github/workflows/hydrun.yaml @@ -81,7 +81,7 @@ jobs: id: extract_branch run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" - name: Publish pre-release to GitHub releases - if: ${{ github.ref == github.ref == 'refs/heads/firecracker-v1.7-live-migration-and-msync' || github.ref == 'refs/heads/firecracker-v1.7-live-migration-pvm-and-msync' || github.ref == 'refs/heads/main-live-migration-and-msync' }} + if: ${{ github.ref == 'refs/heads/firecracker-v1.7-live-migration-and-msync' || github.ref == 'refs/heads/firecracker-v1.7-live-migration-pvm-and-msync' || github.ref == 'refs/heads/main-live-migration-and-msync' }} uses: softprops/action-gh-release@v2 with: tag_name: release-${{ steps.extract_branch.outputs.branch }}