Skip to content

Commit

Permalink
Support shared mmap for running VMs
Browse files Browse the repository at this point in the history
This allows us to run VMs while streaming memory changes to disk

support mmap shared

update log

add configuration for memory backing file

Progress

tweaks
  • Loading branch information
CompuIves committed Jul 23, 2022
1 parent bed8dae commit d1b90e4
Show file tree
Hide file tree
Showing 16 changed files with 296 additions and 41 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions resources/seccomp/aarch64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,19 @@
}
]
},
{
"syscall": "msync",
"comment": "Used to sync memory from mmap to disk",
"args": [
{
"index": 2,
"type": "dword",
"op": "eq",
"val": 4,
"comment": "MS_SYNC"
}
]
},
{
"syscall": "rt_sigaction",
"comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT",
Expand Down
13 changes: 13 additions & 0 deletions resources/seccomp/x86_64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,19 @@
}
]
},
{
"syscall": "msync",
"comment": "Used to sync memory from mmap to disk",
"args": [
{
"index": 2,
"type": "dword",
"op": "eq",
"val": 4,
"comment": "MS_SYNC"
}
]
},
{
"syscall": "rt_sigaction",
"comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT",
Expand Down
2 changes: 2 additions & 0 deletions src/api_server/src/parsed_request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::request::logger::parse_put_logger;
use crate::request::machine_configuration::{
parse_get_machine_config, parse_patch_machine_config, parse_put_machine_config,
};
use crate::request::memory_backing_file::parse_put_memory_backing_file;
use crate::request::metrics::parse_put_metrics;
use crate::request::mmds::{parse_get_mmds, parse_patch_mmds, parse_put_mmds};
use crate::request::net::{parse_patch_net, parse_put_net};
Expand Down Expand Up @@ -114,6 +115,7 @@ impl ParsedRequest {
(Method::Put, "network-interfaces", Some(body)) => {
parse_put_net(body, path_tokens.get(1))
}
(Method::Put, "memory-backing-file", Some(body)) => parse_put_memory_backing_file(body),
(Method::Put, "shutdown-internal", None) => {
Ok(ParsedRequest::new(RequestAction::ShutdownInternal))
}
Expand Down
42 changes: 42 additions & 0 deletions src/api_server/src/request/memory_backing_file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

use super::super::VmmAction;
use crate::parsed_request::{Error, ParsedRequest};
use crate::request::Body;
use logger::{IncMetric, METRICS};
use vmm::vmm_config::memory_backing_file::MemoryBackingFileConfig;

pub(crate) fn parse_put_memory_backing_file(body: &Body) -> Result<ParsedRequest, Error> {
METRICS.put_api_requests.memory_backing_file_cfg_count.inc();
Ok(ParsedRequest::new_sync(VmmAction::SetMemoryBackingFile(
serde_json::from_slice::<MemoryBackingFileConfig>(body.raw()).map_err(|e| {
METRICS.put_api_requests.memory_backing_file_cfg_fails.inc();
Error::SerdeJson(e)
})?,
)))
}

#[cfg(test)]
mod tests {
use std::path::PathBuf;

use super::*;

#[test]
fn test_parse_memory_backing_file() {
assert!(parse_put_memory_backing_file(&Body::new("invalid_payload")).is_err());

let body = r#"{
"path": "./memory.snap"
}"#;
let same_body = MemoryBackingFileConfig {
path: PathBuf::from("./memory.snap"),
};
let result = parse_put_memory_backing_file(&Body::new(body));
assert!(result.is_ok());
let parsed_req = result.unwrap_or_else(|_e| panic!("Failed test."));

assert!(parsed_req == ParsedRequest::new_sync(VmmAction::SetMemoryBackingFile(same_body)));
}
}
1 change: 1 addition & 0 deletions src/api_server/src/request/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub mod drive;
pub mod instance_info;
pub mod logger;
pub mod machine_configuration;
pub mod memory_backing_file;
pub mod metrics;
pub mod mmds;
pub mod net;
Expand Down
31 changes: 31 additions & 0 deletions src/api_server/swagger/firecracker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,29 @@ paths:
description: Internal server error
schema:
$ref: "#/definitions/Error"

/memory-backing-file:
put:
summary: Configures a memory backing file to sync the memory changes to during the runtime of the vm
operationId: putMemoryBackingFile
parameters:
- name: body
in: body
description: Path to memory backing file
required: true
schema:
$ref: "#/definitions/MemoryBackingFile"
responses:
204:
description: Memory backing file configured
400:
description: Memory backing file failed
schema:
$ref: "#/definitions/Error"
default:
description: Internal server error.
schema:
$ref: "#/definitions/Error"

/metrics:
put:
Expand Down Expand Up @@ -1047,6 +1070,14 @@ definitions:
tx_rate_limiter:
$ref: "#/definitions/RateLimiter"

MemoryBackingFile:
type: object
required:
- path
properties:
path:
type: string

PartialDrive:
type: object
required:
Expand Down
4 changes: 4 additions & 0 deletions src/logger/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,10 @@ pub struct PutRequestsMetrics {
pub machine_cfg_count: SharedIncMetric,
/// Number of failures in configuring the machine.
pub machine_cfg_fails: SharedIncMetric,
/// Number of PUTs for setting memory backing file.
pub memory_backing_file_cfg_count: SharedIncMetric,
/// Number of failures in configuring the machine.
pub memory_backing_file_cfg_fails: SharedIncMetric,
/// Number of PUTs for initializing the metrics system.
pub metrics_count: SharedIncMetric,
/// Number of failures in initializing the metrics system.
Expand Down
2 changes: 1 addition & 1 deletion src/vm-memory/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ pub fn create_guest_memory(
for region in regions {
let flags = match region.0 {
None => libc::MAP_NORESERVE | libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
Some(_) => libc::MAP_NORESERVE | libc::MAP_PRIVATE,
Some(_) => libc::MAP_NORESERVE | libc::MAP_SHARED,
};

let mmap_region =
Expand Down
59 changes: 45 additions & 14 deletions src/vmm/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ use libc::EFD_NONBLOCK;
use logger::METRICS;
use std::convert::TryFrom;
use std::fmt::{Display, Formatter};
use std::fs::File;
use std::io::{self, Read, Seek, SeekFrom};
use std::os::unix::io::{AsRawFd, RawFd};
use std::sync::{Arc, Mutex};
use vm_memory::FileOffset;
use vm_superio::Serial;

#[cfg(target_arch = "aarch64")]
Expand All @@ -23,6 +25,7 @@ use crate::construct_kvm_mpidrs;
use crate::device_manager::legacy::PortIODeviceManager;
use crate::device_manager::mmio::MMIODeviceManager;
use crate::device_manager::persist::MMIODevManagerConstructorArgs;
use crate::persist::MemoryDescriptor;

#[cfg(target_arch = "x86_64")]
use linux_loader::loader::elf::Elf as Loader;
Expand Down Expand Up @@ -53,7 +56,6 @@ use linux_loader::loader::KernelLoader;
use logger::{error, warn};
use seccompiler::BpfThreadMap;
use snapshot::Persist;
use userfaultfd::Uffd;
use utils::eventfd::EventFd;
use utils::terminal::Terminal;
use utils::time::TimestampUs;
Expand All @@ -66,6 +68,8 @@ use vm_superio::Rtc;
pub enum StartMicrovmError {
/// Unable to attach block device to Vmm.
AttachBlockDevice(io::Error),
/// Unable to create the memory backing file.
BackingMemoryFile(io::Error),
/// This error is thrown by the minimal boot loader implementation.
ConfigureSystem(arch::Error),
/// Internal errors are due to resource exhaustion.
Expand Down Expand Up @@ -119,6 +123,9 @@ impl Display for StartMicrovmError {
AttachBlockDevice(err) => {
write!(f, "Unable to attach block device to Vmm: {}", err)
}
BackingMemoryFile(err) => {
write!(f, "Unable to create the memory backing file: {}", err)
}
ConfigureSystem(e) => write!(f, "System configuration error: {:?}", e),
CreateRateLimiter(err) => write!(f, "Cannot create RateLimiter: {}", err),
CreateNetDevice(err) => {
Expand Down Expand Up @@ -238,7 +245,7 @@ fn create_vmm_and_vcpus(
instance_info: &InstanceInfo,
event_manager: &mut EventManager,
guest_memory: GuestMemoryMmap,
uffd: Option<Uffd>,
memory_descriptor: Option<MemoryDescriptor>,
track_dirty_pages: bool,
vcpu_count: u8,
) -> std::result::Result<(Vmm, Vec<Vcpu>), StartMicrovmError> {
Expand Down Expand Up @@ -304,7 +311,7 @@ fn create_vmm_and_vcpus(
shutdown_exit_code: None,
vm,
guest_memory,
uffd,
memory_descriptor,
vcpus_handles: Vec::new(),
vcpus_exit_evt,
mmio_device_manager,
Expand Down Expand Up @@ -336,8 +343,23 @@ pub fn build_microvm_for_boot(
let boot_config = vm_resources.boot_source().ok_or(MissingKernelConfig)?;

let track_dirty_pages = vm_resources.track_dirty_pages();
let guest_memory =
create_guest_memory(vm_resources.vm_config().mem_size_mib, track_dirty_pages)?;

let backing_memory_file = if let Some(ref file) = vm_resources.backing_memory_file {
file.set_len((vm_resources.vm_config().mem_size_mib * 1024 * 1024) as u64)
.map_err(|e| {
error!("Failed to set backing memory file size: {}", e);
StartMicrovmError::BackingMemoryFile(e)
})?;

Some(file.clone())
} else {
None
};
let guest_memory = create_guest_memory(
vm_resources.vm_config().mem_size_mib,
backing_memory_file.clone(),
track_dirty_pages,
)?;
let vcpu_config = vm_resources.vcpu_config();
let entry_addr = load_kernel(boot_config, &guest_memory)?;
let initrd = load_initrd_from_config(boot_config, &guest_memory)?;
Expand Down Expand Up @@ -369,7 +391,7 @@ pub fn build_microvm_for_boot(
instance_info,
event_manager,
guest_memory,
None,
backing_memory_file.map(MemoryDescriptor::File),
track_dirty_pages,
vcpu_config.vcpu_count,
)?;
Expand Down Expand Up @@ -458,7 +480,7 @@ pub fn build_microvm_from_snapshot(
event_manager: &mut EventManager,
microvm_state: MicrovmState,
guest_memory: GuestMemoryMmap,
uffd: Option<Uffd>,
memory_descriptor: Option<MemoryDescriptor>,
track_dirty_pages: bool,
seccomp_filters: &BpfThreadMap,
vm_resources: &mut VmResources,
Expand All @@ -473,7 +495,7 @@ pub fn build_microvm_from_snapshot(
instance_info,
event_manager,
guest_memory.clone(),
uffd,
memory_descriptor,
track_dirty_pages,
vcpu_count,
)?;
Expand Down Expand Up @@ -590,15 +612,24 @@ pub fn build_microvm_from_snapshot(
/// Creates GuestMemory of `mem_size_mib` MiB in size.
pub fn create_guest_memory(
mem_size_mib: usize,
backing_memory_file: Option<Arc<File>>,
track_dirty_pages: bool,
) -> std::result::Result<GuestMemoryMmap, StartMicrovmError> {
let mem_size = mem_size_mib << 20;
let arch_mem_regions = arch::arch_memory_regions(mem_size);

let mut offset = 0_u64;
vm_memory::create_guest_memory(
&arch_mem_regions
.iter()
.map(|(addr, size)| (None, *addr, *size))
.map(|(addr, size)| {
let file_offset = backing_memory_file
.clone()
.map(|file| FileOffset::from_arc(file, offset));
offset += *size as u64;

(file_offset, *addr, *size)
})
.collect::<Vec<_>>()[..],
track_dirty_pages,
)
Expand Down Expand Up @@ -1076,7 +1107,7 @@ pub mod tests {
}

pub(crate) fn default_vmm() -> Vmm {
let guest_memory = create_guest_memory(128, false).unwrap();
let guest_memory = create_guest_memory(128, None, false).unwrap();

let vcpus_exit_evt = EventFd::new(libc::EFD_NONBLOCK)
.map_err(Error::EventFd)
Expand Down Expand Up @@ -1104,12 +1135,12 @@ pub mod tests {
shutdown_exit_code: None,
vm,
guest_memory,
uffd: None,
vcpus_handles: Vec::new(),
vcpus_exit_evt,
mmio_device_manager,
#[cfg(target_arch = "x86_64")]
pio_device_manager,
memory_descriptor: None,
}
}

Expand Down Expand Up @@ -1291,21 +1322,21 @@ pub mod tests {

// Case 1: create guest memory without dirty page tracking
{
let guest_memory = create_guest_memory(mem_size, false).unwrap();
let guest_memory = create_guest_memory(mem_size, None, false).unwrap();
assert!(!is_dirty_tracking_enabled(&guest_memory));
}

// Case 2: create guest memory with dirty page tracking
{
let guest_memory = create_guest_memory(mem_size, true).unwrap();
let guest_memory = create_guest_memory(mem_size, None, true).unwrap();
assert!(is_dirty_tracking_enabled(&guest_memory));
}
}

#[test]
fn test_create_vcpus() {
let vcpu_count = 2;
let guest_memory = create_guest_memory(128, false).unwrap();
let guest_memory = create_guest_memory(128, None, false).unwrap();

#[allow(unused_mut)]
let mut vm = setup_kvm_vm(&guest_memory, false).unwrap();
Expand Down
Loading

0 comments on commit d1b90e4

Please sign in to comment.