Skip to content

Commit 7de43ec

Browse files
committed
Optimize shared memory operations
Signed-off-by: Ludvig Liljenberg <[email protected]> Change u64 to u128 Signed-off-by: Ludvig Liljenberg <[email protected]>
1 parent 30bd3d4 commit 7de43ec

File tree

2 files changed

+323
-9
lines changed

2 files changed

+323
-9
lines changed

src/hyperlight_host/benches/benchmarks.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,7 @@ fn shared_memory_benchmark(c: &mut Criterion) {
524524
let mut dst = vec![0u8; size];
525525
b.iter(|| {
526526
hshm.copy_to_slice(&mut dst, 0).unwrap();
527+
std::hint::black_box(&dst);
527528
});
528529
},
529530
);

src/hyperlight_host/src/mem/shared_mem.rs

Lines changed: 322 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
use std::any::type_name;
1818
use std::ffi::c_void;
1919
use std::io::Error;
20+
use std::mem::{align_of, size_of};
2021
#[cfg(target_os = "linux")]
2122
use std::ptr::null_mut;
2223
use std::sync::{Arc, RwLock};
@@ -783,12 +784,39 @@ impl HostSharedMemory {
783784
.lock
784785
.try_read()
785786
.map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?;
786-
// todo: replace with something a bit more optimized + correct
787-
for (i, b) in slice.iter_mut().enumerate() {
787+
788+
const CHUNK: usize = size_of::<u128>();
789+
let len = slice.len();
790+
let mut i = 0;
791+
792+
// Handle unaligned head bytes until we reach u128 alignment.
793+
// Note: align_offset can return usize::MAX if alignment is impossible.
794+
// In that case, head_len = len via .min(), so we fall back to byte-by-byte
795+
// operations for the entire slice.
796+
let align_offset = base.align_offset(align_of::<u128>());
797+
let head_len = align_offset.min(len);
798+
while i < head_len {
799+
unsafe {
800+
slice[i] = base.add(i).read_volatile();
801+
}
802+
i += 1;
803+
}
804+
805+
// Read aligned u128 chunks
806+
while i + CHUNK <= len {
807+
let value = unsafe { (base.add(i) as *const u128).read_volatile() };
808+
slice[i..i + CHUNK].copy_from_slice(&value.to_ne_bytes());
809+
i += CHUNK;
810+
}
811+
812+
// Handle remaining tail bytes
813+
while i < len {
788814
unsafe {
789-
*b = base.wrapping_add(i).read_volatile();
815+
slice[i] = base.add(i).read_volatile();
790816
}
817+
i += 1;
791818
}
819+
792820
drop(guard);
793821
Ok(())
794822
}
@@ -802,12 +830,51 @@ impl HostSharedMemory {
802830
.lock
803831
.try_read()
804832
.map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?;
805-
// todo: replace with something a bit more optimized + correct
806-
for (i, b) in slice.iter().enumerate() {
833+
834+
const CHUNK: usize = size_of::<u128>();
835+
let len = slice.len();
836+
let mut i = 0;
837+
838+
// Handle unaligned head bytes until we reach u128 alignment.
839+
// Note: align_offset can return usize::MAX if alignment is impossible.
840+
// In that case, head_len = len via .min(), so we fall back to byte-by-byte
841+
// operations for the entire slice.
842+
let align_offset = base.align_offset(align_of::<u128>());
843+
let head_len = align_offset.min(len);
844+
while i < head_len {
845+
unsafe {
846+
base.add(i).write_volatile(slice[i]);
847+
}
848+
i += 1;
849+
}
850+
851+
// Write aligned u128 chunks
852+
while i + CHUNK <= len {
853+
let chunk: [u8; CHUNK] = slice[i..i + CHUNK].try_into().map_err(|_| {
854+
new_error!(
855+
"Failed to convert slice to fixed-size array for u128 chunk: \
856+
expected length {}, got {} (total slice len {}, offset {})",
857+
CHUNK,
858+
slice[i..i + CHUNK].len(),
859+
len,
860+
i,
861+
)
862+
})?;
863+
let value = u128::from_ne_bytes(chunk);
864+
unsafe {
865+
(base.add(i) as *mut u128).write_volatile(value);
866+
}
867+
i += CHUNK;
868+
}
869+
870+
// Handle remaining tail bytes
871+
while i < len {
807872
unsafe {
808-
base.wrapping_add(i).write_volatile(*b);
873+
base.add(i).write_volatile(slice[i]);
809874
}
875+
i += 1;
810876
}
877+
811878
drop(guard);
812879
Ok(())
813880
}
@@ -821,10 +888,40 @@ impl HostSharedMemory {
821888
.lock
822889
.try_read()
823890
.map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?;
824-
// todo: replace with something a bit more optimized + correct
825-
for i in 0..len {
826-
unsafe { base.wrapping_add(i).write_volatile(value) };
891+
892+
const CHUNK: usize = size_of::<u128>();
893+
let value_u128 = u128::from_ne_bytes([value; CHUNK]);
894+
let mut i = 0;
895+
896+
// Handle unaligned head bytes until we reach u128 alignment.
897+
// Note: align_offset can return usize::MAX if alignment is impossible.
898+
// In that case, head_len = len via .min(), so we fall back to byte-by-byte
899+
// operations for the entire slice.
900+
let align_offset = base.align_offset(align_of::<u128>());
901+
let head_len = align_offset.min(len);
902+
while i < head_len {
903+
unsafe {
904+
base.add(i).write_volatile(value);
905+
}
906+
i += 1;
907+
}
908+
909+
// Write aligned u128 chunks
910+
while i + CHUNK <= len {
911+
unsafe {
912+
(base.add(i) as *mut u128).write_volatile(value_u128);
913+
}
914+
i += CHUNK;
915+
}
916+
917+
// Handle remaining tail bytes
918+
while i < len {
919+
unsafe {
920+
base.add(i).write_volatile(value);
921+
}
922+
i += 1;
827923
}
924+
828925
drop(guard);
829926
Ok(())
830927
}
@@ -1137,6 +1234,222 @@ mod tests {
11371234
assert_eq!(data, ret_vec);
11381235
}
11391236

1237+
/// Tests for the optimized aligned memory operations.
1238+
/// These tests verify that the u128 chunk optimization works correctly
1239+
/// for various alignment scenarios and buffer sizes.
1240+
mod alignment_tests {
1241+
use super::*;
1242+
1243+
const CHUNK_SIZE: usize = 16; // size_of::<u128>()
1244+
1245+
/// Test copy operations with all possible starting alignment offsets (0-15)
1246+
#[test]
1247+
fn copy_with_various_alignments() {
1248+
// Use a buffer large enough to test all alignment cases
1249+
let mem_size: usize = 4096;
1250+
let eshm = ExclusiveSharedMemory::new(mem_size).unwrap();
1251+
let (hshm, _) = eshm.build();
1252+
1253+
// Test all 16 possible alignment offsets (0 through 15)
1254+
for start_offset in 0..CHUNK_SIZE {
1255+
let test_len = 64; // Enough to cover head, aligned chunks, and tail
1256+
let test_data: Vec<u8> = (0..test_len).map(|i| (i + start_offset) as u8).collect();
1257+
1258+
// Write data at the given offset
1259+
hshm.copy_from_slice(&test_data, start_offset).unwrap();
1260+
1261+
// Read it back
1262+
let mut read_buf = vec![0u8; test_len];
1263+
hshm.copy_to_slice(&mut read_buf, start_offset).unwrap();
1264+
1265+
assert_eq!(
1266+
test_data, read_buf,
1267+
"Mismatch at alignment offset {}",
1268+
start_offset
1269+
);
1270+
}
1271+
}
1272+
1273+
/// Test copy operations with lengths smaller than chunk size (< 16 bytes)
1274+
#[test]
1275+
fn copy_small_lengths() {
1276+
let mem_size: usize = 4096;
1277+
let eshm = ExclusiveSharedMemory::new(mem_size).unwrap();
1278+
let (hshm, _) = eshm.build();
1279+
1280+
for len in 0..CHUNK_SIZE {
1281+
let test_data: Vec<u8> = (0..len).map(|i| i as u8).collect();
1282+
1283+
hshm.copy_from_slice(&test_data, 0).unwrap();
1284+
1285+
let mut read_buf = vec![0u8; len];
1286+
hshm.copy_to_slice(&mut read_buf, 0).unwrap();
1287+
1288+
assert_eq!(test_data, read_buf, "Mismatch for length {}", len);
1289+
}
1290+
}
1291+
1292+
/// Test copy operations with lengths that don't align to chunk boundaries
1293+
#[test]
1294+
fn copy_non_aligned_lengths() {
1295+
let mem_size: usize = 4096;
1296+
let eshm = ExclusiveSharedMemory::new(mem_size).unwrap();
1297+
let (hshm, _) = eshm.build();
1298+
1299+
// Test lengths like 17, 31, 33, 47, 63, 65, etc.
1300+
let test_lengths = [17, 31, 33, 47, 63, 65, 100, 127, 129, 255, 257];
1301+
1302+
for &len in &test_lengths {
1303+
let test_data: Vec<u8> = (0..len).map(|i| (i % 256) as u8).collect();
1304+
1305+
hshm.copy_from_slice(&test_data, 0).unwrap();
1306+
1307+
let mut read_buf = vec![0u8; len];
1308+
hshm.copy_to_slice(&mut read_buf, 0).unwrap();
1309+
1310+
assert_eq!(test_data, read_buf, "Mismatch for length {}", len);
1311+
}
1312+
}
1313+
1314+
/// Test copy with exactly one chunk (16 bytes)
1315+
#[test]
1316+
fn copy_exact_chunk_size() {
1317+
let mem_size: usize = 4096;
1318+
let eshm = ExclusiveSharedMemory::new(mem_size).unwrap();
1319+
let (hshm, _) = eshm.build();
1320+
1321+
let test_data: Vec<u8> = (0..CHUNK_SIZE).map(|i| i as u8).collect();
1322+
1323+
hshm.copy_from_slice(&test_data, 0).unwrap();
1324+
1325+
let mut read_buf = vec![0u8; CHUNK_SIZE];
1326+
hshm.copy_to_slice(&mut read_buf, 0).unwrap();
1327+
1328+
assert_eq!(test_data, read_buf);
1329+
}
1330+
1331+
/// Test fill with various alignment offsets
1332+
#[test]
1333+
fn fill_with_various_alignments() {
1334+
let mem_size: usize = 4096;
1335+
let eshm = ExclusiveSharedMemory::new(mem_size).unwrap();
1336+
let (mut hshm, _) = eshm.build();
1337+
1338+
for start_offset in 0..CHUNK_SIZE {
1339+
let fill_len = 64;
1340+
let fill_value = (start_offset % 256) as u8;
1341+
1342+
// Clear memory first
1343+
hshm.fill(0, 0, mem_size).unwrap();
1344+
1345+
// Fill at the given offset
1346+
hshm.fill(fill_value, start_offset, fill_len).unwrap();
1347+
1348+
// Read it back and verify
1349+
let mut read_buf = vec![0u8; fill_len];
1350+
hshm.copy_to_slice(&mut read_buf, start_offset).unwrap();
1351+
1352+
assert!(
1353+
read_buf.iter().all(|&b| b == fill_value),
1354+
"Fill mismatch at alignment offset {}",
1355+
start_offset
1356+
);
1357+
}
1358+
}
1359+
1360+
/// Test fill with lengths smaller than chunk size
1361+
#[test]
1362+
fn fill_small_lengths() {
1363+
let mem_size: usize = 4096;
1364+
let eshm = ExclusiveSharedMemory::new(mem_size).unwrap();
1365+
let (mut hshm, _) = eshm.build();
1366+
1367+
for len in 0..CHUNK_SIZE {
1368+
let fill_value = 0xAB;
1369+
1370+
hshm.fill(0, 0, mem_size).unwrap(); // Clear
1371+
hshm.fill(fill_value, 0, len).unwrap();
1372+
1373+
let mut read_buf = vec![0u8; len];
1374+
hshm.copy_to_slice(&mut read_buf, 0).unwrap();
1375+
1376+
assert!(
1377+
read_buf.iter().all(|&b| b == fill_value),
1378+
"Fill mismatch for length {}",
1379+
len
1380+
);
1381+
}
1382+
}
1383+
1384+
/// Test fill with non-aligned lengths
1385+
#[test]
1386+
fn fill_non_aligned_lengths() {
1387+
let mem_size: usize = 4096;
1388+
let eshm = ExclusiveSharedMemory::new(mem_size).unwrap();
1389+
let (mut hshm, _) = eshm.build();
1390+
1391+
let test_lengths = [17, 31, 33, 47, 63, 65, 100, 127, 129, 255, 257];
1392+
1393+
for &len in &test_lengths {
1394+
let fill_value = 0xCD;
1395+
1396+
hshm.fill(0, 0, mem_size).unwrap(); // Clear
1397+
hshm.fill(fill_value, 0, len).unwrap();
1398+
1399+
let mut read_buf = vec![0u8; len];
1400+
hshm.copy_to_slice(&mut read_buf, 0).unwrap();
1401+
1402+
assert!(
1403+
read_buf.iter().all(|&b| b == fill_value),
1404+
"Fill mismatch for length {}",
1405+
len
1406+
);
1407+
}
1408+
}
1409+
1410+
/// Test edge cases: length 0 and length 1
1411+
#[test]
1412+
fn copy_edge_cases() {
1413+
let mem_size: usize = 4096;
1414+
let eshm = ExclusiveSharedMemory::new(mem_size).unwrap();
1415+
let (hshm, _) = eshm.build();
1416+
1417+
// Length 0
1418+
let empty: Vec<u8> = vec![];
1419+
hshm.copy_from_slice(&empty, 0).unwrap();
1420+
let mut read_buf: Vec<u8> = vec![];
1421+
hshm.copy_to_slice(&mut read_buf, 0).unwrap();
1422+
assert!(read_buf.is_empty());
1423+
1424+
// Length 1
1425+
let single = vec![0x42u8];
1426+
hshm.copy_from_slice(&single, 0).unwrap();
1427+
let mut read_buf = vec![0u8; 1];
1428+
hshm.copy_to_slice(&mut read_buf, 0).unwrap();
1429+
assert_eq!(single, read_buf);
1430+
}
1431+
1432+
/// Test combined: unaligned start + non-aligned length
1433+
#[test]
1434+
fn copy_unaligned_start_and_length() {
1435+
let mem_size: usize = 4096;
1436+
let eshm = ExclusiveSharedMemory::new(mem_size).unwrap();
1437+
let (hshm, _) = eshm.build();
1438+
1439+
// Start at offset 7 (unaligned), length 37 (not a multiple of 16)
1440+
let start_offset = 7;
1441+
let len = 37;
1442+
let test_data: Vec<u8> = (0..len).map(|i| (i * 3) as u8).collect();
1443+
1444+
hshm.copy_from_slice(&test_data, start_offset).unwrap();
1445+
1446+
let mut read_buf = vec![0u8; len];
1447+
hshm.copy_to_slice(&mut read_buf, start_offset).unwrap();
1448+
1449+
assert_eq!(test_data, read_buf);
1450+
}
1451+
}
1452+
11401453
/// A test to ensure that, if a `SharedMem` instance is cloned
11411454
/// and _all_ clones are dropped, the memory region will no longer
11421455
/// be valid.

0 commit comments

Comments
 (0)