@@ -17,6 +17,7 @@ limitations under the License.
1717use std:: any:: type_name;
1818use std:: ffi:: c_void;
1919use std:: io:: Error ;
20+ use std:: mem:: { align_of, size_of} ;
2021#[ cfg( target_os = "linux" ) ]
2122use std:: ptr:: null_mut;
2223use std:: sync:: { Arc , RwLock } ;
@@ -783,12 +784,39 @@ impl HostSharedMemory {
783784 . lock
784785 . try_read ( )
785786 . map_err ( |e| new_error ! ( "Error locking at {}:{}: {}" , file!( ) , line!( ) , e) ) ?;
786- // todo: replace with something a bit more optimized + correct
787- for ( i, b) in slice. iter_mut ( ) . enumerate ( ) {
787+
788+ const CHUNK : usize = size_of :: < u128 > ( ) ;
789+ let len = slice. len ( ) ;
790+ let mut i = 0 ;
791+
792+ // Handle unaligned head bytes until we reach u128 alignment.
793+ // Note: align_offset can return usize::MAX if alignment is impossible.
794+ // In that case, head_len = len via .min(), so we fall back to byte-by-byte
795+ // operations for the entire slice.
796+ let align_offset = base. align_offset ( align_of :: < u128 > ( ) ) ;
797+ let head_len = align_offset. min ( len) ;
798+ while i < head_len {
799+ unsafe {
800+ slice[ i] = base. add ( i) . read_volatile ( ) ;
801+ }
802+ i += 1 ;
803+ }
804+
805+ // Read aligned u128 chunks
806+ while i + CHUNK <= len {
807+ let value = unsafe { ( base. add ( i) as * const u128 ) . read_volatile ( ) } ;
808+ slice[ i..i + CHUNK ] . copy_from_slice ( & value. to_ne_bytes ( ) ) ;
809+ i += CHUNK ;
810+ }
811+
812+ // Handle remaining tail bytes
813+ while i < len {
788814 unsafe {
789- * b = base. wrapping_add ( i) . read_volatile ( ) ;
815+ slice [ i ] = base. add ( i) . read_volatile ( ) ;
790816 }
817+ i += 1 ;
791818 }
819+
792820 drop ( guard) ;
793821 Ok ( ( ) )
794822 }
@@ -802,12 +830,51 @@ impl HostSharedMemory {
802830 . lock
803831 . try_read ( )
804832 . map_err ( |e| new_error ! ( "Error locking at {}:{}: {}" , file!( ) , line!( ) , e) ) ?;
805- // todo: replace with something a bit more optimized + correct
806- for ( i, b) in slice. iter ( ) . enumerate ( ) {
833+
834+ const CHUNK : usize = size_of :: < u128 > ( ) ;
835+ let len = slice. len ( ) ;
836+ let mut i = 0 ;
837+
838+ // Handle unaligned head bytes until we reach u128 alignment.
839+ // Note: align_offset can return usize::MAX if alignment is impossible.
840+ // In that case, head_len = len via .min(), so we fall back to byte-by-byte
841+ // operations for the entire slice.
842+ let align_offset = base. align_offset ( align_of :: < u128 > ( ) ) ;
843+ let head_len = align_offset. min ( len) ;
844+ while i < head_len {
845+ unsafe {
846+ base. add ( i) . write_volatile ( slice[ i] ) ;
847+ }
848+ i += 1 ;
849+ }
850+
851+ // Write aligned u128 chunks
852+ while i + CHUNK <= len {
853+ let chunk: [ u8 ; CHUNK ] = slice[ i..i + CHUNK ] . try_into ( ) . map_err ( |_| {
854+ new_error ! (
855+ "Failed to convert slice to fixed-size array for u128 chunk: \
856+ expected length {}, got {} (total slice len {}, offset {})",
857+ CHUNK ,
858+ slice[ i..i + CHUNK ] . len( ) ,
859+ len,
860+ i,
861+ )
862+ } ) ?;
863+ let value = u128:: from_ne_bytes ( chunk) ;
864+ unsafe {
865+ ( base. add ( i) as * mut u128 ) . write_volatile ( value) ;
866+ }
867+ i += CHUNK ;
868+ }
869+
870+ // Handle remaining tail bytes
871+ while i < len {
807872 unsafe {
808- base. wrapping_add ( i) . write_volatile ( * b ) ;
873+ base. add ( i) . write_volatile ( slice [ i ] ) ;
809874 }
875+ i += 1 ;
810876 }
877+
811878 drop ( guard) ;
812879 Ok ( ( ) )
813880 }
@@ -821,10 +888,40 @@ impl HostSharedMemory {
821888 . lock
822889 . try_read ( )
823890 . map_err ( |e| new_error ! ( "Error locking at {}:{}: {}" , file!( ) , line!( ) , e) ) ?;
824- // todo: replace with something a bit more optimized + correct
825- for i in 0 ..len {
826- unsafe { base. wrapping_add ( i) . write_volatile ( value) } ;
891+
892+ const CHUNK : usize = size_of :: < u128 > ( ) ;
893+ let value_u128 = u128:: from_ne_bytes ( [ value; CHUNK ] ) ;
894+ let mut i = 0 ;
895+
896+ // Handle unaligned head bytes until we reach u128 alignment.
897+ // Note: align_offset can return usize::MAX if alignment is impossible.
898+ // In that case, head_len = len via .min(), so we fall back to byte-by-byte
899+ // operations for the entire slice.
900+ let align_offset = base. align_offset ( align_of :: < u128 > ( ) ) ;
901+ let head_len = align_offset. min ( len) ;
902+ while i < head_len {
903+ unsafe {
904+ base. add ( i) . write_volatile ( value) ;
905+ }
906+ i += 1 ;
907+ }
908+
909+ // Write aligned u128 chunks
910+ while i + CHUNK <= len {
911+ unsafe {
912+ ( base. add ( i) as * mut u128 ) . write_volatile ( value_u128) ;
913+ }
914+ i += CHUNK ;
915+ }
916+
917+ // Handle remaining tail bytes
918+ while i < len {
919+ unsafe {
920+ base. add ( i) . write_volatile ( value) ;
921+ }
922+ i += 1 ;
827923 }
924+
828925 drop ( guard) ;
829926 Ok ( ( ) )
830927 }
@@ -1137,6 +1234,222 @@ mod tests {
11371234 assert_eq ! ( data, ret_vec) ;
11381235 }
11391236
1237+ /// Tests for the optimized aligned memory operations.
1238+ /// These tests verify that the u128 chunk optimization works correctly
1239+ /// for various alignment scenarios and buffer sizes.
1240+ mod alignment_tests {
1241+ use super :: * ;
1242+
1243+ const CHUNK_SIZE : usize = 16 ; // size_of::<u128>()
1244+
1245+ /// Test copy operations with all possible starting alignment offsets (0-15)
1246+ #[ test]
1247+ fn copy_with_various_alignments ( ) {
1248+ // Use a buffer large enough to test all alignment cases
1249+ let mem_size: usize = 4096 ;
1250+ let eshm = ExclusiveSharedMemory :: new ( mem_size) . unwrap ( ) ;
1251+ let ( hshm, _) = eshm. build ( ) ;
1252+
1253+ // Test all 16 possible alignment offsets (0 through 15)
1254+ for start_offset in 0 ..CHUNK_SIZE {
1255+ let test_len = 64 ; // Enough to cover head, aligned chunks, and tail
1256+ let test_data: Vec < u8 > = ( 0 ..test_len) . map ( |i| ( i + start_offset) as u8 ) . collect ( ) ;
1257+
1258+ // Write data at the given offset
1259+ hshm. copy_from_slice ( & test_data, start_offset) . unwrap ( ) ;
1260+
1261+ // Read it back
1262+ let mut read_buf = vec ! [ 0u8 ; test_len] ;
1263+ hshm. copy_to_slice ( & mut read_buf, start_offset) . unwrap ( ) ;
1264+
1265+ assert_eq ! (
1266+ test_data, read_buf,
1267+ "Mismatch at alignment offset {}" ,
1268+ start_offset
1269+ ) ;
1270+ }
1271+ }
1272+
1273+ /// Test copy operations with lengths smaller than chunk size (< 16 bytes)
1274+ #[ test]
1275+ fn copy_small_lengths ( ) {
1276+ let mem_size: usize = 4096 ;
1277+ let eshm = ExclusiveSharedMemory :: new ( mem_size) . unwrap ( ) ;
1278+ let ( hshm, _) = eshm. build ( ) ;
1279+
1280+ for len in 0 ..CHUNK_SIZE {
1281+ let test_data: Vec < u8 > = ( 0 ..len) . map ( |i| i as u8 ) . collect ( ) ;
1282+
1283+ hshm. copy_from_slice ( & test_data, 0 ) . unwrap ( ) ;
1284+
1285+ let mut read_buf = vec ! [ 0u8 ; len] ;
1286+ hshm. copy_to_slice ( & mut read_buf, 0 ) . unwrap ( ) ;
1287+
1288+ assert_eq ! ( test_data, read_buf, "Mismatch for length {}" , len) ;
1289+ }
1290+ }
1291+
1292+ /// Test copy operations with lengths that don't align to chunk boundaries
1293+ #[ test]
1294+ fn copy_non_aligned_lengths ( ) {
1295+ let mem_size: usize = 4096 ;
1296+ let eshm = ExclusiveSharedMemory :: new ( mem_size) . unwrap ( ) ;
1297+ let ( hshm, _) = eshm. build ( ) ;
1298+
1299+ // Test lengths like 17, 31, 33, 47, 63, 65, etc.
1300+ let test_lengths = [ 17 , 31 , 33 , 47 , 63 , 65 , 100 , 127 , 129 , 255 , 257 ] ;
1301+
1302+ for & len in & test_lengths {
1303+ let test_data: Vec < u8 > = ( 0 ..len) . map ( |i| ( i % 256 ) as u8 ) . collect ( ) ;
1304+
1305+ hshm. copy_from_slice ( & test_data, 0 ) . unwrap ( ) ;
1306+
1307+ let mut read_buf = vec ! [ 0u8 ; len] ;
1308+ hshm. copy_to_slice ( & mut read_buf, 0 ) . unwrap ( ) ;
1309+
1310+ assert_eq ! ( test_data, read_buf, "Mismatch for length {}" , len) ;
1311+ }
1312+ }
1313+
1314+ /// Test copy with exactly one chunk (16 bytes)
1315+ #[ test]
1316+ fn copy_exact_chunk_size ( ) {
1317+ let mem_size: usize = 4096 ;
1318+ let eshm = ExclusiveSharedMemory :: new ( mem_size) . unwrap ( ) ;
1319+ let ( hshm, _) = eshm. build ( ) ;
1320+
1321+ let test_data: Vec < u8 > = ( 0 ..CHUNK_SIZE ) . map ( |i| i as u8 ) . collect ( ) ;
1322+
1323+ hshm. copy_from_slice ( & test_data, 0 ) . unwrap ( ) ;
1324+
1325+ let mut read_buf = vec ! [ 0u8 ; CHUNK_SIZE ] ;
1326+ hshm. copy_to_slice ( & mut read_buf, 0 ) . unwrap ( ) ;
1327+
1328+ assert_eq ! ( test_data, read_buf) ;
1329+ }
1330+
1331+ /// Test fill with various alignment offsets
1332+ #[ test]
1333+ fn fill_with_various_alignments ( ) {
1334+ let mem_size: usize = 4096 ;
1335+ let eshm = ExclusiveSharedMemory :: new ( mem_size) . unwrap ( ) ;
1336+ let ( mut hshm, _) = eshm. build ( ) ;
1337+
1338+ for start_offset in 0 ..CHUNK_SIZE {
1339+ let fill_len = 64 ;
1340+ let fill_value = ( start_offset % 256 ) as u8 ;
1341+
1342+ // Clear memory first
1343+ hshm. fill ( 0 , 0 , mem_size) . unwrap ( ) ;
1344+
1345+ // Fill at the given offset
1346+ hshm. fill ( fill_value, start_offset, fill_len) . unwrap ( ) ;
1347+
1348+ // Read it back and verify
1349+ let mut read_buf = vec ! [ 0u8 ; fill_len] ;
1350+ hshm. copy_to_slice ( & mut read_buf, start_offset) . unwrap ( ) ;
1351+
1352+ assert ! (
1353+ read_buf. iter( ) . all( |& b| b == fill_value) ,
1354+ "Fill mismatch at alignment offset {}" ,
1355+ start_offset
1356+ ) ;
1357+ }
1358+ }
1359+
1360+ /// Test fill with lengths smaller than chunk size
1361+ #[ test]
1362+ fn fill_small_lengths ( ) {
1363+ let mem_size: usize = 4096 ;
1364+ let eshm = ExclusiveSharedMemory :: new ( mem_size) . unwrap ( ) ;
1365+ let ( mut hshm, _) = eshm. build ( ) ;
1366+
1367+ for len in 0 ..CHUNK_SIZE {
1368+ let fill_value = 0xAB ;
1369+
1370+ hshm. fill ( 0 , 0 , mem_size) . unwrap ( ) ; // Clear
1371+ hshm. fill ( fill_value, 0 , len) . unwrap ( ) ;
1372+
1373+ let mut read_buf = vec ! [ 0u8 ; len] ;
1374+ hshm. copy_to_slice ( & mut read_buf, 0 ) . unwrap ( ) ;
1375+
1376+ assert ! (
1377+ read_buf. iter( ) . all( |& b| b == fill_value) ,
1378+ "Fill mismatch for length {}" ,
1379+ len
1380+ ) ;
1381+ }
1382+ }
1383+
1384+ /// Test fill with non-aligned lengths
1385+ #[ test]
1386+ fn fill_non_aligned_lengths ( ) {
1387+ let mem_size: usize = 4096 ;
1388+ let eshm = ExclusiveSharedMemory :: new ( mem_size) . unwrap ( ) ;
1389+ let ( mut hshm, _) = eshm. build ( ) ;
1390+
1391+ let test_lengths = [ 17 , 31 , 33 , 47 , 63 , 65 , 100 , 127 , 129 , 255 , 257 ] ;
1392+
1393+ for & len in & test_lengths {
1394+ let fill_value = 0xCD ;
1395+
1396+ hshm. fill ( 0 , 0 , mem_size) . unwrap ( ) ; // Clear
1397+ hshm. fill ( fill_value, 0 , len) . unwrap ( ) ;
1398+
1399+ let mut read_buf = vec ! [ 0u8 ; len] ;
1400+ hshm. copy_to_slice ( & mut read_buf, 0 ) . unwrap ( ) ;
1401+
1402+ assert ! (
1403+ read_buf. iter( ) . all( |& b| b == fill_value) ,
1404+ "Fill mismatch for length {}" ,
1405+ len
1406+ ) ;
1407+ }
1408+ }
1409+
1410+ /// Test edge cases: length 0 and length 1
1411+ #[ test]
1412+ fn copy_edge_cases ( ) {
1413+ let mem_size: usize = 4096 ;
1414+ let eshm = ExclusiveSharedMemory :: new ( mem_size) . unwrap ( ) ;
1415+ let ( hshm, _) = eshm. build ( ) ;
1416+
1417+ // Length 0
1418+ let empty: Vec < u8 > = vec ! [ ] ;
1419+ hshm. copy_from_slice ( & empty, 0 ) . unwrap ( ) ;
1420+ let mut read_buf: Vec < u8 > = vec ! [ ] ;
1421+ hshm. copy_to_slice ( & mut read_buf, 0 ) . unwrap ( ) ;
1422+ assert ! ( read_buf. is_empty( ) ) ;
1423+
1424+ // Length 1
1425+ let single = vec ! [ 0x42u8 ] ;
1426+ hshm. copy_from_slice ( & single, 0 ) . unwrap ( ) ;
1427+ let mut read_buf = vec ! [ 0u8 ; 1 ] ;
1428+ hshm. copy_to_slice ( & mut read_buf, 0 ) . unwrap ( ) ;
1429+ assert_eq ! ( single, read_buf) ;
1430+ }
1431+
1432+ /// Test combined: unaligned start + non-aligned length
1433+ #[ test]
1434+ fn copy_unaligned_start_and_length ( ) {
1435+ let mem_size: usize = 4096 ;
1436+ let eshm = ExclusiveSharedMemory :: new ( mem_size) . unwrap ( ) ;
1437+ let ( hshm, _) = eshm. build ( ) ;
1438+
1439+ // Start at offset 7 (unaligned), length 37 (not a multiple of 16)
1440+ let start_offset = 7 ;
1441+ let len = 37 ;
1442+ let test_data: Vec < u8 > = ( 0 ..len) . map ( |i| ( i * 3 ) as u8 ) . collect ( ) ;
1443+
1444+ hshm. copy_from_slice ( & test_data, start_offset) . unwrap ( ) ;
1445+
1446+ let mut read_buf = vec ! [ 0u8 ; len] ;
1447+ hshm. copy_to_slice ( & mut read_buf, start_offset) . unwrap ( ) ;
1448+
1449+ assert_eq ! ( test_data, read_buf) ;
1450+ }
1451+ }
1452+
11401453 /// A test to ensure that, if a `SharedMem` instance is cloned
11411454 /// and _all_ clones are dropped, the memory region will no longer
11421455 /// be valid.
0 commit comments