Skip to content

Commit d7f698e

Browse files
committed
compression hint
1 parent 42d1c75 commit d7f698e

File tree

4 files changed

+262
-2
lines changed

4 files changed

+262
-2
lines changed
+255
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
use std::collections::HashMap;
2+
3+
use cairo_vm::hint_processor::builtin_hint_processor::hint_utils::get_ptr_from_var_name;
4+
use cairo_vm::hint_processor::hint_processor_definition::HintReference;
5+
use cairo_vm::hint_processor::hint_processor_utils::felt_to_usize;
6+
use cairo_vm::serde::deserialize_program::ApTracking;
7+
use cairo_vm::types::exec_scope::ExecutionScopes;
8+
use cairo_vm::types::relocatable::MaybeRelocatable;
9+
use cairo_vm::vm::errors::hint_errors::HintError;
10+
use cairo_vm::vm::vm_core::VirtualMachine;
11+
use cairo_vm::Felt252;
12+
use indoc::indoc;
13+
14+
use crate::hints::vars;
15+
use crate::utils::get_constant;
16+
17+
const COMPRESSION_VERSION: u8 = 0;
18+
const MAX_N_BITS: usize = 251;
19+
const N_UNIQUE_VALUE_BUCKETS: usize = 6;
20+
const TOTAL_N_BUCKETS: usize = N_UNIQUE_VALUE_BUCKETS + 1;
21+
22+
#[derive(Debug, Clone)]
23+
struct UniqueValueBucket {
24+
n_bits: Felt252,
25+
value_to_index: HashMap<Felt252, usize>,
26+
}
27+
28+
impl UniqueValueBucket {
29+
fn new(n_bits: Felt252) -> Self {
30+
Self { n_bits, value_to_index: HashMap::new() }
31+
}
32+
33+
fn add(&mut self, value: &Felt252) {
34+
if !self.value_to_index.contains_key(value) {
35+
let next_index = self.value_to_index.len();
36+
self.value_to_index.insert(*value, next_index);
37+
}
38+
}
39+
40+
fn get_index(&self, value: &Felt252) -> Option<usize> {
41+
self.value_to_index.get(value).copied()
42+
}
43+
44+
fn pack_in_felts(&self) -> Vec<&Felt252> {
45+
let mut values: Vec<&Felt252> = self.value_to_index.keys().collect();
46+
values.sort_by_key(|&v| self.value_to_index[v]);
47+
values
48+
}
49+
}
50+
51+
struct CompressionSet {
52+
buckets: Vec<UniqueValueBucket>,
53+
sorted_buckets: Vec<(usize, UniqueValueBucket)>,
54+
repeating_value_locations: Vec<(usize, usize)>,
55+
bucket_index_per_elm: Vec<usize>,
56+
finalized: bool,
57+
}
58+
59+
impl CompressionSet {
60+
fn new(n_bits_per_bucket: Vec<Felt252>) -> Self {
61+
let buckets: Vec<UniqueValueBucket> =
62+
n_bits_per_bucket.iter().map(|&n_bits| UniqueValueBucket::new(n_bits)).collect();
63+
64+
let mut indexed_buckets: Vec<(usize, UniqueValueBucket)> = Vec::new();
65+
for (index, bucket) in buckets.iter().enumerate() {
66+
indexed_buckets.push((index, bucket.clone()));
67+
}
68+
indexed_buckets.sort_by(|a, b| a.1.n_bits.cmp(&b.1.n_bits));
69+
70+
CompressionSet {
71+
buckets,
72+
sorted_buckets: indexed_buckets,
73+
repeating_value_locations: Vec::new(),
74+
bucket_index_per_elm: Vec::new(),
75+
finalized: false,
76+
}
77+
}
78+
79+
fn update(&mut self, values: Vec<Felt252>) {
80+
assert!(!self.finalized, "Cannot add values after finalizing.");
81+
let buckets_len = self.buckets.len();
82+
for value in values.iter() {
83+
for (bucket_index, bucket) in self.sorted_buckets.iter_mut() {
84+
if Felt252::from(value.bits()) <= bucket.n_bits {
85+
if bucket.value_to_index.contains_key(value) {
86+
// Repeated value; add the location of the first added copy.
87+
if let Some(index) = bucket.get_index(value) {
88+
self.repeating_value_locations.push((*bucket_index, index));
89+
self.bucket_index_per_elm.push(buckets_len);
90+
}
91+
} else {
92+
// First appearance of this value.
93+
bucket.add(value);
94+
self.bucket_index_per_elm.push(*bucket_index);
95+
}
96+
}
97+
}
98+
}
99+
}
100+
101+
fn finalize(&mut self) {
102+
self.finalized = true;
103+
}
104+
pub fn get_bucket_index_per_elm(&self) -> Vec<usize> {
105+
assert!(self.finalized, "Cannot get bucket_index_per_elm before finalizing.");
106+
self.bucket_index_per_elm.clone()
107+
}
108+
109+
pub fn get_unique_value_bucket_lengths(&self) -> Vec<usize> {
110+
self.sorted_buckets.iter().map(|elem| elem.1.value_to_index.len()).collect()
111+
}
112+
113+
pub fn get_repeating_value_bucket_length(&self) -> usize {
114+
self.repeating_value_locations.len()
115+
}
116+
117+
pub fn pack_unique_values(&self) -> Vec<Felt252> {
118+
assert!(self.finalized, "Cannot pack before finalizing.");
119+
// Chain the packed felts from each bucket into a single vector.
120+
self.buckets.iter().flat_map(|bucket| bucket.pack_in_felts()).cloned().collect()
121+
}
122+
123+
/// Returns a list of pointers corresponding to the repeating values.
124+
/// The pointers point to the chained unique value buckets.
125+
pub fn get_repeating_value_pointers(&self) -> Vec<usize> {
126+
assert!(self.finalized, "Cannot get pointers before finalizing.");
127+
128+
let unique_value_bucket_lengths = self.get_unique_value_bucket_lengths();
129+
let bucket_offsets = get_bucket_offsets(unique_value_bucket_lengths);
130+
131+
let mut pointers = Vec::new();
132+
for (bucket_index, index_in_bucket) in self.repeating_value_locations.iter() {
133+
pointers.push(bucket_offsets[*bucket_index] + index_in_bucket);
134+
}
135+
136+
pointers
137+
}
138+
}
139+
140+
fn pack_in_felt(elms: Vec<usize>, elm_bound: usize) -> Felt252 {
141+
let mut res = Felt252::ZERO;
142+
for (i, &elm) in elms.iter().enumerate() {
143+
res += Felt252::from(elm * elm_bound.pow(i as u32));
144+
}
145+
assert!(res.to_biguint() < Felt252::prime(), "Out of bound packing.");
146+
res
147+
}
148+
149+
fn pack_in_felts(elms: Vec<usize>, elm_bound: usize) -> Vec<Felt252> {
150+
assert!(elms.iter().all(|&elm| elm < elm_bound), "Element out of bound.");
151+
152+
elms.chunks(get_n_elms_per_felt(elm_bound)).map(|chunk| pack_in_felt(chunk.to_vec(), elm_bound)).collect()
153+
}
154+
155+
fn get_bucket_offsets(bucket_lengths: Vec<usize>) -> Vec<usize> {
156+
let mut offsets = Vec::new();
157+
let mut sum = 0;
158+
for length in bucket_lengths {
159+
offsets.push(sum);
160+
sum += length;
161+
}
162+
offsets
163+
}
164+
165+
fn log2_ceil(x: usize) -> usize {
166+
assert!(x > 0);
167+
(x - 1).count_ones() as usize
168+
}
169+
170+
fn get_n_elms_per_felt(elm_bound: usize) -> usize {
171+
if elm_bound <= 1 {
172+
return MAX_N_BITS;
173+
}
174+
if elm_bound > 2_usize.pow(MAX_N_BITS as u32) {
175+
return 1;
176+
}
177+
178+
MAX_N_BITS / log2_ceil(elm_bound)
179+
}
180+
181+
fn compression(
182+
data: Vec<Felt252>,
183+
data_size: usize,
184+
constants: &HashMap<String, Felt252>,
185+
) -> Result<Vec<Felt252>, HintError> {
186+
let n_bits_per_bucket = vec![
187+
Felt252::from(252),
188+
Felt252::from(125),
189+
Felt252::from(83),
190+
Felt252::from(62),
191+
Felt252::from(31),
192+
Felt252::from(15),
193+
];
194+
let header_elm_n_bits = felt_to_usize(get_constant(vars::constants::HEADER_ELM_N_BITS, constants)?)?;
195+
let header_elm_bound = 1usize << header_elm_n_bits;
196+
197+
assert!(data_size < header_elm_bound, "Data length exceeds the header element bound");
198+
199+
let mut compression_set = CompressionSet::new(n_bits_per_bucket);
200+
compression_set.update(data);
201+
compression_set.finalize();
202+
203+
let bucket_index_per_elm = compression_set.get_bucket_index_per_elm();
204+
205+
let unique_value_bucket_lengths = compression_set.get_unique_value_bucket_lengths();
206+
let n_unique_values = unique_value_bucket_lengths.iter().sum::<usize>();
207+
208+
let mut header = vec![COMPRESSION_VERSION as usize, data_size];
209+
header.extend(unique_value_bucket_lengths.iter().cloned());
210+
header.push(compression_set.get_repeating_value_bucket_length());
211+
212+
let packed_header = vec![pack_in_felt(header, header_elm_bound)];
213+
214+
let packed_repeating_value_pointers =
215+
pack_in_felts(compression_set.get_repeating_value_pointers(), n_unique_values);
216+
217+
let packed_bucket_index_per_elm = pack_in_felts(bucket_index_per_elm, TOTAL_N_BUCKETS);
218+
219+
let compressed_data = packed_header
220+
.into_iter()
221+
.chain(compression_set.pack_unique_values().into_iter())
222+
.chain(packed_repeating_value_pointers.into_iter())
223+
.chain(packed_bucket_index_per_elm.into_iter())
224+
.collect::<Vec<Felt252>>();
225+
226+
Ok(compressed_data)
227+
}
228+
229+
pub const COMPRESS: &str = indoc! {r#"from starkware.starknet.core.os.data_availability.compression import compress
230+
data = memory.get_range_as_ints(addr=ids.data_start, size=ids.data_end - ids.data_start)
231+
segments.write_arg(ids.compressed_dst, compress(data))"#};
232+
233+
pub fn compress(
234+
vm: &mut VirtualMachine,
235+
_exec_scopes: &mut ExecutionScopes,
236+
ids_data: &HashMap<String, HintReference>,
237+
ap_tracking: &ApTracking,
238+
constants: &HashMap<String, Felt252>,
239+
) -> Result<(), HintError> {
240+
let data_start = get_ptr_from_var_name(vars::ids::DATA_START, vm, ids_data, ap_tracking)?;
241+
let data_end = get_ptr_from_var_name(vars::ids::DATA_END, vm, ids_data, ap_tracking)?;
242+
let data_size = (data_end - data_start)?;
243+
244+
let compressed_dst = get_ptr_from_var_name(vars::ids::COMPRESSED_DST, vm, ids_data, ap_tracking)?;
245+
246+
let data: Vec<Felt252> = vm.get_integer_range(data_start, data_size)?.into_iter().map(|s| *s).collect();
247+
let compress_result = compression(data, data_size, constants)?
248+
.into_iter()
249+
.map(MaybeRelocatable::Int)
250+
.collect::<Vec<MaybeRelocatable>>();
251+
252+
vm.write_arg(compressed_dst, &compress_result)?;
253+
254+
Ok(())
255+
}

crates/starknet-os/src/hints/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ mod bls_field;
3535
mod bls_utils;
3636
pub mod builtins;
3737
mod compiled_class;
38+
mod compression;
3839
mod deprecated_compiled_class;
3940
mod execute_transactions;
4041
pub mod execution;
@@ -254,6 +255,7 @@ fn hints<PCS>() -> HashMap<String, HintImpl> where
254255
hints.insert(compiled_class::SET_AP_TO_SEGMENT_HASH.into(), compiled_class::set_ap_to_segment_hash);
255256
hints.insert(secp::READ_EC_POINT_ADDRESS.into(), secp::read_ec_point_from_address);
256257
hints.insert(execute_transactions::SHA2_FINALIZE.into(), execute_transactions::sha2_finalize);
258+
hints.insert(compression::COMPRESS.into(), compression::compress);
257259
hints
258260
}
259261

crates/starknet-os/src/hints/output.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,7 @@ pub fn set_state_updates_start(
163163
Ok(())
164164
}
165165

166-
pub const SET_COMPRESSED_START: &str = indoc! {r#"use_kzg_da = ids.use_kzg_da
167-
if use_kzg_da:
166+
pub const SET_COMPRESSED_START: &str = indoc! {r#"if use_kzg_da:
168167
ids.compressed_start = segments.add()
169168
else:
170169
# Assign a temporary segment, to be relocated into the output segment.

crates/starknet-os/src/hints/vars.rs

+4
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ pub mod ids {
163163
pub const N_UPDATES_SMALL_PACKING_BOUND: &str =
164164
"starkware.starknet.core.os.state.output.N_UPDATES_SMALL_PACKING_BOUND";
165165
pub const FULL_OUTPUT: &str = "full_output";
166+
pub const COMPRESSED_DST: &str = "compressed_dst";
167+
pub const DATA_START: &str = "data_start";
168+
pub const DATA_END: &str = "data_end";
166169
}
167170

168171
pub mod constants {
@@ -171,4 +174,5 @@ pub mod constants {
171174
pub const MERKLE_HEIGHT: &str = "starkware.starknet.core.os.state.commitment.MERKLE_HEIGHT";
172175
pub const STORED_BLOCK_HASH_BUFFER: &str = "starkware.starknet.core.os.constants.STORED_BLOCK_HASH_BUFFER";
173176
pub const VALIDATED: &str = "starkware.starknet.core.os.constants.VALIDATED";
177+
pub const HEADER_ELM_N_BITS: &str = "starkware.starknet.core.os.data_availability.compression.HEADER_ELM_N_BITS";
174178
}

0 commit comments

Comments
 (0)