|
| 1 | +use std::collections::HashMap; |
| 2 | + |
| 3 | +use cairo_vm::hint_processor::builtin_hint_processor::hint_utils::get_ptr_from_var_name; |
| 4 | +use cairo_vm::hint_processor::hint_processor_definition::HintReference; |
| 5 | +use cairo_vm::hint_processor::hint_processor_utils::felt_to_usize; |
| 6 | +use cairo_vm::serde::deserialize_program::ApTracking; |
| 7 | +use cairo_vm::types::exec_scope::ExecutionScopes; |
| 8 | +use cairo_vm::types::relocatable::MaybeRelocatable; |
| 9 | +use cairo_vm::vm::errors::hint_errors::HintError; |
| 10 | +use cairo_vm::vm::vm_core::VirtualMachine; |
| 11 | +use cairo_vm::Felt252; |
| 12 | +use indoc::indoc; |
| 13 | + |
| 14 | +use crate::hints::vars; |
| 15 | +use crate::utils::get_constant; |
| 16 | + |
| 17 | +const COMPRESSION_VERSION: u8 = 0; |
| 18 | +const MAX_N_BITS: usize = 251; |
| 19 | +const N_UNIQUE_VALUE_BUCKETS: usize = 6; |
| 20 | +const TOTAL_N_BUCKETS: usize = N_UNIQUE_VALUE_BUCKETS + 1; |
| 21 | + |
| 22 | +#[derive(Debug, Clone)] |
| 23 | +struct UniqueValueBucket { |
| 24 | + n_bits: Felt252, |
| 25 | + value_to_index: HashMap<Felt252, usize>, |
| 26 | +} |
| 27 | + |
| 28 | +impl UniqueValueBucket { |
| 29 | + fn new(n_bits: Felt252) -> Self { |
| 30 | + Self { n_bits, value_to_index: HashMap::new() } |
| 31 | + } |
| 32 | + |
| 33 | + fn add(&mut self, value: &Felt252) { |
| 34 | + if !self.value_to_index.contains_key(value) { |
| 35 | + let next_index = self.value_to_index.len(); |
| 36 | + self.value_to_index.insert(*value, next_index); |
| 37 | + } |
| 38 | + } |
| 39 | + |
| 40 | + fn get_index(&self, value: &Felt252) -> Option<usize> { |
| 41 | + self.value_to_index.get(value).copied() |
| 42 | + } |
| 43 | + |
| 44 | + fn pack_in_felts(&self) -> Vec<&Felt252> { |
| 45 | + let mut values: Vec<&Felt252> = self.value_to_index.keys().collect(); |
| 46 | + values.sort_by_key(|&v| self.value_to_index[v]); |
| 47 | + values |
| 48 | + } |
| 49 | +} |
| 50 | + |
| 51 | +struct CompressionSet { |
| 52 | + buckets: Vec<UniqueValueBucket>, |
| 53 | + _sorted_buckets: Vec<(usize, UniqueValueBucket)>, |
| 54 | + repeating_value_locations: Vec<(usize, usize)>, |
| 55 | + bucket_index_per_elm: Vec<usize>, |
| 56 | + finalized: bool, |
| 57 | +} |
| 58 | + |
| 59 | +impl CompressionSet { |
| 60 | + fn new(n_bits_per_bucket: Vec<Felt252>) -> Self { |
| 61 | + let buckets: Vec<UniqueValueBucket> = |
| 62 | + n_bits_per_bucket.iter().map(|&n_bits| UniqueValueBucket::new(n_bits)).collect(); |
| 63 | + |
| 64 | + let mut indexed_buckets: Vec<(usize, UniqueValueBucket)> = Vec::new(); |
| 65 | + for (index, bucket) in buckets.iter().enumerate() { |
| 66 | + indexed_buckets.push((index, bucket.clone())); |
| 67 | + } |
| 68 | + indexed_buckets.sort_by(|a, b| a.1.n_bits.cmp(&b.1.n_bits)); |
| 69 | + |
| 70 | + CompressionSet { |
| 71 | + buckets, |
| 72 | + _sorted_buckets: indexed_buckets, |
| 73 | + repeating_value_locations: Vec::new(), |
| 74 | + bucket_index_per_elm: Vec::new(), |
| 75 | + finalized: false, |
| 76 | + } |
| 77 | + } |
| 78 | + |
| 79 | + fn update(&mut self, values: Vec<Felt252>) { |
| 80 | + assert!(!self.finalized, "Cannot add values after finalizing."); |
| 81 | + let buckets_len = self.buckets.len(); |
| 82 | + for value in values.iter() { |
| 83 | + for (bucket_index, bucket) in self.buckets.iter_mut().enumerate() { |
| 84 | + if Felt252::from(value.bits()) <= bucket.n_bits { |
| 85 | + if bucket.value_to_index.contains_key(value) { |
| 86 | + // Repeated value; add the location of the first added copy. |
| 87 | + if let Some(index) = bucket.get_index(value) { |
| 88 | + self.repeating_value_locations.push((bucket_index, index)); |
| 89 | + self.bucket_index_per_elm.push(buckets_len); |
| 90 | + } |
| 91 | + } else { |
| 92 | + // First appearance of this value. |
| 93 | + bucket.add(value); |
| 94 | + self.bucket_index_per_elm.push(bucket_index); |
| 95 | + } |
| 96 | + } |
| 97 | + } |
| 98 | + } |
| 99 | + } |
| 100 | + |
| 101 | + fn finalize(&mut self) { |
| 102 | + self.finalized = true; |
| 103 | + } |
| 104 | + pub fn get_bucket_index_per_elm(&self) -> Vec<usize> { |
| 105 | + assert!(self.finalized, "Cannot get bucket_index_per_elm before finalizing."); |
| 106 | + self.bucket_index_per_elm.clone() |
| 107 | + } |
| 108 | + |
| 109 | + pub fn get_unique_value_bucket_lengths(&self) -> Vec<usize> { |
| 110 | + self.buckets.iter().map(|bucket| bucket.value_to_index.len()).collect() |
| 111 | + } |
| 112 | + |
| 113 | + pub fn get_repeating_value_bucket_length(&self) -> usize { |
| 114 | + self.repeating_value_locations.len() |
| 115 | + } |
| 116 | + |
| 117 | + pub fn pack_unique_values(&self) -> Vec<Felt252> { |
| 118 | + assert!(self.finalized, "Cannot pack before finalizing."); |
| 119 | + // Chain the packed felts from each bucket into a single vector. |
| 120 | + self.buckets.iter().flat_map(|bucket| bucket.pack_in_felts()).cloned().collect() |
| 121 | + } |
| 122 | + |
| 123 | + /// Returns a list of pointers corresponding to the repeating values. |
| 124 | + /// The pointers point to the chained unique value buckets. |
| 125 | + pub fn get_repeating_value_pointers(&self) -> Vec<usize> { |
| 126 | + assert!(self.finalized, "Cannot get pointers before finalizing."); |
| 127 | + |
| 128 | + let unique_value_bucket_lengths = self.get_unique_value_bucket_lengths(); |
| 129 | + let bucket_offsets = get_bucket_offsets(unique_value_bucket_lengths); |
| 130 | + |
| 131 | + let mut pointers = Vec::new(); |
| 132 | + for (bucket_index, index_in_bucket) in self.repeating_value_locations.iter() { |
| 133 | + pointers.push(bucket_offsets[*bucket_index] + index_in_bucket); |
| 134 | + } |
| 135 | + |
| 136 | + pointers |
| 137 | + } |
| 138 | +} |
| 139 | + |
| 140 | +fn pack_in_felt(elms: Vec<usize>, elm_bound: usize) -> Felt252 { |
| 141 | + let mut res = Felt252::ZERO; |
| 142 | + for (i, &elm) in elms.iter().enumerate() { |
| 143 | + res += Felt252::from(elm * elm_bound.pow(i as u32)); |
| 144 | + } |
| 145 | + assert!(res.to_biguint() < Felt252::prime(), "Out of bound packing."); |
| 146 | + res |
| 147 | +} |
| 148 | + |
| 149 | +fn pack_in_felts(elms: Vec<usize>, elm_bound: usize) -> Vec<Felt252> { |
| 150 | + assert!(elms.iter().all(|&elm| elm < elm_bound), "Element out of bound."); |
| 151 | + |
| 152 | + elms.chunks(get_n_elms_per_felt(elm_bound)).map(|chunk| pack_in_felt(chunk.to_vec(), elm_bound)).collect() |
| 153 | +} |
| 154 | + |
| 155 | +fn get_bucket_offsets(bucket_lengths: Vec<usize>) -> Vec<usize> { |
| 156 | + let mut offsets = Vec::new(); |
| 157 | + let mut sum = 0; |
| 158 | + for length in bucket_lengths { |
| 159 | + offsets.push(sum); |
| 160 | + sum += length; |
| 161 | + } |
| 162 | + offsets |
| 163 | +} |
| 164 | + |
| 165 | +fn log2_ceil(x: usize) -> usize { |
| 166 | + assert!(x > 0); |
| 167 | + (x - 1).count_ones() as usize |
| 168 | +} |
| 169 | + |
| 170 | +fn get_n_elms_per_felt(elm_bound: usize) -> usize { |
| 171 | + if elm_bound <= 1 { |
| 172 | + return MAX_N_BITS; |
| 173 | + } |
| 174 | + if elm_bound > 2_usize.pow(MAX_N_BITS as u32) { |
| 175 | + return 1; |
| 176 | + } |
| 177 | + |
| 178 | + MAX_N_BITS / log2_ceil(elm_bound) |
| 179 | +} |
| 180 | + |
| 181 | +fn compression( |
| 182 | + data: Vec<Felt252>, |
| 183 | + data_size: usize, |
| 184 | + constants: &HashMap<String, Felt252>, |
| 185 | +) -> Result<Vec<Felt252>, HintError> { |
| 186 | + let n_bits_per_bucket = vec![ |
| 187 | + Felt252::from(252), |
| 188 | + Felt252::from(125), |
| 189 | + Felt252::from(83), |
| 190 | + Felt252::from(62), |
| 191 | + Felt252::from(31), |
| 192 | + Felt252::from(15), |
| 193 | + ]; |
| 194 | + let header_elm_n_bits = felt_to_usize(get_constant(vars::constants::HEADER_ELM_N_BITS, constants)?)?; |
| 195 | + let header_elm_bound = 1usize << header_elm_n_bits; |
| 196 | + |
| 197 | + assert!(data_size < header_elm_bound, "Data length exceeds the header element bound"); |
| 198 | + |
| 199 | + let mut compression_set = CompressionSet::new(n_bits_per_bucket); |
| 200 | + compression_set.update(data); |
| 201 | + compression_set.finalize(); |
| 202 | + |
| 203 | + let bucket_index_per_elm = compression_set.get_bucket_index_per_elm(); |
| 204 | + |
| 205 | + let unique_value_bucket_lengths = compression_set.get_unique_value_bucket_lengths(); |
| 206 | + let n_unique_values = unique_value_bucket_lengths.iter().sum::<usize>(); |
| 207 | + |
| 208 | + let mut header = vec![COMPRESSION_VERSION as usize, data_size]; |
| 209 | + header.extend(unique_value_bucket_lengths.iter().cloned()); |
| 210 | + header.push(compression_set.get_repeating_value_bucket_length()); |
| 211 | + |
| 212 | + let packed_header = vec![pack_in_felt(header, header_elm_bound)]; |
| 213 | + |
| 214 | + let packed_repeating_value_pointers = |
| 215 | + pack_in_felts(compression_set.get_repeating_value_pointers(), n_unique_values); |
| 216 | + |
| 217 | + let packed_bucket_index_per_elm = pack_in_felts(bucket_index_per_elm, TOTAL_N_BUCKETS); |
| 218 | + |
| 219 | + let compressed_data = packed_header |
| 220 | + .into_iter() |
| 221 | + .chain(compression_set.pack_unique_values().into_iter()) |
| 222 | + .chain(packed_repeating_value_pointers.into_iter()) |
| 223 | + .chain(packed_bucket_index_per_elm.into_iter()) |
| 224 | + .collect::<Vec<Felt252>>(); |
| 225 | + |
| 226 | + Ok(compressed_data) |
| 227 | +} |
| 228 | + |
| 229 | +pub const COMPRESS: &str = indoc! {r#"from starkware.starknet.core.os.data_availability.compression import compress |
| 230 | + data = memory.get_range_as_ints(addr=ids.data_start, size=ids.data_end - ids.data_start) |
| 231 | + segments.write_arg(ids.compressed_dst, compress(data))"#}; |
| 232 | + |
| 233 | +pub fn compress( |
| 234 | + vm: &mut VirtualMachine, |
| 235 | + _exec_scopes: &mut ExecutionScopes, |
| 236 | + ids_data: &HashMap<String, HintReference>, |
| 237 | + ap_tracking: &ApTracking, |
| 238 | + constants: &HashMap<String, Felt252>, |
| 239 | +) -> Result<(), HintError> { |
| 240 | + let data_start = get_ptr_from_var_name(vars::ids::DATA_START, vm, ids_data, ap_tracking)?; |
| 241 | + let data_end = get_ptr_from_var_name(vars::ids::DATA_END, vm, ids_data, ap_tracking)?; |
| 242 | + let data_size = (data_end - data_start)?; |
| 243 | + |
| 244 | + let compressed_dst = get_ptr_from_var_name(vars::ids::COMPRESSED_DST, vm, ids_data, ap_tracking)?; |
| 245 | + |
| 246 | + let data: Vec<Felt252> = vm.get_integer_range(data_start, data_size)?.into_iter().map(|s| *s).collect(); |
| 247 | + let compress_result = compression(data, data_size, constants)? |
| 248 | + .into_iter() |
| 249 | + .map(MaybeRelocatable::Int) |
| 250 | + .collect::<Vec<MaybeRelocatable>>(); |
| 251 | + |
| 252 | + vm.write_arg(compressed_dst, &compress_result)?; |
| 253 | + |
| 254 | + Ok(()) |
| 255 | +} |
0 commit comments