From 49ac21f510224f4c5540cbcdcabf5ac8bd8498fd Mon Sep 17 00:00:00 2001 From: LaihoE <80683769+LaihoE@users.noreply.github.com> Date: Wed, 20 Sep 2023 21:03:52 +0300 Subject: [PATCH] Add inventory prop (#27) * add inv * fix props not being parsed * fix instancebaseline * rm comment --- src/parser/src/collect_data.rs | 48 +++++ src/parser/src/entities.rs | 9 +- src/parser/src/main.rs | 7 +- src/parser/src/maps.rs | 2 + src/parser/src/parser.rs | 3 +- src/parser/src/parser_thread_settings.rs | 2 + src/parser/src/prop_controller.rs | 21 ++- src/parser/src/sendtables.rs | 41 +++- src/parser/src/stringtables.rs | 13 +- src/parser/src/variants.rs | 46 ++++- src/python/src/lib.rs | 226 ++++++++++++++++------- 11 files changed, 325 insertions(+), 93 deletions(-) diff --git a/src/parser/src/collect_data.rs b/src/parser/src/collect_data.rs index 98e25a30..e7a3877d 100644 --- a/src/parser/src/collect_data.rs +++ b/src/parser/src/collect_data.rs @@ -5,6 +5,7 @@ use crate::maps::PAINTKITS; use crate::maps::WEAPINDICIES; use crate::parser_thread_settings::ParserThread; use crate::prop_controller::PropInfo; +use crate::prop_controller::MY_WEAPONS_OFFSET; use crate::prop_controller::WEAPON_SKIN_ID; use crate::variants::PropColumn; use std::fmt; @@ -87,6 +88,7 @@ pub enum PropCollectionError { // DONT KNOW IF THESE ARE CORRECT. SEEMS TO GIVE CORRECT VALUES const CELL_BITS: i32 = 9; const MAX_COORD: f32 = (1 << 14) as f32; +const MAX_INVENTORY_IDX: u32 = 16; impl std::error::Error for PropCollectionError {} impl fmt::Display for PropCollectionError { @@ -444,9 +446,55 @@ impl ParserThread { "weapon_name" => self.find_weapon_name(entity_id), "weapon_skin" => self.find_weapon_skin(entity_id), "active_weapon_original_owner" => self.find_weapon_original_owner(entity_id), + "inventory" => self.find_my_inventory(entity_id), _ => Err(PropCollectionError::UnknownCustomPropName), } } + pub fn find_my_inventory(&self, entity_id: &i32) -> Result { + let mut names = vec![]; + let mut unique_eids = vec![]; + + for i in 0..MAX_INVENTORY_IDX { + let prop_id = MY_WEAPONS_OFFSET + i; + match self.get_prop_from_ent(&(prop_id as u32), entity_id) { + Err(_e) => {} + Ok(Variant::U32(x)) => { + let eid = (x & ((1 << 14) - 1)) as i32; + // Sometimes multiple references to same eid? + if unique_eids.contains(&eid) { + continue; + } + unique_eids.push(eid); + let res = match self.get_prop_from_ent(&self.prop_controller.special_ids.item_def.unwrap(), &eid) { + Err(_e) => continue, + Ok(def) => def, + }; + self.insert_equipment_name(&mut names, res, entity_id); + } + _ => {} + } + } + Ok(Variant::StringVec(names)) + } + fn insert_equipment_name(&self, names: &mut Vec, res: Variant, player_entid: &i32) { + if let Variant::U32(def_idx) = res { + match WEAPINDICIES.get(&def_idx) { + None => return, + Some(weap_name) => { + match weap_name { + // Check how many flashbangs player has (only prop that works like this) + &"flashbang" => { + if let Ok(Variant::U32(2)) = self.get_prop_from_ent(&987654, player_entid) { + names.push(weap_name.to_string()); + } + } + _ => {} + } + names.push(weap_name.to_string()); + } + }; + } + } pub fn find_weapon_original_owner(&self, entity_id: &i32) -> Result { let low_id = match self.prop_controller.special_ids.orig_own_low { diff --git a/src/parser/src/entities.rs b/src/parser/src/entities.rs index ee902952..59274214 100644 --- a/src/parser/src/entities.rs +++ b/src/parser/src/entities.rs @@ -104,11 +104,10 @@ impl ParserThread { if self.is_debug_mode { for (field_info, debug) in self.field_infos[..n_updates].iter().zip(&self.debug_fields) { let result = bitreader.decode(&field_info.decoder, &self.qf_mapper)?; - // self.game_events_counter.insert(debug.field.full_name.clone()); - if debug.field.full_name.contains("Freeze") { + if debug.field.full_name.contains("CWeaponMAC10") { println!( - "{:?} {:?} {:?} {:?} {:?}", - debug.path, debug.field.full_name, result, self.tick, self.net_tick + "{:?} {:?} {:?} {:?} {:?} {:?}", + debug.path, debug.field.full_name, result, self.tick, self.net_tick, field_info.prop_id ); } } @@ -226,7 +225,7 @@ impl ParserThread { } // We reuse one big vector for holding paths. Purely for performance. // Alternatively we could create a new vector in this function and return it. - self.field_infos[idx] = class.serializer.find_decoder(&fp, 0); + self.field_infos[idx] = class.serializer.find_decoder(&fp, 0, self.parse_inventory); idx += 1; } Ok(idx) diff --git a/src/parser/src/main.rs b/src/parser/src/main.rs index fc71969e..cf1553c8 100644 --- a/src/parser/src/main.rs +++ b/src/parser/src/main.rs @@ -10,7 +10,7 @@ use std::sync::Arc; use std::time::Instant; fn main() { - let wanted_props = vec!["active_weapon_original_owner".to_string()]; + let wanted_props = vec!["X".to_string()]; let before = Instant::now(); let dir = fs::read_dir("/home/laiho/Documents/demos/cs2/test3/").unwrap(); let mut c = 0; @@ -21,7 +21,7 @@ fn main() { let before = Instant::now(); - if c > 100 { + if c > 1000 { break; } @@ -58,9 +58,6 @@ fn main() { let mut ds = Parser::new(settings); let d = ds.parse_demo().unwrap(); println!("TOTAL {:?}", before.elapsed()); - for x in d.game_events_counter { - println!("{:?}", x); - } } println!("TOTAL {:?}", before.elapsed()); } diff --git a/src/parser/src/maps.rs b/src/parser/src/maps.rs index 4f2db4b0..5a1cfeab 100644 --- a/src/parser/src/maps.rs +++ b/src/parser/src/maps.rs @@ -1562,6 +1562,7 @@ pub static TYPEHM: phf::Map<&'static str, PropType> = phf_map! { "weapon_name" => PropType::Custom, "active_weapon_original_owner" => PropType::Custom, "game_time" => PropType::GameTime, + "inventory" => PropType::Custom, // Weapon "m_flAnimTime" => PropType::Weapon, "m_flSimulationTime"=> PropType::Weapon, @@ -1767,6 +1768,7 @@ pub static FRIENDLY_NAMES_MAPPING: phf::Map<&'static str, &'static str> = phf_ma "yaw" => "yaw", "game_time" => "game_time", + "inventory" => "inventory", "rank" => "CCSPlayerController.m_iCompetitiveRanking", "rank_if_win" => "CCSPlayerController.m_iCompetitiveRankingPredicted_Win", diff --git a/src/parser/src/parser.rs b/src/parser/src/parser.rs index eb8bbb54..a3568e22 100644 --- a/src/parser/src/parser.rs +++ b/src/parser/src/parser.rs @@ -97,6 +97,7 @@ impl Parser { DEM_SignonPacket => self.parse_packet(&bytes), DEM_Stop => break, DEM_FullPacket => { + self.parse_full_packet(&bytes).unwrap(); self.fullpacket_offsets.push(frame_starts_at); Ok(()) } @@ -104,7 +105,6 @@ impl Parser { }; ok?; } - let outputs: Vec> = self .fullpacket_offsets .par_iter() @@ -126,6 +126,7 @@ impl Parser { } Ok(self.combine_thread_outputs(&mut ok)) } + // fn parse_stringtables_cmd(bytes: &[u8]) -> Result<(), DemoParserError> {} pub fn create_parser_thread_input(&self, offset: usize, parse_all: bool) -> ParserThreadInput { let cls_by_id = match &self.cls_by_id { Some(cls_by_id) => cls_by_id.clone(), diff --git a/src/parser/src/parser_thread_settings.rs b/src/parser/src/parser_thread_settings.rs index 751cd136..0186d272 100644 --- a/src/parser/src/parser_thread_settings.rs +++ b/src/parser/src/parser_thread_settings.rs @@ -38,6 +38,7 @@ pub struct ParserThread { pub cls_by_id: Arc>, pub stringtable_players: AHashMap, pub net_tick: u32, + pub parse_inventory: bool, pub ptr: usize, pub bytes: Arc, @@ -165,6 +166,7 @@ impl ParserThread { false => 0, }; Ok(ParserThread { + parse_inventory: input.prop_controller.wanted_player_props.contains(&"inventory".to_string()), net_tick: 0, debug_fields: vec![ DebugFieldAndPath { diff --git a/src/parser/src/prop_controller.rs b/src/parser/src/prop_controller.rs index 65d4b62b..52c02df1 100644 --- a/src/parser/src/prop_controller.rs +++ b/src/parser/src/prop_controller.rs @@ -20,6 +20,8 @@ const NORMAL_PROP_BASEID: u32 = 1000; pub const WEAPON_SKIN_ID: u32 = 420420420; pub const WEAPON_ORIGINGAL_OWNER_ID: u32 = 6942000; +pub const MY_WEAPONS_OFFSET: u32 = 500000; +pub const GRENADE_AMMO_ID: u32 = 1111111; #[derive(Clone, Debug)] pub struct PropController { @@ -96,6 +98,15 @@ impl PropController { is_player_prop: true, }); } + if self.wanted_player_props.contains(&("inventory".to_string())) { + self.prop_infos.push(PropInfo { + id: 555555575, + prop_type: PropType::Custom, + prop_name: "inventory".to_string(), + prop_friendly_name: "inventory".to_string(), + is_player_prop: true, + }); + } if self.wanted_player_props.contains(&("game_time".to_string())) { self.prop_infos.push(PropInfo { id: 123456879, @@ -264,10 +275,12 @@ impl PropController { let is_weapon_prop = (split_at_dot[0].contains("Weapon") || split_at_dot[0].contains("AK")) && !split_at_dot[0].contains("Player") || split_at_dot[0].contains("Knife") - || split_at_dot[0].contains("CDEagle"); + || split_at_dot[0].contains("CDEagle") + || split_at_dot[0].contains("C4"); - let is_projectile_prop = (split_at_dot[0].contains("Projectile") || split_at_dot[0].contains("Grenade")) - && !split_at_dot[0].contains("Player"); + let is_projectile_prop = + (split_at_dot[0].contains("Projectile") || split_at_dot[0].contains("Grenade") || split_at_dot[0].contains("Flash")) + && !split_at_dot[0].contains("Player"); let is_grenade_or_weapon = is_weapon_prop || is_projectile_prop; // Strip first part of name from grenades and weapons. @@ -329,7 +342,9 @@ impl PropController { || name.contains("CCSPlayerController.m_hPlayerPawn") || name.contains("CCSPlayerController.m_bPawnIsAlive") || name.contains("m_hActiveWeapon") + || name.contains("Weapons") || name.contains("OriginalOwnerXuid") + || name.contains("Flash") { return true; } diff --git a/src/parser/src/sendtables.rs b/src/parser/src/sendtables.rs index 55a4822c..bedec393 100644 --- a/src/parser/src/sendtables.rs +++ b/src/parser/src/sendtables.rs @@ -169,7 +169,7 @@ pub static BASETYPE_DECODERS: phf::Map<&'static str, Decoder> = phf_map! { const WEAPON_SKIN_PATH: [i32; 7] = [87, 0, 1, 0, 0, 0, 0]; impl Field { - pub fn decoder_from_path(&self, path: &FieldPath, pos: usize) -> FieldInfo { + pub fn decoder_from_path(&self, path: &FieldPath, pos: usize, parse_inventory: bool) -> FieldInfo { match self.model { FieldModelSimple => { // EHHH IDK WILL HAVE TO DO FOR NOW @@ -218,7 +218,7 @@ impl Field { } else { match &self.serializer { Some(ser) => { - return ser.find_decoder(path, pos); + return ser.find_decoder(path, pos, parse_inventory); } None => panic!("no serializer for path"), } @@ -245,7 +245,7 @@ impl Field { if path.last >= pos + 1 { match &self.serializer { Some(ser) => { - return ser.find_decoder(path, pos + 1); + return ser.find_decoder(path, pos + 1, parse_inventory); } None => panic!("no serializer for path"), } @@ -498,16 +498,44 @@ pub struct Serializer { pub name: String, pub fields: Vec, } +const FLASH_AMMO_PATH: [i32; 7] = [86, 2, 14, 0, 0, 0, 0]; +use crate::prop_controller::GRENADE_AMMO_ID; +use crate::prop_controller::MY_WEAPONS_OFFSET; impl Serializer { - pub fn find_decoder(&self, path: &FieldPath, pos: usize) -> FieldInfo { - self.fields[path.path[pos] as usize].decoder_from_path(path, pos + 1) + pub fn find_decoder(&self, path: &FieldPath, pos: usize, parse_inventory: bool) -> FieldInfo { + // Edge case for now... + if parse_inventory { + if let Some(info) = self.find_inventory_info(path) { + return info; + } + } + self.fields[path.path[pos] as usize].decoder_from_path(path, pos + 1, parse_inventory) } pub fn debug_find_decoder(&self, path: &FieldPath, pos: usize, prop_name: String) -> DebugField { let idx = path.path[pos]; let f = &self.fields[idx as usize]; f.debug_decoder_from_path(path, pos + 1, prop_name) } + fn find_inventory_info(&self, path: &FieldPath) -> Option { + if path.path == FLASH_AMMO_PATH && self.name == "CCSPlayerPawn" { + return Some(FieldInfo { + controller_prop: None, + decoder: UnsignedDecoder, + should_parse: true, + prop_id: GRENADE_AMMO_ID, + }); + } + if path.path[0] == 86 && path.path[1] == 0 && self.name == "CCSPlayerPawn" && path.last == 2 { + return Some(FieldInfo { + controller_prop: None, + decoder: UnsignedDecoder, + should_parse: true, + prop_id: MY_WEAPONS_OFFSET + path.path[2] as u32, + }); + } + None + } } const POINTER_TYPES: &'static [&'static str] = &[ @@ -621,6 +649,9 @@ impl Parser { || my_serializer.name.contains("Knife") || my_serializer.name.contains("CDEagle") || my_serializer.name.contains("Rules") + || my_serializer.name.contains("C4") + || my_serializer.name.contains("Grenade") + || my_serializer.name.contains("Flash") { prop_controller.find_prop_name_paths(&mut my_serializer); } diff --git a/src/parser/src/stringtables.rs b/src/parser/src/stringtables.rs index 19e8ad75..835bd04a 100644 --- a/src/parser/src/stringtables.rs +++ b/src/parser/src/stringtables.rs @@ -51,9 +51,11 @@ impl Parser { pub fn parse_create_stringtable(&mut self, bytes: &[u8]) -> Result<(), DemoParserError> { let table: CSVCMsg_CreateStringTable = Message::parse_from_bytes(&bytes).unwrap(); + if !(table.name() == "instancebaseline" || table.name() == "userinfo") { return Ok(()); } + let bytes = match table.data_compressed() { true => snap::raw::Decoder::new().decompress_vec(table.string_data()).unwrap(), false => table.string_data().to_vec(), @@ -148,8 +150,9 @@ impl Parser { }; } if name == "userinfo" { - let player = parse_userinfo(&value)?; - self.stringtable_players.insert(player.steamid, player); + if let Ok(player) = parse_userinfo(&value) { + self.stringtable_players.insert(player.steamid, player); + } } if name == "instancebaseline" { match key.parse::() { @@ -301,8 +304,12 @@ impl ParserThread { value }; } + if name == "userinfo" { + if let Ok(player) = parse_userinfo(&value) { + self.stringtable_players.insert(player.steamid, player); + } + } if name == "instancebaseline" { - // Watch out for keys like 42:15 <-- seem to be props that are not used atm match key.parse::() { Ok(cls_id) => self.baselines.insert(cls_id, value.clone()), Err(_e) => None, diff --git a/src/parser/src/variants.rs b/src/parser/src/variants.rs index a8fa8c1f..c37a339f 100644 --- a/src/parser/src/variants.rs +++ b/src/parser/src/variants.rs @@ -4,7 +4,7 @@ use crate::prop_controller::PropInfo; use ahash::{HashMap, HashMapExt}; use itertools::Itertools; use memmap2::Mmap; -use serde::ser::{SerializeMap, SerializeStruct}; +use serde::ser::{SerializeMap, SerializeSeq, SerializeStruct}; use serde::Serialize; #[derive(Debug, Clone, PartialEq)] @@ -19,6 +19,8 @@ pub enum Variant { String(String), VecXY([f32; 2]), VecXYZ([f32; 3]), + // Todo change to Vec + StringVec(Vec), } #[derive(Debug, Clone)] @@ -29,6 +31,7 @@ pub enum VarVec { F32(Vec>), I32(Vec>), String(Vec>), + StringVec(Vec>), } impl VarVec { @@ -40,6 +43,7 @@ impl VarVec { Variant::String(_) => VarVec::String(vec![]), Variant::U64(_) => VarVec::U64(vec![]), Variant::U32(_) => VarVec::U32(vec![]), + Variant::StringVec(_) => VarVec::StringVec(vec![]), _ => panic!("Tried to create propcolumns from: {:?}", item), } } @@ -66,6 +70,7 @@ impl PropColumn { Some(VarVec::String(b)) => b.len(), Some(VarVec::U32(b)) => b.len(), Some(VarVec::U64(b)) => b.len(), + Some(VarVec::StringVec(b)) => b.len(), None => self.num_nones, } } @@ -149,6 +154,20 @@ impl PropColumn { panic!("illegal 6"); } }, + Some(VarVec::StringVec(v)) => match &other.data { + Some(VarVec::StringVec(v_other)) => { + v.extend_from_slice(&v_other); + } + None => { + for _ in 0..other.num_nones { + v.push(vec![]); + } + } + _ => { + panic!("illegal 7"); + } + }, + None => match &other.data { Some(VarVec::Bool(_inner)) => { self.resolve_vec_type(PropColumn::get_type(&other.data)); @@ -174,6 +193,10 @@ impl PropColumn { self.resolve_vec_type(PropColumn::get_type(&other.data)); self.extend_from(other); } + Some(VarVec::StringVec(_inner)) => { + self.resolve_vec_type(PropColumn::get_type(&other.data)); + self.extend_from(other); + } None => { self.num_nones += other.num_nones; } @@ -189,6 +212,7 @@ impl PropColumn { Some(VarVec::String(_)) => Some(3), Some(VarVec::U32(_)) => Some(4), Some(VarVec::U64(_)) => Some(5), + Some(VarVec::StringVec(_)) => Some(6), None => None, } } @@ -203,6 +227,7 @@ impl PropColumn { Some(3) => self.data = Some(VarVec::String(vec![])), Some(4) => self.data = Some(VarVec::U32(vec![])), Some(5) => self.data = Some(VarVec::U64(vec![])), + Some(6) => self.data = Some(VarVec::StringVec(vec![])), _ => panic!("NONE OR > 5 TYPE FOR VEC RESOLUTION : {:?}", v_type), } for _ in 0..self.num_nones { @@ -271,6 +296,10 @@ impl VarVec { panic!("Tried to push a {:?} into a {:?} column", item, self); } }, + Some(Variant::StringVec(p)) => match self { + VarVec::StringVec(f) => f.push(p), + _ => {} + }, None => self.push_none(), _ => panic!("bad type for prop: {:?}", item), } @@ -283,6 +312,7 @@ impl VarVec { VarVec::U32(f) => f.push(None), VarVec::U64(f) => f.push(None), VarVec::Bool(f) => f.push(None), + VarVec::StringVec(f) => f.push(vec![]), } } } @@ -317,6 +347,13 @@ impl Serialize for Variant { Variant::U32(u) => serializer.serialize_u32(*u), Variant::U64(u) => serializer.serialize_str(&u.to_string()), Variant::U8(u) => serializer.serialize_u8(*u), + Variant::StringVec(v) => { + let mut s = serializer.serialize_seq(Some(v.len())).unwrap(); + for item in v { + s.serialize_element(item).unwrap(); + } + s.end() + } _ => panic!("cant ser: {:?}", self), } } @@ -457,6 +494,10 @@ pub fn soa_to_aos(soa: OutputSerdeHelperStruct) -> Vec hm.insert(prop_info.prop_friendly_name.clone(), Some(Variant::U32(*f))), None => hm.insert(prop_info.prop_friendly_name.clone(), None), }, + Some(VarVec::StringVec(val)) => match val.get(idx) { + Some(f) => hm.insert(prop_info.prop_friendly_name.clone(), Some(Variant::StringVec(f.clone()))), + None => hm.insert(prop_info.prop_friendly_name.clone(), None), + }, }; } } @@ -500,6 +541,9 @@ impl Serialize for OutputSerdeHelperStruct { Some(VarVec::U32(val)) => { map.serialize_entry(&prop_info.prop_friendly_name, val).unwrap(); } + Some(VarVec::StringVec(val)) => { + map.serialize_entry(&prop_info.prop_friendly_name, val).unwrap(); + } } } } diff --git a/src/python/src/lib.rs b/src/python/src/lib.rs index bab1c105..5e92f91a 100644 --- a/src/python/src/lib.rs +++ b/src/python/src/lib.rs @@ -565,7 +565,7 @@ impl DemoParser { #[args(py_kwargs = "**")] pub fn parse_event( &self, - _py: Python<'_>, + py: Python<'_>, event_name: String, py_kwargs: Option<&PyDict>, ) -> PyResult> { @@ -621,7 +621,7 @@ impl DemoParser { Ok(output) => output, Err(e) => return Err(Exception::new_err(format!("{}", e))), }; - let event_series = match series_from_event(&output.game_events) { + let event_series = match series_from_event(&output.game_events, py) { Ok(ser) => ser, Err(_e) => { return Err(Exception::new_err(format!( @@ -701,8 +701,8 @@ impl DemoParser { #[args(py_kwargs = "**")] pub fn parse_ticks( &self, - _py: Python, - mut wanted_props: Vec, + py: Python, + wanted_props: Vec, py_kwargs: Option<&PyDict>, ) -> PyResult { let (_, wanted_ticks) = parse_kwargs_ticks(py_kwargs); @@ -753,47 +753,44 @@ impl DemoParser { Err(e) => return Err(Exception::new_err(format!("{}", e))), }; let mut all_series = vec![]; + let mut all_pyobjects = vec![]; + let prop_infos = output.prop_info.prop_infos; + let mut df_column_names_arrow = vec![]; + let mut df_column_names_py = vec![]; - wanted_props.push("tick".to_owned()); - wanted_props.push("steamid".to_owned()); - wanted_props.push("name".to_owned()); - - real_props.push("tick".to_owned()); - real_props.push("steamid".to_owned()); - real_props.push("name".to_owned()); - - let mut prop_infos = output.prop_info.prop_infos.clone(); - prop_infos.sort_by_key(|x| x.prop_name.clone()); - real_props.sort(); - - let df_columns = prop_infos - .iter() - .map(|x| x.prop_friendly_name.clone()) - .collect_vec(); - - for (prop_name, prop_info) in real_props.iter().zip(prop_infos) { + for prop_info in prop_infos { if output.df.contains_key(&prop_info.id) { match &output.df[&prop_info.id].data { Some(VarVec::F32(data)) => { + df_column_names_arrow.push(prop_info.prop_friendly_name); all_series.push(arr_to_py(Box::new(Float32Array::from(data))).unwrap()); } Some(VarVec::I32(data)) => { + df_column_names_arrow.push(prop_info.prop_friendly_name); all_series.push(arr_to_py(Box::new(Int32Array::from(data))).unwrap()); } Some(VarVec::U64(data)) => { + df_column_names_arrow.push(prop_info.prop_friendly_name); all_series.push(arr_to_py(Box::new(UInt64Array::from(data))).unwrap()); } Some(VarVec::U32(data)) => { + df_column_names_arrow.push(prop_info.prop_friendly_name); all_series.push(arr_to_py(Box::new(UInt32Array::from(data))).unwrap()); } Some(VarVec::Bool(data)) => { + df_column_names_arrow.push(prop_info.prop_friendly_name); all_series.push(arr_to_py(Box::new(BooleanArray::from(data))).unwrap()); } Some(VarVec::String(data)) => { - let s = Series::new(prop_name, data); + df_column_names_arrow.push(prop_info.prop_friendly_name.clone()); + let s = Series::new(&prop_info.prop_friendly_name.clone(), data); let py_series = rust_series_to_py_series(&s).unwrap(); all_series.push(py_series); } + Some(VarVec::StringVec(data)) => { + df_column_names_py.push(prop_info.prop_friendly_name); + all_pyobjects.push(data.to_object(py)) + } _ => {} } } @@ -802,8 +799,21 @@ impl DemoParser { let polars = py.import("polars")?; let all_series_py = all_series.to_object(py); let df = polars.call_method1("DataFrame", (all_series_py,))?; - df.setattr("columns", df_columns.to_object(py)).unwrap(); + df.setattr("columns", df_column_names_arrow.to_object(py)) + .unwrap(); let pandas_df = df.call_method0("to_pandas").unwrap(); + for (pyobj, col_name) in all_pyobjects.iter().zip(&df_column_names_py) { + pandas_df + .call_method1("insert", (0, col_name, pyobj)) + .unwrap(); + } + df_column_names_arrow.extend(df_column_names_py); + df_column_names_arrow.sort(); + let kwargs = vec![("axis", 1)].into_py_dict(py); + let args = (df_column_names_arrow,); + pandas_df + .call_method("reindex", args, Some(kwargs)) + .unwrap(); Ok(pandas_df.to_object(py)) }) } @@ -921,28 +931,54 @@ pub fn series_from_multiple_events( for (k, v) in per_ge { let pairs: Vec = v.iter().map(|x| x.fields.clone()).flatten().collect(); let per_key_name = pairs.iter().into_group_map_by(|x| &x.name); - let mut series = vec![]; + + let mut series_columns = vec![]; + let mut py_columns = vec![]; + let mut rows = 0; for (name, vals) in per_key_name { - let s = series_from_pairs(&vals, name)?; - series.push(s); + match column_from_pairs(&vals, name, py)? { + DataFrameColumn::Pyany(p) => py_columns.push((p, name)), + DataFrameColumn::Series(s) => { + rows = s.len().max(rows); + series_columns.push((s, name)) + } + }; } - series.sort_by_key(|x| x.name().to_string()); + let mut series_col_names: Vec = series_columns + .iter() + .map(|(_, name)| name.to_string()) + .collect(); + let series_columns: Vec = series_columns + .iter() + .map(|(ser, _)| rust_series_to_py_series(&ser).unwrap()) + .collect(); + let py_col_names: Vec = py_columns + .iter() + .map(|(_, name)| name.to_string()) + .collect(); - let column_names: Vec<&str> = series.iter().map(|x| x.name().clone()).collect(); - let mut rows = 0; - let mut all_series = vec![]; - for ser in &series { - rows = ser.len().max(rows); - let py_series = rust_series_to_py_series(&ser).unwrap(); - all_series.push(py_series); - } if rows != 0 { let dfp = Python::with_gil(|py| { let polars = py.import("polars").unwrap(); - let df = polars.call_method1("DataFrame", (all_series,)).unwrap(); - df.setattr("columns", column_names.to_object(py)).unwrap(); + let all_series_py = series_columns.to_object(py); + let df = polars.call_method1("DataFrame", (all_series_py,)).unwrap(); + df.setattr("columns", series_col_names.to_object(py)) + .unwrap(); let pandas_df = df.call_method0("to_pandas").unwrap(); + for (pyobj, col_name) in py_columns { + pandas_df + .call_method1("insert", (0, col_name, pyobj)) + .unwrap(); + } + let pandas_df = pandas_df.call_method0("to_pandas").unwrap(); + series_col_names.extend(py_col_names); + series_col_names.sort(); + let kwargs = vec![("axis", 1)].into_py_dict(py); + let args = (series_col_names,); + pandas_df + .call_method("reindex", args, Some(kwargs)) + .unwrap(); pandas_df.to_object(py) }); vv.push((k, dfp)); @@ -950,38 +986,71 @@ pub fn series_from_multiple_events( } Ok(vv.to_object(py)) } -pub fn series_from_event(events: &Vec) -> Result, DemoParserError> { + +pub enum DataFrameColumn { + Series(Series), + Pyany(pyo3::Py), +} + +pub fn series_from_event( + events: &Vec, + py: Python, +) -> Result, DemoParserError> { let pairs: Vec = events.iter().map(|x| x.fields.clone()).flatten().collect(); let per_key_name = pairs.iter().into_group_map_by(|x| &x.name); - let mut series = vec![]; - - for (name, vals) in per_key_name { - let s = series_from_pairs(&vals, name)?; - series.push(s); - } - series.sort_by_key(|x| x.name().to_string()); - let column_names: Vec<&str> = series.iter().map(|x| x.name().clone()).collect(); + let mut series_columns = vec![]; + let mut py_columns = vec![]; let mut rows = 0; - let mut all_series = vec![]; - for ser in &series { - rows = ser.len().max(rows); - let py_series = rust_series_to_py_series(&ser).unwrap(); - all_series.push(py_series); + + for (name, vals) in per_key_name { + match column_from_pairs(&vals, name, py)? { + DataFrameColumn::Pyany(p) => py_columns.push((p, name)), + DataFrameColumn::Series(s) => { + rows = s.len().max(rows); + series_columns.push((s, name)) + } + }; } + let mut series_col_names: Vec = series_columns + .iter() + .map(|(_, name)| name.to_string()) + .collect(); + let series_columns: Vec = series_columns + .iter() + .map(|(ser, _)| rust_series_to_py_series(&ser).unwrap()) + .collect(); + let py_col_names: Vec = py_columns + .iter() + .map(|(_, name)| name.to_string()) + .collect(); if rows == 0 { return Err(DemoParserError::NoEvents); } let dfp = Python::with_gil(|py| { let polars = py.import("polars").unwrap(); - let df = polars.call_method1("DataFrame", (all_series,)).unwrap(); - df.setattr("columns", column_names.to_object(py)).unwrap(); + let all_series_py = series_columns.to_object(py); + let df = polars.call_method1("DataFrame", (all_series_py,)).unwrap(); + df.setattr("columns", series_col_names.to_object(py)) + .unwrap(); let pandas_df = df.call_method0("to_pandas").unwrap(); + for (pyobj, col_name) in py_columns { + pandas_df + .call_method1("insert", (0, col_name, pyobj)) + .unwrap(); + } + series_col_names.extend(py_col_names); + series_col_names.sort(); + let kwargs = vec![("axis", 1)].into_py_dict(py); + let args = (series_col_names,); + pandas_df + .call_method("reindex", args, Some(kwargs)) + .unwrap(); pandas_df.to_object(py) }); Ok(dfp) } -fn to_f32_series(pairs: &Vec<&EventField>, name: &String) -> Series { +fn to_f32_series(pairs: &Vec<&EventField>, name: &String) -> DataFrameColumn { let mut v = vec![]; for pair in pairs { match &pair.data { @@ -992,9 +1061,9 @@ fn to_f32_series(pairs: &Vec<&EventField>, name: &String) -> Series { None => v.push(None), } } - Series::new(name, v) + DataFrameColumn::Series(Series::new(name, v)) } -fn to_u32_series(pairs: &Vec<&EventField>, name: &String) -> Series { +fn to_u32_series(pairs: &Vec<&EventField>, name: &String) -> DataFrameColumn { let mut v = vec![]; for pair in pairs { match &pair.data { @@ -1005,9 +1074,9 @@ fn to_u32_series(pairs: &Vec<&EventField>, name: &String) -> Series { None => v.push(None), } } - Series::new(name, v) + DataFrameColumn::Series(Series::new(name, v)) } -fn to_i32_series(pairs: &Vec<&EventField>, name: &String) -> Series { +fn to_i32_series(pairs: &Vec<&EventField>, name: &String) -> DataFrameColumn { let mut v = vec![]; for pair in pairs { match &pair.data { @@ -1018,9 +1087,9 @@ fn to_i32_series(pairs: &Vec<&EventField>, name: &String) -> Series { None => v.push(None), } } - Series::new(name, v) + DataFrameColumn::Series(Series::new(name, v)) } -fn to_u64_series(pairs: &Vec<&EventField>, name: &String) -> Series { +fn to_u64_series(pairs: &Vec<&EventField>, name: &String) -> DataFrameColumn { let mut v = vec![]; for pair in pairs { match &pair.data { @@ -1031,9 +1100,23 @@ fn to_u64_series(pairs: &Vec<&EventField>, name: &String) -> Series { None => v.push(None), } } - Series::new(name, v) + DataFrameColumn::Series(Series::new(name, v)) } -fn to_string_series(pairs: &Vec<&EventField>, name: &String) -> Series { +fn to_py_col(pairs: &Vec<&EventField>, name: &String, py: Python) -> DataFrameColumn { + let mut v = vec![]; + for pair in pairs { + match &pair.data { + Some(k) => match k { + Variant::StringVec(val) => v.push(Some(val.clone())), + _ => v.push(None), + }, + None => v.push(None), + } + } + DataFrameColumn::Pyany(v.to_object(py)) +} + +fn to_string_series(pairs: &Vec<&EventField>, name: &String) -> DataFrameColumn { let mut v = vec![]; for pair in pairs { match &pair.data { @@ -1044,10 +1127,10 @@ fn to_string_series(pairs: &Vec<&EventField>, name: &String) -> Series { None => v.push(None), } } - Series::new(name, v) + DataFrameColumn::Series(Series::new(name, v)) } -fn to_bool_series(pairs: &Vec<&EventField>, name: &String) -> Series { +fn to_bool_series(pairs: &Vec<&EventField>, name: &String) -> DataFrameColumn { let mut v = vec![]; for pair in pairs { match &pair.data { @@ -1058,22 +1141,23 @@ fn to_bool_series(pairs: &Vec<&EventField>, name: &String) -> Series { None => v.push(None), } } - Series::new(name, v) + DataFrameColumn::Series(Series::new(name, v)) } -fn to_null_series(pairs: &Vec<&EventField>, name: &String) -> Series { +fn to_null_series(pairs: &Vec<&EventField>, name: &String) -> DataFrameColumn { // All series are null can pick any type let mut v: Vec> = vec![]; for _ in pairs { v.push(None); } - Series::new(name, v) + DataFrameColumn::Series(Series::new(name, v)) } -pub fn series_from_pairs( +pub fn column_from_pairs( pairs: &Vec<&EventField>, name: &String, -) -> Result { + py: Python, +) -> Result { let field_type = find_type_of_vals(pairs)?; let s = match field_type { @@ -1084,6 +1168,7 @@ pub fn series_from_pairs( Some(Variant::I32(_)) => to_i32_series(pairs, name), Some(Variant::U64(_)) => to_u64_series(pairs, name), Some(Variant::String(_)) => to_string_series(pairs, name), + Some(Variant::StringVec(_)) => to_py_col(pairs, name, py), _ => panic!("unkown ge key: {:?}", field_type), }; Ok(s) @@ -1099,6 +1184,7 @@ fn find_type_of_vals(pairs: &Vec<&EventField>) -> Result, DemoPa Some(Variant::String(s)) => Some(Variant::String(s.clone())), Some(Variant::U64(u)) => Some(Variant::U64(*u)), Some(Variant::U32(u)) => Some(Variant::U32(*u)), + Some(Variant::StringVec(u)) => Some(Variant::StringVec(vec![])), None => None, _ => { return Err(DemoParserError::UnknownGameEventVariant(pair.name.clone()));