diff --git a/src/geometry/core.rs b/src/geometry/core.rs index 945351e..8b5d207 100644 --- a/src/geometry/core.rs +++ b/src/geometry/core.rs @@ -255,7 +255,7 @@ impl GeometricHash { GeometricHash::PDBTrRosetta(hash) => hash.reverse_hash_default(), GeometricHash::TertiaryInteraction(hash) => hash.reverse_hash_default(), // append new hash type here - _ => panic!("Invalid hash type"), + // _ => panic!("Invalid hash type"), } } @@ -271,7 +271,7 @@ impl GeometricHash { GeometricHash::PDBTrRosetta(hash) => hash.reverse_hash(nbin_dist, nbin_angle), GeometricHash::TertiaryInteraction(hash) => hash.reverse_hash(nbin_dist, nbin_angle), // append new hash type here - _ => panic!("Invalid hash type"), + // _ => panic!("Invalid hash type"), } } @@ -287,7 +287,7 @@ impl GeometricHash { GeometricHash::PDBTrRosetta(hash) => hash.hash_type(), GeometricHash::TertiaryInteraction(hash) => hash.hash_type(), // append new hash type here - _ => panic!("Invalid hash type"), + // _ => panic!("Invalid hash type"), } } @@ -378,7 +378,7 @@ impl GeometricHash { GeometricHash::PDBTrRosetta(hash) => hash.as_u64(), GeometricHash::TertiaryInteraction(hash) => hash.as_u64(), // append new hash type here - _ => panic!("Invalid hash type"), + // _ => panic!("Invalid hash type"), } } @@ -471,7 +471,7 @@ impl fmt::Debug for GeometricHash { write!(f, "TertiaryInteraction({:?})", hash) }, // append new hash type here - _ => panic!("Invalid hash type"), + // _ => panic!("Invalid hash type"), } } } @@ -507,7 +507,7 @@ impl fmt::Display for GeometricHash { write!(f, "TertiaryInteraction\t{:?}", hash) }, // append new hash type here - _ => panic!("Invalid hash type"), + // _ => panic!("Invalid hash type"), } } } \ No newline at end of file diff --git a/src/index/alloc.rs b/src/index/alloc.rs index 575f5d0..bb077fa 100644 --- a/src/index/alloc.rs +++ b/src/index/alloc.rs @@ -3,22 +3,17 @@ // Author: Hyunbin Kim (khb7840@gmail.com) // Copyright © 2023 Hyunbin Kim, All rights reserved -use rayon::iter::ParallelIterator; -use rayon::prelude::*; // external crates -use rustc_hash::{FxHashMap}; +use rustc_hash::FxHashMap; use dashmap::DashMap; // -use std::sync::{Arc}; +use std::sync::Arc; use std::cell::UnsafeCell; use std::sync::atomic::{AtomicUsize, Ordering}; use std::thread; -// Measure time - - use crate::HashableSync; const DEFAULT_NUM_THREADS: usize = 4; @@ -368,121 +363,3 @@ pub fn convert_sorted_pairs_to_offset_and_values_vec Vec> { - let mut data = Vec::new(); - for i in 0..num_key { - let mut inner = Vec::new(); - for j in 0..num_value { - inner.push(i * num_value + j); - } - data.push(inner); - } - println!("Created test data with {} keys and {} values", num_key, num_value); - println!("data.len(): {}", data.len()); - data - } -} - -// pub struct HugeAllocation { -// pub allocation: UnsafeCell>, -// } - -// unsafe impl Sync for HugeAllocation {} - -// pub fn run(num_threads: usize, ext_data: Arc>>) -> Arc { -// println!("Creating {} threads", num_threads); -// // Iterate through ext_data and get the total size -// let total_size = Arc::new(AtomicUsize::new(0)); -// let ext_data_index = Arc::new(AtomicUsize::new(0)); -// // Spawn threads to find out the size to allocate -// let start = Instant::now(); -// let mut handles = vec![]; -// for i in 0..num_threads { -// let ext_data = ext_data.clone(); -// let total_size = total_size.clone(); -// let ext_data_index = ext_data_index.clone(); -// let handle = thread::spawn(move || { -// // While there is data to check the size, keep checking -// while ext_data_index.load(Ordering::Relaxed) < ext_data.len() { -// let ext_data_index = ext_data_index.fetch_add(1, Ordering::Relaxed); -// if ext_data_index >= ext_data.len() { -// break; -// } -// let ext_data_inner = &ext_data[ext_data_index]; -// total_size.fetch_add(ext_data_inner.len(), Ordering::Relaxed); -// } -// }); -// handles.push(handle); -// } -// for handle in handles { -// handle.join().unwrap(); -// } -// let estimation_time = start.elapsed(); -// println!("Estimation time: {:?}", estimation_time); -// println!( -// "Allocating {} gigabytes", -// total_size.clone().load(Ordering::Relaxed) as f32 * 8.0 / 1024.0 / 1024.0 / 1024.0 -// ); - -// // Allocate the memory -// let start = Instant::now(); -// let data = Arc::new(HugeAllocation { -// allocation: UnsafeCell::new(vec![0; total_size.load(Ordering::Relaxed)]), -// }); -// let allocation_time = start.elapsed(); -// println!("Allocation time: {:?}", allocation_time); -// // Spawn threads to copy the data -// let start = Instant::now(); -// let mut handles = vec![]; -// let mut ext_data_index = Arc::new(AtomicUsize::new(0)); - -// let expected_num_value = ext_data[0].len(); -// let size_per_value = total_size.load(Ordering::Relaxed) / expected_num_value; -// let offset_vec = (0..expected_num_value).map( -// |x| AtomicUsize::new(x * size_per_value) -// ).collect::>(); -// let offset_vec = Arc::new(offset_vec); - -// for i in 0..num_threads { -// let data_clone = Arc::clone(&data); -// let ext_data = ext_data.clone(); -// let ext_data_index = ext_data_index.clone(); -// let offset_vec = offset_vec.clone(); -// let handle = thread::spawn(move || { -// while ext_data_index.load(Ordering::Relaxed) < ext_data.len() { -// let ext_data_index = ext_data_index.fetch_add(1, Ordering::Relaxed); -// if ext_data_index >= ext_data.len() { -// break; -// } -// let ext_data_inner = &ext_data[ext_data_index]; -// // let offset_in_allocation = offset_in_allocation.fetch_add(ext_data_inner.len(), Ordering::Relaxed); -// let data = data_clone.allocation.get(); -// for j in 0..ext_data_inner.len() { -// // Get offset from offsets map for value of j -// let val = ext_data_inner[j]; -// let offset_in_allocation = offset_vec[val].fetch_add(1, Ordering::Relaxed); -// unsafe { -// (*data)[offset_in_allocation] = ext_data_index; -// } -// } -// } -// }); -// handles.push(handle); -// } -// for handle in handles { -// handle.join().unwrap(); -// } -// let computation_time = start.elapsed(); -// println!("Filling time: {:?}", computation_time); - -// for i in 0..10 { -// println!("{:?}", offset_vec[i].load(Ordering::Relaxed)); -// } - -// return data; -// } \ No newline at end of file diff --git a/src/structure/io/mod.rs b/src/structure/io/mod.rs index f293024..d47bbf6 100644 --- a/src/structure/io/mod.rs +++ b/src/structure/io/mod.rs @@ -5,7 +5,7 @@ pub mod pdb; #[derive(Debug)] pub enum StructureFileFormat { PDB, - PDB_GZ, + PDBGZ, CIF, FCZ, MMTF, diff --git a/src/utils/benchmark.rs b/src/utils/benchmark.rs index beffe60..33f9282 100644 --- a/src/utils/benchmark.rs +++ b/src/utils/benchmark.rs @@ -74,9 +74,7 @@ pub fn compare_target_answer_set(target: &HashSet, pub fn measure_up_to_k_fp(target: &Vec, answer: &HashSet, all: &HashSet, k: f64) -> Metrics { // Iter until k false positives are found let mut true_pos = 0.0; - let mut true_neg = 0.0; let mut false_pos = 0.0; - let mut false_neg = 0.0; // Iterate over target for t in target { if answer.contains(t) { @@ -89,8 +87,8 @@ pub fn measure_up_to_k_fp(target: &Vec, answer: &Ha } } // False negatives - false_neg = answer.len() as f64 - true_pos; + let false_neg = answer.len() as f64 - true_pos; // True negatives - true_neg = all.len() as f64 - (true_pos + false_pos + false_neg); + let true_neg = all.len() as f64 - (true_pos + false_pos + false_neg); Metrics::new(true_pos, true_neg, false_pos, false_neg) } \ No newline at end of file diff --git a/tests/common/loader.rs b/tests/common/loader.rs index 3631c11..f840a3e 100644 --- a/tests/common/loader.rs +++ b/tests/common/loader.rs @@ -1,27 +1,27 @@ -pub fn load_homeobox_toy() -> Vec { - vec![ - "data/homeobox/1akha-.pdb".to_string(), - "data/homeobox/1b72a-.pdb".to_string(), - "data/homeobox/1b72b-.pdb".to_string(), - "data/homeobox/1ba5--.pdb".to_string(), - ] -} +// pub fn load_homeobox_toy() -> Vec { +// vec![ +// "data/homeobox/1akha-.pdb".to_string(), +// "data/homeobox/1b72a-.pdb".to_string(), +// "data/homeobox/1b72b-.pdb".to_string(), +// "data/homeobox/1ba5--.pdb".to_string(), +// ] +// } -pub fn load_yeast_proteome() -> Vec { - // Load all pdbs in data/yeast - let mut pdb_paths = Vec::new(); - let paths = std::fs::read_dir("data/yeast").expect("Unable to read yeast proteome"); - for path in paths { - let path = path.expect("Unable to read path"); - let path = path.path(); - let path = path.to_str().expect("Unable to convert path to string"); - // If the path is a pdb file, add it to the list - if path.ends_with(".pdb") { - pdb_paths.push(path.to_string()); - } - } - pdb_paths -} +// pub fn load_yeast_proteome() -> Vec { +// // Load all pdbs in data/yeast +// let mut pdb_paths = Vec::new(); +// let paths = std::fs::read_dir("data/yeast").expect("Unable to read yeast proteome"); +// for path in paths { +// let path = path.expect("Unable to read path"); +// let path = path.path(); +// let path = path.to_str().expect("Unable to convert path to string"); +// // If the path is a pdb file, add it to the list +// if path.ends_with(".pdb") { +// pdb_paths.push(path.to_string()); +// } +// } +// pdb_paths +// } pub fn load_path(dir: &str) -> Vec { // Load all pdbs in given path diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 93dc086..2f7b1e6 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,4 +1,3 @@ /* Common functions for testing */ pub mod loader; -pub mod processor; diff --git a/tests/common/processor.rs b/tests/common/processor.rs deleted file mode 100644 index 6e69c44..0000000 --- a/tests/common/processor.rs +++ /dev/null @@ -1,92 +0,0 @@ -use motifsearch::structure::core; -use motifsearch::structure::feature::Torsion; -use motifsearch::PDBReader; - -#[derive(Debug)] -pub struct TmpGeometry { - residue_pair: Vec<(u64, u64)>, - angle: Vec, - distance: Vec, - - torsion: Torsion, -} - -impl TmpGeometry { - fn new() -> TmpGeometry { - TmpGeometry { - residue_pair: Vec::new(), - angle: Vec::new(), - distance: Vec::new(), - - torsion: Torsion::new(), - } - } - - fn push(&mut self, res_pair: (u64, u64), angle: f32, distance: f32) { - self.residue_pair.push(res_pair); - self.angle.push(angle); - self.distance.push(distance); - } - - fn push_torsion(&mut self, torsion: Torsion) { - self.torsion = torsion; - } -} - -pub struct TmpPDB { - //IDEA: replace Vec to HashMap - pdbs: Vec, - geometrys: Vec, -} - -impl TmpPDB { - fn new() -> TmpPDB { - TmpPDB { - pdbs: Vec::new(), - geometrys: Vec::new(), - } - } - - fn push(&mut self, pdb: String, geometry: TmpGeometry) { - self.pdbs.push(pdb); - self.geometrys.push(geometry); - } -} - -pub fn process_pdbs(path_vec: &Vec) -> TmpPDB { - /* to get distributions of dist, angle, torsion(psi) angle - similar to controllers but not hash yet */ - let mut tmp_pdb = TmpPDB::new(); - - for i in 0..path_vec.len() { - let pdb_path = &path_vec[i]; - let pdb_reader = PDBReader::from_file(pdb_path).expect("pdb file not found"); - let structure = pdb_reader.read_structure().expect("structure read failed"); - let compact = structure.to_compact(); - - let torsion = structure.get_torsion(); - - let mut tmp_geometry = process_structure(&compact); - tmp_geometry.push_torsion(torsion); - - tmp_pdb.push(pdb_path.to_string(), tmp_geometry); - } - tmp_pdb -} - -pub fn process_structure(compact: &core::CompactStructure) -> TmpGeometry { - let mut tmp_geometry = TmpGeometry::new(); - /* process one pdb and return TmpGeometry */ - for i in 0..compact.num_residues { - for j in i + 1..compact.num_residues { - let resi_pair = compact.get_res_serial(i, j); - let dist = compact - .get_ca_distance(i, j) - .expect("compact failed to get distance"); - let angle = compact.get_ca_cb_angle(i, j).unwrap_or(0.0); - // let angle = compact.get_angle(i,j).expect("compact failed to get angle"); - tmp_geometry.push(resi_pair, angle, dist); - } - } - tmp_geometry -} diff --git a/tests/toydata_test.rs b/tests/toydata_test.rs deleted file mode 100644 index be8e381..0000000 --- a/tests/toydata_test.rs +++ /dev/null @@ -1,11 +0,0 @@ -mod common; - -#[test] -fn test_serine_peptidase() { - let path = "data/serine_peptidases_filtered"; - let pdb_paths = common::loader::load_path(path); - let _pdb_features = common::processor::process_pdbs(&pdb_paths); - //TODO: - // - change struct Torsion HashMap -> Vector - // - implement code to save Feature Data -}