Skip to content

Commit

Permalink
[IN PROGRESS] optimizing query
Browse files Browse the repository at this point in the history
  • Loading branch information
khb7840 committed Oct 4, 2024
1 parent 11b51fd commit efe5183
Show file tree
Hide file tree
Showing 9 changed files with 499 additions and 149 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ toml = "0.8.12"
regex = "1.10.4"
petgraph = "0.6.4"
libc = "0.2.155"
lazy_static = "1.5.0"
dashmap = { version = "6.1.0", features = ["rayon"] }


Expand Down
6 changes: 4 additions & 2 deletions src/cli/workflows/query_pdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use rayon::prelude::*;

use crate::cli::config::{read_index_config_from_file, IndexConfig};
use crate::controller::map::SimpleHashMap;
use crate::controller::mode::{parse_path_by_id_type, IdType, IndexMode};
use crate::controller::mode::{parse_path_by_id_type, parse_path_by_id_type_with_string, IdType, IndexMode};
use crate::cli::*;
use crate::controller::io::{read_compact_structure, read_u16_vector};
use crate::controller::query::{check_and_get_indices, get_offset_value_lookup_type, make_query_map, parse_threshold_string};
Expand Down Expand Up @@ -389,8 +389,10 @@ pub fn query_pdb(env: AppArgs) {
if header {
println!("{}", QUERY_RESULT_HEADER);
}
let mut id_container = String::new();
for (_k, v) in queried_from_indices.iter_mut() {
v.id = parse_path_by_id_type(&v.id, &id_type);
parse_path_by_id_type_with_string(v.id, &id_type, &mut id_container);
v.id = Box::leak(id_container.clone().into_boxed_str());
println!("{:?}\t{}\t{}\t{}", v, query_string, pdb_path, index_path.clone().unwrap());
}
}
Expand Down
77 changes: 77 additions & 0 deletions src/controller/mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,83 @@ pub fn parse_path_by_id_type(path: &str, id_type: &IdType) -> String {
}
}


pub fn parse_path_by_id_type_with_string(path: &str, id_type: &IdType, string: &mut String) {
// TODO: 2024-04-04 15:07:54 Fill in this function to ease benchmarking
string.clear();
let afdb_regex = regex::Regex::new(r"AF-.+-model_v\d").unwrap();
match id_type {
IdType::Pdb => {
// Get the basename of the path
let path = Path::new(path);
let file_name = path.file_stem().unwrap();
// Remove extension
let file_name = file_name.to_str().unwrap();
// Remove extension, If startswith "pdb" remove "pdb" from the start
if file_name.starts_with("pdb") {
// &file_name[3..]
string.push_str(&file_name[3..]);
} else {
// file_name
string.push_str(file_name);
}
}
IdType::Afdb => {
let path = Path::new(path);
let file_name = path.file_stem().unwrap().to_str().unwrap();
// Find the matching pattern
let afdb_id = afdb_regex.find(file_name);
if afdb_id.is_none() {
// return file_name;
string.push_str(file_name);
} else {
// &file_name[afdb_id.unwrap().start()..afdb_id.unwrap().end()]
string.push_str(&file_name[afdb_id.unwrap().start()..afdb_id.unwrap().end()]);
}
}
IdType::UniProt => {
let path = Path::new(path);
let file_name = path.file_stem().unwrap().to_str().unwrap();
// Find the matching pattern
let afdb_id = afdb_regex.find(file_name);
if afdb_id.is_none() {
// return file_name;
string.push_str(file_name);
}
let afdb_id = file_name[afdb_id.unwrap().start()..afdb_id.unwrap().end()].to_string();
let afdb_id = afdb_id.split("-").collect::<Vec<_>>();
// afdb_id[1]
string.push_str(afdb_id[1]);
}
IdType::BasenameWithoutExt => {
let path = Path::new(path);
let file_name = path.file_stem().unwrap().to_str().unwrap();
// file_name
string.push_str(file_name);
}
IdType::BasenameWithExt => {
let path = Path::new(path);
let file_name = path.file_name().unwrap().to_str().unwrap();
// file_name
string.push_str(file_name);
}
IdType::AbsPath => {
let path = fs::canonicalize(path).unwrap();
// path.to_str().unwrap()
string.push_str(path.to_str().unwrap());
}
IdType::RelPath => {
// path
string.push_str(path);
}
IdType::Other => {
// path
string.push_str(path);
}
}
}


pub fn parse_path_vec_by_id_type(path_vec: &Vec<String>, id_type: IdType) -> Vec<String> {
let mut parsed_path_vec = Vec::with_capacity(path_vec.len());
for path in path_vec {
Expand Down
Loading

0 comments on commit efe5183

Please sign in to comment.