Skip to content

Commit

Permalink
refactor: library is now only
Browse files Browse the repository at this point in the history
  • Loading branch information
bpiwowar committed Jun 15, 2024
1 parent 6abbe22 commit 8aefde2
Show file tree
Hide file tree
Showing 20 changed files with 158 additions and 170 deletions.
66 changes: 33 additions & 33 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
[package]
name = "xpmir-rust"
name = "impact-index"
version = "0.22.0"
edition = "2021"
authors = ["Benjamin Piwowarski <[email protected]>"]
license = "GPL-2.0-or-later"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "xpmir_rust"
name = "impact_index"
crate-type = ["cdylib", "rlib"]
bench = false
test = false
Expand Down
8 changes: 5 additions & 3 deletions benches/sparse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ use std::collections::HashMap;
use criterion::{criterion_group, criterion_main, Criterion};

use helpers::documents::{create_document, document_vectors};
use impact_index::{
base::SearchFn,
builder::Indexer,
search::{maxscore::search_maxscore, wand::search_wand},
};
use log::info;
use rand::thread_rng;
use temp_dir::TempDir;
use xpmir_rust::index::sparse::{
builder::Indexer, maxscore::search_maxscore, wand::search_wand, SearchFn,
};

fn benchmark(c: &mut Criterion, name: &str, search_fn: SearchFn) {
let mut rng = thread_rng();
Expand Down
78 changes: 78 additions & 0 deletions src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,81 @@ pub type BoxResult<T> = Result<T, Box<dyn std::error::Error>>;
pub trait Len {
fn len(&self) -> usize;
}

use std::collections::HashMap;
use std::fmt;
use std::{fs::File, path::Path};

use crate::builder::load_forward_index;
use crate::index::{BlockTermImpactIterator, SparseIndex};
use crate::search::ScoredDocument;
use serde::{Deserialize, Serialize};

pub const INDEX_CBOR: &str = "index.cbor";
pub const BUILDER_INDEX_CBOR: &str = "information.cbor";

/// Term impact = document ID + impact value
#[derive(Serialize, Deserialize, Clone, Copy)]
pub struct TermImpact {
pub docid: DocId,
pub value: ImpactValue,
}

impl std::fmt::Display for TermImpact {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "({},{})", self.docid, self.value)
}
}

/// An iterator on term impacts
pub type TermImpactIterator<'a> = Box<dyn BlockTermImpactIterator + 'a>;

/// A search function
pub type SearchFn = fn(
index: &dyn SparseIndex,
query: &HashMap<TermIndex, ImpactValue>,
top_k: usize,
) -> Vec<ScoredDocument>;

#[typetag::serde(tag = "type")]
pub trait IndexLoader {
/// Consumes the loader and return an index
fn into_index(self: Box<Self>, path: &Path, in_memory: bool) -> Box<dyn SparseIndex>;
}

pub fn load_index(path: &Path, in_memory: bool) -> Box<dyn SparseIndex> {
let info_path = path.join(BUILDER_INDEX_CBOR);
if info_path.exists() {
// Takes care of old format with the raw builder index
Box::new(load_forward_index(path, in_memory))
} else {
// Uses the new way to load indices
let info_path = path.join(INDEX_CBOR);
let info_file = File::options()
.read(true)
.open(info_path)
.expect("Error while opening the index information file");

let loader: Box<dyn IndexLoader> = ciborium::de::from_reader(info_file)
.expect("Error loading compressed term index information");

loader.into_index(path, in_memory)
}
}

pub fn save_index(loader: Box<dyn IndexLoader>, path: &Path) -> Result<(), std::io::Error> {
let info_path = path.join(INDEX_CBOR);
let info_path_s = info_path.display().to_string();

let info_file = File::options()
.write(true)
.truncate(true)
.create(true)
.open(info_path)
.expect(&format!("Error while creating file {}", info_path_s));

ciborium::ser::into_writer(&loader, info_file)
.expect("Error saving compressed term index information");

Ok(())
}
9 changes: 4 additions & 5 deletions src/index/sparse/builder.rs → src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@ use ndarray::{ArrayBase, Data, Ix1};
use super::{
index::{BlockTermImpactIterator, SparseIndex},
index::{IndexInformation, TermIndexPageInformation},
TermImpact,
};
use crate::{
base::Len,
utils::buffer::{Buffer, MemoryBuffer, MmapBuffer, Slice},
base::{BoxResult, DocId, ImpactValue, TermIndex},
index::TermIndexInformation,
};
use crate::{
base::{BoxResult, DocId, ImpactValue, TermIndex},
index::sparse::index::TermIndexInformation,
base::{Len, TermImpact},
utils::buffer::{Buffer, MemoryBuffer, MmapBuffer, Slice},
};

/*
Expand Down
File renamed without changes.
File renamed without changes.
4 changes: 1 addition & 3 deletions src/index/sparse/compress/mod.rs → src/compress/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@ use std::{

use super::{
index::{BlockTermImpactIterator, SparseIndex, SparseIndexView},
save_index,
transforms::IndexTransform,
IndexLoader, TermImpact,
};
use crate::{
base::{DocId, ImpactValue, Len, TermIndex},
base::{save_index, DocId, ImpactValue, IndexLoader, Len, TermImpact, TermIndex},
utils::buffer::{Buffer, MemoryBuffer, MmapBuffer, Slice},
};
use log::debug;
Expand Down
2 changes: 1 addition & 1 deletion src/index/sparse/index.rs → src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};

use crate::base::{DocId, ImpactValue, Len, TermIndex};

use super::TermImpact;
use crate::base::TermImpact;

#[derive(Serialize, Deserialize)]
pub struct TermIndexPageInformation {
Expand Down
1 change: 0 additions & 1 deletion src/index/mod.rs

This file was deleted.

86 changes: 0 additions & 86 deletions src/index/sparse/mod.rs

This file was deleted.

7 changes: 4 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
pub mod base;
pub mod builder;
pub mod compress;
pub mod index;
pub mod search;
pub mod index {
pub mod sparse;
}
pub mod transforms;

mod py;
mod utils;
16 changes: 8 additions & 8 deletions src/py/sparse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ use std::sync::Arc;
use tokio::sync::Mutex;
use tokio::task;

use crate::index::sparse::compress;
use crate::index::sparse::transforms;
use crate::compress;

use crate::base::load_index;
use crate::base::{DocId, ImpactValue, TermIndex};
use crate::index::sparse::index::{SparseIndex, SparseIndexView};
use crate::index::sparse::load_index;
use crate::index::sparse::maxscore::search_maxscore;
use crate::index::sparse::transforms::IndexTransform;
use crate::index::sparse::{
builder::Indexer as SparseIndexer, wand::search_wand, SearchFn, TermImpactIterator,
use crate::index::SparseIndex;
use crate::search::maxscore::search_maxscore;
use crate::transforms::IndexTransform;
use crate::{
base::SearchFn, base::TermImpactIterator, builder::Indexer as SparseIndexer,
search::wand::search_wand,
};

use numpy::PyArray1;
Expand Down
8 changes: 3 additions & 5 deletions src/index/sparse/maxscore.rs → src/search/maxscore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@ use std::collections::HashMap;
use log::debug;

use crate::{
base::{DocId, ImpactValue},
base::{DocId, ImpactValue, TermImpact},
index::SparseIndex,
search::{ScoredDocument, TopScoredDocuments},
};

use crate::base::TermIndex;

use super::{
index::{BlockTermImpactIterator, SparseIndex},
TermImpact,
};
use crate::index::BlockTermImpactIterator;

struct MaxScoreTermIterator<'a> {
iterator: Box<dyn BlockTermImpactIterator + 'a>,
Expand Down
3 changes: 3 additions & 0 deletions src/search.rs → src/search/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
pub mod maxscore;
pub mod wand;

use std::{cmp::Ordering, collections::BinaryHeap};

use crate::base::{DocId, ImpactValue};
Expand Down
2 changes: 1 addition & 1 deletion src/index/sparse/wand.rs → src/search/wand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::{

use crate::base::TermIndex;

use super::index::{BlockTermImpactIterator, SparseIndex};
use crate::index::{BlockTermImpactIterator, SparseIndex};

/**
* WAND algorithm
Expand Down
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 8aefde2

Please sign in to comment.