Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Local image parsing #458

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions shinkai-libs/shinkai-ocr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ tokio = { version = "1.36", features = ["full"] }

[features]
default = []
static = ["pdfium-render/static", "pdfium-render/libc++"]
static-pdf-parser = ["pdfium-render/static", "pdfium-render/libc++"]
dynamic-pdf-parser = []

[[test]]
name = "image_parser_tests"
path = "tests/image_parser_tests.rs"

[[test]]
name = "pdf_parser_tests"
path = "tests/pdf_parser_tests.rs"
path = "tests/pdf_parser_tests.rs"
required-features = ["static-pdf-parser"]
6 changes: 3 additions & 3 deletions shinkai-libs/shinkai-ocr/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ RUSTFLAGS=-g cargo build --release

### Static linking PDFium

By default the project binds to the PDFium dynamic library at runtime. To statically link PDFium build with feature `static` enabled:
By default the project binds to the PDFium dynamic library at runtime. To statically link PDFium build with feature `static-pdf-parser` enabled:

```sh
cargo build --release --features static
cargo build --release --features static-pdf-parser
```

The project needs to link the PDFium static library which should be available as `libpdfium.a` in the PDFium directory. If you wish to build PDFium from source follow the steps in the *Building PDFium static library from source* section.
Expand Down Expand Up @@ -77,5 +77,5 @@ PDFIUM_DYNAMIC_LIB_PATH=$(PWD)/pdfium/linux-x64 cargo test -- --test-threads=1
## Running tests

```sh
cargo test --features static -- --test-threads=1
cargo test --features static-pdf-parser -- --test-threads=1
```
90 changes: 45 additions & 45 deletions shinkai-libs/shinkai-ocr/build.rs
Original file line number Diff line number Diff line change
@@ -1,65 +1,65 @@
use std::{env, path::PathBuf};

fn main() {
#[cfg(target_os = "linux")]
let os = "linux";

#[cfg(target_os = "macos")]
let os = "mac";
#[cfg(any(feature = "dynamic-pdf-parser", feature = "static-pdf-parser"))]
{
use std::{env, path::PathBuf};

#[cfg(target_os = "windows")]
let os = "win";
#[cfg(target_os = "linux")]
let os = "linux";

#[cfg(target_arch = "aarch64")]
let arch = "arm64";
#[cfg(target_os = "macos")]
let os = "mac";

#[cfg(target_arch = "x86_64")]
let arch = "x64";
#[cfg(target_os = "windows")]
let os = "win";

let current_directory = env::var("CARGO_MANIFEST_DIR").unwrap();
#[cfg(target_arch = "aarch64")]
let arch = "arm64";

let pdfium_directory = format!("pdfium/{}-{}", os, arch);
let pdfium_lib_path = PathBuf::from(&current_directory).join(pdfium_directory);
#[cfg(target_arch = "x86_64")]
let arch = "x64";

#[cfg(feature = "static")]
{
println!("cargo:rustc-link-search=native={}", pdfium_lib_path.display());
println!("cargo:rustc-link-lib=static=pdfium");
let current_directory = env::var("CARGO_MANIFEST_DIR").unwrap();

#[cfg(target_os = "linux")]
println!("cargo:rustc-link-lib=dylib=stdc++");
let pdfium_directory = format!("pdfium/{}-{}", os, arch);
let pdfium_lib_path = PathBuf::from(&current_directory).join(pdfium_directory);

#[cfg(target_os = "macos")]
#[cfg(feature = "static-pdf-parser")]
{
println!("cargo:rustc-link-lib=dylib=c++");
println!("cargo:rustc-link-lib=framework=CoreGraphics");
}
}
println!("cargo:rustc-link-search=native={}", pdfium_lib_path.display());
println!("cargo:rustc-link-lib=static=pdfium");

#[cfg(not(feature = "static"))]
{
let out_dir = env::var("OUT_DIR").unwrap();
let out_dir = PathBuf::from(&out_dir);
let out_dir = out_dir.iter().collect::<Vec<_>>();
#[cfg(target_os = "linux")]
println!("cargo:rustc-link-lib=dylib=stdc++");

let target_dir = out_dir.iter().take(out_dir.len() - 4).collect::<PathBuf>();
let bin_dir = target_dir.join(env::var("PROFILE").unwrap());
let pdfium_dest_dir = bin_dir.join(format!("pdfium/{}-{}", os, arch));
#[cfg(target_os = "macos")]
{
println!("cargo:rustc-link-lib=dylib=c++");
println!("cargo:rustc-link-lib=framework=CoreGraphics");
}
}

let _ = std::fs::create_dir_all(&pdfium_dest_dir);
#[cfg(feature = "dynamic-pdf-parser")]
{
let out_dir = env::var("OUT_DIR").unwrap();
let out_dir = PathBuf::from(&out_dir);
let out_dir = out_dir.iter().collect::<Vec<_>>();

#[cfg(target_os = "linux")]
let pdfium_lib = "libpdfium.so";
let target_dir = out_dir.iter().take(out_dir.len() - 4).collect::<PathBuf>();
let bin_dir = target_dir.join(env::var("PROFILE").unwrap());

#[cfg(target_os = "macos")]
let pdfium_lib = "libpdfium.dylib";
#[cfg(target_os = "linux")]
let pdfium_lib = "libpdfium.so";

#[cfg(target_os = "windows")]
let pdfium_lib = "pdfium.dll";
#[cfg(target_os = "macos")]
let pdfium_lib = "libpdfium.dylib";

let pdfium_lib_source = pdfium_lib_path.join(pdfium_lib);
let pdfium_lib_dest = pdfium_dest_dir.join(pdfium_lib);
#[cfg(target_os = "windows")]
let pdfium_lib = "pdfium.dll";

std::fs::copy(pdfium_lib_source, pdfium_lib_dest).unwrap();
let pdfium_lib_source = pdfium_lib_path.join(pdfium_lib);
let pdfium_lib_dest = bin_dir.join(pdfium_lib);

std::fs::copy(pdfium_lib_source, pdfium_lib_dest).unwrap();
}
}
}
1 change: 1 addition & 0 deletions shinkai-libs/shinkai-ocr/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod image_parser;
#[cfg(any(feature = "dynamic-pdf-parser", feature = "static-pdf-parser"))]
pub mod pdf_parser;
12 changes: 9 additions & 3 deletions shinkai-libs/shinkai-ocr/src/pdf_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl PDFParser {
pub fn new() -> anyhow::Result<Self> {
let image_parser = ImageParser::new()?;

#[cfg(not(feature = "static"))]
#[cfg(feature = "dynamic-pdf-parser")]
let pdfium = {
let lib_path = match std::env::var("PDFIUM_DYNAMIC_LIB_PATH").ok() {
Some(lib_path) => lib_path,
Expand All @@ -47,10 +47,16 @@ impl PDFParser {
}
};

Pdfium::new(Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path(&lib_path)).unwrap())
// Look for the dynamic library in the specified path or fall back to the current directory.
let bindings = match Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path(&lib_path)) {
Ok(bindings) => bindings,
Err(_) => Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./"))?,
};

Pdfium::new(bindings)
};

#[cfg(feature = "static")]
#[cfg(feature = "static-pdf-parser")]
let pdfium = Pdfium::new(Pdfium::bind_to_statically_linked_library().unwrap());

Ok(PDFParser { image_parser, pdfium })
Expand Down
6 changes: 3 additions & 3 deletions shinkai-libs/shinkai-vector-resources/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@ base64 = "0.13.0"
futures = "0.3.30"
urlencoding = "1.1.1"
docx-rust = "0.1.8"
shinkai_ocr = { path = "../shinkai-ocr", optional = true }
shinkai_ocr = { path = "../shinkai-ocr" }

[build-dependencies]
reqwest = { version = "0.11.26", features = ["json", "tokio-native-tls", "blocking", "multipart"] }

[features]
default = ["desktop-only"]
desktop-only = ["reqwest/blocking", "comrak"]
dynamic-pdf-parser = ["shinkai_ocr"]
static-pdf-parser = ["shinkai_ocr/static"]
dynamic-pdf-parser = ["shinkai_ocr/dynamic-pdf-parser"]
static-pdf-parser = ["shinkai_ocr/static-pdf-parser"]

wasm-http = []

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use shinkai_ocr::image_parser::ImageParser;

use crate::{
file_parser::{file_parser::ShinkaiFileParser, file_parser_types::TextGroup},
resource_errors::VRError,
};

use super::LocalFileParser;

impl LocalFileParser {
pub fn process_image_file(file_buffer: Vec<u8>, max_node_text_size: u64) -> Result<Vec<TextGroup>, VRError> {
let image_parser = ImageParser::new().map_err(|_| VRError::FailedImageParsing)?;
let text = image_parser
.process_image_file(file_buffer)
.map_err(|_| VRError::FailedImageParsing)?;

let text_groups = ShinkaiFileParser::parse_and_split_into_text_groups(text, max_node_text_size);

Ok(text_groups)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ impl LocalFileParser {
VRSourceReference::Standard(source) => match source {
SourceReference::Other(_) => Err(VRError::UnsupportedFileType(file_name.to_string())),
SourceReference::FileRef(file_source) => match file_source.clone().file_type {
SourceFileType::Image(_)
| SourceFileType::Code(_)
SourceFileType::Code(_)
| SourceFileType::ConfigFileType(_)
| SourceFileType::Video(_)
| SourceFileType::Audio(_)
Expand All @@ -47,6 +46,7 @@ impl LocalFileParser {

_ => Err(VRError::UnsupportedFileType(file_name.to_string())),
},
SourceFileType::Image(_) => LocalFileParser::process_image_file(file_buffer, max_node_text_size),
},
SourceReference::ExternalURI(_) => Err(VRError::UnsupportedFileType(file_name.to_string())),
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod csv_parsing;
pub mod docx_parsing;
pub mod html_parsing;
pub mod image_parsing;
pub mod json_parsing;
pub mod local_parsing;
pub mod md_parsing;
Expand Down
2 changes: 2 additions & 0 deletions shinkai-libs/shinkai-vector-resources/src/resource_errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub enum VRError {
FailedEmbeddingGeneration(String),
NoNodeFound,
InvalidModelArchitecture,
FailedImageParsing,
FailedJSONParsing,
FailedCSVParsing,
FailedDOCXParsing,
Expand Down Expand Up @@ -55,6 +56,7 @@ impl fmt::Display for VRError {
VRError::InvalidModelArchitecture => {
write!(f, "An unsupported model architecture was specified.")
}
VRError::FailedImageParsing => write!(f, "Failed image parsing."),
VRError::FailedJSONParsing => write!(f, "Failed JSON parsing."),
VRError::FailedCSVParsing => write!(f, "Failed CSV parsing."),
VRError::FailedDOCXParsing => write!(f, "Failed DOCX parsing."),
Expand Down