Skip to content

Commit

Permalink
Merge pull request #36 from upstash/add-metadata-to-pdf
Browse files Browse the repository at this point in the history
feat: add metadata to pdf
  • Loading branch information
ogzhanolguncu committed Aug 7, 2024
2 parents fa70ac4 + 8d6bea8 commit 6c91446
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions src/file-loader.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
/* eslint-disable @typescript-eslint/no-explicit-any */
/* eslint-disable @typescript-eslint/no-unsafe-argument */
import { CSVLoader } from "@langchain/community/document_loaders/fs/csv";
Expand Down Expand Up @@ -67,7 +69,16 @@ export class FileDataLoader {
const splitter = new RecursiveCharacterTextSplitter(args);
const splittedDocuments = await splitter.splitDocuments(documents);

return mapDocumentsIntoInsertPayload(splittedDocuments);
return mapDocumentsIntoInsertPayload(splittedDocuments, (metadata: any, index: number) => ({
source: metadata.source,
timestamp: new Date().toISOString(),
paragraphNumber: index + 1,
pageNumber: metadata.loc?.pageNumber || undefined,
author: metadata.pdf?.info?.Author || undefined,
title: metadata.pdf?.info?.Title || undefined,
totalPages: metadata.pdf?.totalPages || undefined,
language: metadata.pdf?.metadata?._metadata?.["dc:language"] || undefined,
}));
}

case "csv": {
Expand Down Expand Up @@ -98,10 +109,14 @@ export class FileDataLoader {
}
}

function mapDocumentsIntoInsertPayload(splittedDocuments: Document[]) {
return splittedDocuments.map((document) => ({
function mapDocumentsIntoInsertPayload(
splittedDocuments: Document[],
metadataMapper?: (metadata: any, index: number) => Record<string, any>
) {
return splittedDocuments.map((document, index) => ({
data: document.pageContent,
id: nanoid(),
...(metadataMapper ? { metadata: metadataMapper(document.metadata, index) } : {}),
}));
}
}
Expand Down

0 comments on commit 6c91446

Please sign in to comment.