Skip to content

Commit

Permalink
Merge pull request #7 from satrong/main
Browse files Browse the repository at this point in the history
支持更多类型的文件上传
  • Loading branch information
sugarforever authored Mar 7, 2024
2 parents a838744 + a6abc46 commit 5b99572
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 43 deletions.
2 changes: 1 addition & 1 deletion components/Chat.vue
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ const onSend = async () => {
});
const body = JSON.stringify({
knowledgebaseId: props.knowledgebase.id,
knowledgebaseId: props.knowledgebase?.id,
model: model.value,
messages: [...messages.value],
stream: true,
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
},
"devDependencies": {
"@types/ws": "^8.5.10",
"h3": "^1.11.1",
"nuxt": "^3.10.0",
"vue": "^3.4.15",
"vue-router": "^4.2.5"
Expand Down
2 changes: 1 addition & 1 deletion pages/knowledgebases/index.vue
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ const knowlegeBases = computed(() => {
</UFormGroup>

<UFormGroup label="File as Knowledge Base" name="file">
<UInput multiple type="file" size="sm" v-model="state.selectedFile" @change="onFileChange" />
<UInput multiple type="file" size="sm" accept=".txt,.json,.md,.doc,.docx,.pdf" v-model="state.selectedFile" @change="onFileChange" />
</UFormGroup>

<UButton type="submit" :loading="loading">
Expand Down
48 changes: 38 additions & 10 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

82 changes: 51 additions & 31 deletions server/api/knowledgebases/index.post.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
import path from 'node:path'
import fs from 'node:fs/promises'
import { PDFLoader } from "langchain/document_loaders/fs/pdf";
import { TextLoader } from "langchain/document_loaders/fs/text";
import { JSONLoader } from "langchain/document_loaders/fs/json";
import { DocxLoader } from "langchain/document_loaders/fs/docx";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama";
import { Chroma } from "@langchain/community/vectorstores/chroma";
import { PrismaClient } from '@prisma/client';
import { MultiPartData } from 'h3'

const ingestDocument = async (file, collectionName, embedding) => {
const tmpDir = path.resolve('tmp');
try {
await fs.access(tmpDir);
} catch (error) {
await fs.mkdir(tmpDir);
}
const tmp_file_path = path.join(tmpDir, file.filename);

const status = await fs.writeFile(tmp_file_path, file.data)
console.log(`Writing data to file ${tmp_file_path}: ${status}`);

const loader = new PDFLoader(tmp_file_path);
const docs = await loader.load();
const ingestDocument = async (file: MultiPartData, collectionName: string, embedding: string) => {
const docs = await loadDocuments(file)

const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200 });
const splits = await textSplitter.splitDocuments(docs);
Expand All @@ -42,38 +32,68 @@ const ingestDocument = async (file, collectionName, embedding) => {
}
}

async function loadDocuments(file: MultiPartData) {
const Loaders = {
pdf: PDFLoader,
json: JSONLoader,
docx: DocxLoader,
doc: DocxLoader,
txt: TextLoader,
md: TextLoader,
} as const;

const ext = (file.filename?.match(/\.(\w+)$/)?.[1] || 'txt').toLowerCase() as keyof typeof Loaders;
if (!Loaders[ext]) {
throw new Error(`Unsupported file type: ${ext}`);
}
const blob = new Blob([file.data], { type: file.type })
return new Loaders[ext](blob).load();
}

export default defineEventHandler(async (event) => {
const items = await readMultipartFormData(event);

const knowledgeBase: { [key: string]: string | Date } = {};
const decoder = new TextDecoder("utf-8");
const uploadedFiles = [];
const uploadedFiles: MultiPartData[] = [];

let _name = ''
let _description = ''
let _embedding = ''
items?.forEach((item) => {
const { name, data, filename } = item;
if (name) {
if (name.startsWith("file_")) {
uploadedFiles.push(item);
}
if (["name", "description", "embedding"].includes(name)) {
knowledgeBase[name] = decoder.decode(data);
}
const key = item.name || '';
const decodeData = decoder.decode(item.data)
if (key.startsWith("file_")) {
uploadedFiles.push(item);
}
if (key === 'name') {
_name = decodeData
}
if (key === 'description') {
_description = decodeData
}
if (key === 'embedding') {
_embedding = decodeData
}
});

const prisma = new PrismaClient();
knowledgeBase.created = new Date();
const affected = await prisma.knowledgeBase.create({
data: knowledgeBase
data: {
name: _name,
description: _description,
embedding: _embedding,
created: new Date(),
}
});
console.log(`Created knowledge base ${knowledgeBase.name}: ${affected}`);
console.log(`Created knowledge base ${_name}: ${affected}`);

if (uploadedFiles.length > 0) {
for (const uploadedFile of uploadedFiles) {
await ingestDocument(uploadedFile, `collection_${affected.id}`, affected.embedding);
await ingestDocument(uploadedFile, `collection_${affected.id}`, affected.embedding!);

const createdKnowledgeBaseFile = await prisma.knowledgeBaseFile.create({
data: {
url: uploadedFile.filename,
url: uploadedFile.filename!,
knowledgeBaseId: affected.id
}
});
Expand Down

0 comments on commit 5b99572

Please sign in to comment.