Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into EAI-235
Browse files Browse the repository at this point in the history
  • Loading branch information
mongodben committed Mar 29, 2024
2 parents 8ab2f67 + 2b8757c commit aa6ee21
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 105 deletions.
13 changes: 0 additions & 13 deletions packages/ingest-mongodb-public/src/sources/cppDriver.test.ts

This file was deleted.

41 changes: 0 additions & 41 deletions packages/ingest-mongodb-public/src/sources/cppDriver.ts

This file was deleted.

8 changes: 6 additions & 2 deletions packages/ingest-mongodb-public/src/sources/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import {
import { prismaSourceConstructor } from "./prisma";
import { wiredTigerSourceConstructor } from "./wiredTiger";
import { pyMongoSourceConstructor } from "./pyMongo";
import { cppSourceConstructor } from "./cppDriver";
import { mongooseSourceConstructor } from "./mongoose";
import { practicalAggregationsDataSource } from "./practicalAggregations";

Expand Down Expand Up @@ -229,6 +228,12 @@ export const snootyProjectConfig: LocallySpecifiedSnootyProjectConfig[] = [
tags: ["docs", "driver", "rust"],
productName: "Rust Driver",
},
{
type: "snooty",
name: "cpp-driver",
tags: ["docs", "driver", "cpp", "cxx", "c++"],
productName: "C++ Driver",
},
];

export const devCenterProjectConfig: DevCenterProjectConfig = {
Expand Down Expand Up @@ -473,7 +478,6 @@ export const sourceConstructors: SourceConstructor[] = [
pyMongoSourceConstructor,
mongooseSourceConstructor,
prismaSourceConstructor,
cppSourceConstructor,
mongoDbCorpDataSource,
practicalAggregationsDataSource,
javaReactiveStreamsSourceConstructor,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,78 +8,79 @@ import { Page } from "mongodb-rag-core";

jest.setTimeout(60000);

export function samplePathToPage(pathInRepo: string) {
if (pathInRepo.endsWith("_index.md")) {
pathInRepo = pathInRepo.replace("_index.md", "index.md");
}
return pathInRepo
.replace(/^docs\/content\/mongocxx-v3/, "https://example/com")
.replace(/\.md$/, "/");
}
const sampleConf: MakeMdOnGithubDataSourceParams = {
name: "sample",
repoUrl: "https://github.com/mongodb/mongo-cxx-driver/",
const baseChatbotRepoConfig: MakeMdOnGithubDataSourceParams = {
name: "chatbot",
repoUrl: "https://github.com/mongodb/chatbot",
repoLoaderOptions: {
branch: "master",
ignoreFiles: [/^(?!^\/docs\/content\/mongocxx-v3\/).*/],
},
pathToPageUrl: samplePathToPage,
metadata: {
productName: "C++ Driver (mongocxx)",
branch: "main",
},
pathToPageUrl: (path) => path,
extractMetadata: () => ({
foo: "bar",
}),
};

const mongodbCorpConfig: MakeMdOnGithubDataSourceParams = {
...baseChatbotRepoConfig,
name: "mongodb-corp",
frontMatter: {
process: true,
separator: "+++",
format: "toml",
separator: "---",
format: "yaml",
},
metadata: {
productName: "MongoDB Corp",
},
filter: (path) => path.includes("mongodb-corp"),
extractTitle: (_, frontmatter) => (frontmatter?.title as string) ?? null,
extractMetadata: () => ({
foo: "bar",
}),
};

const ingestTestDataConfig: MakeMdOnGithubDataSourceParams = {
...baseChatbotRepoConfig,
name: "ingest_testData",
metadata: {
productName: "Ingest Test Data",
},
filter: (path) => path.includes("ingest/testData"),
};

describe("MdOnGithubDataSource", () => {
let pages: Page[];
const samplePages: Record<string, Page | undefined> = {};
const getSamplePage = (path: string) => {
const samplePage = samplePages[path];
assert(samplePage);
return samplePage;
};
beforeAll(async () => {
const dataSource = await makeMdOnGithubDataSource(sampleConf);
const dataSource = await makeMdOnGithubDataSource(mongodbCorpConfig);
pages = await dataSource.fetchPages();
samplePages["mongodb-corp/chatbot/overview.md"] = pages.find((page) => {
return page.url.includes("mongodb-corp/chatbot/overview.md");
});
});
it("loads and processes a real repo of markdown files", async () => {
const samplePage = pages.find((page) =>
page.title?.includes("Installing the mongocxx driver")
);
const samplePage = getSamplePage("mongodb-corp/chatbot/overview.md");
assert(samplePage);
expect(samplePage?.body).toContain("install");
expect(samplePage?.body).toContain(
"The MongoDB AI is an advanced LLM-based chatbot"
);
});
it("processes metadata", () => {
const samplePage = pages[0];
const samplePage = getSamplePage("mongodb-corp/chatbot/overview.md");
expect(samplePage.metadata).toHaveProperty("foo", "bar");
expect(samplePage.metadata).toHaveProperty(
"productName",
"C++ Driver (mongocxx)"
);
expect(samplePage.metadata).toHaveProperty("productName", "MongoDB Corp");
});
it("removes frontmatter from page body", () => {
const samplePage = pages[0];
expect(samplePage.body).not.toContain("+++");
const samplePage = getSamplePage("mongodb-corp/chatbot/overview.md");
expect(samplePage.body).not.toContain("---");
});
it("extracts title from frontmatter", () => {
const samplePage = pages[0];
const samplePage = getSamplePage("mongodb-corp/chatbot/overview.md");
expect(samplePage.title).toBeTruthy();
});
it("works with .mdx files", async () => {
const sampleConf: MakeMdOnGithubDataSourceParams = {
name: "sample",
repoUrl: "https://github.com/mongodb/chatbot",
repoLoaderOptions: {
branch: "main",
},
pathToPageUrl: (path) => path,
metadata: {
productName: "C++ Driver (mongocxx)",
},
filter: (path) => path.includes("ingest/testData"),
};
const dataSource = await makeMdOnGithubDataSource(sampleConf);
const dataSource = await makeMdOnGithubDataSource(ingestTestDataConfig);
const pages = await dataSource.fetchPages();
expect(pages.length).toBeGreaterThan(1);
expect(
Expand Down
20 changes: 19 additions & 1 deletion packages/mongodb-rag-ingest/testData/sampleMdxFile.mdx
Original file line number Diff line number Diff line change
@@ -1 +1,19 @@
hello mdx!
+++
title = "Hello, MDX!"
date = "2024-01-01T00:00:00.000Z"
+++

# Hello, MDX!

This is an MDX file!

It's got standard markdown features like code fences:

```js title="SomeComponent.js"
import { sum } from "./math";
const three = sum(1, 2);
```

<SomeComponent />

It renders JSX wow!

0 comments on commit aa6ee21

Please sign in to comment.