Skip to content

Commit

Permalink
Add knowledge form context selection
Browse files Browse the repository at this point in the history
- Adds routes for retrieving files from the local knowledge git
- Adds the ability to view the file changes/additions in the
native dashboard.
- Enables the user to view the knowledge document, highlight the
text they want to add to the context field and submit it. The highlighted
text will get populated to the context field.
- Allows the user to still manually enter context or knowledge file details
and handles the state switching when they choose to do so.
- Prevents the user from selecting context from a different commit SHA if
they have already selected context from another SHA.

Signed-off-by: Brent Salisbury <[email protected]>
  • Loading branch information
nerdalert committed Dec 18, 2024
1 parent f15fdcc commit 8d644f9
Show file tree
Hide file tree
Showing 9 changed files with 1,254 additions and 272 deletions.
80 changes: 56 additions & 24 deletions src/app/api/native/git/branches/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ const REMOTE_TAXONOMY_REPO_CONTAINER_MOUNT_DIR = '/tmp/.instructlab-ui';
interface Diffs {
file: string;
status: string;
content?: string;
}

export async function GET() {
Expand All @@ -38,15 +39,15 @@ export async function GET() {
const messageStr = commitMessage.split('Signed-off-by');
branchDetails.push({
name: branch,
creationDate: commitDetails.commit.committer.timestamp * 1000, // Convert to milliseconds
creationDate: commitDetails.commit.committer.timestamp * 1000,
message: messageStr[0].replace(/\n+$/, ''),
author: signoff
});
}

branchDetails.sort((a, b) => b.creationDate - a.creationDate); // Sort by creation date, newest first
branchDetails.sort((a, b) => b.creationDate - a.creationDate);
console.log('Total branches present in native taxonomy:', branchDetails.length);

console.log('Total branches present in local taxonomy:', branchDetails.length);
return NextResponse.json({ branches: branchDetails }, { status: 200 });
} catch (error) {
console.error('Failed to list branches from local taxonomy:', error);
Expand Down Expand Up @@ -131,7 +132,18 @@ async function handleDiff(branchName: string, localTaxonomyDir: string) {
}

const changes = await findDiff(branchName, localTaxonomyDir);
return NextResponse.json({ changes }, { status: 200 });
// For each added/modified file, read the content from the branch and include it
const enrichedChanges: Diffs[] = [];
for (const change of changes) {
if (change.status === 'added' || change.status === 'modified') {
const fileContent = await readFileFromBranch(localTaxonomyDir, branchName, change.file);
enrichedChanges.push({ ...change, content: fileContent });
} else {
enrichedChanges.push(change);
}
}

return NextResponse.json({ changes: enrichedChanges }, { status: 200 });
} catch (error) {
console.error(`Failed to show contribution changes ${branchName}:`, error);
return NextResponse.json(
Expand All @@ -155,8 +167,8 @@ async function findDiff(branchName: string, localTaxonomyDir: string): Promise<D
const mainCommit = await git.resolveRef({ fs, dir: localTaxonomyDir, ref: 'main' });
const branchCommit = await git.resolveRef({ fs, dir: localTaxonomyDir, ref: branchName });

const mainFiles = await getFilesFromTree(mainCommit);
const branchFiles = await getFilesFromTree(branchCommit);
const mainFiles = await getFilesFromTree(mainCommit, localTaxonomyDir);
const branchFiles = await getFilesFromTree(branchCommit, localTaxonomyDir);

// Create an array of Diffs to store changes
const changes: Diffs[] = [];
Expand Down Expand Up @@ -187,7 +199,7 @@ async function getTopCommitDetails(dir: string, ref: string = 'HEAD') {
fs,
dir,
ref,
depth: 1 // Only fetch the latest commit
depth: 1
});

if (!topCommit) {
Expand All @@ -210,6 +222,7 @@ async function getTopCommitDetails(dir: string, ref: string = 'HEAD') {
throw error;
}
}

async function handlePublish(branchName: string, localTaxonomyDir: string, remoteTaxonomyDir: string) {
try {
if (!branchName || branchName === 'main') {
Expand All @@ -222,16 +235,14 @@ async function handlePublish(branchName: string, localTaxonomyDir: string, remot
// Check if there are any changes to publish, create a new branch at remoteTaxonomyDir and
// copy all the files listed in the changes array to the new branch and create a commit
if (changes.length > 0) {
const remoteBranchName = branchName;
await git.checkout({ fs, dir: localTaxonomyDir, ref: branchName });
// Read the commit message of the top commit from the branch
const details = await getTopCommitDetails(localTaxonomyDir);

// Check if the remote branch exists, if not create it
const remoteBranchName = branchName;
const remoteBranchExists = await git.listBranches({ fs, dir: remoteTaxonomyDir });
if (remoteBranchExists.includes(remoteBranchName)) {
console.log(`Branch ${remoteBranchName} exist in remote taxonomy, deleting it.`);
// Delete the remote branch if it exists, we will recreate it
console.log(`Branch ${remoteBranchName} exists in remote taxonomy, deleting it.`);
await git.deleteBranch({ fs, dir: remoteTaxonomyDir, ref: remoteBranchName });
} else {
console.log(`Branch ${remoteBranchName} does not exist in remote taxonomy, creating a new branch.`);
Expand All @@ -243,14 +254,21 @@ async function handlePublish(branchName: string, localTaxonomyDir: string, remot

// Copy the files listed in the changes array to the remote branch and if the directories do not exist, create them
for (const change of changes) {
console.log(`Copying ${change.file} to remote branch ${remoteBranchName}`);
const filePath = path.join(localTaxonomyDir, change.file);
const remoteFilePath = path.join(remoteTaxonomyDir, change.file);
const remoteFileDir = path.dirname(remoteFilePath);
if (!fs.existsSync(remoteFileDir)) {
fs.mkdirSync(remoteFileDir, { recursive: true });
if (change.status !== 'deleted') {
const filePath = path.join(localTaxonomyDir, change.file);
const remoteFilePath = path.join(remoteTaxonomyDir, change.file);
const remoteFileDir = path.dirname(remoteFilePath);
if (!fs.existsSync(remoteFileDir)) {
fs.mkdirSync(remoteFileDir, { recursive: true });
}
fs.copyFileSync(filePath, remoteFilePath);
} else {
// If deleted, ensure the file is removed from remote as well, if it exists
const remoteFilePath = path.join(remoteTaxonomyDir, change.file);
if (fs.existsSync(remoteFilePath)) {
fs.rmSync(remoteFilePath);
}
}
fs.copyFileSync(filePath, remoteFilePath);
}

await git.add({ fs, dir: remoteTaxonomyDir, filepath: '.' });
Expand Down Expand Up @@ -306,20 +324,34 @@ async function handlePublish(branchName: string, localTaxonomyDir: string, remot
}
}

// Helper function to recursively gather file paths and their oids from a tree
async function getFilesFromTree(commitOid: string) {
const REPO_DIR = path.join(LOCAL_TAXONOMY_ROOT_DIR, '/taxonomy');
async function readFileFromBranch(localTaxonomyDir: string, branchName: string, filePath: string): Promise<string> {
const tempDir = path.join(localTaxonomyDir, '.temp_checkout');
if (!fs.existsSync(tempDir)) {
fs.mkdirSync(tempDir);
}

// Checkout the file at the given branchName without altering the working directory permanently
// Instead, we use git's internal APIs to read the file from the commit
const branchCommit = await git.resolveRef({ fs, dir: localTaxonomyDir, ref: branchName });
const { blob } = await git.readBlob({ fs, dir: localTaxonomyDir, oid: branchCommit, filepath: filePath });

// Use TextDecoder to properly decode the Uint8Array
const decoder = new TextDecoder('utf-8');
const content = decoder.decode(blob);
return content;
}

async function getFilesFromTree(commitOid: string, repoDir: string) {
const fileMap: Record<string, string> = {};

async function walkTree(dir: string) {
const tree = await git.readTree({ fs, dir: REPO_DIR, oid: commitOid, filepath: dir });

const tree = await git.readTree({ fs, dir: repoDir, oid: commitOid, filepath: dir });
for (const entry of tree.tree) {
const fullPath = path.join(dir, entry.path);
if (entry.type === 'blob') {
fileMap[fullPath] = entry.oid;
} else if (entry.type === 'tree') {
await walkTree(fullPath); // Recursively walk subdirectories
await walkTree(fullPath);
}
}
}
Expand Down
211 changes: 211 additions & 0 deletions src/app/api/native/git/knowledge-files/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
// src/app/api/native/git/knowledge-files/route.ts

import { NextRequest, NextResponse } from 'next/server';
import * as git from 'isomorphic-git';
import fs from 'fs';
import path from 'path';

// Constants for repository paths
const LOCAL_TAXONOMY_DOCS_ROOT_DIR =
process.env.NEXT_PUBLIC_LOCAL_TAXONOMY_DOCS_ROOT_DIR || `${process.env.HOME}/.instructlab-ui/taxonomy-knowledge-docs`;

// Interface for the response
interface KnowledgeFile {
filename: string;
content: string;
commitSha: string;
commitDate: string;
}

interface Branch {
name: string;
commitSha: string;
commitDate: string;
}

/**
* Function to list all branches.
*/
const listAllBranches = async (): Promise<Branch[]> => {
const REPO_DIR = LOCAL_TAXONOMY_DOCS_ROOT_DIR;

if (!fs.existsSync(REPO_DIR)) {
throw new Error('Repository path does not exist.');
}

const branches = await git.listBranches({ fs, dir: REPO_DIR });

const branchDetails: Branch[] = [];

for (const branch of branches) {
try {
const latestCommit = await git.log({ fs, dir: REPO_DIR, ref: branch, depth: 1 });
if (latestCommit.length === 0) {
continue; // No commits on this branch
}

const commit = latestCommit[0];
const commitSha = commit.oid;
const commitDate = new Date(commit.commit.committer.timestamp * 1000).toISOString();

branchDetails.push({
name: branch,
commitSha: commitSha,
commitDate: commitDate
});
} catch (error) {
console.error(`Failed to retrieve commit for branch ${branch}:`, error);
continue;
}
}

return branchDetails;
};

/**
* Function to retrieve knowledge files from a specific branch.
* @param branchName - The name of the branch to retrieve files from.
* @returns An array of KnowledgeFile objects.
*/
const getKnowledgeFiles = async (branchName: string): Promise<KnowledgeFile[]> => {
const REPO_DIR = LOCAL_TAXONOMY_DOCS_ROOT_DIR;

// Ensure the repository path exists
if (!fs.existsSync(REPO_DIR)) {
throw new Error('Repository path does not exist.');
}

// Check if the branch exists
const branches = await git.listBranches({ fs, dir: REPO_DIR });
if (!branches.includes(branchName)) {
throw new Error(`Branch "${branchName}" does not exist.`);
}

// Checkout the specified branch
await git.checkout({ fs, dir: REPO_DIR, ref: branchName });

// Read all files in the repository root directory
const allFiles = fs.readdirSync(REPO_DIR);

// Filter for Markdown files only
const markdownFiles = allFiles.filter((file) => path.extname(file).toLowerCase() === '.md');

const knowledgeFiles: KnowledgeFile[] = [];

for (const file of markdownFiles) {
const filePath = path.join(REPO_DIR, file);

// Check if the file is a regular file
const stat = fs.statSync(filePath);
if (!stat.isFile()) {
continue;
}

try {
// Retrieve the latest commit SHA for the file on the specified branch
const logs = await git.log({
fs,
dir: REPO_DIR,
ref: branchName,
filepath: file,
depth: 1 // Only the latest commit
});

if (logs.length === 0) {
// No commits found for this file; skip it
continue;
}

const latestCommit = logs[0];
const commitSha = latestCommit.oid;
const commitDate = new Date(latestCommit.commit.committer.timestamp * 1000).toISOString();

// Read the file content
const fileContent = fs.readFileSync(filePath, 'utf-8');

knowledgeFiles.push({
filename: file,
content: fileContent,
commitSha: commitSha,
commitDate: commitDate
});
} catch (error) {
console.error(`Failed to retrieve commit for file ${file}:`, error);
// Skip files that cause errors
continue;
}
}

return knowledgeFiles;
};

/**
* Handler for GET requests.
* - If 'action=list-branches' is present, return all branches.
* - Else, return knowledge files from the 'main' branch.
*/
const getKnowledgeFilesHandler = async (req: NextRequest): Promise<NextResponse> => {
try {
const { searchParams } = new URL(req.url);
const action = searchParams.get('action');

if (action === 'list-branches') {
const branches = await listAllBranches();
return NextResponse.json({ branches }, { status: 200 });
}

// Default behavior: fetch files from 'main' branch
const branchName = 'main';
const knowledgeFiles = await getKnowledgeFiles(branchName);
return NextResponse.json({ files: knowledgeFiles }, { status: 200 });
} catch (error) {
console.error('Failed to retrieve knowledge files:', error);
return NextResponse.json({ error: (error as Error).message }, { status: 500 });
}
};

/**
* Handler for POST requests.
* - If 'branchName' is provided, fetch files for that branch.
* - If 'action=diff', fetch files from the 'main' branch.
* - Else, return an error.
*/
const postKnowledgeFilesHandler = async (req: NextRequest): Promise<NextResponse> => {
try {
const body = await req.json();
const { action, branchName } = body;

if (action === 'diff') {
// Existing behavior: fetch files from 'main' branch
const branchNameForDiff = 'main';
const knowledgeFiles = await getKnowledgeFiles(branchNameForDiff);
return NextResponse.json({ files: knowledgeFiles }, { status: 200 });
}

if (branchName && typeof branchName === 'string') {
// New behavior: fetch files from specified branch
const knowledgeFiles = await getKnowledgeFiles(branchName);
return NextResponse.json({ files: knowledgeFiles }, { status: 200 });
}

// If no valid action or branchName is provided
return NextResponse.json({ error: 'Invalid request. Provide an action or branchName.' }, { status: 400 });
} catch (error) {
console.error('Failed to process POST request:', error);
return NextResponse.json({ error: (error as Error).message }, { status: 500 });
}
};

/**
* GET handler to retrieve knowledge files or list branches based on 'action' query parameter.
*/
export async function GET(req: NextRequest) {
return await getKnowledgeFilesHandler(req);
}

/**
* POST handler to retrieve knowledge files based on 'branchName' or 'action'.
*/
export async function POST(req: NextRequest) {
return await postKnowledgeFilesHandler(req);
}
Loading

0 comments on commit 8d644f9

Please sign in to comment.