From 27478394afb03162ed28ae05c0be8d24554bf610 Mon Sep 17 00:00:00 2001 From: Blake Date: Tue, 6 Feb 2024 00:18:39 -0600 Subject: [PATCH 1/2] Initial project pushed --- README.md | 24 +++++- app.py | 124 +++++++++++++++++++++++++++++ helpers.py | 203 +++++++++++++++++++++++++++++++++++++++++++++++ prompt.txt | 87 ++++++++++++++++++++ requirements.txt | 4 + 5 files changed, 440 insertions(+), 2 deletions(-) create mode 100644 app.py create mode 100644 helpers.py create mode 100644 prompt.txt create mode 100644 requirements.txt diff --git a/README.md b/README.md index ba40aae..3d56e3e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,22 @@ -# VideoInterviewAutomation -Source code for video interview analysis automation. +--- +title: VideoInterviewAutomation +emoji: 🚀 +colorFrom: yellow +colorTo: red +sdk: gradio +sdk_version: 4.16.0 +app_file: app.py +pinned: false +license: mit +--- + +# YouTube Interview Analysis Tool + +This application evaluates YouTube video interviews to recommend whether the interviewee should be considered for a further interview based on a specific rubric. It leverages the YouTube Transcript API to fetch transcripts, analyzes the content with OpenAI's GPT-4, and provides recommendations through a simple web interface powered by Gradio. + +## Features + +- **Video ID Extraction**: Extracts the video ID from a YouTube URL. +- **Transcript Retrieval**: Retrieves the video's transcript along with its total duration and an estimated number of pauses. +- **GPT-4 Analysis**: Analyzes the transcript data against a predefined rubric to assess the interviewee's performance. +- **Gradio Interface**: Offers a user-friendly web interface for inputting YouTube URLs and receiving recommendations. \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..bad6b5b --- /dev/null +++ b/app.py @@ -0,0 +1,124 @@ +from flask import Flask, request, jsonify, make_response +from requests.auth import HTTPBasicAuth +from helpers import * +import requests +import os + +app = Flask(__name__) +app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 + +def post_candidate_analysis_to_lever(analysis_result, candidate_id): + """ + Sends the analysis result of a candidate's video interview to Lever via a POST request. + + This function constructs a request to the Lever API to add a note to a specific opportunity + (candidate) identified by the candidate_id. The note contains the result of the machine learning + analysis of the candidate's video interview. It handles various exceptions that might occur during + the request, logs the attempt and outcome of the request, and ensures that any HTTP or connection + errors are caught and logged appropriately. + + Parameters: + - analysis_result (str): The result of the video interview analysis to be sent to Lever. + - candidate_id (str): The unique identifier for the candidate/opportunity in Lever. + + Returns: + - dict: The JSON response from the Lever API if the request is successful. + - None: If the request fails due to an exception, the function returns None. + + The function logs an info message before sending the data, and upon successful data transmission. + In case of exceptions such as HTTPError, ConnectionError, Timeout, or any other RequestException, + it logs the specific error. A general exception catch is also implemented to log any unexpected errors. + + It uses the requests library for making HTTP requests, and the HTTPBasicAuth for authentication. + The Lever API key is expected to be available as an environment variable 'LeverKey'. + """ + lever_api_url = 'https://api.lever.co/v1/opportunities/{}/notes'.format(candidate_id) + data = { + "value": "Video Interview ML Decision: {}".format(analysis_result) + } + + try: + # Log the attempt to send data + logging.info(f"Sending analysis result to Lever for candidate ID {candidate_id}") + + response = requests.post(lever_api_url, auth=HTTPBasicAuth(os.getenv('LeverKey'), ''), json=data) + + # Check if the request was successful + response.raise_for_status() + + # Log successful data sending + logging.info(f"Successfully sent analysis result to Lever for candidate ID {candidate_id}") + + return response.json() + except requests.exceptions.HTTPError as http_err: + # Log HTTP errors (e.g., 404, 401, etc.) + logging.error(f'HTTP error occurred: {http_err}') + except requests.exceptions.ConnectionError as conn_err: + # Log connection errors (e.g., DNS failure, refused connection, etc.) + logging.error(f'Connection error occurred: {conn_err}') + except requests.exceptions.Timeout as timeout_err: + # Log timeout errors + logging.error(f'Timeout error occurred: {timeout_err}') + except requests.exceptions.RequestException as req_err: + # Log any other requests-related errors + logging.error(f'Error sending data to Lever: {req_err}') + except Exception as e: + # Catch-all for any other exceptions not related to requests + logging.error(f'An unexpected error occurred: {e}') + + # Return None or an appropriate response in case of failure + return None + +@app.route('/webhook', methods=['POST']) +def handle_webhook(): + """ + Processes incoming webhook POST requests, analyzes video transcripts, and posts results to Lever. + + Validates the presence of required data ('opportunityId') in the request, retrieves the candidate's + video URL, analyzes the video transcript, and sends the analysis result to Lever. It handles errors + at each step by logging the error and returning an appropriate HTTP response. + + Returns: + - A success response with the analysis result and a 200 status code if all operations succeed. + - An error response with a relevant message and an appropriate status code (400, 404, 500) if any operation fails. + """ + try: + data = request.json + if not data: + # If no data is received + logging.error("No data received in request") + return make_response(jsonify({"error": "No data received"}), 400) + + opportunity_id = data.get('opportunityId') + if not opportunity_id: + # If opportunityId is not provided in the data + logging.error("No opportunityId provided") + return make_response(jsonify({"error": "No opportunityId provided"}), 400) + + candidate_video_url = get_youtube_url(opportunity_id) + if not candidate_video_url: + # If no URL is returned for the given opportunity_id + logging.error(f"Unable to process video URL for opportunityId {opportunity_id}") + analysis_result = "Unable to process the video URL. Currently only YouTube URLs are accepted." + + return jsonify(analysis_result), 200 + + analysis_result = analyze_transcript(candidate_video_url) + if analysis_result is None: + # Handle case where analysis_result is None or an error occurred during analysis + logging.error(f"Error analyzing transcript for opportunityId {opportunity_id}") + return make_response(jsonify({"error": "Failed to analyze transcript"}), 500) + + send_result = post_candidate_analysis_to_lever(analysis_result, opportunity_id) + if send_result is None: + # Assuming post_candidate_analysis_to_lever returns None on failure + logging.error(f"Failed to send results to Lever for opportunityId {opportunity_id}") + return make_response(jsonify({"error": "Failed to send results to Lever"}), 500) + + return jsonify(analysis_result), 200 + except Exception as e: + logging.error(f"An unexpected error occurred: {e}") + return make_response(jsonify({"error": "An unexpected error occurred"}), 500) + +if __name__ == '__main__': + app.run(debug=True, port=5002) \ No newline at end of file diff --git a/helpers.py b/helpers.py new file mode 100644 index 0000000..baee7af --- /dev/null +++ b/helpers.py @@ -0,0 +1,203 @@ +from youtube_transcript_api import YouTubeTranscriptApi +import openai +from urllib.parse import urlparse, parse_qs +import requests +from requests.auth import HTTPBasicAuth +import os +import logging + +logging.basicConfig(filename='app.log', filemode='a', + format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG) + + +def get_video_id_from_url(url): + """ + Extracts the YouTube video ID from a given URL. + + Supports both 'youtube.com' and 'youtu.be' URL formats. For 'youtube.com', it looks for the 'v' query parameter. + For 'youtu.be', it extracts the ID directly from the path. + + Parameters: + url (str): The full URL of the YouTube video. + + Returns: + str: The extracted video ID if found, otherwise None. + + Note: + This function silently handles exceptions and returns None if the video ID cannot be extracted. + """ + try: + url_data = urlparse(url) + if url_data.hostname == 'www.youtube.com' or url_data.hostname == 'youtube.com': + query = parse_qs(url_data.query) + video_id = query.get("v") + if video_id: + logging.info(f"Video ID {video_id[0]} extracted from URL.") + return video_id[0] + elif url_data.hostname == 'youtu.be': + # Extract the video ID from the path for youtu.be URLs + video_id = url_data.path[1:] # Remove the leading '/' + if video_id: + logging.info(f"Video ID {video_id} extracted from URL.") + return video_id + + logging.warning(f"No video ID found in URL: {url}") + return None + except Exception: + logging.error(f"Error extracting video ID from URL {url}: {e}") + return None + +def get_first_youtube_video_url(urls): + """ + Finds and returns the first YouTube video URL from a list of URLs. + + Iterates over a provided list of URLs, checking each for a substring that matches + 'youtube' or 'youtu.be'. Returns the first URL that matches these criteria. + + Parameters: + urls (list of str): A list containing URLs to be checked. + + Returns: + str: The first YouTube video URL found in the list, or None if no YouTube URL is found. + """ + for url in urls: + if 'youtube' in url or 'youtu.be' in url: + return url + return None + +def get_youtube_url(opportunity_id): + """ + Retrieves the YouTube video URL associated with a given opportunity ID from the Lever API. + + This function makes a GET request to the Lever API to fetch the opportunity details using the provided + opportunity ID. It then extracts and returns the first YouTube video URL found in the 'links' section + of the opportunity data. + + Parameters: + opportunity_id (str): The unique identifier for the opportunity in the Lever system. + + Returns: + str: The YouTube video URL associated with the opportunity, or None if no YouTube URL is found. + + Note: + Requires the 'LeverKey' environment variable to be set for authentication with the Lever API. + """ + url = 'https://api.lever.co/v1/opportunities/{}'.format(opportunity_id) + response = requests.get(url, auth=HTTPBasicAuth(os.getenv('LeverKey'),'')) + + links = response.json()['data']['links'] + youtube_link = get_first_youtube_video_url(links) + + return youtube_link + +def parse_decision_to_binary(decision_text): + """ + Converts a decision text to a binary outcome based on the presence of the word 'yes'. + + This function checks if the word 'yes' is present in the provided decision text, performing + a case-insensitive comparison. It is designed to interpret a textual decision as a binary + outcome, where the presence of 'yes' indicates a positive (True) decision, and its absence + indicates a negative (False) decision. + + Parameters: + decision_text (str): The decision text to be analyzed. + + Returns: + bool: True if 'yes' is present in the decision text, False otherwise. + """ + decision_text_lower = decision_text.lower() + return "yes" in decision_text_lower + +def get_transcript_data_and_pause_count(video_id): + """ + Fetches a video's transcript, calculates its total duration in minutes, and counts pauses between segments. + + Utilizes the YouTubeTranscriptApi to retrieve the English transcript of a video given its ID, then analyzes + the transcript to determine the total duration and estimate the number of pauses based on gaps between + transcript segments. + + Parameters: + video_id (str): The unique identifier of the YouTube video. + + Returns: + tuple: A tuple containing the full transcript text (str), total duration in minutes (int), + and the estimated number of pauses (int), or (None, None, None) if an error occurs. + """ + try: + transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) + if transcript: + last_segment = transcript[-1] + total_duration = last_segment['start'] + last_segment['duration'] + + # Estimate the number of pauses + pauses = 0 + for i in range(1, len(transcript)): + current_start = transcript[i]['start'] + previous_end = transcript[i-1]['start'] + transcript[i-1]['duration'] + if current_start > previous_end: + pauses += 1 + + full_transcript = " ".join(segment['text'] for segment in transcript) + logging.info(f"Transcript retrieved successfully for video ID {video_id}.") + return full_transcript, total_duration // 60, pauses + except Exception as e: + logging.error(f"Failed to retrieve transcript for video ID {video_id}. Error: {e}") + return None, None, None + +def analyze_transcript(url): + """ + Analyzes a YouTube video's transcript for content quality, using a predefined prompt for GPT evaluation. + + This function reads a prompt from 'prompt.txt', extracts the video ID from the provided URL, retrieves the + video's transcript and its analysis metrics (total duration and pauses), and evaluates these metrics against + a GPT model to determine if the candidate qualifies for an interview. + + Parameters: + url (str): The URL of the YouTube video to be analyzed. + + Returns: + str: A message indicating whether the candidate qualifies for an interview, an error message if the + video URL is invalid or the transcript could not be retrieved, or a detailed error message if + any other error occurs during processing. + """ + try: + with open('prompt.txt', 'r') as file: + prompt = file.read() + except Exception as e: + logging.error(f"Error opening or reading from 'prompt.txt': {e}") + return "Error processing the prompt file." + + try: + video_id = get_video_id_from_url(url) + if not video_id: + logging.error("Invalid URL provided.") + return "Unable to process the video URL. Currently only YouTube URLs are accepted." + + full_transcript, total_duration, pauses = get_transcript_data_and_pause_count( + video_id) + + if full_transcript is None: # If there was an error retrieving the transcript + logging.error("Error retrieving the transcript.") + return pauses + + # Define the prompt for GPT evaluation based on the rubric + prompt = prompt.format(full_transcript, pauses, total_duration) + + # Using the new OpenAI client structure + client = openai.OpenAI(api_key=os.getenv('OpenAIKey')) + response = client.chat.completions.create( + model="gpt-4", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prompt} + ], + ) + + decision = parse_decision_to_binary(response.choices[0].message.content.strip()) + + if decision: + return "The candidate qualifies for an interview." + return "The candidate does not qualify for an interview." + except Exception as e: + logging.error(f"An error occurred during the analysis: {e}") + return f"An error occurred during the processing. {e}" \ No newline at end of file diff --git a/prompt.txt b/prompt.txt new file mode 100644 index 0000000..60b5f08 --- /dev/null +++ b/prompt.txt @@ -0,0 +1,87 @@ +Candidate's Transcript: +{} + +Candidate's Number of Pauses: +{} + +Candidate's Length of Video: +{} minutes + +Introduction: +You are tasked with evaluating a candidate's interview transcript for a highly competitive position that demands not only technical expertise but also a strong alignment with our educational mission, leadership qualities, and a collaborative spirit. This evaluation requires a rigorous, critical analysis. Additionally, assess the presentation’s duration and pacing, as the ability to convey information effectively within a constrained timeframe and with minimal unnecessary pauses is crucial. We expect assessors to employ a stringent standard, focusing on identifying any shortcomings, gaps in knowledge, or areas for improvement. Our aim is to ensure only the highest caliber candidates progress, reflecting our no-tolerance policy for mediocrity. Your assessment should be blunt and uncompromising, providing clear justifications for each score based on the candidate's responses without inferring unstated intentions. + +Company Mission: +We believe that education is the most valuable asset anyone can have, and we founded this camp to help students get ready for the real world. + +At AI Camp, we want to open doors for our students by teaching them real-world skills that they wouldn't find in their traditional classrooms to prepare them for opportunities in the technology field. + +Detailed Rubric for Evaluation: + +Alignment to Mission and Desire to Join (Max 5 Points) +(0-1 Points): Exhibits little or no understanding or alignment with the mission. Fails to mention an interest in teaching, or reasons for joining are unclear, purely self-interested, or unrelated to the educational goals of the organization. +(2 Points): Shows a basic alignment with the mission with a mention of teaching but lacks depth, passion, or a clear understanding of how teaching aligns with the organization's goals. Interest in teaching may be mentioned but not elaborated upon. +(3 Points): Indicates a general interest in teaching and some alignment with the mission. Mentions a desire to join with an understanding of the importance of education but falls short of demonstrating a strong personal commitment to teaching or the organization's specific educational goals. +(4 Points): Demonstrates a strong alignment with the mission with a clear, articulated interest in teaching. Shows a desire to join that goes beyond the basics, with some explanation of how their teaching can contribute meaningfully to the organization's goals. +(5 Points): Demonstrates an exceptional and passionate alignment with the mission, clearly and enthusiastically articulating a strong desire to teach and contribute meaningfully to the organization's goals. Shows a deep understanding of the importance of education and how their specific skills and interests in teaching can significantly advance the mission. + +Desire to Learn (Max 5 Points) +(0-1 Points): Displays no interest in learning from the mentoring experience, treating it as just another job. +(2-3 Points): Recognizes the value of learning but does not prioritize it as a key motivation. +(4-5 Points): Strongly motivated by a desire to learn from experiences as a mentor, viewing it as a central driving factor. + +Technical Explanation (KNN or Back Propagation) and Tone (Max 10 Points) +(0-2 Points): Provides an unclear or incorrect technical explanation with a monotone or difficult-to-follow delivery. +(3-6 Points): Offers a basic technical explanation with some engagement and occasional tone changes, showing an understanding of the subject. +(7-8 Points): Gives a competent technical explanation, maintaining an engaging tone and showing enthusiasm for the topic. +(9-10 Points): Excels with a clear, accurate, and engaging explanation, demonstrating exceptional enthusiasm and making the subject accessible and interesting to all listeners. + +Confidence (Max 4 Points) +(0-1 Points): Demonstrates noticeable uncertainty, with significant hesitations or pauses. +(2 Points): Shows a level of confidence with occasional hesitations. +(3-4 Points): Exudes confidence throughout the presentation, without any noticeable hesitations. + +Comprehensibility (Max 4 Points) +(0-1 Points): Utilizes overly technical language or inaccuracies, hindering comprehension. +(2 Points): Generally understandable to those with a technical background, despite the prevalence of jargon. +(3-4 Points): Communicates effectively, using relatable analogies that make the material comprehensible to audiences of all ages. + +Effort (Max 5 Points) +(0-1 Points): Demonstrates minimal effort, failing to meet basic expectations. +(2-3 Points): Meets basic expectations with a simple presentation, showing little beyond the minimum. +(4-5 Points): Surpasses expectations with a passionate and innovative presentation, incorporating novel elements that enrich the experience significantly. + +Leadership and Product Strategy (Max 6 Points) +(0-2 Points): Shows little to no leadership or strategic vision, lacking clarity in product direction. +(3-4 Points): Displays some leadership qualities and understanding of product strategy, but lacks a compelling vision. +(5-6 Points): Demonstrates strong leadership and a clear, innovative strategy for product development, aligning with organizational goals. + +Collaboration and Understanding Customer Needs (Max 6 Points) +(0-2 Points): Limited collaboration with teams and understanding of customer needs. +(3-4 Points): Adequate collaboration skills and a basic grasp of customer needs. +(5-6 Points): Excellent collaboration with cross-functional teams and a deep understanding of customer needs, driving product development effectively. + +Presentation Length (Max 3 Points) +(0 Points): The presentation significantly exceeds the ideal duration or is significantly under the ideal duration, lasting longer than 7 minutes or less than 2 minutes, indicating potential inefficiency in communication. +(1 Point): The presentation is slightly over the ideal duration, lasting between 5 to 7 minutes, suggesting minor pacing issues. +(2 Points): The presentation is within the ideal duration of 4 to 5 minutes, reflecting effective communication and time management skills. +(3 Points): The presentation is not only within the ideal duration but also efficiently utilizes the time to convey the message concisely and effectively. + +Pauses and Pacing (Max 2 Points) +(0 Points): The presentation contains excessive unnecessary pauses, disrupting the flow and indicating potential nervousness or lack of preparation. +(1 Point): The presentation has a moderate number of pauses, but they occasionally disrupt the flow of information. +(2 Points): The presentation has minimal pauses, contributing to a smooth flow of information and demonstrating confidence and preparation. + +Conclusion: +Sum the scores for a total out of 50. Please be precise with your total. Based on the total score: + +0-39 Points: Does not qualify for an interview. +40-50 Points: Qualifies for an interview, demonstrating strong potential. + +Modified Concluding Instructions for You (the AI): + +Evaluate the candidate's performance based on the total score and the assessment of their strengths and weaknesses in time management, presentation efficiency, and communication effectiveness. You should then decide the candidate's suitability for further interviews. Your decision should be: + +"Yes" if the candidate meets or exceeds the required criteria for an interview. +"No" if the candidate does not meet the necessary criteria for an interview. + +Your output must be strictly one of these two options (Yes or No), ensuring a clear and direct conclusion based on the evaluation criteria provided. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7907fc9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +gradio +youtube_transcript_api +openai +flask \ No newline at end of file From e8ea0b04d869fe960bdf696e902021574a7390b2 Mon Sep 17 00:00:00 2001 From: Blake Date: Tue, 6 Feb 2024 00:21:17 -0600 Subject: [PATCH 2/2] Readme change --- README.md | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/README.md b/README.md index 3d56e3e..b8bc2f8 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,3 @@ ---- -title: VideoInterviewAutomation -emoji: 🚀 -colorFrom: yellow -colorTo: red -sdk: gradio -sdk_version: 4.16.0 -app_file: app.py -pinned: false -license: mit ---- - # YouTube Interview Analysis Tool This application evaluates YouTube video interviews to recommend whether the interviewee should be considered for a further interview based on a specific rubric. It leverages the YouTube Transcript API to fetch transcripts, analyzes the content with OpenAI's GPT-4, and provides recommendations through a simple web interface powered by Gradio.