streamlit_interactive.py

# Import necessary libraries
import json
import os
from typing import List

import networkx as nx
import nltk
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
from annotated_text import annotated_text, parameters
from streamlit_extras import add_vertical_space as avs
from streamlit_extras.badges import badge

from scripts import JobDescriptionProcessor, ResumeProcessor
from scripts.parsers import ParseJobDesc, ParseResume
from scripts.ReadPdf import read_single_pdf
from scripts.similarity.get_score import *
from scripts.utils import get_filenames_from_dir

# Set page configuration
st.set_page_config(
    page_title="Resume Matcher",
    page_icon="Assets/img/favicon.ico",
    initial_sidebar_state="auto",
    layout="wide",
)

# Find the current working directory and configuration path
cwd = find_path("Resume-Matcher")
config_path = os.path.join(cwd, "scripts", "similarity")

# Check if NLTK punkt data is available, if not, download it
try:
    nltk.data.find("tokenizers/punkt")
except LookupError:
    nltk.download("punkt")

# Set some visualization parameters using the annotated_text library
parameters.SHOW_LABEL_SEPARATOR = False
parameters.BORDER_RADIUS = 3
parameters.PADDING = "0.5 0.25rem"


# Function to set session state variables
def update_session_state(key, val):
    st.session_state[key] = val


# Function to delete all files in a directory
def delete_from_dir(filepath: str) -> bool:
    try:
        for file in os.scandir(filepath):
            os.remove(file.path)

        return True
    except OSError as error:
        print(f"Exception: {error}")
        return False


# Function to create a star-shaped graph visualization
def create_star_graph(nodes_and_weights, title):
    """
    Create a star-shaped graph visualization.

    Args:
        nodes_and_weights (list): List of tuples containing nodes and their weights.
        title (str): Title for the graph.

    Returns:
        None
    """
    # Create an empty graph
    graph = nx.Graph()

    # Add the central node
    central_node = "resume"
    graph.add_node(central_node)

    # Add nodes and edges with weights to the graph
    for node, weight in nodes_and_weights:
        graph.add_node(node)
        graph.add_edge(central_node, node, weight=weight * 100)

    # Get position layout for nodes
    pos = nx.spring_layout(graph)

    # Create edge trace
    edge_x = []
    edge_y = []
    for edge in graph.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])

    edge_trace = go.Scatter(
        x=edge_x,
        y=edge_y,
        line=dict(width=0.5, color="#888"),
        hoverinfo="none",
        mode="lines",
    )

    # Create node trace
    node_x = []
    node_y = []
    for node in graph.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)

    node_trace = go.Scatter(
        x=node_x,
        y=node_y,
        mode="markers",
        hoverinfo="text",
        marker=dict(
            showscale=True,
            colorscale="Rainbow",
            reversescale=True,
            color=[],
            size=10,
            colorbar=dict(
                thickness=15,
                title="Node Connections",
                xanchor="left",
                titleside="right",
            ),
            line_width=2,
        ),
    )

    # Color node points by number of connections
    node_adjacencies = []
    node_text = []
    for node in graph.nodes():
        adjacencies = list(graph.adj[node])  # Changes here
        node_adjacencies.append(len(adjacencies))
        node_text.append(f"{node}<br># of connections: {len(adjacencies)}")

    node_trace.marker.color = node_adjacencies
    node_trace.text = node_text

    # Create the figure
    figure = go.Figure(
        data=[edge_trace, node_trace],
        layout=go.Layout(
            title=title,
            titlefont=dict(size=16),
            showlegend=False,
            hovermode="closest",
            margin=dict(b=20, l=5, r=5, t=40),
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        ),
    )

    # Show the figure
    st.plotly_chart(figure, use_container_width=True)


# Function to create annotated text with highlighting
def create_annotated_text(
    input_string: str, word_list: List[str], annotation: str, color_code: str
):
    """
    Create annotated text with highlighted keywords.

    Args:
        input_string (str): The input text.
        word_list (List[str]): List of keywords to be highlighted.
        annotation (str): Annotation label for highlighted keywords.
        color_code (str): Color code for highlighting.

    Returns:
        List: Annotated text with highlighted keywords.
    """
    # Tokenize the input string
    tokens = nltk.word_tokenize(input_string)

    # Convert the list to a set for quick lookups
    word_set = set(word_list)

    # Initialize an empty list to hold the annotated text
    ret_annotated_text = []

    for token in tokens:
        # Check if the token is in the set
        if token in word_set:
            # If it is, append a tuple with the token, annotation, and color code
            ret_annotated_text.append((token, annotation, color_code))
        else:
            # If it's not, just append the token as a string
            ret_annotated_text.append(token)

    return ret_annotated_text


# Function to read JSON data from a file
def read_json(filename):
    """
    Read JSON data from a file.

    Args:
        filename (str): The path to the JSON file.

    Returns:
        dict: The JSON data.
    """
    with open(filename) as f:
        data = json.load(f)
    return data


# Function to tokenize a string
def tokenize_string(input_string):
    """
    Tokenize a string into words.

    Args:
        input_string (str): The input string.

    Returns:
        List[str]: List of tokens.
    """
    tokens = nltk.word_tokenize(input_string)
    return tokens


# Cleanup processed resume / job descriptions
delete_from_dir(os.path.join(cwd, "Data", "Processed", "Resumes"))
delete_from_dir(os.path.join(cwd, "Data", "Processed", "JobDescription"))

# Set default session states for first run
if "resumeUploaded" not in st.session_state.keys():
    update_session_state("resumeUploaded", "Pending")
    update_session_state("resumePath", "")
if "jobDescriptionUploaded" not in st.session_state.keys():
    update_session_state("jobDescriptionUploaded", "Pending")
    update_session_state("jobDescriptionPath", "")

# Display the main title and sub-headers
st.title(":blue[Resume Matcher]")
with st.sidebar:
    st.image("Assets/img/header_image.png")
    st.subheader(
        "Free and Open Source ATS to help your resume pass the screening stage."
    )
    st.markdown(
        "Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)"
    )
    st.markdown(
        "Give Resume Matcher a ⭐ on [GitHub](https://github.com/srbhr/resume-matcher)"
    )
    badge(type="github", name="srbhr/Resume-Matcher")
    st.markdown("For updates follow me on Twitter.")
    badge(type="twitter", name="_srbhr_")
    st.markdown(
        "If you like the project and would like to further help in development please consider 👇"
    )
    badge(type="buymeacoffee", name="srbhr")

st.divider()
avs.add_vertical_space(1)

with st.container():
    resumeCol, jobDescriptionCol = st.columns(2)
    with resumeCol:
        uploaded_Resume = st.file_uploader("Choose a Resume", type="pdf")
        if uploaded_Resume is not None:
            if st.session_state["resumeUploaded"] == "Pending":
                save_path_resume = os.path.join(
                    cwd, "Data", "Resumes", uploaded_Resume.name
                )

                with open(save_path_resume, mode="wb") as w:
                    w.write(uploaded_Resume.getvalue())

                if os.path.exists(save_path_resume):
                    st.toast(
                        f"File {uploaded_Resume.name} is successfully saved!", icon="✔️"
                    )
                    update_session_state("resumeUploaded", "Uploaded")
                    update_session_state("resumePath", save_path_resume)
        else:
            update_session_state("resumeUploaded", "Pending")
            update_session_state("resumePath", "")

    with jobDescriptionCol:
        uploaded_JobDescription = st.file_uploader(
            "Choose a Job Description", type="pdf"
        )
        if uploaded_JobDescription is not None:
            if st.session_state["jobDescriptionUploaded"] == "Pending":
                save_path_jobDescription = os.path.join(
                    cwd, "Data", "JobDescription", uploaded_JobDescription.name
                )

                with open(save_path_jobDescription, mode="wb") as w:
                    w.write(uploaded_JobDescription.getvalue())

                if os.path.exists(save_path_jobDescription):
                    st.toast(
                        f"File {uploaded_JobDescription.name} is successfully saved!",
                        icon="✔️",
                    )
                    update_session_state("jobDescriptionUploaded", "Uploaded")
                    update_session_state("jobDescriptionPath", save_path_jobDescription)
        else:
            update_session_state("jobDescriptionUploaded", "Pending")
            update_session_state("jobDescriptionPath", "")

with st.spinner("Please wait..."):
    if (
        uploaded_Resume is not None
        and st.session_state["jobDescriptionUploaded"] == "Uploaded"
        and uploaded_JobDescription is not None
        and st.session_state["jobDescriptionUploaded"] == "Uploaded"
    ):

        resumeProcessor = ParseResume(read_single_pdf(st.session_state["resumePath"]))
        jobDescriptionProcessor = ParseJobDesc(
            read_single_pdf(st.session_state["jobDescriptionPath"])
        )

        # Resume / JD output
        selected_file = resumeProcessor.get_JSON()
        selected_jd = jobDescriptionProcessor.get_JSON()

        # Add containers for each row to avoid overlap

        # Parsed data
        with st.container():
            resumeCol, jobDescriptionCol = st.columns(2)
            with resumeCol:
                with st.expander("Parsed Resume Data"):
                    st.caption(
                        "This text is parsed from your resume. This is how it'll look like after getting parsed by an "
                        "ATS."
                    )
                    st.caption(
                        "Utilize this to understand how to make your resume ATS friendly."
                    )
                    avs.add_vertical_space(3)
                    st.write(selected_file["clean_data"])

            with jobDescriptionCol:
                with st.expander("Parsed Job Description"):
                    st.caption(
                        "Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste."
                    )
                    avs.add_vertical_space(3)
                    st.write(selected_jd["clean_data"])

        # Extracted keywords
        with st.container():
            resumeCol, jobDescriptionCol = st.columns(2)
            with resumeCol:
                with st.expander("Extracted Keywords"):
                    st.write(
                        "Now let's take a look at the extracted keywords from the resume."
                    )
                    annotated_text(
                        create_annotated_text(
                            selected_file["clean_data"],
                            selected_file["extracted_keywords"],
                            "KW",
                            "#0B666A",
                        )
                    )
            with jobDescriptionCol:
                with st.expander("Extracted Keywords"):
                    st.write(
                        "Now let's take a look at the extracted keywords from the job description."
                    )
                    annotated_text(
                        create_annotated_text(
                            selected_jd["clean_data"],
                            selected_jd["extracted_keywords"],
                            "KW",
                            "#0B666A",
                        )
                    )

        # Star graph visualization
        with st.container():
            resumeCol, jobDescriptionCol = st.columns(2)
            with resumeCol:
                with st.expander("Extracted Entities"):
                    st.write(
                        "Now let's take a look at the extracted entities from the resume."
                    )

                    # Call the function with your data
                    create_star_graph(selected_file["keyterms"], "Entities from Resume")
            with jobDescriptionCol:
                with st.expander("Extracted Entities"):
                    st.write(
                        "Now let's take a look at the extracted entities from the job description."
                    )

                    # Call the function with your data
                    create_star_graph(
                        selected_jd["keyterms"], "Entities from Job Description"
                    )

        # Keywords and values
        with st.container():
            resumeCol, jobDescriptionCol = st.columns(2)
            with resumeCol:
                with st.expander("Keywords & Values"):
                    df1 = pd.DataFrame(
                        selected_file["keyterms"], columns=["keyword", "value"]
                    )

                    # Create the dictionary
                    keyword_dict = {}
                    for keyword, value in selected_file["keyterms"]:
                        keyword_dict[keyword] = value * 100

                    fig = go.Figure(
                        data=[
                            go.Table(
                                header=dict(
                                    values=["Keyword", "Value"],
                                    font=dict(size=12, color="white"),
                                    fill_color="#1d2078",
                                ),
                                cells=dict(
                                    values=[
                                        list(keyword_dict.keys()),
                                        list(keyword_dict.values()),
                                    ],
                                    line_color="darkslategray",
                                    fill_color="#6DA9E4",
                                ),
                            )
                        ]
                    )
                    st.plotly_chart(fig, use_container_width=True)
            with jobDescriptionCol:
                with st.expander("Keywords & Values"):
                    df2 = pd.DataFrame(
                        selected_jd["keyterms"], columns=["keyword", "value"]
                    )

                    # Create the dictionary
                    keyword_dict = {}
                    for keyword, value in selected_jd["keyterms"]:
                        keyword_dict[keyword] = value * 100

                    fig = go.Figure(
                        data=[
                            go.Table(
                                header=dict(
                                    values=["Keyword", "Value"],
                                    font=dict(size=12, color="white"),
                                    fill_color="#1d2078",
                                ),
                                cells=dict(
                                    values=[
                                        list(keyword_dict.keys()),
                                        list(keyword_dict.values()),
                                    ],
                                    line_color="darkslategray",
                                    fill_color="#6DA9E4",
                                ),
                            )
                        ]
                    )
                    st.plotly_chart(fig, use_container_width=True)

        # Treemaps
        with st.container():
            resumeCol, jobDescriptionCol = st.columns(2)
            with resumeCol:
                with st.expander("Key Topics"):
                    fig = px.treemap(
                        df1,
                        path=["keyword"],
                        values="value",
                        color_continuous_scale="Rainbow",
                        title="Key Terms/Topics Extracted from your Resume",
                    )
                    st.plotly_chart(fig, use_container_width=True)

            with jobDescriptionCol:
                with st.expander("Key Topics"):
                    fig = px.treemap(
                        df2,
                        path=["keyword"],
                        values="value",
                        color_continuous_scale="Rainbow",
                        title="Key Terms/Topics Extracted from Job Description",
                    )
                    st.plotly_chart(fig, use_container_width=True)

        avs.add_vertical_space(2)
        st.markdown("#### Similarity Score")
        print("Config file parsed successfully:")
        resume_string = " ".join(selected_file["extracted_keywords"])
        jd_string = " ".join(selected_jd["extracted_keywords"])
        result = get_score(resume_string, jd_string)
        similarity_score = round(result[0].score * 100, 2)

        # Default color to green
        score_color = "green"
        if similarity_score < 60:
            score_color = "red"
        elif 60 <= similarity_score < 75:
            score_color = "orange"

        st.markdown(
            f"Similarity Score obtained for the resume and job description is "
            f'<span style="color:{score_color};font-size:24px; font-weight:Bold">{similarity_score}</span>',
            unsafe_allow_html=True,
        )

        avs.add_vertical_space(2)
        with st.expander("Common words between Resume and Job Description:"):
            annotated_text(
                create_annotated_text(
                    selected_file["clean_data"],
                    selected_jd["extracted_keywords"],
                    "JD",
                    "#F24C3D",
                )
            )

st.divider()

# Go back to top
st.markdown("[:arrow_up: Back to Top](#resume-matcher)")