oss-aspen · cdolfi · Feb 13, 2024 · Jan 26, 2024 · Feb 6, 2024 · Feb 6, 2024
diff --git a/8Knot/pages/affiliation/visualizations/commit_domains.py b/8Knot/pages/affiliation/visualizations/commit_domains.py
@@ -187,10 +187,10 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
     # creates df of domains and counts
     df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()
 
-    df = df.rename(columns={0: "occurrences"})
+    df = df.rename(columns={"count": "occurrences"})
 
     # changes the name of the company if under a certain threshold
-    df.loc[df.occurrences <= num, "domains"] = "Other"
+    df.loc[df["occurrences"] <= num, "domains"] = "Other"
 
     # groups others together for final counts
     df = (

diff --git a/8Knot/pages/affiliation/visualizations/gh_org_affiliation.py b/8Knot/pages/affiliation/visualizations/gh_org_affiliation.py
@@ -187,7 +187,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
     df["company_name"] = df.index
     df = df.reset_index()
     df["company_name"] = df["company_name"].astype(str)
-    df = df.rename(columns={"index": "orginal_name", "cntrb_company": "contribution_count"})
+    df = df.rename(columns={"cntrb_company": "orginal_name", "count": "contribution_count"})
 
     # applies fuzzy matching comparing all rows to each other
     df["match"] = df.apply(lambda row: fuzzy_match(df, row["company_name"]), axis=1)
@@ -212,7 +212,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
     )
 
     # changes the name of the company if under a certain threshold
-    df.loc[df.contribution_count <= num, "company_name"] = "Other"
+    df.loc[df["contribution_count"] <= num, "company_name"] = "Other"
 
     # groups others together for final counts
     df = (

diff --git a/8Knot/pages/affiliation/visualizations/org_associated_activity.py b/8Knot/pages/affiliation/visualizations/org_associated_activity.py
@@ -230,7 +230,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter):
     # creates df of domains and counts
     df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()
 
-    df = df.rename(columns={0: "occurrences"})
+    df = df.rename(columns={"count": "occurrences"})
 
     # changes the name of the organization if under a certain threshold
     df.loc[df.occurrences <= num, "domains"] = "Other"

diff --git a/8Knot/pages/affiliation/visualizations/org_core_contributors.py b/8Knot/pages/affiliation/visualizations/org_core_contributors.py
@@ -243,7 +243,7 @@ def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_
     # creates df of domains and counts
     df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()
 
-    df = df.rename(columns={0: "contributors"})
+    df = df.rename(columns={"count": "contributors"})
 
     # changes the name of the org if under a certain threshold
     df.loc[df.contributors <= contributors, "domains"] = "Other"

diff --git a/8Knot/pages/affiliation/visualizations/unqiue_domains.py b/8Knot/pages/affiliation/visualizations/unqiue_domains.py
@@ -188,7 +188,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
     # creates df of domains and counts
     df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()
 
-    df = df.rename(columns={0: "occurences"})
+    df = df.rename(columns={"count": "occurences"})
 
     # changes the name of the company if under a certain threshold
     df.loc[df.occurences <= num, "domains"] = "Other"

diff --git a/8Knot/pages/chaoss/visualizations/contrib_importance_pie.py b/8Knot/pages/chaoss/visualizations/contrib_importance_pie.py
@@ -318,7 +318,10 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
     df_sum = df[action_type].sum()
 
     # calculate the remaining contributions by taking the the difference of t_sum and df_sum
-    df = df.append({"cntrb_id": "Other", action_type: t_sum - df_sum}, ignore_index=True)
+    # dataframes no longer implement above 'append' interface as of Pandas 1.4.4
+    # create a single-entry dataframe that we can concatenate onto existing df
+    df_concat = pd.DataFrame(data={"cntrb_id": ["Other"], action_type: [t_sum - df_sum]})
+    df = pd.concat([df, df_concat], ignore_index=True)
 
     return df
 

diff --git a/8Knot/pages/contributions/visualizations/cntrb_pr_assignment.py b/8Knot/pages/contributions/visualizations/cntrb_pr_assignment.py
@@ -217,13 +217,7 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
     df_contrib = df[df["assignment_action"] == "assigned"]
 
     # count the assignments total for each contributor
-    df_contrib = (
-        df_contrib["assignee"]
-        .value_counts()
-        .to_frame()
-        .reset_index()
-        .rename(columns={"assignee": "count", "index": "assignee"})
-    )
+    df_contrib = df_contrib["assignee"].value_counts().to_frame().reset_index()
 
     # create list of all contributors that meet the assignment requirement
     contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list()

diff --git a/8Knot/pages/contributions/visualizations/cntrib_issue_assignment.py b/8Knot/pages/contributions/visualizations/cntrib_issue_assignment.py
@@ -214,13 +214,7 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
     df_contrib = df[df["assignment_action"] == "assigned"]
 
     # count the assignments total for each contributor
-    df_contrib = (
-        df_contrib["assignee"]
-        .value_counts()
-        .to_frame()
-        .reset_index()
-        .rename(columns={"assignee": "count", "index": "assignee"})
-    )
+    df_contrib = df_contrib["assignee"].value_counts().to_frame().reset_index()
 
     # create list of all contributors that meet the assignment requirement
     contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list()

diff --git a/8Knot/pages/contributions/visualizations/issues_over_time.py b/8Knot/pages/contributions/visualizations/issues_over_time.py
@@ -189,15 +189,16 @@ def process_data(df: pd.DataFrame, interval, start_date, end_date):
     created_range = pd.to_datetime(df["created_at"]).dt.to_period(interval).value_counts().sort_index()
 
     # converts to data frame object and creates date column from period values
-    df_created = created_range.to_frame().reset_index().rename(columns={"index": "Date"})
+    df_created = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "created_at"})
 
     # converts date column to a datetime object, converts to string first to handle period information
     # the period slice is to handle weekly corner case
     df_created["Date"] = pd.to_datetime(df_created["Date"].astype(str).str[:period_slice])
 
     # df for closed issues in time interval
     closed_range = pd.to_datetime(df["closed_at"]).dt.to_period(interval).value_counts().sort_index()
-    df_closed = closed_range.to_frame().reset_index().rename(columns={"index": "Date"})
+    df_closed = closed_range.to_frame().reset_index().rename(columns={"closed_at": "Date", "count": "closed_at"})
+
     df_closed["Date"] = pd.to_datetime(df_closed["Date"].astype(str).str[:period_slice])
 
     # first and last elements of the dataframe are the

diff --git a/8Knot/pages/contributions/visualizations/pr_over_time.py b/8Knot/pages/contributions/visualizations/pr_over_time.py
@@ -165,20 +165,20 @@ def process_data(df: pd.DataFrame, interval):
     created_range = df["created_at"].dt.to_period(interval).value_counts().sort_index()
 
     # converts to data frame object and created date column from period values
-    df_created = created_range.to_frame().reset_index().rename(columns={"index": "Date"})
+    df_created = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "created_at"})
 
     # converts date column to a datetime object, converts to string first to handle period information
     # the period slice is to handle weekly corner case
     df_created["Date"] = pd.to_datetime(df_created["Date"].astype(str).str[:period_slice])
 
     # df for merged prs in time interval
     merged_range = pd.to_datetime(df["merged_at"]).dt.to_period(interval).value_counts().sort_index()
-    df_merged = merged_range.to_frame().reset_index().rename(columns={"index": "Date"})
+    df_merged = merged_range.to_frame().reset_index().rename(columns={"merged_at": "Date", "count": "merged_at"})
     df_merged["Date"] = pd.to_datetime(df_merged["Date"].astype(str).str[:period_slice])
 
     # df for closed prs in time interval
     closed_range = pd.to_datetime(df["closed_at"]).dt.to_period(interval).value_counts().sort_index()
-    df_closed = closed_range.to_frame().reset_index().rename(columns={"index": "Date"})
+    df_closed = closed_range.to_frame().reset_index().rename(columns={"closed_at": "Date", "count": "closed_at"})
     df_closed["Date"] = pd.to_datetime(df_closed["Date"].astype(str).str[:period_slice])
 
     # A single df created for plotting merged and closed as stacked bar chart

diff --git a/8Knot/pages/contributors/visualizations/contrib_importance_pie.py b/8Knot/pages/contributors/visualizations/contrib_importance_pie.py
@@ -319,7 +319,10 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
     df_sum = df[action_type].sum()
 
     # calculate the remaining contributions by taking the the difference of t_sum and df_sum
-    df = df.append({"cntrb_id": "Other", action_type: t_sum - df_sum}, ignore_index=True)
+    # dataframes no longer implement above 'append' interface as of Pandas 1.4.4
+    # create a single-entry dataframe that we can concatenate onto existing df
+    df_concat = pd.DataFrame(data={"cntrb_id": ["Other"], action_type: [t_sum - df_sum]})
+    df = pd.concat([df, df_concat], ignore_index=True)
 
     return df
 

diff --git a/8Knot/pages/contributors/visualizations/new_contributor.py b/8Knot/pages/contributors/visualizations/new_contributor.py
@@ -189,7 +189,7 @@ def process_data(df, interval):
     created_range = pd.to_datetime(df["created_at"]).dt.to_period(interval).value_counts().sort_index()
 
     # converts to data frame object and creates date column from period values
-    df_contribs = created_range.to_frame().reset_index().rename(columns={"index": "Date", "created_at": "contribs"})
+    df_contribs = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "contribs"})
 
     # converts date column to a datetime object, converts to string first to handle period information
     df_contribs["Date"] = pd.to_datetime(df_contribs["Date"].astype(str))

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,4 +1,4 @@
 FROM registry.access.redhat.com/ubi9/python-39:latest

 WORKDIR /opt/app-root/src

@@ -11,13 +11,6 @@
 # working directory.
 COPY ./8Knot/ /opt/app-root/src/
 
-# run flower
-# CMD [ "celery", "-A", "app:celery_app", "flower" ]
-
-# run worker
-# CMD [ "celery", "-A", "app:celery_app", "worker", "--loglevel=INFO" ]
-
-# run app
 # Description of how to choose the number of workers and threads.
 # common wisdom is (2*CPU)+1 workers:
 # https://medium.com/building-the-system/gunicorn-3-means-of-concurrency-efbb547674b7

diff --git a/requirements-base.txt b/requirements-base.txt
@@ -0,0 +1,24 @@
+# top-level required modules w/o pinned dependencies
+# DON'T ADD NEW DEPENDENCIES TO REQUIREMENTS.TXT MANUALLY
+# ADD THEM TO REQUIREMENTS-BASE.TXT, THEN RUN
+# pip3 freeze -r requirement-base.txt > requirements.txt
+sqlalchemy
+celery
+dash
+dash-bootstrap-components
+dash-mantine-components
+dash-bootstrap-templates
+flask-login
+flask
+redis
+uuid
+plotly
+psycopg2-binary
+pandas
+numpy
+python-dateutil
+fuzzywuzzy
+python-Levenshtein
+datetime
+gunicorn
+pyarrow
diff --git a/requirements.txt b/requirements.txt
@@ -1,60 +1,61 @@
--i https://pypi.org/simple
-amqp==5.1.1 ; python_version >= '3.6'
-async-timeout==4.0.2 ; python_version >= '3.6'
-billiard==3.6.4.0
-celery==5.2.7
-# celery-flower==1.*
-cfgv==3.3.1 ; python_full_version >= '3.6.1'
-click==8.1.3 ; python_version >= '3.7'
-click-didyoumean==0.3.0 ; python_full_version >= '3.6.2' and python_full_version < '4.0.0'
-click-plugins==1.1.1
-click-repl==0.2.0
-dash==2.7.0
+# top-level required modules w/o pinned dependencies
+# DON'T ADD NEW DEPENDENCIES TO REQUIREMENTS.TXT MANUALLY
+# ADD THEM TO REQUIREMENTS-BASE.TXT, THEN RUN
+# pip3 freeze -r requirement-base.txt > requirements.txt
+SQLAlchemy==2.0.25
+celery==5.3.6
+dash==2.15.0
 dash-bootstrap-components==1.5.0
-dash-bootstrap-templates==1.0.7
+dash-mantine-components==0.12.1
+dash-bootstrap-templates==1.1.2
+Flask-Login==0.6.3
+Flask==3.0.2
+redis==5.0.1
+uuid==1.30
+plotly==5.18.0
+psycopg2-binary==2.9.9
+pandas==2.2.0
+numpy==1.26.4
+python-dateutil==2.8.2
+fuzzywuzzy==0.18.0
+python-Levenshtein==0.24.0
+DateTime==5.4
+gunicorn==21.2.0
+pyarrow==15.0.0
+## The following requirements were added by pip freeze:
+amqp==5.2.0
+billiard==4.2.0
+blinker==1.7.0
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+click-didyoumean==0.3.0
+click-plugins==1.1.1
+click-repl==0.3.0
 dash-core-components==2.0.0
 dash-html-components==2.0.0
 dash-table==5.0.0
-deprecated==1.2.13 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
-distlib==0.3.6
-filelock==3.8.0 ; python_version >= '3.7'
-flask==2.2.2 ; python_version >= '3.7'
-gunicorn==20.1.0
-identify==2.5.8 ; python_version >= '3.7'
-itsdangerous==2.1.2 ; python_version >= '3.7'
-jinja2==3.1.2 ; python_version >= '3.7'
-kombu==5.2.4 ; python_version >= '3.7'
-markupsafe==2.1.1 ; python_version >= '3.7'
-nodeenv==1.7.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
-numpy==1.23.4
-packaging==21.3 ; python_version >= '3.6'
-pandas==1.5.1
-patsy==0.5.3
-platformdirs==2.5.3 ; python_version >= '3.7'
-plotly==5.11.0 ; python_version >= '3.6'
-plotly-express==0.4.1
-pre-commit==2.20.0
-prompt-toolkit==3.0.32 ; python_full_version >= '3.6.2'
-psycopg2-binary
-pyparsing==3.0.9 ; python_full_version >= '3.6.8'
-python-dateutil==2.8.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
-pytz==2022.6
-pyyaml==6.0 ; python_version >= '3.6'
-redis==4.3.4
-rq==1.11.1
-scipy==1.9.3 ; python_version >= '3.8'
-six==1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
-sqlalchemy==1.4.43
-statsmodels==0.13.5 ; python_version >= '3.7'
-tenacity==8.1.0 ; python_version >= '3.6'
-toml==0.10.2 ; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
-vine==5.0.0 ; python_version >= '3.6'
-wcwidth==0.2.5
-werkzeug==2.2.2 ; python_version >= '3.7'
-wrapt==1.14.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
-requests
-dash-mantine-components
-pyarrow
-fuzzywuzzy
-python-Levenshtein
-flask-login
+idna==3.6
+importlib-metadata==7.0.1
+itsdangerous==2.1.2
+Jinja2==3.1.3
+kombu==5.3.5
+Levenshtein==0.24.0
+MarkupSafe==2.1.5
+nest-asyncio==1.6.0
+packaging==23.2
+prompt-toolkit==3.0.43
+pytz==2024.1
+rapidfuzz==3.6.1
+requests==2.31.0
+retrying==1.3.4
+six==1.16.0
+tenacity==8.2.3
+typing_extensions==4.9.0
+tzdata==2023.4
+urllib3==2.2.0
+vine==5.1.0
+wcwidth==0.2.13
+Werkzeug==3.0.1
+zipp==3.17.0
+zope.interface==6.1