Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update packages #631

Merged
merged 4 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions 8Knot/pages/affiliation/visualizations/commit_domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,10 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "occurrences"})
df = df.rename(columns={"count": "occurrences"})

# changes the name of the company if under a certain threshold
df.loc[df.occurrences <= num, "domains"] = "Other"
df.loc[df["occurrences"] <= num, "domains"] = "Other"

# groups others together for final counts
df = (
Expand Down
4 changes: 2 additions & 2 deletions 8Knot/pages/affiliation/visualizations/gh_org_affiliation.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
df["company_name"] = df.index
df = df.reset_index()
df["company_name"] = df["company_name"].astype(str)
df = df.rename(columns={"index": "orginal_name", "cntrb_company": "contribution_count"})
df = df.rename(columns={"cntrb_company": "orginal_name", "count": "contribution_count"})

# applies fuzzy matching comparing all rows to each other
df["match"] = df.apply(lambda row: fuzzy_match(df, row["company_name"]), axis=1)
Expand All @@ -212,7 +212,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
)

# changes the name of the company if under a certain threshold
df.loc[df.contribution_count <= num, "company_name"] = "Other"
df.loc[df["contribution_count"] <= num, "company_name"] = "Other"

# groups others together for final counts
df = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter):
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "occurrences"})
df = df.rename(columns={"count": "occurrences"})

# changes the name of the organization if under a certain threshold
df.loc[df.occurrences <= num, "domains"] = "Other"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "contributors"})
df = df.rename(columns={"count": "contributors"})

# changes the name of the org if under a certain threshold
df.loc[df.contributors <= contributors, "domains"] = "Other"
Expand Down
2 changes: 1 addition & 1 deletion 8Knot/pages/affiliation/visualizations/unqiue_domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "occurences"})
df = df.rename(columns={"count": "occurences"})

# changes the name of the company if under a certain threshold
df.loc[df.occurences <= num, "domains"] = "Other"
Expand Down
5 changes: 4 additions & 1 deletion 8Knot/pages/chaoss/visualizations/contrib_importance_pie.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,10 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
df_sum = df[action_type].sum()

# calculate the remaining contributions by taking the the difference of t_sum and df_sum
df = df.append({"cntrb_id": "Other", action_type: t_sum - df_sum}, ignore_index=True)
# dataframes no longer implement above 'append' interface as of Pandas 1.4.4
# create a single-entry dataframe that we can concatenate onto existing df
df_concat = pd.DataFrame(data={"cntrb_id": ["Other"], action_type: [t_sum - df_sum]})
df = pd.concat([df, df_concat], ignore_index=True)

return df

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,7 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
df_contrib = df[df["assignment_action"] == "assigned"]

# count the assignments total for each contributor
df_contrib = (
df_contrib["assignee"]
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"assignee": "count", "index": "assignee"})
)
df_contrib = df_contrib["assignee"].value_counts().to_frame().reset_index()

# create list of all contributors that meet the assignment requirement
contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,13 +214,7 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
df_contrib = df[df["assignment_action"] == "assigned"]

# count the assignments total for each contributor
df_contrib = (
df_contrib["assignee"]
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"assignee": "count", "index": "assignee"})
)
df_contrib = df_contrib["assignee"].value_counts().to_frame().reset_index()

# create list of all contributors that meet the assignment requirement
contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list()
Expand Down
5 changes: 3 additions & 2 deletions 8Knot/pages/contributions/visualizations/issues_over_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,15 +189,16 @@ def process_data(df: pd.DataFrame, interval, start_date, end_date):
created_range = pd.to_datetime(df["created_at"]).dt.to_period(interval).value_counts().sort_index()

# converts to data frame object and creates date column from period values
df_created = created_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_created = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "created_at"})

# converts date column to a datetime object, converts to string first to handle period information
# the period slice is to handle weekly corner case
df_created["Date"] = pd.to_datetime(df_created["Date"].astype(str).str[:period_slice])

# df for closed issues in time interval
closed_range = pd.to_datetime(df["closed_at"]).dt.to_period(interval).value_counts().sort_index()
df_closed = closed_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_closed = closed_range.to_frame().reset_index().rename(columns={"closed_at": "Date", "count": "closed_at"})

df_closed["Date"] = pd.to_datetime(df_closed["Date"].astype(str).str[:period_slice])

# first and last elements of the dataframe are the
Expand Down
6 changes: 3 additions & 3 deletions 8Knot/pages/contributions/visualizations/pr_over_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,20 +165,20 @@ def process_data(df: pd.DataFrame, interval):
created_range = df["created_at"].dt.to_period(interval).value_counts().sort_index()

# converts to data frame object and created date column from period values
df_created = created_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_created = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "created_at"})

# converts date column to a datetime object, converts to string first to handle period information
# the period slice is to handle weekly corner case
df_created["Date"] = pd.to_datetime(df_created["Date"].astype(str).str[:period_slice])

# df for merged prs in time interval
merged_range = pd.to_datetime(df["merged_at"]).dt.to_period(interval).value_counts().sort_index()
df_merged = merged_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_merged = merged_range.to_frame().reset_index().rename(columns={"merged_at": "Date", "count": "merged_at"})
df_merged["Date"] = pd.to_datetime(df_merged["Date"].astype(str).str[:period_slice])

# df for closed prs in time interval
closed_range = pd.to_datetime(df["closed_at"]).dt.to_period(interval).value_counts().sort_index()
df_closed = closed_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_closed = closed_range.to_frame().reset_index().rename(columns={"closed_at": "Date", "count": "closed_at"})
df_closed["Date"] = pd.to_datetime(df_closed["Date"].astype(str).str[:period_slice])

# A single df created for plotting merged and closed as stacked bar chart
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,10 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
df_sum = df[action_type].sum()

# calculate the remaining contributions by taking the the difference of t_sum and df_sum
df = df.append({"cntrb_id": "Other", action_type: t_sum - df_sum}, ignore_index=True)
# dataframes no longer implement above 'append' interface as of Pandas 1.4.4
# create a single-entry dataframe that we can concatenate onto existing df
df_concat = pd.DataFrame(data={"cntrb_id": ["Other"], action_type: [t_sum - df_sum]})
df = pd.concat([df, df_concat], ignore_index=True)

return df

Expand Down
2 changes: 1 addition & 1 deletion 8Knot/pages/contributors/visualizations/new_contributor.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def process_data(df, interval):
created_range = pd.to_datetime(df["created_at"]).dt.to_period(interval).value_counts().sort_index()

# converts to data frame object and creates date column from period values
df_contribs = created_range.to_frame().reset_index().rename(columns={"index": "Date", "created_at": "contribs"})
df_contribs = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "contribs"})

# converts date column to a datetime object, converts to string first to handle period information
df_contribs["Date"] = pd.to_datetime(df_contribs["Date"].astype(str))
Expand Down
7 changes: 0 additions & 7 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM registry.access.redhat.com/ubi9/python-39:latest

Check warning on line 1 in docker/Dockerfile

View workflow job for this annotation

GitHub Actions / containerfile-lint

Using latest is prone to errors if the image will ever update. Pin the version explicitly to a release tag

WORKDIR /opt/app-root/src

Expand All @@ -11,13 +11,6 @@
# working directory.
COPY ./8Knot/ /opt/app-root/src/

# run flower
# CMD [ "celery", "-A", "app:celery_app", "flower" ]

# run worker
# CMD [ "celery", "-A", "app:celery_app", "worker", "--loglevel=INFO" ]

# run app
# Description of how to choose the number of workers and threads.
# common wisdom is (2*CPU)+1 workers:
# https://medium.com/building-the-system/gunicorn-3-means-of-concurrency-efbb547674b7
Expand Down
24 changes: 24 additions & 0 deletions requirements-base.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# top-level required modules w/o pinned dependencies
# DON'T ADD NEW DEPENDENCIES TO REQUIREMENTS.TXT MANUALLY
# ADD THEM TO REQUIREMENTS-BASE.TXT, THEN RUN
# pip3 freeze -r requirement-base.txt > requirements.txt
sqlalchemy
celery
dash
dash-bootstrap-components
dash-mantine-components
dash-bootstrap-templates
flask-login
flask
redis
uuid
plotly
psycopg2-binary
pandas
numpy
python-dateutil
fuzzywuzzy
python-Levenshtein
datetime
gunicorn
pyarrow
113 changes: 57 additions & 56 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,60 +1,61 @@
-i https://pypi.org/simple
amqp==5.1.1 ; python_version >= '3.6'
async-timeout==4.0.2 ; python_version >= '3.6'
billiard==3.6.4.0
celery==5.2.7
# celery-flower==1.*
cfgv==3.3.1 ; python_full_version >= '3.6.1'
click==8.1.3 ; python_version >= '3.7'
click-didyoumean==0.3.0 ; python_full_version >= '3.6.2' and python_full_version < '4.0.0'
click-plugins==1.1.1
click-repl==0.2.0
dash==2.7.0
# top-level required modules w/o pinned dependencies
# DON'T ADD NEW DEPENDENCIES TO REQUIREMENTS.TXT MANUALLY
# ADD THEM TO REQUIREMENTS-BASE.TXT, THEN RUN
# pip3 freeze -r requirement-base.txt > requirements.txt
SQLAlchemy==2.0.25
celery==5.3.6
dash==2.15.0
dash-bootstrap-components==1.5.0
dash-bootstrap-templates==1.0.7
dash-mantine-components==0.12.1
dash-bootstrap-templates==1.1.2
Flask-Login==0.6.3
Flask==3.0.2
redis==5.0.1
uuid==1.30
plotly==5.18.0
psycopg2-binary==2.9.9
pandas==2.2.0
numpy==1.26.4
python-dateutil==2.8.2
fuzzywuzzy==0.18.0
python-Levenshtein==0.24.0
DateTime==5.4
gunicorn==21.2.0
pyarrow==15.0.0
## The following requirements were added by pip freeze:
amqp==5.2.0
billiard==4.2.0
blinker==1.7.0
certifi==2024.2.2
charset-normalizer==3.3.2
click==8.1.7
click-didyoumean==0.3.0
click-plugins==1.1.1
click-repl==0.3.0
dash-core-components==2.0.0
dash-html-components==2.0.0
dash-table==5.0.0
deprecated==1.2.13 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
distlib==0.3.6
filelock==3.8.0 ; python_version >= '3.7'
flask==2.2.2 ; python_version >= '3.7'
gunicorn==20.1.0
identify==2.5.8 ; python_version >= '3.7'
itsdangerous==2.1.2 ; python_version >= '3.7'
jinja2==3.1.2 ; python_version >= '3.7'
kombu==5.2.4 ; python_version >= '3.7'
markupsafe==2.1.1 ; python_version >= '3.7'
nodeenv==1.7.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
numpy==1.23.4
packaging==21.3 ; python_version >= '3.6'
pandas==1.5.1
patsy==0.5.3
platformdirs==2.5.3 ; python_version >= '3.7'
plotly==5.11.0 ; python_version >= '3.6'
plotly-express==0.4.1
pre-commit==2.20.0
prompt-toolkit==3.0.32 ; python_full_version >= '3.6.2'
psycopg2-binary
pyparsing==3.0.9 ; python_full_version >= '3.6.8'
python-dateutil==2.8.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pytz==2022.6
pyyaml==6.0 ; python_version >= '3.6'
redis==4.3.4
rq==1.11.1
scipy==1.9.3 ; python_version >= '3.8'
six==1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
sqlalchemy==1.4.43
statsmodels==0.13.5 ; python_version >= '3.7'
tenacity==8.1.0 ; python_version >= '3.6'
toml==0.10.2 ; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
vine==5.0.0 ; python_version >= '3.6'
wcwidth==0.2.5
werkzeug==2.2.2 ; python_version >= '3.7'
wrapt==1.14.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
requests
dash-mantine-components
pyarrow
fuzzywuzzy
python-Levenshtein
flask-login
idna==3.6
importlib-metadata==7.0.1
itsdangerous==2.1.2
Jinja2==3.1.3
kombu==5.3.5
Levenshtein==0.24.0
MarkupSafe==2.1.5
nest-asyncio==1.6.0
packaging==23.2
prompt-toolkit==3.0.43
pytz==2024.1
rapidfuzz==3.6.1
requests==2.31.0
retrying==1.3.4
six==1.16.0
tenacity==8.2.3
typing_extensions==4.9.0
tzdata==2023.4
urllib3==2.2.0
vine==5.1.0
wcwidth==0.2.13
Werkzeug==3.0.1
zipp==3.17.0
zope.interface==6.1
Loading