Skip to content

Commit

Permalink
Merge branch 'dev' into repo_info_page
Browse files Browse the repository at this point in the history
  • Loading branch information
cdolfi authored Jan 12, 2024
2 parents cbe0f54 + 5db4c7a commit 10aee9b
Show file tree
Hide file tree
Showing 20 changed files with 298 additions and 153 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/spellcheck.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ jobs:
- name: Code Checkout
uses: actions/checkout@v2
- name: Spellcheck
uses: rojopolis/spellcheck-github-actions@0.27.0
uses: rojopolis/spellcheck-github-actions@0.35.0
1 change: 1 addition & 0 deletions .wordlist-md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ WSGI
api
cdolfi
cmq
aq
config
deployer
dev
Expand Down
4 changes: 2 additions & 2 deletions 8Knot/cache_manager/db_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def _create_application_tables() -> None:

cur.execute(
"""
CREATE UNLOGGED TABLE IF NOT EXISTS company_query(
CREATE UNLOGGED TABLE IF NOT EXISTS affiliation_query(
cntrb_id text,
created text,
id int,
Expand All @@ -178,7 +178,7 @@ def _create_application_tables() -> None:
)
"""
)
logging.warning("CREATED company TABLE")
logging.warning("CREATED affiliation_query TABLE")

cur.execute(
"""
Expand Down
12 changes: 6 additions & 6 deletions 8Knot/pages/affiliation/affiliation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import warnings

# import visualization cards
from .visualizations.gh_company_affiliation import gc_gh_company_affiliation
from .visualizations.gh_org_affiliation import gc_gh_org_affiliation
from .visualizations.unqiue_domains import gc_unique_domains
from .visualizations.company_associated_activity import gc_company_associated_activity
from .visualizations.company_core_contributors import gc_company_core_contributors
from .visualizations.org_associated_activity import gc_org_associated_activity
from .visualizations.org_core_contributors import gc_org_core_contributors
from .visualizations.commit_domains import gc_commit_domains

warnings.filterwarnings("ignore")
Expand All @@ -26,15 +26,15 @@
),
dbc.Row(
[
dbc.Col(gc_company_associated_activity, width=6),
dbc.Col(gc_company_core_contributors, width=6),
dbc.Col(gc_org_associated_activity, width=6),
dbc.Col(gc_org_core_contributors, width=6),
],
align="center",
style={"marginBottom": ".5%"},
),
dbc.Row(
[
dbc.Col(gc_gh_company_affiliation, width=6),
dbc.Col(gc_gh_org_affiliation, width=6),
],
align="center",
style={"marginBottom": ".5%"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dateutil.relativedelta import * # type: ignore
import plotly.express as px
from pages.utils.graph_utils import color_seq
from queries.company_query import company_query as cmq
from queries.affiliation_query import affiliation_query as aq
from pages.utils.job_utils import nodata_graph
import time
import datetime as dt
Expand All @@ -17,14 +17,14 @@
import cache_manager.cache_facade as cf

PAGE = "affiliation"
VIZ_ID = "gh-company-affiliation"
VIZ_ID = "gh-org-affiliation"

gc_gh_company_affiliation = dbc.Card(
gc_gh_org_affiliation = dbc.Card(
[
dbc.CardBody(
[
html.H3(
"Company Affiliation by Github Account Info",
"Organization Affiliation by GitHub Account Info",
className="card-title",
style={"textAlign": "center"},
),
Expand All @@ -33,7 +33,7 @@
dbc.PopoverHeader("Graph Info:"),
dbc.PopoverBody(
"""
Visualizes Github account institution affiliation.\n
Visualizes GitHub account institution affiliation.\n
Many individuals don't report an affiliated institution, but\n
this count may be considered an absolute lower-bound on affiliation.
"""
Expand Down Expand Up @@ -118,7 +118,7 @@ def toggle_popover(n, is_open):
return is_open


# callback for Company Affiliation by Github Account Info graph
# callback for Organization Affiliation by Github Account Info graph
@callback(
Output(f"{PAGE}-{VIZ_ID}", "figure"),
[
Expand All @@ -130,9 +130,9 @@ def toggle_popover(n, is_open):
],
background=True,
)
def gh_company_affiliation_graph(repolist, num, start_date, end_date, bot_switch):
def gh_org_affiliation_graph(repolist, num, start_date, end_date, bot_switch):
# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=cmq.__name__, repolist=repolist):
while not_cached := cf.get_uncached(func_name=aq.__name__, repolist=repolist):
logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)

Expand All @@ -141,7 +141,7 @@ def gh_company_affiliation_graph(repolist, num, start_date, end_date, bot_switch

# GET ALL DATA FROM POSTGRES CACHE
df = cf.retrieve_from_cache(
tablename=cmq.__name__,
tablename=aq.__name__,
repolist=repolist,
)
# test if there is data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,22 @@
from dateutil.relativedelta import * # type: ignore
import plotly.express as px
from pages.utils.graph_utils import color_seq
from queries.company_query import company_query as cmq
from queries.affiliation_query import affiliation_query as aq
from pages.utils.job_utils import nodata_graph
import time
import datetime as dt
import app
import cache_manager.cache_facade as cf

PAGE = "affiliation"
VIZ_ID = "company-associated-activity"
VIZ_ID = "organization-associated-activity"

gc_company_associated_activity = dbc.Card(
gc_org_associated_activity = dbc.Card(
[
dbc.CardBody(
[
html.H3(
"Company Associated Activity",
"Organization Associated Activity",
className="card-title",
style={"textAlign": "center"},
),
Expand Down Expand Up @@ -62,12 +62,12 @@
[
dbc.Label(
"Contributions Required:",
html_for=f"company-contributions-required-{PAGE}-{VIZ_ID}",
html_for=f"contributions-required-{PAGE}-{VIZ_ID}",
width={"size": "auto"},
),
dbc.Col(
dbc.Input(
id=f"company-contributions-required-{PAGE}-{VIZ_ID}",
id=f"contributions-required-{PAGE}-{VIZ_ID}",
type="number",
min=1,
max=100,
Expand All @@ -78,6 +78,19 @@
className="me-2",
width=2,
),
dbc.Col(
dbc.Checklist(
id=f"email-filter-{PAGE}-{VIZ_ID}",
options=[
{"label": "Exclude Gmail", "value": "gmail"},
{"label": "Exclude GitHub", "value": "github"},
],
value=[""],
inline=True,
switch=True,
),
width=4,
),
],
align="center",
),
Expand Down Expand Up @@ -127,19 +140,20 @@ def toggle_popover(n, is_open):
return is_open


# callback for Company Affiliation by Github Account Info graph
# callback for Organization Affiliation by Github Account Info graph
@callback(
Output(f"{PAGE}-{VIZ_ID}", "figure"),
[
Input("repo-choices", "data"),
Input(f"company-contributions-required-{PAGE}-{VIZ_ID}", "value"),
Input(f"contributions-required-{PAGE}-{VIZ_ID}", "value"),
Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "start_date"),
Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "end_date"),
Input(f"email-filter-{PAGE}-{VIZ_ID}", "value"),
Input("bot-switch", "value"),
],
background=True,
)
def compay_associated_activity_graph(repolist, num, start_date, end_date, bot_switch):
def org_associated_activity_graph(repolist, num, start_date, end_date, email_filter, bot_switch):
"""Each contribution is associated with a contributor. That contributor can be associated with
more than one different email. Hence each contribution is associated with all of the emails that a contributor has historically used.
Expand All @@ -154,7 +168,7 @@ def compay_associated_activity_graph(repolist, num, start_date, end_date, bot_sw
"""

# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=cmq.__name__, repolist=repolist):
while not_cached := cf.get_uncached(func_name=aq.__name__, repolist=repolist):
logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)

Expand All @@ -163,7 +177,7 @@ def compay_associated_activity_graph(repolist, num, start_date, end_date, bot_sw

# GET ALL DATA FROM POSTGRES CACHE
df = cf.retrieve_from_cache(
tablename=cmq.__name__,
tablename=aq.__name__,
repolist=repolist,
)

Expand All @@ -177,15 +191,15 @@ def compay_associated_activity_graph(repolist, num, start_date, end_date, bot_sw
df = df[~df["cntrb_id"].isin(app.bots_list)]

# function for all data pre processing, COULD HAVE ADDITIONAL INPUTS AND OUTPUTS
df = process_data(df, num, start_date, end_date)
df = process_data(df, num, start_date, end_date, email_filter)

fig = create_figure(df)

logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
return fig


def process_data(df: pd.DataFrame, num, start_date, end_date):
def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter):
# convert to datetime objects rather than strings
df["created"] = pd.to_datetime(df["created"], utc=True)

Expand All @@ -212,7 +226,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):

df = df.rename(columns={0: "occurrences"})

# changes the name of the company if under a certain threshold
# changes the name of the organization if under a certain threshold
df.loc[df.occurrences <= num, "domains"] = "Other"

# groups others together for final counts
Expand All @@ -224,6 +238,16 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
.reset_index(drop=True)
)

# remove other from set
df = df[df.domains != "Other"]

# removes entries with gmail or other if checked
if email_filter is not None:
if "gmail" in email_filter:
df = df[df.domains != "gmail.com"]
if "github" in email_filter:
df = df[df.domains != "users.noreply.github.com"]

return df


Expand Down
Loading

0 comments on commit 10aee9b

Please sign in to comment.