Skip to content

Commit

Permalink
fix afflilation and chaoss pages
Browse files Browse the repository at this point in the history
Signed-off-by: James Kunstle <[email protected]>
  • Loading branch information
JamesKunstle committed Jan 26, 2024
1 parent 7d12c64 commit 19e2ced
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 13 deletions.
4 changes: 2 additions & 2 deletions 8Knot/pages/affiliation/visualizations/commit_domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,10 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "occurrences"})
df = df.rename(columns={"count": "occurrences"})

# changes the name of the company if under a certain threshold
df.loc[df.occurrences <= num, "domains"] = "Other"
df.loc[df["occurrences"] <= num, "domains"] = "Other"

# groups others together for final counts
df = (
Expand Down
4 changes: 2 additions & 2 deletions 8Knot/pages/affiliation/visualizations/gh_org_affiliation.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
df["company_name"] = df.index
df = df.reset_index()
df["company_name"] = df["company_name"].astype(str)
df = df.rename(columns={"index": "orginal_name", "cntrb_company": "contribution_count"})
df = df.rename(columns={"cntrb_company": "orginal_name", "count": "contribution_count"})

# applies fuzzy matching comparing all rows to each other
df["match"] = df.apply(lambda row: fuzzy_match(df, row["company_name"]), axis=1)
Expand All @@ -212,7 +212,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
)

# changes the name of the company if under a certain threshold
df.loc[df.contribution_count <= num, "company_name"] = "Other"
df.loc[df["contribution_count"] <= num, "company_name"] = "Other"

# groups others together for final counts
df = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,14 @@
dbc.Checklist(
id=f"email-filter-{PAGE}-{VIZ_ID}",
options=[
{"label": "Exclude Gmail", "value": "gmail"},
{"label": "Exclude GitHub", "value": "github"},
{
"label": "Exclude Gmail",
"value": "gmail",
},
{
"label": "Exclude GitHub",
"value": "github",
},
],
value=[""],
inline=True,
Expand Down Expand Up @@ -224,7 +230,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter):
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "occurrences"})
df = df.rename(columns={"count": "occurrences"})

# changes the name of the organization if under a certain threshold
df.loc[df.occurrences <= num, "domains"] = "Other"
Expand Down
20 changes: 16 additions & 4 deletions 8Knot/pages/affiliation/visualizations/org_core_contributors.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,14 @@
dbc.Checklist(
id=f"email-filter-{PAGE}-{VIZ_ID}",
options=[
{"label": "Exclude Gmail", "value": "gmail"},
{"label": "Exclude GitHub", "value": "github"},
{
"label": "Exclude Gmail",
"value": "gmail",
},
{
"label": "Exclude GitHub",
"value": "github",
},
],
value=[""],
inline=True,
Expand Down Expand Up @@ -165,7 +171,13 @@ def toggle_popover(n, is_open):
background=True,
)
def compay_associated_activity_graph(
repolist, contributions, contributors, start_date, end_date, email_filter, bot_switch
repolist,
contributions,
contributors,
start_date,
end_date,
email_filter,
bot_switch,
):
# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=aq.__name__, repolist=repolist):
Expand Down Expand Up @@ -231,7 +243,7 @@ def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "contributors"})
df = df.rename(columns={"count": "contributors"})

# changes the name of the org if under a certain threshold
df.loc[df.contributors <= contributors, "domains"] = "Other"
Expand Down
2 changes: 1 addition & 1 deletion 8Knot/pages/affiliation/visualizations/unqiue_domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "occurences"})
df = df.rename(columns={"count": "occurences"})

# changes the name of the company if under a certain threshold
df.loc[df.occurences <= num, "domains"] = "Other"
Expand Down
7 changes: 6 additions & 1 deletion 8Knot/pages/chaoss/visualizations/contrib_importance_pie.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,12 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
df_sum = df[action_type].sum()

# calculate the remaining contributions by taking the the difference of t_sum and df_sum
df = df.append({"cntrb_id": "Other", action_type: t_sum - df_sum}, ignore_index=True)
# df = df.append({"cntrb_id": "Other", action_type: t_sum - df_sum}, ignore_index=True)

# dataframes no longer implement above 'append' interface as of Pandas 1.4.4
# create a single-entry dataframe that we can concatenate onto existing df
df_concat = pd.DataFrame(data={"cntrb_id": ["Other"], action_type: [t_sum - df_sum]})
df = pd.concat([df, df_concat], ignore_index=True)

return df

Expand Down

0 comments on commit 19e2ced

Please sign in to comment.