-
Notifications
You must be signed in to change notification settings - Fork 55
/
issue_metrics.py
366 lines (328 loc) · 14.7 KB
/
issue_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
"""A script for measuring time to first response and time to close for GitHub issues.
This script uses the GitHub API to search for issues/prs/discussions in a repository
and measure the time to first response and time to close for each issue. It then calculates
the average time to first response and time to close and writes the issues with
their metrics to a markdown file.
Functions:
get_per_issue_metrics(issues: Union[List[dict], List[github3.issues.Issue]],
discussions: bool = False), labels: Union[List[str], None] = None,
ignore_users: List[str] = [] -> tuple[List, int, int]:
Calculate the metrics for each issue in a list of GitHub issues.
get_owner(search_query: str) -> Union[str, None]]:
Get the owner from the search query.
main(): Run the issue-metrics script.
"""
import shutil
from typing import List, Union
import github3
import github3.structs
from auth import auth_to_github, get_github_app_installation_token
from classes import IssueWithMetrics
from config import EnvVars, get_env_vars
from discussions import get_discussions
from json_writer import write_to_json
from labels import get_label_metrics, get_stats_time_in_labels
from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file
from markdown_writer import write_to_markdown
from most_active_mentors import count_comments_per_user, get_mentor_count
from search import get_owners_and_repositories, search_issues
from time_in_draft import get_stats_time_in_draft, measure_time_in_draft
from time_to_answer import get_stats_time_to_answer, measure_time_to_answer
from time_to_close import get_stats_time_to_close, measure_time_to_close
from time_to_first_response import (
get_stats_time_to_first_response,
measure_time_to_first_response,
)
from time_to_merge import measure_time_to_merge
from time_to_ready_for_review import get_time_to_ready_for_review
def get_per_issue_metrics(
issues: Union[List[dict], List[github3.search.IssueSearchResult]], # type: ignore
env_vars: EnvVars,
discussions: bool = False,
labels: Union[List[str], None] = None,
ignore_users: Union[List[str], None] = None,
max_comments_to_eval: int = 20,
heavily_involved: int = 3,
) -> tuple[List, int, int]:
"""
Calculate the metrics for each issue/pr/discussion in a list provided.
Args:
issues (Union[List[dict], List[github3.search.IssueSearchResult]]): A list of
GitHub issues or discussions.
discussions (bool, optional): Whether the issues are discussions or not.
Defaults to False.
labels (List[str]): A list of labels to measure time spent in. Defaults to empty list.
ignore_users (List[str]): A list of users to ignore when calculating metrics.
env_vars (EnvVars): The environment variables for the script.
Returns:
tuple[List[IssueWithMetrics], int, int]: A tuple containing a
list of IssueWithMetrics objects, the number of open issues,
and the number of closed issues or discussions.
"""
issues_with_metrics = []
num_issues_open = 0
num_issues_closed = 0
for issue in issues:
if discussions:
issue_with_metrics = IssueWithMetrics(
issue["title"],
issue["url"],
None,
None,
None,
None,
None,
None,
)
if env_vars.hide_time_to_first_response is False:
issue_with_metrics.time_to_first_response = (
measure_time_to_first_response(None, issue, ignore_users)
)
if env_vars.enable_mentor_count:
issue_with_metrics.mentor_activity = count_comments_per_user(
None,
issue,
None,
None,
ignore_users,
max_comments_to_eval,
heavily_involved,
)
if env_vars.hide_time_to_answer is False:
issue_with_metrics.time_to_answer = measure_time_to_answer(issue)
if issue["closedAt"]:
num_issues_closed += 1
if not env_vars.hide_time_to_close:
issue_with_metrics.time_to_close = measure_time_to_close(
None, issue
)
else:
num_issues_open += 1
else:
if ignore_users and issue.user["login"] in ignore_users: # type: ignore
continue
issue_with_metrics = IssueWithMetrics(
title=issue.title, # type: ignore
html_url=issue.html_url, # type: ignore
author=issue.user["login"], # type: ignore
)
# Check if issue is actually a pull request
pull_request, ready_for_review_at = None, None
if issue.issue.pull_request_urls: # type: ignore
pull_request = issue.issue.pull_request() # type: ignore
ready_for_review_at = get_time_to_ready_for_review(issue, pull_request)
if env_vars.draft_pr_tracking:
issue_with_metrics.time_in_draft = measure_time_in_draft(
issue=issue,
ready_for_review_at=ready_for_review_at,
)
if env_vars.hide_time_to_first_response is False:
issue_with_metrics.time_to_first_response = (
measure_time_to_first_response(
issue, None, pull_request, ready_for_review_at, ignore_users
)
)
if env_vars.enable_mentor_count:
issue_with_metrics.mentor_activity = count_comments_per_user(
issue,
None,
pull_request,
ready_for_review_at,
ignore_users,
max_comments_to_eval,
heavily_involved,
)
if labels and env_vars.hide_label_metrics is False:
issue_with_metrics.label_metrics = get_label_metrics(issue, labels)
if issue.state == "closed": # type: ignore
num_issues_closed += 1
if not env_vars.hide_time_to_close:
if pull_request:
issue_with_metrics.time_to_close = measure_time_to_merge(
pull_request, ready_for_review_at
)
else:
issue_with_metrics.time_to_close = measure_time_to_close(
issue, None
)
elif issue.state == "open": # type: ignore
num_issues_open += 1
issues_with_metrics.append(issue_with_metrics)
return issues_with_metrics, num_issues_open, num_issues_closed
def main(): # pragma: no cover
"""Run the issue-metrics script.
This function authenticates to GitHub, searches for issues/prs/discussions
using the SEARCH_QUERY environment variable, measures the time to first response
and close for each issue, calculates the average time to first response,
and writes the results to a markdown file.
Raises:
ValueError: If the SEARCH_QUERY environment variable is not set.
ValueError: If the search query does not include a repository owner and name.
"""
print("Starting issue-metrics search...")
# Get the environment variables for use in the script
env_vars = get_env_vars()
search_query = env_vars.search_query
token = env_vars.gh_token
ignore_users = env_vars.ignore_users
hide_items_closed_count = env_vars.hide_items_closed_count
hide_label_metrics = env_vars.hide_label_metrics
non_mentioning_links = env_vars.non_mentioning_links
report_title = env_vars.report_title
output_file = env_vars.output_file
rate_limit_bypass = env_vars.rate_limit_bypass
ghe = env_vars.ghe
gh_app_id = env_vars.gh_app_id
gh_app_installation_id = env_vars.gh_app_installation_id
gh_app_private_key_bytes = env_vars.gh_app_private_key_bytes
gh_app_enterprise_only = env_vars.gh_app_enterprise_only
# Auth to GitHub.com
github_connection = auth_to_github(
token,
gh_app_id,
gh_app_installation_id,
gh_app_private_key_bytes,
ghe,
gh_app_enterprise_only,
)
if not token and gh_app_id and gh_app_installation_id and gh_app_private_key_bytes:
token = get_github_app_installation_token(
ghe, gh_app_id, gh_app_private_key_bytes, gh_app_installation_id
)
enable_mentor_count = env_vars.enable_mentor_count
min_mentor_count = int(env_vars.min_mentor_comments)
max_comments_eval = int(env_vars.max_comments_eval)
heavily_involved_cutoff = int(env_vars.heavily_involved_cutoff)
# Get the owners and repositories from the search query
owners_and_repositories = get_owners_and_repositories(search_query)
# Every search query must include a repository owner for each repository, organization, or user
for item in owners_and_repositories:
if item["owner"] is None:
raise ValueError(
"The search query must include a repository owner and name \
(ie. repo:owner/repo), an organization (ie. org:organization), \
a user (ie. user:login) or an owner (ie. owner:user-or-organization)"
)
# Determine if there are label to measure
labels = env_vars.labels_to_measure
# Search for issues
# If type:discussions is in the search_query, search for discussions using get_discussions()
if "type:discussions" in search_query:
if labels:
raise ValueError(
"The search query for discussions cannot include labels to measure"
)
issues = get_discussions(token, search_query, ghe)
if len(issues) <= 0:
print("No discussions found")
write_to_markdown(
issues_with_metrics=None,
average_time_to_first_response=None,
average_time_to_close=None,
average_time_to_answer=None,
average_time_in_draft=None,
average_time_in_labels=None,
num_issues_opened=None,
num_issues_closed=None,
num_mentor_count=None,
labels=None,
search_query=search_query,
hide_label_metrics=False,
hide_items_closed_count=False,
non_mentioning_links=False,
report_title=report_title,
output_file=output_file,
)
return
else:
issues = search_issues(
search_query, github_connection, owners_and_repositories, rate_limit_bypass
)
if len(issues) <= 0:
print("No issues found")
write_to_markdown(
issues_with_metrics=None,
average_time_to_first_response=None,
average_time_to_close=None,
average_time_to_answer=None,
average_time_in_draft=None,
average_time_in_labels=None,
num_issues_opened=None,
num_issues_closed=None,
num_mentor_count=None,
labels=None,
search_query=search_query,
hide_label_metrics=False,
hide_items_closed_count=False,
non_mentioning_links=False,
report_title=report_title,
output_file=output_file,
)
return
# Get all the metrics
issues_with_metrics, num_issues_open, num_issues_closed = get_per_issue_metrics(
issues,
discussions="type:discussions" in search_query,
labels=labels,
ignore_users=ignore_users,
max_comments_to_eval=max_comments_eval,
heavily_involved=heavily_involved_cutoff,
env_vars=env_vars,
)
stats_time_to_first_response = get_stats_time_to_first_response(issues_with_metrics)
stats_time_to_close = None
if num_issues_closed > 0:
stats_time_to_close = get_stats_time_to_close(issues_with_metrics)
stats_time_to_answer = get_stats_time_to_answer(issues_with_metrics)
stats_time_in_draft = get_stats_time_in_draft(issues_with_metrics)
num_mentor_count = 0
if enable_mentor_count:
num_mentor_count = get_mentor_count(issues_with_metrics, min_mentor_count)
# Get stats describing the time in label for each label and store it in a dictionary
# where the key is the label and the value is the average time
stats_time_in_labels = get_stats_time_in_labels(issues_with_metrics, labels)
# Write the results to json and a markdown file
write_to_json(
issues_with_metrics=issues_with_metrics,
stats_time_to_first_response=stats_time_to_first_response,
stats_time_to_close=stats_time_to_close,
stats_time_to_answer=stats_time_to_answer,
stats_time_in_draft=stats_time_in_draft,
stats_time_in_labels=stats_time_in_labels,
num_issues_opened=num_issues_open,
num_issues_closed=num_issues_closed,
num_mentor_count=num_mentor_count,
search_query=search_query,
output_file=output_file,
)
write_to_markdown(
issues_with_metrics=issues_with_metrics,
average_time_to_first_response=stats_time_to_first_response,
average_time_to_close=stats_time_to_close,
average_time_to_answer=stats_time_to_answer,
average_time_in_draft=stats_time_in_draft,
average_time_in_labels=stats_time_in_labels,
num_issues_opened=num_issues_open,
num_issues_closed=num_issues_closed,
num_mentor_count=num_mentor_count,
labels=labels,
search_query=search_query,
hide_label_metrics=hide_label_metrics,
hide_items_closed_count=hide_items_closed_count,
non_mentioning_links=non_mentioning_links,
report_title=report_title,
output_file=output_file,
)
max_char_count = 65535
if markdown_too_large_for_issue_body("issue_metrics.md", max_char_count):
split_markdown_file("issue_metrics.md", max_char_count)
shutil.move("issue_metrics.md", "issue_metrics_full.md")
shutil.move("issue_metrics_0.md", "issue_metrics.md")
print(
"Issue metrics markdown file is too large for GitHub issue body and has been \
split into multiple files. ie. issue_metrics.md, issue_metrics_1.md, etc. \
The full file is saved as issue_metrics_full.md\n\
See https://github.com/github/issue-metrics/blob/main/docs/dealing-with-large-issue-metrics.md"
)
if __name__ == "__main__":
main()