-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path03_scan_repos.R
49 lines (39 loc) · 1.34 KB
/
03_scan_repos.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Cron script to clone the necessary git repos
library(pins)
library(tidyverse)
library(here)
library(furrr)
setwd(here())
pins::board_register_local(name = 'conscious_lang', cache = '/tmp')
repos <- pin_get('cl_results', board = 'conscious_lang') %>%
select(url, org, repo)
count_words <- function(org, repo, regx) {
# Search path for this repo
path = here('clones', org, repo)
# This is very ugly, but ag returns exit 1 on match-not-found
suppressWarnings(
system2('ag',c('-c', regx, path), stdout = TRUE, stderr = FALSE)
) -> res
# Ag2 vs Ag1
if (length(res) > 0 && str_detect(res[1],':')) {
#AG1 returns paths too
res %>%
str_extract(':[0-9]*$') %>%
str_remove(':') -> res
}
res %>%
as.integer() %>%
sum() %>%
return()
}
# Count words in clone
plan(multiprocess, workers=2)
repos %>%
mutate(blacklist = future_map2_int(org, repo, count_words, 'black[-_]?list', .progress = TRUE),
whitelist = future_map2_int(org, repo, count_words, 'white[-_]?list', .progress = TRUE),
master = future_map2_int(org, repo, count_words, 'master', .progress = TRUE),
slave = future_map2_int(org, repo, count_words, 'slave', .progress = TRUE)
) -> repos
repos %>%
filter(blacklist + whitelist + master + slave > 0) %>%
pin(name='cl_results', board = 'conscious_lang')