8383):
8484 dependents = None
8585
86- dependents_file = (
87- config ["OUTPUT" ]["DIR" ] / config ["OUTPUT" ]["DEPENDENTS" ]
88- )
86+ dependents_file = config ["OUTPUT" ]["DIR" ] / config ["OUTPUT" ]["DEPENDENTS" ]
8987
9088 repos = []
9189
@@ -98,27 +96,23 @@ def _(
9896 dependents = get_dependents (config ["PACKAGE_OF_INTEREST_REPO" ])
9997
10098 if dependents :
101-
10299 update_cache (dependents_file , dependents )
103-
100+
104101 repos = [f"https://github.com/{ x } " for x in dependents ]
105102
106- repo_url_cache_file = (
107- config ["OUTPUT" ]["DIR" ]
108- / config ["OUTPUT" ]["REPOSITORIES" ]
109- )
103+ repo_url_cache_file = config ["OUTPUT" ]["DIR" ] / config ["OUTPUT" ]["REPOSITORIES" ]
110104 extra_repos = load_cache (repo_url_cache_file )
111105 if extra_repos :
112106 logger .info ("Adding repos grabbed from github API..." )
113107 repos .extend (extra_repos )
114-
108+
115109 repos = list (set (repos ))
116110
117111 else :
118112 logger .info (f"'dependents' file not found: { dependents_file } " )
119113 logger .info (f"No known repo for: { config ['PACKAGE_OF_INTEREST' ]} " )
120114 logger .info ("Pinging github API to list repos..." )
121-
115+
122116 repos = search_repositories (
123117 queries = QUERIES ,
124118 config = config ,
@@ -222,9 +216,7 @@ def _(config, content_cache_file, get_lock_file, load_cache, mo, pd):
222216 data_poi [["user" , "repo" ]] = data_poi ["name" ].str .split ("/" , expand = True )
223217
224218 data_poi ["content" ] = "mixed"
225- pure_notebook = (data_poi ["n_notebook_parsed" ] > 0 ) & (
226- data_poi ["n_python_file_parsed" ] == 0
227- )
219+ pure_notebook = (data_poi ["n_notebook_parsed" ] > 0 ) & (data_poi ["n_python_file_parsed" ] == 0 )
228220 pure_python = (data_poi ["n_notebook_parsed" ] == 0 ) & (data_poi ["n_python_file_parsed" ] > 0 )
229221 data_poi .loc [pure_notebook , "content" ] = "notebook"
230222 data_poi .loc [pure_python , "content" ] = "python"
@@ -519,7 +511,9 @@ def _(mo):
519511
520512@app .cell
521513def _ (config , data_poi , extract_object_count ):
522- import_df = extract_object_count (data_poi [data_poi ["has_imports" ]], col = "import_counts" , config = config )
514+ import_df = extract_object_count (
515+ data_poi [data_poi ["has_imports" ]], col = "import_counts" , config = config
516+ )
523517 import_df
524518 return (import_df ,)
525519
@@ -581,10 +575,10 @@ def _(mo):
581575
582576@app .cell
583577def _ (config , data_poi , extract_object_count ):
584- function_df = extract_object_count (data_poi [data_poi ["use_imports" ]], col = "function_counts" , config = config )
585- function_df .to_csv (
586- config ["OUTPUT" ]["DIR" ] / "functions_used.csv" , index = False
578+ function_df = extract_object_count (
579+ data_poi [data_poi ["use_imports" ]], col = "function_counts" , config = config
587580 )
581+ function_df .to_csv (config ["OUTPUT" ]["DIR" ] / "functions_used.csv" , index = False )
588582 function_df
589583 return (function_df ,)
590584
@@ -664,6 +658,7 @@ def poia_logger(log_level: str = "INFO") -> logging.Logger:
664658 )
665659
666660 return logging .getLogger ("cohort_creator" )
661+
667662 return (poia_logger ,)
668663
669664
@@ -723,6 +718,7 @@ def plot_usage(df, color=None, weighted=False):
723718 fig .update_layout (xaxis_title = col , yaxis_title = "Usage Count" )
724719
725720 return fig
721+
726722 return (plot_usage ,)
727723
728724
@@ -771,6 +767,7 @@ def plot_repos(df, color=None, bin_size="M3", include_mask=None):
771767 fig .update_traces (xbins = {"start" : start_date , "end" : end_date , "size" : bin_size })
772768
773769 return fig
770+
774771 return (plot_repos ,)
775772
776773
@@ -796,6 +793,7 @@ def plot_versions(df):
796793 )
797794 fig .update_layout (xaxis_title = "Version" , yaxis_title = "Repository Count" )
798795 return fig
796+
799797 return (plot_versions ,)
800798
801799
@@ -857,6 +855,7 @@ def count_imports(content, config):
857855 import_counts [submodule ] = import_counts .get (submodule , 0 ) + 1
858856
859857 return import_counts
858+
860859 return (count_imports ,)
861860
862861
@@ -924,6 +923,7 @@ def visit_Name(self, node):
924923 function_counts = {k : v for k , v in function_counts .items () if k not in imports }
925924
926925 return function_counts
926+
927927 return (count_functions ,)
928928
929929
@@ -969,6 +969,7 @@ def call_api(query: str, page: int, config):
969969 time .sleep (config ["GITHUB_API" ]["SLEEP_TIME" ])
970970
971971 return response
972+
972973 return (call_api ,)
973974
974975
@@ -1027,6 +1028,7 @@ def search_repositories(queries: list[str], config, cache_file: Path | None = No
10271028 logger .info ("Done." )
10281029
10291030 return list (repo_urls )
1031+
10301032 return (search_repositories ,)
10311033
10321034
@@ -1089,6 +1091,7 @@ def get_dependents(repo_of_interest):
10891091 logger .info (duplicates )
10901092
10911093 return dependents
1094+
10921095 return (get_dependents ,)
10931096
10941097
@@ -1121,6 +1124,7 @@ def clone_repo(url):
11211124 logger .info (f"Cloned: { url } " )
11221125 except subprocess .CalledProcessError :
11231126 logger .error (f"Failed to clone: { url } " )
1127+
11241128 return (clone_repo ,)
11251129
11261130
@@ -1135,10 +1139,10 @@ def _update_cache(Path, json, load_cache, logger):
11351139 def update_cache (cache_file : Path | None , data : list [str ]):
11361140 """Update data to a JSON cache file."""
11371141 if not isinstance (cache_file , Path ):
1138- return
1142+ return
11391143 if cache_file is None :
11401144 return
1141-
1145+
11421146 cache_file .parent .mkdir (exist_ok = True , parents = True )
11431147 cache = load_cache (cache_file )
11441148 cache .extend (data )
@@ -1149,6 +1153,7 @@ def update_cache(cache_file: Path | None, data: list[str]):
11491153 logger .error ("TypeError: unhashable type: 'dict'" )
11501154 with cache_file .open ("w" ) as f :
11511155 json .dump (cache , f , indent = 2 )
1156+
11521157 return (update_cache ,)
11531158
11541159
@@ -1164,6 +1169,7 @@ def load_cache(cache_file: Path):
11641169 if response .status_code == 200 :
11651170 return response .json ()
11661171 return []
1172+
11671173 return (load_cache ,)
11681174
11691175
@@ -1300,6 +1306,7 @@ def extract_object_count(df, col, config):
13001306 }
13011307 )
13021308 return pd .DataFrame (object_list )
1309+
13031310 return (extract_object_count ,)
13041311
13051312
@@ -1315,20 +1322,15 @@ def _(
13151322 shutil ,
13161323):
13171324 def extract_data (config ):
1318-
1319- content_cache_file = (
1320- config ["OUTPUT" ]["DIR" ] / config ["OUTPUT" ]["CONTENT" ]
1321- )
1325+ content_cache_file = config ["OUTPUT" ]["DIR" ] / config ["OUTPUT" ]["CONTENT" ]
13221326
13231327 # nbconvert cannot be installed in WASM
13241328 try :
1325- from nbconvert import PythonExporter
1329+ from nbconvert import PythonExporter # noqa : F401
13261330 except ModuleNotFoundError :
13271331 return content_cache_file
13281332
1329- ignore_list = load_cache (
1330- config ["OUTPUT" ]["DIR" ] / config ["OUTPUT" ]["IGNORE" ]
1331- )
1333+ ignore_list = load_cache (config ["OUTPUT" ]["DIR" ] / config ["OUTPUT" ]["IGNORE" ])
13321334
13331335 data_projects = load_cache (content_cache_file )
13341336 repo_already_done = {x ["name" ] for x in data_projects }
@@ -1388,6 +1390,7 @@ def extract_data(config):
13881390 logger .info ("Data extraction done." )
13891391
13901392 return content_cache_file
1393+
13911394 return (extract_data ,)
13921395
13931396
@@ -1398,7 +1401,7 @@ def _(Path, count_functions, count_imports, find_files_with_string, logger):
13981401
13991402 def extract_data_repo (repo_path : Path , config ):
14001403 from nbconvert import PythonExporter
1401-
1404+
14021405 exporter = PythonExporter ()
14031406
14041407 import_counts : dict [str , int ] = {}
@@ -1507,6 +1510,7 @@ def extract_data_repo(repo_path: Path, config):
15071510 "function_counts" : function_counts ,
15081511 "contains_python_2" : contains_python_2 ,
15091512 }
1513+
15101514 return (extract_data_repo ,)
15111515
15121516
@@ -1539,12 +1543,13 @@ def find_files_with_string(search_dir, config, pattern="*.py"):
15391543 else :
15401544 logger .error ("An error occurred" )
15411545 return []
1546+
15421547 return (find_files_with_string ,)
15431548
15441549
15451550@app .cell (disabled = True , hide_code = True )
15461551def test_extract_data_repo (config , extract_data_repo ):
1547- data_this_repo = repo_to_test = config ["CACHE" ]["DIR" ] / "poldrack" / "myconnectome"
1552+ repo_to_test = config ["CACHE" ]["DIR" ] / "poldrack" / "myconnectome"
15481553 extract_data_repo (repo_path = repo_to_test , config = config )
15491554 return
15501555
@@ -1575,6 +1580,7 @@ def get_last_commit_date(directory: Path | str) -> str | None:
15751580 except subprocess .CalledProcessError as e :
15761581 logger .error (f"Error: { e } " )
15771582 return None
1583+
15781584 return (get_last_commit_date ,)
15791585
15801586
@@ -1631,6 +1637,7 @@ def get_version(directory: Path, config):
16311637 )
16321638
16331639 return versions
1640+
16341641 return (get_version ,)
16351642
16361643
@@ -1655,6 +1662,7 @@ def get_lock_file(version_list):
16551662 return "several_lockfile_detected"
16561663 else :
16571664 return next (iter (set (tmp )))
1665+
16581666 return (get_lock_file ,)
16591667
16601668
@@ -1672,6 +1680,7 @@ def extract_version(version):
16721680 if match :
16731681 return match .group (1 )
16741682 return "0.0.0"
1683+
16751684 return (extract_version ,)
16761685
16771686
@@ -1722,6 +1731,7 @@ def get_version_from_pyproject(pyproject_path: Path, config) -> str | None:
17221731 except Exception as e :
17231732 print (f"Error reading pyproject.toml: { e } " )
17241733 return None
1734+
17251735 return (get_version_from_pyproject ,)
17261736
17271737
@@ -1758,6 +1768,7 @@ def get_version_from_setup_cfg(setup_cfg, config) -> str | None:
17581768 except Exception as e :
17591769 print (f"Error reading setup.cfg: { e } " )
17601770 return None
1771+
17611772 return (get_version_from_setup_cfg ,)
17621773
17631774
@@ -1793,6 +1804,7 @@ def visit_Call(self, node):
17931804 if config ["PACKAGE_OF_INTEREST" ] in dep :
17941805 return dep
17951806 return None
1807+
17961808 return (get_version_from_setup_py ,)
17971809
17981810
@@ -1891,9 +1903,7 @@ def _(mo):
18911903
18921904@app .cell
18931905def _ ():
1894- import argparse
18951906 import ast
1896- import base64
18971907 import collections
18981908 import configparser
18991909 import itertools
@@ -1902,23 +1912,19 @@ def _():
19021912 import re
19031913 import shutil
19041914 import subprocess
1905- import sys
19061915 import time
19071916 import warnings
1908- from ast import literal_eval
19091917 from pathlib import Path
19101918
19111919 import marimo as mo
1912- import matplotlib as mpl
19131920 import matplotlib .colors as mcolors
19141921 import matplotlib .pyplot as plt
19151922 import pandas as pd
19161923 import plotly .express as px
19171924 import requests
1918- from marimo import md
1919- from matplotlib import cm
19201925 from packaging .version import Version
19211926 from rich import print
1927+
19221928 return (
19231929 Path ,
19241930 Version ,
0 commit comments