Skip to content

Commit 5ab760b

Browse files
authored
Merge pull request #20 from JacobCallahan/things
Add additional extraction options
2 parents 5bbdd0c + 2720fcd commit 5ab760b

File tree

4 files changed

+64
-23
lines changed

4 files changed

+64
-23
lines changed

candore/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def __init__(self, settings):
2121
def list_endpoints(self):
2222
return self.api_lister.lister_endpoints()
2323

24-
async def save_all_entities(self, mode, output_file, full):
24+
async def save_all_entities(self, mode, output_file, full, max_pages=None, skip_percent=None):
2525
"""Save all the entities to a json file
2626
2727
:param mode: Pre or Post
@@ -36,6 +36,8 @@ async def save_all_entities(self, mode, output_file, full):
3636
async with Extractor(settings=self.settings, apilister=self.api_lister) as extractor:
3737
if full:
3838
extractor.full = True
39+
extractor.max_pages = max_pages
40+
extractor.skip_percent = skip_percent
3941
data = await extractor.extract_all_entities()
4042

4143
if not data:

candore/cli.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,21 @@ def apis(ctx):
4444
@click.option("--mode", type=str, help="The mode must be 'pre' or 'post'")
4545
@click.option("-o", "--output", type=str, help="The output file name")
4646
@click.option("--full", is_flag=True, help="Extract data from all the pages of a component")
47+
@click.option("--max-pages", type=int, help="The maximum number of pages to extract per entity")
48+
@click.option("--skip-percent", type=int, help="The percentage of pages to skip per entity")
4749
@click.pass_context
48-
def extract(ctx, mode, output, full):
50+
def extract(ctx, mode, output, full, max_pages, skip_percent):
4951
loop = asyncio.get_event_loop()
5052
candore_obj = ctx.parent.candore
51-
loop.run_until_complete(candore_obj.save_all_entities(mode=mode, output_file=output, full=full))
53+
loop.run_until_complete(
54+
candore_obj.save_all_entities(
55+
mode=mode,
56+
output_file=output,
57+
full=full,
58+
max_pages=max_pages,
59+
skip_percent=skip_percent,
60+
)
61+
)
5262

5363

5464
@candore.command(help="Compare pre and post upgrade data")
@@ -86,9 +96,11 @@ def compare(ctx, pre, post, inverse, output, report_type, record_evs):
8696
"e.g entity/5/description",
8797
)
8898
@click.option(
89-
"--data-file", type=str, help="The data file from which to search the data on a given path"
99+
"--data-file",
100+
type=str,
101+
help="The data file from which to search the data on a given path",
90102
)
91-
@click.option("--delimiter", type=str, default='/', help="Settings file path. Default is '/'")
103+
@click.option("--delimiter", type=str, default="/", help="Settings file path. Default is '/'")
92104
@click.pass_context
93105
def reader(ctx, path, data_file, delimiter):
94106
candore_obj = ctx.parent.candore

candore/modules/comparator.py

+19-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import json
22

3-
from candore.modules.variations import Variations, Constants
4-
from candore.utils import last_index_of_element, is_list_contains_dict
3+
from candore.modules.variations import Constants
4+
from candore.modules.variations import Variations
5+
from candore.utils import is_list_contains_dict
6+
from candore.utils import last_index_of_element
57

68

79
class Comparator:
@@ -29,7 +31,10 @@ def record_variation(self, pre, post, var_details=None):
2931
big_key = [str(itm) for itm in self.big_key]
3032
full_path = "/".join(big_key)
3133
var_full_path = "/".join([itm for itm in self.big_key if not isinstance(itm, int)])
32-
if var_full_path in self.variations.expected_variations or var_full_path in self.variations.skipped_variations:
34+
if (
35+
var_full_path in self.variations.expected_variations
36+
or var_full_path in self.variations.skipped_variations
37+
):
3338
if self.record_evs:
3439
variation = {
3540
"pre": pre,
@@ -48,7 +53,10 @@ def record_constants(self, pre, post, var_details=None):
4853
big_key = [str(itm) for itm in self.big_key]
4954
full_path = "/".join(big_key)
5055
var_full_path = "/".join([itm for itm in self.big_key if not isinstance(itm, int)])
51-
if var_full_path in self.constants.expected_constants or var_full_path in self.constants.skipped_constants:
56+
if (
57+
var_full_path in self.constants.expected_constants
58+
or var_full_path in self.constants.skipped_constants
59+
):
5260
if self.record_evs:
5361
variation = {
5462
"pre": pre,
@@ -93,19 +101,22 @@ def _is_data_type_list_contains_dict(self, pre, post):
93101
self.compare_all_pres_with_posts(
94102
pre_entity, post_entity, unique_key=pre_entity["id"]
95103
)
104+
post.remove(post_entity)
105+
break
96106
else:
97107
key = list(pre_entity.keys())[0]
98-
if pre_entity[key] == post_entity[key]:
108+
if pre_entity[key] == post_entity.get(key):
99109
self.compare_all_pres_with_posts(
100110
pre_entity[key], post_entity[key], unique_key=key
101111
)
112+
del post_entity[key]
113+
break
102114
if "id" in pre_entity:
103115
self.remove_path(pre_entity["id"])
104116
else:
105117
self.remove_path(pre_entity[list(pre_entity.keys())[0]])
106118

107119
def _is_data_type_list(self, pre, post, unique_key=""):
108-
109120
def custom_key(elem):
110121
return 'None' if elem is None else str(elem)
111122

@@ -121,9 +132,9 @@ def custom_key(elem):
121132
def compare_all_pres_with_posts(self, pre_data, post_data, unique_key="", var_details=None):
122133
if unique_key:
123134
self.big_key.append(unique_key)
124-
if type(pre_data) is dict:
135+
if isinstance(pre_data, dict):
125136
self._is_data_type_dict(pre_data, post_data, unique_key=unique_key)
126-
elif type(pre_data) is list:
137+
elif isinstance(pre_data, list):
127138
self._is_data_type_list(pre_data, post_data, unique_key=unique_key)
128139
else:
129140
if pre_data != post_data:

candore/modules/extractor.py

+26-10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio # noqa: F401
2+
import math
23
from functools import cached_property
34

45
import aiohttp
@@ -68,9 +69,18 @@ async def fetch_page(self, page, _request):
6869
page_entities = await self.paged_results(**_request)
6970
return page_entities
7071

71-
async def fetch_all_pages(self, total_pages, _request):
72+
async def fetch_all_pages(self, total_pages, _request, max_pages=None, skip_percent=None):
73+
if max_pages:
74+
stop = min(total_pages, max_pages)
75+
else:
76+
stop = total_pages
77+
if skip_percent:
78+
step = stop // math.ceil(stop * (100 - skip_percent) / 100)
79+
else:
80+
step = 1
7281
tasks = []
73-
for page in range(2, total_pages + 1):
82+
print(f"Fetching {len(list(range(1, stop, step)))} more page(s).")
83+
for page in range(1, stop, step):
7484
task = asyncio.ensure_future(self.fetch_page(page, _request))
7585
tasks.append(task)
7686
responses = await asyncio.gather(*tasks)
@@ -96,15 +106,21 @@ async def fetch_component_entities(self, **comp_params):
96106
return entity_data
97107
else:
98108
return entity_data
99-
# If the entity has multiple pages, fetch them all
100-
if self.full:
101-
total_pages = results.get("total") // results.get("per_page") + 1
102-
if total_pages > 1:
103-
print(f"Endpoint {endpoint} has {total_pages} pages.")
109+
total_pages = results.get("total") // results.get("per_page") + 1
110+
if total_pages > 1:
111+
print(f"Endpoint {endpoint} has {total_pages} pages.")
112+
# If the entity has multiple pages, fetch them all
113+
if self.full:
104114
pages_data = await self.fetch_all_pages(total_pages, _request)
105-
for page_entities in pages_data:
106-
if page_entities:
107-
entity_data.extend(page_entities)
115+
elif self.max_pages or self.skip_percent:
116+
pages_data = await self.fetch_all_pages(
117+
total_pages, _request, self.max_pages, self.skip_percent
118+
)
119+
else:
120+
return entity_data
121+
for page_entities in pages_data:
122+
if page_entities:
123+
entity_data.extend(page_entities)
108124
return entity_data
109125

110126
async def dependency_ids(self, dependency):

0 commit comments

Comments
 (0)