From 4625ecdff8264e5768f9771ac7c8f29b90c7d5c7 Mon Sep 17 00:00:00 2001 From: John-Paul Navarro Date: Thu, 31 Oct 2024 09:44:02 -0500 Subject: [PATCH] Load CiderFeatures model if CIDER_FEATURES_PATH is set --- CHANGELOG | 3 ++ bin/router_cider.py | 81 ++++++++++++++++++++++++++++++++----- conf/router_cider.conf.prod | 1 + 3 files changed, 76 insertions(+), 9 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 34b5a86..1894bb6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,6 @@ +v2.6.0 20241031 JP + - Load CiderFeatures model if CIDER_FEATURES_PATH is set + v2.5.0 20241030 JP - Load new CiderGroups model from new API - Use urlparse for source url parsing, unfortunately destination url uses non-standard scheme diff --git a/bin/router_cider.py b/bin/router_cider.py index 33f0c4a..88efe0d 100755 --- a/bin/router_cider.py +++ b/bin/router_cider.py @@ -142,15 +142,14 @@ def Setup(self): if self.srcparsed.scheme not in ['http', 'https'] or self.dest['scheme'] not in ['warehouse']: self.logger.error('Can only daemonize when source=[http|https] and destination=warehouse') sys.exit(1) - # Optional for now -# if self.srcparsed.scheme in ['http', 'https'] and not self.config.get('CIDER_GROUPS_PATH'): -# self.logger.error('Required config missing: CIDER_GROUPS_PATH') -# sys.exit(1) # If path to the CiDeR last update API is set, use it to determine if there are resource updates if self.srcparsed.scheme in ['http', 'https'] and self.config.get('CIDER_LAST_PATH'): self.lasturldisplay = urljoin(urlunparse(self.srcparsed), self.config.get('CIDER_LAST_PATH')) self.lasturl = urlparse(self.lasturldisplay) + if self.srcparsed.scheme in ['http', 'https'] and self.config.get('CIDER_FEATURES_PATH'): + self.featuresurldisplay = urljoin(urlunparse(self.srcparsed), self.config.get('CIDER_FEATURES_PATH')) + self.featuresurl = urlparse(self.featuresurldisplay) if self.srcparsed.scheme in ['http', 'https'] and self.config.get('CIDER_GROUPS_PATH'): self.groupsurldisplay = urljoin(urlunparse(self.srcparsed), self.config.get('CIDER_GROUPS_PATH')) self.groupsurl = urlparse(self.groupsurldisplay) @@ -158,6 +157,7 @@ def Setup(self): self.logger.info('Source: {}'.format(self.srcdisplay)) self.logger.info('Destination: {}'.format(self.destdisplay)) if hasattr(self, 'lasturldisplay'): self.logger.info('Last URL: {}'.format(self.lasturldisplay)) + if hasattr(self, 'featuresurldisplay'): self.logger.info('Features URL: {}'.format(self.featuresurldisplay)) if hasattr(self, 'groupsurldisplay'): self.logger.info('Groups URL: {}'.format(self.groupsurldisplay)) self.logger.info('Config: {}' .format(self.config_file)) self.logger.info('Log Level: {}({})'.format(loglevel_str, loglevel_num)) @@ -168,6 +168,8 @@ def Setup(self): 'resource_descriptive_name', 'resource_description', 'project_affiliation', 'provider_level', 'resource_status', 'current_statuses', 'updated_at'] + self.feature_model_fields = ['id', 'name', 'description', + 'features'] self.group_model_fields = ['info_groupid', 'name', 'description', 'group_logo_url', 'group_types', 'info_resourceids'] @@ -396,6 +398,48 @@ def Warehouse_Resources(self, info_json): self.logger.error('{} deleting ID={}: {}'.format(type(e).__name__, id, str(e))) return(True, '') + def Warehouse_Features(self, info_json): + self.cur = {} # Feature categories currently in database + self.new = {} # New feature categories in document + for item in CiderFeatures.objects.all(): + self.cur[item.id] = item + self.logger.debug('Retrieved from database {}/feature categories'.format(len(self.cur))) + + for p_feat in info_json['feature_categories']: # Iterating over feature groups + id = p_feat['id'] + # All the attributes, then remove the ones that have their own field + other_attributes=p_feat.copy() + for attrib in self.feature_model_fields: + other_attributes.pop(attrib, None) + + try: + model, created = CiderFeatures.objects.update_or_create( + feature_category_id=id, + defaults = { + 'feature_category_name': p_feat['name'], + 'feature_category_description': p_feat['description'], + 'features': p_feat['features'], + 'other_attributes': other_attributes + }) + model.save() + self.logger.debug('FeatureCategory ID={}, created={}'.format(id, created)) + self.new[id]=model + self.FCOUNTERS.update({'Update'}) + except (DataError, IntegrityError) as e: + msg = '{} saving ID={} ({}): {}'.format(type(e).__name__, id, p_feat['feature_category_name'], str(e)) + self.logger.error(msg) + return(False, msg) + + for id in self.cur: + if id not in self.new: + try: + CiderFeatures.objects.filter(feature_category_id=id).delete() + self.FCOUNTERS.update({'Delete'}) + self.logger.info('Deleted ID={}'.format(id)) + except (DataError, IntegrityError) as e: + self.logger.error('{} deleting ID={}: {}'.format(type(e).__name__, id, str(e))) + return(True, '') + def Warehouse_Groups(self, info_json): self.cur = {} # Groups currently in database self.new = {} # New groups in document @@ -500,7 +544,8 @@ def Run(self): while True: loop_start_utc = datetime.now(timezone.utc) self.RCOUNTERS = Counter() # Resource - self.GCOUNTERS = Counter() # Group + self.FCOUNTERS = Counter() # Resource + self.GCOUNTERS = Counter() # Group self.GCOUNTERS = Counter() # Group if self.srcparsed.scheme == 'file': RAW = self.Read_Cache(self.srcparsed.path) @@ -520,13 +565,31 @@ def Run(self): pa_id = '{}:{}:{}'.format(pa_application, pa_function, pa_topic) pa = ProcessingActivity(pa_application, pa_function, pa_id , pa_topic, pa_about) (rc, warehouse_msg) = self.Warehouse_Resources(RAW) - if rc: - if hasattr(self, 'groupsurl'): - RAWGROUPS = self.Retrieve_CiDeR_by_Affiliation(self.groupsurl) - (rc, warehouse_msg) = self.Warehouse_Groups(RAWGROUPS) + if hasattr(self, 'featuresurl'): + RAWFEATURES = self.Retrieve_CiDeR_by_Affiliation(self.featuresurl) + (rc2, warehouse_msg2) = self.Warehouse_Features(RAWFEATURES) + if rc2: + rc = max(rc, rc2) + if warehouse_msg2: + if warehouse_msg: + warehouse_msg = '; '.join(warehouse_msg, warehouse_msg2) + else: + warehouse_msg = warehouse_msg2 + if hasattr(self, 'groupsurl'): + RAWGROUPS = self.Retrieve_CiDeR_by_Affiliation(self.groupsurl) + (rc3, warehouse_msg3) = self.Warehouse_Groups(RAWGROUPS) + if rc3: + rc = max(rc, rc3) + if warehouse_msg3: + if warehouse_msg: + warehouse_msg = '; '.join(warehouse_msg, warehouse_msg3) + else: + warehouse_msg = warehouse_msg3 loop_end_utc = datetime.now(timezone.utc) summary_msg = 'Processed resources in {:.3f}/seconds: {}/updates, {}/deletes, {}/skipped'.format((loop_end_utc - loop_start_utc).total_seconds(), self.RCOUNTERS['Update'], self.RCOUNTERS['Delete'], self.RCOUNTERS['Skip']) + if hasattr(self, 'featuresurl'): + summary_msg = summary_msg + '; {}/feature group updates'.format(self.FCOUNTERS['Update']) if hasattr(self, 'groupsurl'): summary_msg = summary_msg + '; {}/group updates'.format(self.GCOUNTERS['Update']) self.logger.info(summary_msg) diff --git a/conf/router_cider.conf.prod b/conf/router_cider.conf.prod index 5f1eee3..54365ce 100644 --- a/conf/router_cider.conf.prod +++ b/conf/router_cider.conf.prod @@ -2,6 +2,7 @@ "DESTINATION": "warehouse", "CIDER_INFO_URL": "https://cider.access-ci.org/xsede-api/provider/rdr/v1/resources", "CIDER_LAST_PATH": "/xsede-api/provider/rdr/v1/resources/last_update", + "CIDER_FEATURES_PATH": "/xsede-api/provider/rdr/v1/feature_categories", "CIDER_GROUPS_PATH": "/xsede-api/provider/rdr/v1/groups", "AFFILIATIONS": ["XSEDE", "TACC", "ACCESS"], "LOG_FILE": "%APP_HOME%/%APP_NAME%.log",