From 3cd6c6182f77eda064754bb9ba38e3731c3c7b0f Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Sun, 19 Nov 2023 18:22:18 -0500 Subject: [PATCH 01/18] clean up --- src/pmidcite/eutils/cmds/cmdbase.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/pmidcite/eutils/cmds/cmdbase.py b/src/pmidcite/eutils/cmds/cmdbase.py index 41ad9a6..8f6d84e 100644 --- a/src/pmidcite/eutils/cmds/cmdbase.py +++ b/src/pmidcite/eutils/cmds/cmdbase.py @@ -8,19 +8,17 @@ from pmidcite.eutils.cmds.base import EntrezUtilities -#### class EntrezCommands(EntrezUtilities): class CommandBase(EntrezUtilities): """Fetch and write text""" - def __init__(self, retmax=10000, rettype='medline', retmode='text', batch_size=100, **kws): - kws_base = {k:v for k, v in kws.items() if k in EntrezUtilities.exp_kws} + def __init__(self, retmax=10000, rettype='medline', retmode='text', batch_size=100): cfg = Cfg() - super(CommandBase, self).__init__( - cfg.get_email(), cfg.get_apikey(), cfg.get_tool(), **kws_base) + super(CommandBase, self).__init__(cfg.get_email(), cfg.get_apikey(), cfg.get_tool()) self.batch_size = batch_size self.retmax = retmax self.rettype = rettype self.retmode = retmode + ## print(f'CommandBase: retmax({retmax}) retmode({retmode}) rettype({rettype})') # Copyright (C) 2016-present DV Klopfenstein, PhD. All rights reserved. From ee89f07512c7beb326a7518126759179b71ca447 Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Sun, 19 Nov 2023 18:36:03 -0500 Subject: [PATCH 02/18] pylint Python3 style super() wo/args --- src/pmidcite/eutils/cmds/cmdbase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pmidcite/eutils/cmds/cmdbase.py b/src/pmidcite/eutils/cmds/cmdbase.py index 8f6d84e..78f9845 100644 --- a/src/pmidcite/eutils/cmds/cmdbase.py +++ b/src/pmidcite/eutils/cmds/cmdbase.py @@ -13,7 +13,7 @@ class CommandBase(EntrezUtilities): def __init__(self, retmax=10000, rettype='medline', retmode='text', batch_size=100): cfg = Cfg() - super(CommandBase, self).__init__(cfg.get_email(), cfg.get_apikey(), cfg.get_tool()) + super().__init__(cfg.get_email(), cfg.get_apikey(), cfg.get_tool()) self.batch_size = batch_size self.retmax = retmax self.rettype = rettype From 96c54138a2cf34e700216f9f15ade318f63e4422 Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Sun, 19 Nov 2023 18:42:24 -0500 Subject: [PATCH 03/18] Use Python3 style super() wo/args; use f-strings; internal fnc named starting w/"_" --- src/pmidcite/eutils/cmds/query_ids.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/pmidcite/eutils/cmds/query_ids.py b/src/pmidcite/eutils/cmds/query_ids.py index 272c871..af4e449 100644 --- a/src/pmidcite/eutils/cmds/query_ids.py +++ b/src/pmidcite/eutils/cmds/query_ids.py @@ -36,12 +36,12 @@ class QueryIDs(EntrezUtilities): } def __init__(self, email, apikey, tool, prt=sys.stdout): - super(QueryIDs, self).__init__(email, apikey, tool, prt) + super().__init__(email, apikey, tool, prt) def dnld_query_ids(self, query, database, num_ids_p_epost=10): """Searches a NCBI database for a user query, writes resulting entries into one file.""" rsp_dct = self.get_query_rsp(query, database, num_ids_p_epost) - return self.get_ids(rsp_dct, query, database, num_ids_p_epost) + return self._get_ids(rsp_dct, query, database, num_ids_p_epost) def get_query_rsp(self, query, database, num_ids_p_epost=10): """Searches a NCBI database for a user query, writes resulting entries into one file.""" @@ -49,16 +49,15 @@ def get_query_rsp(self, query, database, num_ids_p_epost=10): rsp_dct = self.query(database, query, retmax=num_ids_p_epost) if rsp_dct is None: if self.log: - self.log.write('No {DB} entries found: {Q}\n'.format(DB=database, Q=query)) + self.log.write(f'No {database} entries found: {query}\n') self.log.flush() return [] if rsp_dct and self.log: - self.log.write('{N:6,} IDs FOR {DB} QUERY({Q})\n'.format( - DB=database, N=rsp_dct['count'], Q=query)) + self.log.write(f'{rsp_dct["count"]:6,} IDs FOR {database} QUERY({query})\n') self.log.flush() return rsp_dct - def get_ids(self, rsp_dct, query, database, num_ids_p_epost=10): + def _get_ids(self, rsp_dct, query, database, num_ids_p_epost=10): """Download PMIDs, N (num_ids_p_epost) at a time""" ##print('WWWWWWWWWWWWWWWWWWWWW pmidcite/eutils/cmds/query_ids.py', rsp_dct) if not rsp_dct: @@ -116,7 +115,7 @@ def query(self, database, query, **esearch): if dct is not None and 'idlist' in dct and dct['idlist']: if database in {'pubmed',}: dct['idlist'] = [int(n) for n in dct['idlist']] - for fldname in {'count', 'retmax'}: + for fldname in ['count', 'retmax']: dct[fldname] = int(dct[fldname]) return dct return None From 4c0aacca770633a956805840a57a27486471dd36 Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Mon, 20 Nov 2023 01:41:14 -0500 Subject: [PATCH 04/18] pylint --- src/pmidcite/eutils/cmds/efetch.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/pmidcite/eutils/cmds/efetch.py b/src/pmidcite/eutils/cmds/efetch.py index e895785..2e6d092 100644 --- a/src/pmidcite/eutils/cmds/efetch.py +++ b/src/pmidcite/eutils/cmds/efetch.py @@ -14,11 +14,9 @@ class EFetch(CommandBase): """Fetch and write text""" # pylint: disable=too-many-arguments - #### def __init__(self, retmax=10000, rettype='medline', retmode='text', batch_size=100, **kws): - def __init__(self, rettype='medline', retmode='text', batch_size=100, **kws): - kws_base = {k:v for k, v in kws.items() if k in CommandBase.exp_kws} - ##print('FFFFFFFFFFFFFFFFFFFF', kws_base) - super(EFetch, self).__init__(**kws_base) + def __init__(self, rettype='medline', retmode='text', batch_size=100): + retmax = 10000 + super().__init__(retmax, rettype, retmode, batch_size) def efetch_and_write(self, ostrm, database, webenv, querykey, num_fetches): """EFetch records found for PMIIDs, page by page""" @@ -29,7 +27,6 @@ def efetch_and_write(self, ostrm, database, webenv, querykey, num_fetches): for start in range(0, num_fetches, self.batch_size): ## msg = msg_fmt.format(querykey, database, self.batch_size, start, self.desc) ## sys.stdout.write(msg) - ## print('SSSSSSSSSSSSSSSSSSSSSSSTART:', start) txt = self.efetch_txt(start, self.batch_size, database, webenv, querykey) if txt is not None: @@ -42,14 +39,13 @@ def efetch_and_write(self, ostrm, database, webenv, querykey, num_fetches): ostrm.flush() # pylint: disable=broad-except except Exception as err: - sys.stdout.write("*FATAL: BAD READ SOCKET HANDLE: {}\n".format(str(err))) + sys.stdout.write(f"*FATAL: BAD READ SOCKET HANDLE: {str(err)}\n") else: sys.stdout.write("*FATAL: NO SOCKET HANDLE TO READ FROM\n") def efetch_txt(self, start, retmax, database, webenv, querykey): """Fetch database text""" try: - # pylint: disable=bad-whitespace txt = self.run_eutilscmd( 'efetch', db = database, @@ -62,15 +58,16 @@ def efetch_txt(self, start, retmax, database, webenv, querykey): #print('FETCH:', dct) return txt except IOError as err: - msg = "\n*FATAL: EFetching FAILED: {}".format(err) + msg = f"\n*FATAL: EFetching FAILED: {err}" sys.stdout.write(msg) - sys.stdout.write(" database: {}\n".format(database)) - sys.stdout.write(" retstart: {}\n".format(start)) - sys.stdout.write(" batch_size: {}\n".format(self.batch_size)) - sys.stdout.write(" rettype: {}\n".format(self.rettype)) - sys.stdout.write(" retmode: {}\n".format(self.retmode)) - sys.stdout.write(" webenv: {}\n".format(webenv)) - sys.stdout.write(" querykey: {}\n".format(querykey)) + sys.stdout.write(f" database: {database}\n") + sys.stdout.write(f" retstart: {start}\n") + sys.stdout.write(f" batch_size: {self.batch_size}\n") + sys.stdout.write(f" rettype: {self.rettype}\n") + sys.stdout.write(f" retmode: {self.retmode}\n") + sys.stdout.write(f" webenv: {webenv}\n") + sys.stdout.write(f" querykey: {querykey}\n") + return None # Copyright (C) 2016-present DV Klopfenstein, PhD. All rights reserved. From 86d4bd0e5cac4807bffebf69b6eab8bd07c9f86d Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Mon, 20 Nov 2023 02:08:14 -0500 Subject: [PATCH 05/18] pylint --- src/pmidcite/eutils/cmds/elink.py | 34 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/pmidcite/eutils/cmds/elink.py b/src/pmidcite/eutils/cmds/elink.py index 213a535..7fa136e 100644 --- a/src/pmidcite/eutils/cmds/elink.py +++ b/src/pmidcite/eutils/cmds/elink.py @@ -1,4 +1,4 @@ -"""Fetch items and write""" +"""ELink""" __author__ = 'DV Klopfenstein, PhD' __copyright__ = "Copyright (C) 2016-present DV Klopfenstein, PhD. All rights reserved." @@ -6,17 +6,16 @@ import sys import re -from pmidcite.eutils.cmds.cmdbase import CommandBase +from pmidcite.eutils.cmds.base import EntrezUtilities -# TBD: -class ELink(CommandBase): - """Fetch and write text""" +class ELink(EntrezUtilities): + """ELink""" # pylint: disable=too-many-arguments - def __init__(self, retmax=10000, rettype='medline', retmode='text', batch_size=100, **kws): - kws_base = {k:v for k, v in kws.items() if k in CommandBase.exp_kws} - super(ELink, self).__init__(**kws_base) + def __init__(self, email, apikey, tool, batch_size=100): + super().__init__(email, apikey, tool) + self.batch_size = batch_size def elink(self, database_from, linkname, webenv, querykey, num_fetches): """EFetch records found for PMIDs, page by page""" @@ -29,7 +28,6 @@ def elink(self, database_from, linkname, webenv, querykey, num_fetches): ## sys.stdout.write(msg) record = None try: - # pylint: disable=bad-whitespace record = self.run_eutilscmd( 'elink', db = database_from, @@ -41,15 +39,15 @@ def elink(self, database_from, linkname, webenv, querykey, num_fetches): query_key = querykey) print('ELINK:', linkname, record) except IOError as err: - msg = "\n*FATAL: EFetching FAILED: {}".format(err) + msg = f"\n*FATAL: EFetching FAILED: {err}" sys.stdout.write(msg) - sys.stdout.write(" database: {}\n".format(database_from)) - sys.stdout.write(" retstart: {}\n".format(start)) - # sys.stdout.write(" retmax: {}\n".format(retmax)) - sys.stdout.write(" batch_size: {}\n".format(self.batch_size)) - sys.stdout.write(" linkname: {}\n".format(linkname)) - sys.stdout.write(" webenv: {}\n".format(webenv)) - sys.stdout.write(" querykey: {}\n".format(querykey)) + sys.stdout.write(f" database: {database_from}\n") + sys.stdout.write(f" retstart: {start}\n") + # sys.stdout.write(f" retmax: {retmax}\n") + sys.stdout.write(f" batch_size: {self.batch_size}\n") + sys.stdout.write(f" linkname: {linkname}\n") + sys.stdout.write(f" webenv: {webenv}\n") + sys.stdout.write(f" querykey: {querykey}\n") if record is not None: try: @@ -61,7 +59,7 @@ def elink(self, database_from, linkname, webenv, querykey, num_fetches): # ostrm.flush() # pylint: disable=broad-except except Exception as err: - sys.stdout.write("*FATAL: BAD READ SOCKET HANDLE: {}\n".format(str(err))) + sys.stdout.write(f"*FATAL: BAD READ SOCKET HANDLE: {str(err)}\n") else: sys.stdout.write("*FATAL: NO SOCKET HANDLE TO READ FROM\n") From 1bafc7a7932c81e60a1004dce74c8b5371111e5d Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Mon, 20 Nov 2023 02:10:29 -0500 Subject: [PATCH 06/18] retmax param is last --- src/pmidcite/eutils/cmds/cmdbase.py | 8 +++++--- src/pmidcite/eutils/cmds/efetch.py | 5 ++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/pmidcite/eutils/cmds/cmdbase.py b/src/pmidcite/eutils/cmds/cmdbase.py index 78f9845..07c36fd 100644 --- a/src/pmidcite/eutils/cmds/cmdbase.py +++ b/src/pmidcite/eutils/cmds/cmdbase.py @@ -1,4 +1,6 @@ -"""Fetch items and write""" +"""Run a NCBI E-Utils command that requires args, rettype and retmode""" +# pylint: disable=line-too-long +# https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly __author__ = 'DV Klopfenstein, PhD' __copyright__ = "Copyright (C) 2016-present DV Klopfenstein, PhD. All rights reserved." @@ -9,9 +11,9 @@ class CommandBase(EntrezUtilities): - """Fetch and write text""" + """Run a NCBI E-Utils command that requires args, rettype and retmode""" - def __init__(self, retmax=10000, rettype='medline', retmode='text', batch_size=100): + def __init__(self, rettype='medline', retmode='text', batch_size=100, retmax=10000): cfg = Cfg() super().__init__(cfg.get_email(), cfg.get_apikey(), cfg.get_tool()) self.batch_size = batch_size diff --git a/src/pmidcite/eutils/cmds/efetch.py b/src/pmidcite/eutils/cmds/efetch.py index 2e6d092..3187223 100644 --- a/src/pmidcite/eutils/cmds/efetch.py +++ b/src/pmidcite/eutils/cmds/efetch.py @@ -14,9 +14,8 @@ class EFetch(CommandBase): """Fetch and write text""" # pylint: disable=too-many-arguments - def __init__(self, rettype='medline', retmode='text', batch_size=100): - retmax = 10000 - super().__init__(retmax, rettype, retmode, batch_size) + def __init__(self, rettype='medline', retmode='text', batch_size=100, retmax=10000): + super().__init__(rettype, retmode, batch_size, retmax) def efetch_and_write(self, ostrm, database, webenv, querykey, num_fetches): """EFetch records found for PMIIDs, page by page""" From 60211112d084166dacd514877d04d39e86b87d71 Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Mon, 20 Nov 2023 10:19:52 -0500 Subject: [PATCH 07/18] cleanup --- src/pmidcite/eutils/cmds/efetch.py | 72 ------------------------------ 1 file changed, 72 deletions(-) delete mode 100644 src/pmidcite/eutils/cmds/efetch.py diff --git a/src/pmidcite/eutils/cmds/efetch.py b/src/pmidcite/eutils/cmds/efetch.py deleted file mode 100644 index 3187223..0000000 --- a/src/pmidcite/eutils/cmds/efetch.py +++ /dev/null @@ -1,72 +0,0 @@ -"""Fetch items and write""" -# https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch - -__author__ = 'DV Klopfenstein, PhD' -__copyright__ = "Copyright (C) 2016-present DV Klopfenstein, PhD. All rights reserved." -__license__ = "GNU AGPLv3" - -import sys -import re -from pmidcite.eutils.cmds.cmdbase import CommandBase - - -class EFetch(CommandBase): - """Fetch and write text""" - - # pylint: disable=too-many-arguments - def __init__(self, rettype='medline', retmode='text', batch_size=100, retmax=10000): - super().__init__(rettype, retmode, batch_size, retmax) - - def efetch_and_write(self, ostrm, database, webenv, querykey, num_fetches): - """EFetch records found for PMIIDs, page by page""" - ## QueryKey( 1) EFetching(database=pubmed) up to 10 records, starting at 0; ABSTRACT - ## QueryKey( 1) EFetching(database=pubmed) up to 10 records, starting at 10; ABSTRACT - ## msg_fmt = (' QueryKey({:>6}) EFetching(database={}) up to {:5} records, ' - ## 'starting at {}; {}\n') - for start in range(0, num_fetches, self.batch_size): - ## msg = msg_fmt.format(querykey, database, self.batch_size, start, self.desc) - ## sys.stdout.write(msg) - txt = self.efetch_txt(start, self.batch_size, database, webenv, querykey) - - if txt is not None: - try: - # Read the downloaded data from the socket handle - mtch = re.search(r'(ERROR.*\n)', txt) - if mtch: - sys.stdout.write(mtch.group(1)) - ostrm.write(txt) - ostrm.flush() - # pylint: disable=broad-except - except Exception as err: - sys.stdout.write(f"*FATAL: BAD READ SOCKET HANDLE: {str(err)}\n") - else: - sys.stdout.write("*FATAL: NO SOCKET HANDLE TO READ FROM\n") - - def efetch_txt(self, start, retmax, database, webenv, querykey): - """Fetch database text""" - try: - txt = self.run_eutilscmd( - 'efetch', - db = database, - retstart = start, # dflt: 1 - retmax = retmax, # max: 10,000 - rettype = self.rettype, # Ex: medline - retmode = self.retmode, # Ex: text - webenv = webenv, - query_key = querykey) - #print('FETCH:', dct) - return txt - except IOError as err: - msg = f"\n*FATAL: EFetching FAILED: {err}" - sys.stdout.write(msg) - sys.stdout.write(f" database: {database}\n") - sys.stdout.write(f" retstart: {start}\n") - sys.stdout.write(f" batch_size: {self.batch_size}\n") - sys.stdout.write(f" rettype: {self.rettype}\n") - sys.stdout.write(f" retmode: {self.retmode}\n") - sys.stdout.write(f" webenv: {webenv}\n") - sys.stdout.write(f" querykey: {querykey}\n") - return None - - -# Copyright (C) 2016-present DV Klopfenstein, PhD. All rights reserved. From e63ffe620179367b909d796bfd59a2e85dc8f95b Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Mon, 20 Nov 2023 10:20:21 -0500 Subject: [PATCH 08/18] add rm __pycache__ files to clobber target --- makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile b/makefile index adb5590..67ec976 100644 --- a/makefile +++ b/makefile @@ -129,4 +129,4 @@ clobber_tmp: rm -rf ./src/tests/icite clobber: - make -f makefile clobber_tmp clean_build + make -f makefile clobber_tmp clean_build pyc From 697c35a685d046414fd99d974653274e3fb68a2b Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Mon, 20 Nov 2023 10:30:56 -0500 Subject: [PATCH 09/18] cleanup --- src/pmidcite/eutils/cmds/cmdbase.py | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 src/pmidcite/eutils/cmds/cmdbase.py diff --git a/src/pmidcite/eutils/cmds/cmdbase.py b/src/pmidcite/eutils/cmds/cmdbase.py deleted file mode 100644 index 07c36fd..0000000 --- a/src/pmidcite/eutils/cmds/cmdbase.py +++ /dev/null @@ -1,26 +0,0 @@ -"""Run a NCBI E-Utils command that requires args, rettype and retmode""" -# pylint: disable=line-too-long -# https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly - -__author__ = 'DV Klopfenstein, PhD' -__copyright__ = "Copyright (C) 2016-present DV Klopfenstein, PhD. All rights reserved." -__license__ = "GPL" - -from pmidcite.cfg import Cfg -from pmidcite.eutils.cmds.base import EntrezUtilities - - -class CommandBase(EntrezUtilities): - """Run a NCBI E-Utils command that requires args, rettype and retmode""" - - def __init__(self, rettype='medline', retmode='text', batch_size=100, retmax=10000): - cfg = Cfg() - super().__init__(cfg.get_email(), cfg.get_apikey(), cfg.get_tool()) - self.batch_size = batch_size - self.retmax = retmax - self.rettype = rettype - self.retmode = retmode - ## print(f'CommandBase: retmax({retmax}) retmode({retmode}) rettype({rettype})') - - -# Copyright (C) 2016-present DV Klopfenstein, PhD. All rights reserved. From 3f41df5316e4e1907e59df4a0618c86f8d7be448 Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Mon, 20 Nov 2023 11:03:41 -0500 Subject: [PATCH 10/18] some pylint --- src/pmidcite/cfg.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/pmidcite/cfg.py b/src/pmidcite/cfg.py index d25ef73..8a44200 100644 --- a/src/pmidcite/cfg.py +++ b/src/pmidcite/cfg.py @@ -185,13 +185,14 @@ def _chk_apikey(self, loaded): """Check to see that user has added a NCBI API key""" try: int(loaded['apikey'], 16) - except ValueError: - msg = ('SET API KEY IN {CFG}\n' + except ValueError as exc: + msg = (f'SET API KEY IN {self.cfgfile}\n' 'Get an NCBI API key to run the E-utilities:\n' 'https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/' 'new-api-keys-for-the-e-utilities\n' - 'To ensure your API key is not made public, add {CFG} to the .gitignore') - raise RuntimeError(msg.format(CFG=self.cfgfile)) + 'To ensure your API key is not made public, ' + f'add {self.cfgfile} to the .gitignore') + raise RuntimeError(msg) from exc def _err_notfound(self): """Report the config file was not found""" From cc00a01f53a6b6709d47f7d04b49cd580e01d5e4 Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Mon, 20 Nov 2023 11:16:42 -0500 Subject: [PATCH 11/18] return xml str --- src/pmidcite/eutils/cmds/base.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/pmidcite/eutils/cmds/base.py b/src/pmidcite/eutils/cmds/base.py index 65d75b4..88a0b6c 100755 --- a/src/pmidcite/eutils/cmds/base.py +++ b/src/pmidcite/eutils/cmds/base.py @@ -348,14 +348,13 @@ def _extract_rsp(self, record, retmode): traceback.print_exc() print('\n**FATAL JSONDecodeError:\n{RECORD}'.format(RECORD=record.decode('utf-8'))) - if retmode == 'text': + if retmode in {'text', 'asn.1'}: ## print('RECORD:', str(record)) return record.decode('utf-8') ## print('RETMODE', retmode) ## print('RECORD', record) - ## print(record) # # # Parse XML root = ElementTree.fromstring(record) - ## print('root.tag', root.tag) - assert root.tag in 'ePostResult', root.tag + print(f'ElementTree.fromstring(record).root:\n{root}') + return root + # TODO + print('root.tag', root.tag) + assert root.tag in 'ePostResult', f'ElementTree.fromstring(record).tag: {root.tag}' dct = {r.tag.lower():r.text for r in root} if 'querykey' in dct: dct['querykey'] = int(dct['querykey']) From 45297ca1ecd23fd93fdfcfb819955bb20ee08235 Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Tue, 21 Nov 2023 09:56:54 -0500 Subject: [PATCH 12/18] mv console_scripts to var --- setup.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 395dba4..3721bf3 100755 --- a/setup.py +++ b/setup.py @@ -39,6 +39,10 @@ def get_long_description(): with open(join(dir_cur, 'README.md'), 'rb') as ifstrm: return ifstrm.read().decode("UTF-8") +CONSOLE_SCRIPTS = [ + 'icite=pmidcite.scripts.icite:main', + 'sumpaps=pmidcite.scripts.summarize_papers:main', +] setup( name=NAME, @@ -49,10 +53,7 @@ def get_long_description(): package_dir=PACKAGE_DIRS, scripts=glob('src/bin/*.py'), entry_points={ - 'console_scripts':[ - 'icite=pmidcite.scripts.icite:main', - 'sumpaps=pmidcite.scripts.summarize_papers:main', - ], + 'console_scripts': CONSOLE_SCRIPTS, }, # https://pypi.org/classifiers/ classifiers=[ From acb07f6378a4dfe96f558a70c99f2915e453f01e Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Fri, 24 Nov 2023 10:20:17 -0500 Subject: [PATCH 13/18] Handle response to NCBI E-Utils cmd here when getting webenv querykey for ID list --- src/pmidcite/eutils/cmds/query_ids.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/pmidcite/eutils/cmds/query_ids.py b/src/pmidcite/eutils/cmds/query_ids.py index af4e449..622bf21 100644 --- a/src/pmidcite/eutils/cmds/query_ids.py +++ b/src/pmidcite/eutils/cmds/query_ids.py @@ -112,12 +112,22 @@ def query(self, database, query, **esearch): usehistory="y", # NCBI prefers we use history(QueryKey, WebEnv) for next acess retmode='json', **kws_act) - if dct is not None and 'idlist' in dct and dct['idlist']: - if database in {'pubmed',}: - dct['idlist'] = [int(n) for n in dct['idlist']] + print(f'run_eutilscmd rsp {dct.keys()}') + esearchresult = self._get_esearchresult(dct) + print(f'run_eutilscmd rsp {esearchresult}') + if esearchresult is not None and 'idlist' in esearchresult and esearchresult['idlist']: + if database in {'pubmed','gene'}: + esearchresult['idlist'] = [int(n) for n in esearchresult['idlist']] for fldname in ['count', 'retmax']: - dct[fldname] = int(dct[fldname]) - return dct + esearchresult[fldname] = int(esearchresult[fldname]) + return esearchresult + return None + + @staticmethod + def _get_esearchresult(dct): + if dct is not None: + if 'esearchresult' in dct: + return dct['esearchresult'] return None From aedbc6e0a336bf48c521513fb09eed9a51962b5e Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Fri, 24 Nov 2023 10:57:31 -0500 Subject: [PATCH 14/18] comment out prints --- src/pmidcite/eutils/cmds/query_ids.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pmidcite/eutils/cmds/query_ids.py b/src/pmidcite/eutils/cmds/query_ids.py index 622bf21..2881c2d 100644 --- a/src/pmidcite/eutils/cmds/query_ids.py +++ b/src/pmidcite/eutils/cmds/query_ids.py @@ -112,9 +112,9 @@ def query(self, database, query, **esearch): usehistory="y", # NCBI prefers we use history(QueryKey, WebEnv) for next acess retmode='json', **kws_act) - print(f'run_eutilscmd rsp {dct.keys()}') + ## print(f'run_eutilscmd rsp {dct.keys()}') esearchresult = self._get_esearchresult(dct) - print(f'run_eutilscmd rsp {esearchresult}') + ## print(f'run_eutilscmd rsp {esearchresult}') if esearchresult is not None and 'idlist' in esearchresult and esearchresult['idlist']: if database in {'pubmed','gene'}: esearchresult['idlist'] = [int(n) for n in esearchresult['idlist']] From 0303a7878bc24b25803ad658a58f6f3aaccf8bec Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Fri, 24 Nov 2023 11:02:35 -0500 Subject: [PATCH 15/18] simplify response handling for base obj; indiv cmds will handle --- src/pmidcite/eutils/cmds/base.py | 50 ++++++-------------------------- 1 file changed, 9 insertions(+), 41 deletions(-) diff --git a/src/pmidcite/eutils/cmds/base.py b/src/pmidcite/eutils/cmds/base.py index 88a0b6c..ac487f7 100755 --- a/src/pmidcite/eutils/cmds/base.py +++ b/src/pmidcite/eutils/cmds/base.py @@ -211,39 +211,16 @@ def epost(self, database, ids, num_ids_p_epost=10): ret['querykey'] = rsp['querykey'] return ret - @staticmethod - def _return_einforesult(record): - """Return EInfo result""" - einforesult = record['einforesult'] - cmdtype = record['header']['type'] - if 'dblist' in einforesult: - return einforesult['dblist'] - if cmdtype == 'einfo' and 'dbinfo' in einforesult: - assert len(record['einforesult']['dbinfo']) == 1 - ## print('RRRRRRRRRRRRRRR', record.keys()) - ## print('RRRRRRRRRRRRRRR', len(record['einforesult']['dbinfo'])) - ## print('RRRRRRRRRRRRRRR', record) - return record['einforesult']['dbinfo'][0] - raise RuntimeError('IMPLEMENT _return_einforesult') - - @staticmethod - def _return_linksets(record): - """Return ELink result""" - links_all = [] - for dct0 in record['linksets']: - ## print('DCT', dct0) - if 'linksetdbs' in dct0: - for dct1 in dct0['linksetdbs']: - links_all.extend(dct1['links']) - print('{N} LINKED ITEMS'.format(N=len(links_all))) - return links_all - # ------------------------------------------------------------------------------------ def run_eutilscmd(self, cmd, **params): # params=None, post=None, ecitmatch=False): """Run NCBI E-Utilities command""" # params example: db retstart retmax rettype retmode webenv query_key + # print('RUN NCBI EUTILS CMD', cmd) rsp_dct = self.run_req(cmd, **params) # post=None, ecitmatch=False): - ## print('RRRRRRRRRRRRRRRRRRRRRRR', rsp_dct) + # print('RRRRRRRRRRRRRRRRRRRRRRR', rsp_dct.keys()) + # dict_keys(['code', 'msg', 'url', 'headers', 'data']) + # print('RRRRRRRRRRRRRRRRRRRRRRR', rsp_dct['data']) + # print('RRRRRRRRRRRRRRRRRRRRRRR', rsp_dct) if rsp_dct is not None: return self._extract_rsp(rsp_dct['data'], params.get('retmode')) return None @@ -251,8 +228,8 @@ def run_eutilscmd(self, cmd, **params): # params=None, post=None, ecitmatch=Fal def _mk_cgi(self, cmd, **params): """Get Fast Common Gateway Interface (fcgi) string, given E-utils command/parameters""" cgi = self.cgifmt.format(ECMD=cmd) + ##print('PARAMS', params) params = self._construct_params(params) - ## print('PARAMS', params) options = self._encode_options(params) cgi += '?' + options return cgi @@ -333,16 +310,7 @@ def _extract_rsp(self, record, retmode): """Extract the data from a response from running a Entrez Utilities command""" if retmode == 'json': try: - dct = json.loads(record) - if 'esearchresult' in dct: - return dct['esearchresult'] - if 'einforesult' in dct: - return self._return_einforesult(dct) - if 'linksets' in dct: - return self._return_linksets(dct) - print('KEYS:', dct.keys()) - print('DCT:', dct) - raise RuntimeError('UNKNOWN RESULT in _run_req') + return json.loads(record) except json.decoder.JSONDecodeError as errobj: print('JSONDecodeError = {ERR}'.format(ERR=str(errobj))) traceback.print_exc() @@ -365,10 +333,10 @@ def _extract_rsp(self, record, retmode): # # Parse XML root = ElementTree.fromstring(record) - print(f'ElementTree.fromstring(record).root:\n{root}') + #print(f'ElementTree.fromstring(record).root:\n{root}') return root # TODO - print('root.tag', root.tag) + #print('root.tag', root.tag) assert root.tag in 'ePostResult', f'ElementTree.fromstring(record).tag: {root.tag}' dct = {r.tag.lower():r.text for r in root} if 'querykey' in dct: From a198fb3dcbde0991f614190c84eac0947c429191 Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Fri, 24 Nov 2023 11:03:08 -0500 Subject: [PATCH 16/18] Not need g target --- makefile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/makefile b/makefile index 67ec976..585acf7 100644 --- a/makefile +++ b/makefile @@ -18,11 +18,6 @@ p: d: find src -regextype posix-extended -regex "[a-z./]*" -type d -g: - git status -uno - git remote -v - git branch - cli: find src/pmidcite/cli -name \*.py From a65f8fcef4afa15408842140957f00df1acd1a6d Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Fri, 24 Nov 2023 16:10:59 -0500 Subject: [PATCH 17/18] update for QueryIDs from esearch json uilist; fnc: query => get_ids_esearch --- src/pmidcite/eutils/cmds/query_ids.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/pmidcite/eutils/cmds/query_ids.py b/src/pmidcite/eutils/cmds/query_ids.py index 2881c2d..8180306 100644 --- a/src/pmidcite/eutils/cmds/query_ids.py +++ b/src/pmidcite/eutils/cmds/query_ids.py @@ -46,7 +46,7 @@ def dnld_query_ids(self, query, database, num_ids_p_epost=10): def get_query_rsp(self, query, database, num_ids_p_epost=10): """Searches a NCBI database for a user query, writes resulting entries into one file.""" # 1) Query PubMed/Protein, PhD/etc. Get first N (num_ids_p_epost) of the total PMIDs - rsp_dct = self.query(database, query, retmax=num_ids_p_epost) + rsp_dct = self.get_ids_esearch(database, query, retmax=num_ids_p_epost) if rsp_dct is None: if self.log: self.log.write(f'No {database} entries found: {query}\n') @@ -72,7 +72,8 @@ def _get_ids(self, rsp_dct, query, database, num_ids_p_epost=10): ##print('WWWWWWWWWWWWWWWWWWWWWWWW', kws_p) for retnum in range(1, self._get_num_querykeys(num_ids_p_epost, tot_ids)): ##print('WWWWWWWWWWWWWWWWWWWWWWWW retnum', retnum) - rsp_dct = self.query(database, query, retstart=num_ids_p_epost*retnum, **kws_p) + # pylint: disable=line-too-long + rsp_dct = self.get_ids_esearch(database, query, retstart=num_ids_p_epost*retnum, **kws_p) if rsp_dct: ##print('WWWWWWWWWWWWWWWWWWWWWWWW idlist', rsp_dct['idlist']) ids.extend(rsp_dct['idlist']) @@ -89,10 +90,10 @@ def _get_num_querykeys(num_ids_p_epost, num_pmids): ## print(f'num_querykeys({num_querykeys})') return num_querykeys - def query(self, database, query, **esearch): - """Text query finds database UIDs for later use in ESummary, EFetch or ELink""" + def get_ids_esearch(self, database, query, **kws): + """Esearch for json uilist finds database UIDs for later use in ESummary, EFetch or ELink""" kws_exp = self.exp_params.difference({'db', 'term', 'rettype', 'usehistory', 'retmode'}) - kws_act = {k:v for k, v in esearch.items() if k in kws_exp} + kws_act = {k:v for k, v in kws.items() if k in kws_exp} # Returns: # count # retmax From d76c2d1fa994a4e9d46b20e3f40a571e3ec70518 Mon Sep 17 00:00:00 2001 From: DVKlopfenstein Date: Sun, 21 Jan 2024 01:39:59 -0500 Subject: [PATCH 18/18] update --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index fbcea2e..bf418db 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # PubMed ID (PMID) Cite [![Tweet](https://img.shields.io/twitter/url/http/shields.io.svg?style=social)](https://twitter.com/intent/tweet?text=Python%20library%20to%20download%20pubmed%20citation%20counts%20and%20data,%20given%20a%20PMID&url=https://github.com/dvklopfenstein/pmidcite&via=dvklopfenstein&hashtags=pubmed,pmid,citations,pubmed2cite,writingtips,scientificwriting) -[![build](https://github.com/dvklopfenstein/pmidcite/actions/workflows/build.yml/badge.svg)](https://github.com/dvklopfenstein/pmidcite/actions/workflows/build.yml) [![CodeQL](https://github.com/dvklopfenstein/pmidcite/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/dvklopfenstein/pmidcite/actions/workflows/codeql-analysis.yml) [![Latest PyPI version](https://img.shields.io/pypi/v/pmidcite.svg)](https://pypi.org/project/pmidcite/) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5172712.svg)](https://doi.org/10.5281/zenodo.5172712)