Skip to content

Commit 2c9080a

Browse files
[lint] Applies remaining ruff linter changes to hail/ folder (#14415)
This change applies each currently-ignored `ruff` rule progressively; each commit applies one rule. The changes were applied manually to avoid known issues with the automatic fixes; for example, given the code ```python return ( is_container(t) or isinstance(t, tstruct) or isinstance(t, tunion) or isinstance(t, ttuple) or isinstance(t, tndarray) ) ``` the automatic fixes produce ```python return isinstance(t, (tndarray, tstruct, ttuple, tunion)) ``` instead of ```python return is_container(t) or isinstance(t, (tstruct, tunion, ttuple, tndarray)) ``` where not only has the call to `is_container` been removed, but also the order of the `isinstance` comparisons has been changed, which has the potential to produce side effects (though in this case, I don’t think it does). Similarly, when eliminating assignments to unused variables, I left the right-hand side of the assignment intact in case of side effects, except where I myself wrote the code in question and know there are no side effects produced by it. See also #14150 and #14159. --------- Co-authored-by: Patrick Schultz <[email protected]>
1 parent 59bbd1c commit 2c9080a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+355
-447
lines changed

hail/python/hail/backend/backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import zipfile
44
from dataclasses import dataclass
55
from enum import Enum
6-
from typing import AbstractSet, Any, Dict, List, Mapping, Optional, Tuple, TypeVar, Union
6+
from typing import AbstractSet, Any, ClassVar, Dict, List, Mapping, Optional, Tuple, TypeVar, Union
77

88
import orjson
99
import pkg_resources
@@ -139,7 +139,7 @@ class FromFASTAFilePayload(ActionPayload):
139139

140140
class Backend(abc.ABC):
141141
# Must match knownFlags in HailFeatureFlags.scala
142-
_flags_env_vars_and_defaults: Dict[str, Tuple[str, Optional[str]]] = {
142+
_flags_env_vars_and_defaults: ClassVar[Dict[str, Tuple[str, Optional[str]]]] = {
143143
"no_whole_stage_codegen": ("HAIL_DEV_NO_WHOLE_STAGE_CODEGEN", None),
144144
"no_ir_logging": ("HAIL_DEV_NO_IR_LOG", None),
145145
"lower": ("HAIL_DEV_LOWER", None),

hail/python/hail/backend/py4j_backend.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ def install_exception_handler():
4242

4343
def uninstall_exception_handler():
4444
global _installed
45-
global _original
4645
if _installed:
4746
_installed = False
4847
py4j.protocol.get_return_value = _original

hail/python/hail/experimental/datasets.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,15 @@ def load_dataset(
6464
if region not in valid_regions:
6565
raise ValueError(
6666
f'Specify valid region parameter,'
67-
f' received: region={repr(region)}.\n'
67+
f' received: region={region!r}.\n'
6868
f'Valid region values are {valid_regions}.'
6969
)
7070

7171
valid_clouds = {'gcp', 'aws'}
7272
if cloud not in valid_clouds:
7373
raise ValueError(
7474
f'Specify valid cloud parameter,'
75-
f' received: cloud={repr(cloud)}.\n'
75+
f' received: cloud={cloud!r}.\n'
7676
f'Valid cloud platforms are {valid_clouds}.'
7777
)
7878

@@ -88,29 +88,27 @@ def load_dataset(
8888
versions = set(dataset['version'] for dataset in datasets[name]['versions'])
8989
if version not in versions:
9090
raise ValueError(
91-
f'Version {repr(version)} not available for dataset' f' {repr(name)}.\n' f'Available versions: {versions}.'
91+
f'Version {version!r} not available for dataset' f' {name!r}.\n' f'Available versions: {versions}.'
9292
)
9393

9494
reference_genomes = set(dataset['reference_genome'] for dataset in datasets[name]['versions'])
9595
if reference_genome not in reference_genomes:
9696
raise ValueError(
97-
f'Reference genome build {repr(reference_genome)} not'
98-
f' available for dataset {repr(name)}.\n'
97+
f'Reference genome build {reference_genome!r} not'
98+
f' available for dataset {name!r}.\n'
9999
f'Available reference genome builds:'
100100
f' {reference_genomes}.'
101101
)
102102

103103
clouds = set(k for dataset in datasets[name]['versions'] for k in dataset['url'].keys())
104104
if cloud not in clouds:
105-
raise ValueError(
106-
f'Cloud platform {repr(cloud)} not available for dataset {name}.\nAvailable platforms: {clouds}.'
107-
)
105+
raise ValueError(f'Cloud platform {cloud!r} not available for dataset {name}.\nAvailable platforms: {clouds}.')
108106

109107
regions = set(k for dataset in datasets[name]['versions'] for k in dataset['url'][cloud].keys())
110108
if region not in regions:
111109
raise ValueError(
112-
f'Region {repr(region)} not available for dataset'
113-
f' {repr(name)} on cloud platform {repr(cloud)}.\n'
110+
f'Region {region!r} not available for dataset'
111+
f' {name!r} on cloud platform {cloud!r}.\n'
114112
f'Available regions: {regions}.'
115113
)
116114

hail/python/hail/experimental/db.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import json
22
import os
33
import warnings
4-
from typing import Iterable, List, Optional, Set, Tuple, Union
4+
from typing import ClassVar, Iterable, List, Optional, Set, Tuple, Union
55

66
import pkg_resources
77

@@ -303,10 +303,10 @@ class DB:
303303
>>> db = hl.experimental.DB(region='us-central1', cloud='gcp')
304304
"""
305305

306-
_valid_key_properties = {'gene', 'unique'}
307-
_valid_regions = {'us', 'us-central1', 'europe-west1'}
308-
_valid_clouds = {'gcp', 'aws'}
309-
_valid_combinations = {('us', 'aws'), ('us-central1', 'gcp'), ('europe-west1', 'gcp')}
306+
_valid_key_properties: ClassVar = {'gene', 'unique'}
307+
_valid_regions: ClassVar = {'us', 'us-central1', 'europe-west1'}
308+
_valid_clouds: ClassVar = {'gcp', 'aws'}
309+
_valid_combinations: ClassVar = {('us', 'aws'), ('us-central1', 'gcp'), ('europe-west1', 'gcp')}
310310

311311
def __init__(
312312
self,
@@ -319,19 +319,19 @@ def __init__(
319319
if region not in DB._valid_regions:
320320
raise ValueError(
321321
f'Specify valid region parameter,'
322-
f' received: region={repr(region)}.\n'
322+
f' received: region={region!r}.\n'
323323
f'Valid regions are {DB._valid_regions}.'
324324
)
325325
if cloud not in DB._valid_clouds:
326326
raise ValueError(
327327
f'Specify valid cloud parameter,'
328-
f' received: cloud={repr(cloud)}.\n'
328+
f' received: cloud={cloud!r}.\n'
329329
f'Valid cloud platforms are {DB._valid_clouds}.'
330330
)
331331
if (region, cloud) not in DB._valid_combinations:
332332
raise ValueError(
333-
f'The {repr(region)} region is not available for'
334-
f' the {repr(cloud)} cloud platform. '
333+
f'The {region!r} region is not available for'
334+
f' the {cloud!r} cloud platform. '
335335
f'Valid region, cloud combinations are'
336336
f' {DB._valid_combinations}.'
337337
)
@@ -350,9 +350,8 @@ def __init__(
350350
response = retry_response_returning_functions(session.get, url)
351351
config = response.json()
352352
assert isinstance(config, dict)
353-
else:
354-
if not isinstance(config, dict):
355-
raise ValueError(f'expected a dict mapping dataset names to ' f'configurations, but found {config}')
353+
elif not isinstance(config, dict):
354+
raise ValueError(f'expected a dict mapping dataset names to ' f'configurations, but found {config}')
356355
config = {k: v for k, v in config.items() if 'annotation_db' in v}
357356
self.region = region
358357
self.cloud = cloud

hail/python/hail/experimental/filtering_allele_frequency.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def filtering_allele_frequency(ac, an, ci) -> Float64Expression:
1212
1313
The filtering allele frequency is the highest true population allele frequency
1414
for which the upper bound of the `ci` (confidence interval) of allele count
15-
under a Poisson distribution is still less than the variants observed
15+
under a Poisson distribution is still less than the variant's observed
1616
`ac` (allele count) in the reference sample, given an `an` (allele number).
1717
1818
This function defines a "filtering AF" that represents

hail/python/hail/experimental/interact.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ def append_struct_frames(t, frames):
346346
frames.append(widgets.HTML('<big>Fields:</big>'))
347347
acc = widgets.Accordion([recursive_build(x) for x in t.values()])
348348
for i, (name, fd) in enumerate(t.items()):
349-
acc.set_title(i, f'{repr(name)} ({summary_type(fd)})')
349+
acc.set_title(i, f'{name!r} ({summary_type(fd)})')
350350
acc.selected_index = None
351351
frames.append(acc)
352352

hail/python/hail/experimental/ldscsim.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def make_betas(mt, h2, pi=None, annot=None, rg=None):
178178
h2 = h2 if isinstance(h2, list) else [h2]
179179
annot_sum = mt.aggregate_rows(hl.agg.sum(annot))
180180
mt = mt.annotate_rows(beta=hl.literal(h2).map(lambda x: hl.rand_norm(0, hl.sqrt(annot * x / (annot_sum * M)))))
181-
elif len(h2) > 1 and (pi == [None] or pi == [1]): # multi-trait correlated infinitesimal
181+
elif len(h2) > 1 and (pi in ([None], [1])): # multi-trait correlated infinitesimal
182182
mt, rg = multitrait_inf(mt=mt, h2=h2, rg=rg)
183183
elif len(h2) == 2 and len(pi) > 1 and len(rg) == 1: # two trait correlated spike & slab
184184
print('multitrait ss')
@@ -552,21 +552,20 @@ def calculate_phenotypes(mt, genotype, beta, h2, popstrat=None, popstrat_var=Non
552552
y_no_noise=hl.agg.array_agg(lambda beta: hl.agg.sum(beta * mt['norm_gt']), mt['beta_' + uid])
553553
)
554554
mt = mt.annotate_cols(y=mt.y_no_noise + hl.literal(h2).map(lambda x: hl.rand_norm(0, hl.sqrt(1 - x))))
555+
elif exact_h2 and min([h2[0], 1 - h2[0]]) != 0:
556+
print('exact h2')
557+
mt = mt.annotate_cols(**{'y_no_noise_' + uid: hl.agg.sum(mt['beta_' + uid] * mt['norm_gt'])})
558+
y_no_noise_stdev = mt.aggregate_cols(hl.agg.stats(mt['y_no_noise_' + uid]).stdev)
559+
mt = mt.annotate_cols(
560+
y_no_noise=hl.sqrt(h2[0]) * mt['y_no_noise_' + uid] / y_no_noise_stdev
561+
) # normalize genetic component of phenotype to have variance of exactly h2
562+
mt = mt.annotate_cols(**{'noise_' + uid: hl.rand_norm(0, hl.sqrt(1 - h2[0]))})
563+
noise_stdev = mt.aggregate_cols(hl.agg.stats(mt['noise_' + uid]).stdev)
564+
mt = mt.annotate_cols(noise=hl.sqrt(1 - h2[0]) * mt['noise_' + uid] / noise_stdev)
565+
mt = mt.annotate_cols(y=mt.y_no_noise + hl.sqrt(1 - h2[0]) * mt['noise_' + uid] / noise_stdev)
555566
else:
556-
if exact_h2 and min([h2[0], 1 - h2[0]]) != 0:
557-
print('exact h2')
558-
mt = mt.annotate_cols(**{'y_no_noise_' + uid: hl.agg.sum(mt['beta_' + uid] * mt['norm_gt'])})
559-
y_no_noise_stdev = mt.aggregate_cols(hl.agg.stats(mt['y_no_noise_' + uid]).stdev)
560-
mt = mt.annotate_cols(
561-
y_no_noise=hl.sqrt(h2[0]) * mt['y_no_noise_' + uid] / y_no_noise_stdev
562-
) # normalize genetic component of phenotype to have variance of exactly h2
563-
mt = mt.annotate_cols(**{'noise_' + uid: hl.rand_norm(0, hl.sqrt(1 - h2[0]))})
564-
noise_stdev = mt.aggregate_cols(hl.agg.stats(mt['noise_' + uid]).stdev)
565-
mt = mt.annotate_cols(noise=hl.sqrt(1 - h2[0]) * mt['noise_' + uid] / noise_stdev)
566-
mt = mt.annotate_cols(y=mt.y_no_noise + hl.sqrt(1 - h2[0]) * mt['noise_' + uid] / noise_stdev)
567-
else:
568-
mt = mt.annotate_cols(y_no_noise=hl.agg.sum(mt['beta_' + uid] * mt['norm_gt']))
569-
mt = mt.annotate_cols(y=mt.y_no_noise + hl.rand_norm(0, hl.sqrt(1 - h2[0])))
567+
mt = mt.annotate_cols(y_no_noise=hl.agg.sum(mt['beta_' + uid] * mt['norm_gt']))
568+
mt = mt.annotate_cols(y=mt.y_no_noise + hl.rand_norm(0, hl.sqrt(1 - h2[0])))
570569
if popstrat is not None:
571570
var_factor = (
572571
1
@@ -661,7 +660,7 @@ def agg_fields(tb, coef_dict=None, str_expr=None, axis='rows'):
661660
:class:`.MatrixTable` or :class:`.Table` containing aggregation field.
662661
"""
663662
assert str_expr is not None or coef_dict is not None, "str_expr and coef_dict cannot both be None"
664-
assert axis == 'rows' or axis == 'cols', "axis must be 'rows' or 'cols'"
663+
assert axis in {'rows', 'cols'}, "axis must be 'rows' or 'cols'"
665664
coef_dict = get_coef_dict(tb=tb, str_expr=str_expr, ref_coef_dict=coef_dict, axis=axis)
666665
axis_field = 'annot' if axis == 'rows' else 'cov'
667666
annotate_fn = (
@@ -702,7 +701,7 @@ def get_coef_dict(tb, str_expr=None, ref_coef_dict=None, axis='rows'):
702701
`coef_dict` value, the row (or col) field name is specified by `coef_dict` key.
703702
"""
704703
assert str_expr is not None or ref_coef_dict is not None, "str_expr and ref_coef_dict cannot both be None"
705-
assert axis == 'rows' or axis == 'cols', "axis must be 'rows' or 'cols'"
704+
assert axis in {'rows', 'cols'}, "axis must be 'rows' or 'cols'"
706705
fields_to_search = tb.row if axis == 'rows' or isinstance(tb, Table) else tb.col
707706
# when axis='rows' we're searching for annotations, axis='cols' searching for covariates
708707
axis_field = 'annotation' if axis == 'rows' else 'covariate'

hail/python/hail/experimental/sparse_mt/densify.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def densify(sparse_mt):
2121
roughly costing as much as reading a matrix table created by importing a dense
2222
project VCF.
2323
"""
24-
if list(sparse_mt.row_key)[0] != 'locus' or not isinstance(sparse_mt.locus.dtype, hl.tlocus):
24+
if next(iter(sparse_mt.row_key)) != 'locus' or not isinstance(sparse_mt.locus.dtype, hl.tlocus):
2525
raise ValueError("first row key field must be named 'locus' and have type 'locus'")
2626
if 'END' not in sparse_mt.entry or sparse_mt.END.dtype != hl.tint32:
2727
raise ValueError("'densify' requires 'END' entry field of type 'int32'")

hail/python/hail/experimental/tidyr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def spread(ht, field, value, key=None) -> Table:
8282

8383
field_vals = list(ht.aggregate(hl.agg.collect_as_set(ht[field])))
8484
ht = ht.group_by(*key).aggregate(
85-
**{rv: hl.agg.take(ht[rv], 1)[0] for rv in ht.row_value if rv not in set(key + [field, value])},
85+
**{rv: hl.agg.take(ht[rv], 1)[0] for rv in ht.row_value if rv not in set([*key, field, value])},
8686
**{
8787
fv: hl.agg.filter(
8888
ht[field] == fv,

hail/python/hail/expr/expressions/base_expression.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,10 @@ def _ascii_string(self, depth, prefix):
8080
for name, v in self.summ_fields.items():
8181
summary += f'\n{spacing} {name.rjust(max_n_len)}: {self.format(v)}'
8282
for name, field in self.nested.items():
83+
_name = name
8384
if prefix is not None:
84-
name = f'{prefix}{name}'
85-
summary += field._ascii_string(depth + 1, prefix=name)
85+
_name = f'{prefix}{name}'
86+
summary += field._ascii_string(depth + 1, prefix=_name)
8687

8788
return summary
8889

@@ -102,9 +103,10 @@ def _html_string(self, prefix):
102103
summary += f'<tr><td>{html.escape(name)}</td><td>{html.escape(self.format(v))}</td></tr>'
103104
summary += '</tbody></table>'
104105
for name, field in self.nested.items():
106+
_name = name
105107
if prefix is not None:
106-
name = f'{prefix}{name}'
107-
summary += '<li>' + field._html_string(prefix=name) + '</li>'
108+
_name = f'{prefix}{name}'
109+
summary += '<li>' + field._html_string(prefix=_name) + '</li>'
108110
summary += '</ul>'
109111

110112
return summary
@@ -533,15 +535,15 @@ def unify_exprs(*exprs: 'Expression') -> Tuple:
533535

534536
# all types are the same
535537
if len(types) == 1:
536-
return exprs + (True,)
538+
return (*exprs, True)
537539

538540
for t in types:
539541
c = expressions.coercer_from_dtype(t)
540542
if all(c.can_coerce(e.dtype) for e in exprs):
541-
return tuple([c.coerce(e) for e in exprs]) + (True,)
543+
return (*tuple([c.coerce(e) for e in exprs]), True)
542544

543545
# cannot coerce all types to the same type
544-
return exprs + (False,)
546+
return (*exprs, False)
545547

546548

547549
class Expression(object):
@@ -617,7 +619,7 @@ def __nonzero__(self):
617619
)
618620

619621
def __iter__(self):
620-
raise ExpressionException(f"{repr(self)} object is not iterable")
622+
raise ExpressionException(f"{self!r} object is not iterable")
621623

622624
def _compare_op(self, op, other):
623625
other = to_expr(other)
@@ -655,7 +657,7 @@ def _promote_numeric(self, typ):
655657
@staticmethod
656658
def _div_ret_type_f(t):
657659
assert is_numeric(t)
658-
if t == tint32 or t == tint64:
660+
if t in {tint32, tint64}:
659661
return tfloat64
660662
else:
661663
# Float64 or Float32
@@ -1240,11 +1242,10 @@ def summarize(self, handler=None):
12401242
if self in src._fields:
12411243
field_name = src._fields_inverse[self]
12421244
prefix = field_name
1245+
elif self._ir.is_nested_field:
1246+
prefix = self._ir.name
12431247
else:
1244-
if self._ir.is_nested_field:
1245-
prefix = self._ir.name
1246-
else:
1247-
prefix = '<expr>'
1248+
prefix = '<expr>'
12481249

12491250
if handler is None:
12501251
handler = hl.utils.default_handler()

0 commit comments

Comments
 (0)