hail-is
diff --git a/‎hail/python/hail/backend/backend.py‎
Lines changed: 2 additions & 2 deletions b/‎hail/python/hail/backend/backend.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎hail/python/hail/backend/py4j_backend.py‎
Lines changed: 0 additions & 1 deletion b/‎hail/python/hail/backend/py4j_backend.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎hail/python/hail/experimental/datasets.py‎
Lines changed: 8 additions & 10 deletions b/‎hail/python/hail/experimental/datasets.py‎
Lines changed: 8 additions & 10 deletions
diff --git a/‎hail/python/hail/experimental/db.py‎
Lines changed: 11 additions & 12 deletions b/‎hail/python/hail/experimental/db.py‎
Lines changed: 11 additions & 12 deletions
diff --git a/‎hail/python/hail/experimental/filtering_allele_frequency.py‎
Lines changed: 1 addition & 1 deletion b/‎hail/python/hail/experimental/filtering_allele_frequency.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎hail/python/hail/experimental/interact.py‎
Lines changed: 1 addition & 1 deletion b/‎hail/python/hail/experimental/interact.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎hail/python/hail/experimental/ldscsim.py‎
Lines changed: 16 additions & 17 deletions b/‎hail/python/hail/experimental/ldscsim.py‎
Lines changed: 16 additions & 17 deletions
diff --git a/‎hail/python/hail/experimental/sparse_mt/densify.py‎
Lines changed: 1 addition & 1 deletion b/‎hail/python/hail/experimental/sparse_mt/densify.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎hail/python/hail/experimental/tidyr.py‎
Lines changed: 1 addition & 1 deletion b/‎hail/python/hail/experimental/tidyr.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎hail/python/hail/expr/expressions/base_expression.py‎
Lines changed: 14 additions & 13 deletions b/‎hail/python/hail/expr/expressions/base_expression.py‎
Lines changed: 14 additions & 13 deletions
@@ -3,7 +3,7 @@
 import zipfile
 from dataclasses import dataclass
 from enum import Enum
-from typing import AbstractSet, Any, Dict, List, Mapping, Optional, Tuple, TypeVar, Union
+from typing import AbstractSet, Any, ClassVar, Dict, List, Mapping, Optional, Tuple, TypeVar, Union
 
 import orjson
 import pkg_resources
@@ -139,7 +139,7 @@ class FromFASTAFilePayload(ActionPayload):
 
 class Backend(abc.ABC):
     # Must match knownFlags in HailFeatureFlags.scala
-    _flags_env_vars_and_defaults: Dict[str, Tuple[str, Optional[str]]] = {
+    _flags_env_vars_and_defaults: ClassVar[Dict[str, Tuple[str, Optional[str]]]] = {
         "no_whole_stage_codegen": ("HAIL_DEV_NO_WHOLE_STAGE_CODEGEN", None),
         "no_ir_logging": ("HAIL_DEV_NO_IR_LOG", None),
         "lower": ("HAIL_DEV_LOWER", None),
 
@@ -42,7 +42,6 @@ def install_exception_handler():
 
 def uninstall_exception_handler():
     global _installed
-    global _original
     if _installed:
         _installed = False
         py4j.protocol.get_return_value = _original
 
@@ -64,15 +64,15 @@ def load_dataset(
     if region not in valid_regions:
         raise ValueError(
             f'Specify valid region parameter,'
-            f' received: region={repr(region)}.\n'
+            f' received: region={region!r}.\n'
             f'Valid region values are {valid_regions}.'
         )
 
     valid_clouds = {'gcp', 'aws'}
     if cloud not in valid_clouds:
         raise ValueError(
             f'Specify valid cloud parameter,'
-            f' received: cloud={repr(cloud)}.\n'
+            f' received: cloud={cloud!r}.\n'
             f'Valid cloud platforms are {valid_clouds}.'
         )
 
@@ -88,29 +88,27 @@ def load_dataset(
     versions = set(dataset['version'] for dataset in datasets[name]['versions'])
     if version not in versions:
         raise ValueError(
-            f'Version {repr(version)} not available for dataset' f' {repr(name)}.\n' f'Available versions: {versions}.'
+            f'Version {version!r} not available for dataset' f' {name!r}.\n' f'Available versions: {versions}.'
         )
 
     reference_genomes = set(dataset['reference_genome'] for dataset in datasets[name]['versions'])
     if reference_genome not in reference_genomes:
         raise ValueError(
-            f'Reference genome build {repr(reference_genome)} not'
-            f' available for dataset {repr(name)}.\n'
+            f'Reference genome build {reference_genome!r} not'
+            f' available for dataset {name!r}.\n'
             f'Available reference genome builds:'
             f' {reference_genomes}.'
         )
 
     clouds = set(k for dataset in datasets[name]['versions'] for k in dataset['url'].keys())
     if cloud not in clouds:
-        raise ValueError(
-            f'Cloud platform {repr(cloud)} not available for dataset {name}.\nAvailable platforms: {clouds}.'
-        )
+        raise ValueError(f'Cloud platform {cloud!r} not available for dataset {name}.\nAvailable platforms: {clouds}.')
 
     regions = set(k for dataset in datasets[name]['versions'] for k in dataset['url'][cloud].keys())
     if region not in regions:
         raise ValueError(
-            f'Region {repr(region)} not available for dataset'
-            f' {repr(name)} on cloud platform {repr(cloud)}.\n'
+            f'Region {region!r} not available for dataset'
+            f' {name!r} on cloud platform {cloud!r}.\n'
             f'Available regions: {regions}.'
         )
 
 
@@ -1,7 +1,7 @@
 import json
 import os
 import warnings
-from typing import Iterable, List, Optional, Set, Tuple, Union
+from typing import ClassVar, Iterable, List, Optional, Set, Tuple, Union
 
 import pkg_resources
 
@@ -303,10 +303,10 @@ class DB:
     >>> db = hl.experimental.DB(region='us-central1', cloud='gcp')
     """
 
-    _valid_key_properties = {'gene', 'unique'}
-    _valid_regions = {'us', 'us-central1', 'europe-west1'}
-    _valid_clouds = {'gcp', 'aws'}
-    _valid_combinations = {('us', 'aws'), ('us-central1', 'gcp'), ('europe-west1', 'gcp')}
+    _valid_key_properties: ClassVar = {'gene', 'unique'}
+    _valid_regions: ClassVar = {'us', 'us-central1', 'europe-west1'}
+    _valid_clouds: ClassVar = {'gcp', 'aws'}
+    _valid_combinations: ClassVar = {('us', 'aws'), ('us-central1', 'gcp'), ('europe-west1', 'gcp')}
 
     def __init__(
         self,
@@ -319,19 +319,19 @@ def __init__(
         if region not in DB._valid_regions:
             raise ValueError(
                 f'Specify valid region parameter,'
-                f' received: region={repr(region)}.\n'
+                f' received: region={region!r}.\n'
                 f'Valid regions are {DB._valid_regions}.'
             )
         if cloud not in DB._valid_clouds:
             raise ValueError(
                 f'Specify valid cloud parameter,'
-                f' received: cloud={repr(cloud)}.\n'
+                f' received: cloud={cloud!r}.\n'
                 f'Valid cloud platforms are {DB._valid_clouds}.'
             )
         if (region, cloud) not in DB._valid_combinations:
             raise ValueError(
-                f'The {repr(region)} region is not available for'
-                f' the {repr(cloud)} cloud platform. '
+                f'The {region!r} region is not available for'
+                f' the {cloud!r} cloud platform. '
                 f'Valid region, cloud combinations are'
                 f' {DB._valid_combinations}.'
             )
@@ -350,9 +350,8 @@ def __init__(
                 response = retry_response_returning_functions(session.get, url)
                 config = response.json()
             assert isinstance(config, dict)
-        else:
-            if not isinstance(config, dict):
-                raise ValueError(f'expected a dict mapping dataset names to ' f'configurations, but found {config}')
+        elif not isinstance(config, dict):
+            raise ValueError(f'expected a dict mapping dataset names to ' f'configurations, but found {config}')
         config = {k: v for k, v in config.items() if 'annotation_db' in v}
         self.region = region
         self.cloud = cloud
 
@@ -12,7 +12,7 @@ def filtering_allele_frequency(ac, an, ci) -> Float64Expression:
 
     The filtering allele frequency is the highest true population allele frequency
     for which the upper bound of the `ci` (confidence interval) of allele count
-    under a Poisson distribution is still less than the variant’s observed
+    under a Poisson distribution is still less than the variant's observed
     `ac` (allele count) in the reference sample, given an `an` (allele number).
 
     This function defines a "filtering AF" that represents
 
@@ -346,7 +346,7 @@ def append_struct_frames(t, frames):
         frames.append(widgets.HTML('<big>Fields:</big>'))
     acc = widgets.Accordion([recursive_build(x) for x in t.values()])
     for i, (name, fd) in enumerate(t.items()):
-        acc.set_title(i, f'{repr(name)} ({summary_type(fd)})')
+        acc.set_title(i, f'{name!r} ({summary_type(fd)})')
     acc.selected_index = None
     frames.append(acc)
 
 
@@ -178,7 +178,7 @@ def make_betas(mt, h2, pi=None, annot=None, rg=None):
         h2 = h2 if isinstance(h2, list) else [h2]
         annot_sum = mt.aggregate_rows(hl.agg.sum(annot))
         mt = mt.annotate_rows(beta=hl.literal(h2).map(lambda x: hl.rand_norm(0, hl.sqrt(annot * x / (annot_sum * M)))))
-    elif len(h2) > 1 and (pi == [None] or pi == [1]):  # multi-trait correlated infinitesimal
+    elif len(h2) > 1 and (pi in ([None], [1])):  # multi-trait correlated infinitesimal
         mt, rg = multitrait_inf(mt=mt, h2=h2, rg=rg)
     elif len(h2) == 2 and len(pi) > 1 and len(rg) == 1:  # two trait correlated spike & slab
         print('multitrait ss')
@@ -552,21 +552,20 @@ def calculate_phenotypes(mt, genotype, beta, h2, popstrat=None, popstrat_var=Non
                 y_no_noise=hl.agg.array_agg(lambda beta: hl.agg.sum(beta * mt['norm_gt']), mt['beta_' + uid])
             )
             mt = mt.annotate_cols(y=mt.y_no_noise + hl.literal(h2).map(lambda x: hl.rand_norm(0, hl.sqrt(1 - x))))
+    elif exact_h2 and min([h2[0], 1 - h2[0]]) != 0:
+        print('exact h2')
+        mt = mt.annotate_cols(**{'y_no_noise_' + uid: hl.agg.sum(mt['beta_' + uid] * mt['norm_gt'])})
+        y_no_noise_stdev = mt.aggregate_cols(hl.agg.stats(mt['y_no_noise_' + uid]).stdev)
+        mt = mt.annotate_cols(
+            y_no_noise=hl.sqrt(h2[0]) * mt['y_no_noise_' + uid] / y_no_noise_stdev
+        )  # normalize genetic component of phenotype to have variance of exactly h2
+        mt = mt.annotate_cols(**{'noise_' + uid: hl.rand_norm(0, hl.sqrt(1 - h2[0]))})
+        noise_stdev = mt.aggregate_cols(hl.agg.stats(mt['noise_' + uid]).stdev)
+        mt = mt.annotate_cols(noise=hl.sqrt(1 - h2[0]) * mt['noise_' + uid] / noise_stdev)
+        mt = mt.annotate_cols(y=mt.y_no_noise + hl.sqrt(1 - h2[0]) * mt['noise_' + uid] / noise_stdev)
     else:
-        if exact_h2 and min([h2[0], 1 - h2[0]]) != 0:
-            print('exact h2')
-            mt = mt.annotate_cols(**{'y_no_noise_' + uid: hl.agg.sum(mt['beta_' + uid] * mt['norm_gt'])})
-            y_no_noise_stdev = mt.aggregate_cols(hl.agg.stats(mt['y_no_noise_' + uid]).stdev)
-            mt = mt.annotate_cols(
-                y_no_noise=hl.sqrt(h2[0]) * mt['y_no_noise_' + uid] / y_no_noise_stdev
-            )  # normalize genetic component of phenotype to have variance of exactly h2
-            mt = mt.annotate_cols(**{'noise_' + uid: hl.rand_norm(0, hl.sqrt(1 - h2[0]))})
-            noise_stdev = mt.aggregate_cols(hl.agg.stats(mt['noise_' + uid]).stdev)
-            mt = mt.annotate_cols(noise=hl.sqrt(1 - h2[0]) * mt['noise_' + uid] / noise_stdev)
-            mt = mt.annotate_cols(y=mt.y_no_noise + hl.sqrt(1 - h2[0]) * mt['noise_' + uid] / noise_stdev)
-        else:
-            mt = mt.annotate_cols(y_no_noise=hl.agg.sum(mt['beta_' + uid] * mt['norm_gt']))
-            mt = mt.annotate_cols(y=mt.y_no_noise + hl.rand_norm(0, hl.sqrt(1 - h2[0])))
+        mt = mt.annotate_cols(y_no_noise=hl.agg.sum(mt['beta_' + uid] * mt['norm_gt']))
+        mt = mt.annotate_cols(y=mt.y_no_noise + hl.rand_norm(0, hl.sqrt(1 - h2[0])))
     if popstrat is not None:
         var_factor = (
             1
@@ -661,7 +660,7 @@ def agg_fields(tb, coef_dict=None, str_expr=None, axis='rows'):
         :class:`.MatrixTable` or :class:`.Table` containing aggregation field.
     """
     assert str_expr is not None or coef_dict is not None, "str_expr and coef_dict cannot both be None"
-    assert axis == 'rows' or axis == 'cols', "axis must be 'rows' or 'cols'"
+    assert axis in {'rows', 'cols'}, "axis must be 'rows' or 'cols'"
     coef_dict = get_coef_dict(tb=tb, str_expr=str_expr, ref_coef_dict=coef_dict, axis=axis)
     axis_field = 'annot' if axis == 'rows' else 'cov'
     annotate_fn = (
@@ -702,7 +701,7 @@ def get_coef_dict(tb, str_expr=None, ref_coef_dict=None, axis='rows'):
         `coef_dict` value, the row (or col) field name is specified by `coef_dict` key.
     """
     assert str_expr is not None or ref_coef_dict is not None, "str_expr and ref_coef_dict cannot both be None"
-    assert axis == 'rows' or axis == 'cols', "axis must be 'rows' or 'cols'"
+    assert axis in {'rows', 'cols'}, "axis must be 'rows' or 'cols'"
     fields_to_search = tb.row if axis == 'rows' or isinstance(tb, Table) else tb.col
     # when axis='rows' we're searching for annotations, axis='cols' searching for covariates
     axis_field = 'annotation' if axis == 'rows' else 'covariate'
 
@@ -21,7 +21,7 @@ def densify(sparse_mt):
     roughly costing as much as reading a matrix table created by importing a dense
     project VCF.
     """
-    if list(sparse_mt.row_key)[0] != 'locus' or not isinstance(sparse_mt.locus.dtype, hl.tlocus):
+    if next(iter(sparse_mt.row_key)) != 'locus' or not isinstance(sparse_mt.locus.dtype, hl.tlocus):
         raise ValueError("first row key field must be named 'locus' and have type 'locus'")
     if 'END' not in sparse_mt.entry or sparse_mt.END.dtype != hl.tint32:
         raise ValueError("'densify' requires 'END' entry field of type 'int32'")
 
@@ -82,7 +82,7 @@ def spread(ht, field, value, key=None) -> Table:
 
     field_vals = list(ht.aggregate(hl.agg.collect_as_set(ht[field])))
     ht = ht.group_by(*key).aggregate(
-        **{rv: hl.agg.take(ht[rv], 1)[0] for rv in ht.row_value if rv not in set(key + [field, value])},
+        **{rv: hl.agg.take(ht[rv], 1)[0] for rv in ht.row_value if rv not in set([*key, field, value])},
         **{
             fv: hl.agg.filter(
                 ht[field] == fv,
 
@@ -80,9 +80,10 @@ def _ascii_string(self, depth, prefix):
             for name, v in self.summ_fields.items():
                 summary += f'\n{spacing}  {name.rjust(max_n_len)}: {self.format(v)}'
         for name, field in self.nested.items():
+            _name = name
             if prefix is not None:
-                name = f'{prefix}{name}'
-            summary += field._ascii_string(depth + 1, prefix=name)
+                _name = f'{prefix}{name}'
+            summary += field._ascii_string(depth + 1, prefix=_name)
 
         return summary
 
@@ -102,9 +103,10 @@ def _html_string(self, prefix):
                 summary += f'<tr><td>{html.escape(name)}</td><td>{html.escape(self.format(v))}</td></tr>'
             summary += '</tbody></table>'
         for name, field in self.nested.items():
+            _name = name
             if prefix is not None:
-                name = f'{prefix}{name}'
-            summary += '<li>' + field._html_string(prefix=name) + '</li>'
+                _name = f'{prefix}{name}'
+            summary += '<li>' + field._html_string(prefix=_name) + '</li>'
         summary += '</ul>'
 
         return summary
@@ -533,15 +535,15 @@ def unify_exprs(*exprs: 'Expression') -> Tuple:
 
     # all types are the same
     if len(types) == 1:
-        return exprs + (True,)
+        return (*exprs, True)
 
     for t in types:
         c = expressions.coercer_from_dtype(t)
         if all(c.can_coerce(e.dtype) for e in exprs):
-            return tuple([c.coerce(e) for e in exprs]) + (True,)
+            return (*tuple([c.coerce(e) for e in exprs]), True)
 
     # cannot coerce all types to the same type
-    return exprs + (False,)
+    return (*exprs, False)
 
 
 class Expression(object):
@@ -617,7 +619,7 @@ def __nonzero__(self):
         )
 
     def __iter__(self):
-        raise ExpressionException(f"{repr(self)} object is not iterable")
+        raise ExpressionException(f"{self!r} object is not iterable")
 
     def _compare_op(self, op, other):
         other = to_expr(other)
@@ -655,7 +657,7 @@ def _promote_numeric(self, typ):
     @staticmethod
     def _div_ret_type_f(t):
         assert is_numeric(t)
-        if t == tint32 or t == tint64:
+        if t in {tint32, tint64}:
             return tfloat64
         else:
             # Float64 or Float32
@@ -1240,11 +1242,10 @@ def summarize(self, handler=None):
         if self in src._fields:
             field_name = src._fields_inverse[self]
             prefix = field_name
+        elif self._ir.is_nested_field:
+            prefix = self._ir.name
         else:
-            if self._ir.is_nested_field:
-                prefix = self._ir.name
-            else:
-                prefix = '<expr>'
+            prefix = '<expr>'
 
         if handler is None:
             handler = hl.utils.default_handler()