Skip to content

Commit

Permalink
Merge pull request #168 from ONSdigital/lack-of-back-data-mishandled
Browse files Browse the repository at this point in the history
lack of back data mishandled
  • Loading branch information
arthompson authored Jun 23, 2023
2 parents 5a77292 + 918bbd5 commit 19d8a7f
Show file tree
Hide file tree
Showing 9 changed files with 70 additions and 39 deletions.
3 changes: 0 additions & 3 deletions .flake8

This file was deleted.

4 changes: 2 additions & 2 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ Briefly describe the purpose of the pr.

## Description

Add a more detailed description of the pr including background and ticket
references if necessary.
Add a more detailed description of the pr if necessary (can reference release
notes if included).
4 changes: 1 addition & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,6 @@ venv.bak/
.devcontainer/
# Editor backup files
*~
# portray generated docs
site/

# asdf tool versions
.tool-versions

3 changes: 0 additions & 3 deletions .isort.cfg

This file was deleted.

38 changes: 28 additions & 10 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "statistical_methods_library"
version = "13.1.0"
version = "13.1.1"
description = ""
authors = ["Your Name <[email protected]>"]
license = "MIT"
Expand All @@ -18,6 +18,7 @@ coverage = "^7.2"
pytest-cov = "^4.1.0"
pytest-dependency = "^0.5.1"
pytest-tap = "^3.3"
flake8-pyproject = "^1.2.3"

[tool.pytest.ini_options]
junit_suite_name = "statistical_methods_library"
Expand All @@ -31,3 +32,8 @@ addopts = [
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"


[tool.flake8]
max-line-length = 90
exclude = ".venv"
18 changes: 18 additions & 0 deletions release-notes/13.1.1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Statistical Methods Library 13.1.1

Release date: 2023-06-23

## Synopsis

This release fixes a crash when passing a link filter and not passing back
data to imputation.

## Changes

Imputation can now handle passing a link filter without back data.
Previously it would incorrectly attempt to use back data in this case and
crash due to the data frame being None.

## Notes

This change has no impact on existing outputs.
21 changes: 12 additions & 9 deletions statistical_methods_library/imputation/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,15 +270,18 @@ def impute(
}

if link_filter:
filtered_refs = (
input_df.unionByName(back_data_df, allowMissingColumns=True).select(
col(reference_col).alias("ref"),
col(period_col).alias("period"),
col(grouping_col).alias("grouping"),
(
expr(link_filter) if isinstance(link_filter, str) else link_filter
).alias("match"),
)
if back_data_df:
filtered_refs = input_df.unionByName(back_data_df, allowMissingColumns=True)
else:
filtered_refs = input_df

filtered_refs = filtered_refs.select(
col(reference_col).alias("ref"),
col(period_col).alias("period"),
col(grouping_col).alias("grouping"),
(expr(link_filter) if isinstance(link_filter, str) else link_filter).alias(
"match"
),
).localCheckpoint(eager=False)

prepared_df = (
Expand Down
10 changes: 2 additions & 8 deletions tests/imputation/test_scenarios.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,21 +101,15 @@ def test_calculations(fxt_load_test_csv, ratio_calculator, scenario_type, scenar
back_data_df = scenario_expected_output.filter(
col(fields["period_col"]) < starting_period
)

imputation_kwargs["back_data_df"] = back_data_df
if back_data_df.count() > 0:
imputation_kwargs["back_data_df"] = back_data_df

scenario_input = scenario_input.filter(col(fields["period_col"]) >= starting_period)

scenario_expected_output = scenario_expected_output.filter(
col(fields["period_col"]) >= starting_period
)

# We need to drop our auxiliary column from our output now
# we've potentially set up our back data as this must not come out of
# imputation.
scenario_expected_output = scenario_expected_output.drop(
fields["auxiliary_col"],
)
scenario_actual_output = imputation.impute(
input_df=scenario_input, **imputation_kwargs
)
Expand Down

0 comments on commit 19d8a7f

Please sign in to comment.