Skip to content

Commit

Permalink
Merge pull request #141 from ONSdigital/spp-8508
Browse files Browse the repository at this point in the history
Spp 8508
  • Loading branch information
Piwington authored Feb 6, 2023
2 parents 3236514 + b2ee324 commit a805546
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 274 deletions.
21 changes: 21 additions & 0 deletions docs/release-notes/9.1.0.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Statistical Methods Library 9.1.0

Release date: 2023-02-06

## Synopsis

This release addresses an issue applying the link filter when using back
data in the Ratio of Means imputation method.

## Changes

In the Ratio of Means imputation method, link filtering now happens after
back data is validated and if there is back data it will be added to the
input data before applying the filter. This fixes an issue where all the
back data was filtered out in link calculations when using a link filter.

## Notes

Any data produced using a prior version of the Ratio of Means method relying
on the link filter and back data should be considered suspect and it is
recommended to repeat such runs with this new version.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "statistical_methods_library"
version = "9.0.1"
version = "9.1.0"
description = ""
authors = ["Your Name <[email protected]>"]
license = "MIT"
Expand Down
11 changes: 6 additions & 5 deletions statistical_methods_library/imputation/ratio_of_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,11 +219,6 @@ def impute(
["target", "forward", "backward", "construction"],
)

if link_filter:
filtered_refs = input_df.filter(link_filter).select(
col(reference_col).alias("ref"), col(period_col).alias("period")
)

# Cache the prepared back data df since we'll need a few differently
# filtered versions
prepared_back_data_df = None
Expand All @@ -232,6 +227,12 @@ def impute(
prepared_back_data_df = validation.validate_dataframe(
back_data_df, back_expected_columns, type_mapping, ["ref", "period"]
)
input_df = input_df.unionByName(back_data_df, allowMissingColumns=True)

if link_filter:
filtered_refs = input_df.filter(link_filter).select(
col(reference_col).alias("ref"), col(period_col).alias("period")
)

# Store the value for the period prior to the start of imputation.
# Stored as a value to avoid a join in output creation.
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
identifier,date,group,other,output,forward,backward,construction,output,marker,count_forward,count_backward,count_construction
10001,202001,1,10,547,1.0,0.992513369,15.46666667,547,R,,2,2
10001,202002,1,10,362,1.007543103,0.843101893,15.58333333,362,R,2,2,2
10001,202003,1,10,895,1.186096257,1.0,18.48333333,895,R,2,,2
10002,202001,1,50,381,1.0,0.992513369,15.46666667,381,R,,2,2
10002,202002,1,50,573,1.007543103,0.843101893,15.58333333,573,R,2,2,2
10002,202003,1,50,214,1.186096257,1.0,18.48333333,214,R,2,,2
10003,202001,2,12,961,1.0,1.693854749,72.19047619,961,R,,2,2
10003,202002,2,12,267,0.590369393,0.852380952,42.61904762,267,R,2,2,2
10003,202003,2,12,314,1.173184358,1.0,50,314,R,2,,2
10004,202001,2,9,555,1.0,1.693854749,72.19047619,555,R,,2,2
10004,202002,2,9,628,0.590369393,0.852380952,42.61904762,628,R,2,2,2
10004,202003,2,9,736,1.173184358,1.0,50,736,R,2,,2
10005,202001,1,18,278.4,1.0,0.992513369,15.46666667,278.4000001,C,,2,2
10005,202002,2,18,767.1429,0.590369393,0.852380952,42.61904762,767.1428572,C,2,2,2
10005,202003,2,18,899.9997,1.173184358,1.0,50,900.00000,FIC,2,,2
10006,202001,1,85,150000,1.0,0.992513369,15.46666667,547,R,,2,2
10006,202002,1,85,180000,1.007543103,0.843101893,15.58333333,362,R,2,2,2
10006,202003,1,85,120000,1.186096257,1.0,18.48333333,895,R,2,,2
10007,202001,1,71,1098.133334,1.0,0.992513369,15.46666667,547,C,,2,2
10007,202002,1,71,1106.416666,1.007543103,0.843101893,15.58333333,362,FIC,2,2,2
10007,202003,1,71,1312.316666,1.186096257,1.0,18.48333333,895,FIC,2,,2
10008,202001,2,85,150000,1.0,1.693854749,72.19047619,961,R,,2,2
10008,202002,2,85,180000,0.590369393,0.852380952,42.61904762,267,R,2,2,2
10008,202003,2,85,120000,1.173184358,1.0,50,314,R,2,,2
10009,202001,2,71,5125.523809,1.0,1.693854749,72.19047619,961,C,,2,2
10009,202002,2,71,3025.952380,0.590369393,0.852380952,42.61904762,267,FIC,2,2,2
10009,202003,2,71,3550.000001,1.173184358,1.0,50,314,FIC,2,,2
identifier,date,group,other,output,forward,backward,construction,marker,count_forward,count_backward,count_construction
10001,202001,1,10,547,1,0.992513369,15.46666667,R,,2,2
10001,202002,1,10,362,1.007543103,0.843101893,15.58333333,R,2,2,2
10001,202003,1,10,895,1.186096257,1,18.48333333,R,2,,2
10002,202001,1,50,381,1,0.992513369,15.46666667,R,,2,2
10002,202002,1,50,573,1.007543103,0.843101893,15.58333333,R,2,2,2
10002,202003,1,50,214,1.186096257,1,18.48333333,R,2,,2
10003,202001,2,12,961,1,1.693854749,72.19047619,R,,2,2
10003,202002,2,12,267,0.590369393,0.852380952,42.61904762,R,2,2,2
10003,202003,2,12,314,1.173184358,1,50,R,2,,2
10004,202001,2,9,555,1,1.693854749,72.19047619,R,,2,2
10004,202002,2,9,628,0.590369393,0.852380952,42.61904762,R,2,2,2
10004,202003,2,9,736,1.173184358,1,50,R,2,,2
10005,202001,1,18,278.4,1,0.992513369,15.46666667,C,,2,2
10005,202002,2,18,767.142857,0.590369393,0.852380952,42.61904762,C,2,2,2
10005,202003,2,18,900.000,1.173184358,1,50,FIC,2,,2
10006,202001,1,85,150000,1,0.992513369,15.46666667,R,,2,2
10006,202002,1,85,180000,1.007543103,0.843101893,15.58333333,R,2,2,2
10006,202003,1,85,120000,1.186096257,1,18.48333333,R,2,,2
10007,202001,1,71,1098.133333,1,0.992513369,15.46666667,C,,2,2
10007,202002,1,71,1106.416666,1.007543103,0.843101893,15.58333333,FIC,2,2,2
10007,202003,1,71,1312.316666,1.186096257,1,18.48333333,FIC,2,,2
10008,202001,2,85,150000,1,1.693854749,72.19047619,R,,2,2
10008,202002,2,85,180000,0.590369393,0.852380952,42.61904762,R,2,2,2
10008,202003,2,85,120000,1.173184358,1,50,R,2,,2
10009,202001,2,71,5125.523810,1,1.693854749,72.19047619,C,,2,2
10009,202002,2,71,3025.952381,0.590369393,0.852380952,42.61904762,FIC,2,2,2
10009,202003,2,71,3550.000000,1.173184358,1,50,FIC,2,,2
Loading

0 comments on commit a805546

Please sign in to comment.