From d01820b6e7a1eae70badc83d9254c45b8982119f Mon Sep 17 00:00:00 2001 From: nicolefindstar Date: Mon, 29 Jul 2024 14:08:11 +0200 Subject: [PATCH] Harmonize SBP_CRF93_Abseentism in SPSS and added Stata and Python --- .../SBP_CRF93_Abseentism.do | 52 +++++++++++++++++++ .../SBP_CRF93_Abseentism.py | 47 +++++++++++++++++ .../SBP_CRF93_Abseentism.sps | 46 +++++++++++----- 3 files changed, 133 insertions(+), 12 deletions(-) create mode 100644 Indicators/SchoolBasedProgrammes-CRF93-SchoolAbseentism/SBP_CRF93_Abseentism.do create mode 100644 Indicators/SchoolBasedProgrammes-CRF93-SchoolAbseentism/SBP_CRF93_Abseentism.py diff --git a/Indicators/SchoolBasedProgrammes-CRF93-SchoolAbseentism/SBP_CRF93_Abseentism.do b/Indicators/SchoolBasedProgrammes-CRF93-SchoolAbseentism/SBP_CRF93_Abseentism.do new file mode 100644 index 0000000..fc2061c --- /dev/null +++ b/Indicators/SchoolBasedProgrammes-CRF93-SchoolAbseentism/SBP_CRF93_Abseentism.do @@ -0,0 +1,52 @@ +******************************************************************************* +* WFP Standardized Scripts +* Calculating Percentage of School Absence Due to Ill Health +******************************************************************************* + +* Load Data -------------------------------------------------------------------- +* import delimited "SBPProcessM_module_SchoolAgeChildRoster_submodule_RepeatSchoolAgeChild.csv", clear + +* Rename Variables ------------------------------------------------------------- +rename (SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildRegisterSchool) PChildRegisterSchool +rename (SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildDayAttendSchool) PChildDayAttendSchool +rename (SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildDayAbsSchool) PChildDayAbsSchool +rename (SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildDayAbsSchoolWhy/1) PChildDayAbsSchoolWhy_IllHealth +rename (SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildDayAbsIllHealth) PChildDayAbsIllHealth +rename (SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildAbsIllHealth) PChildAbsIllHealth + +* Assign Variable Labels -------------------------------------------------------- +label variable PChildRegisterSchool "Is ${PChildName} registered in school?" +label variable PChildDayAttendSchool "In the last 30 school days, how many days did ${PChildName} go to school?" +label variable PChildDayAbsSchool "In the last 30 school days, how many days was ${PChildName} absent from school?" +label variable PChildDayAbsSchoolWhy_IllHealth "What was the reason they missed school: Ill-health/sick" +label variable PChildDayAbsIllHealth "How many days was your child absent from school because of ill-health" +label variable PChildAbsIllHealth "Please, specify the type of illness" + +* Convert Variables ------------------------------------------------------------- +* Ensure variables are in the correct format +recode PChildDayAbsSchoolWhy_IllHealth (0/1 = 0 1 = 1), generate(PChildDayAbsSchoolWhy_IllHealth_bool) +recode PChildRegisterSchool (1/1 = 1), generate(PChildRegisterSchool_num) + +* Calculate Percentages (Without Weights) -------------------------------------- +* Filter for registered children +gen Total_Registered = cond(PChildRegisterSchool == 1, 1, 0) +egen Total_Registered_count = total(Total_Registered) + +gen Absent_Due_To_Ill_Health = cond(PChildDayAbsSchoolWhy_IllHealth == 1, 1, 0) +egen Absent_Due_To_Ill_Health_count = total(Absent_Due_To_Ill_Health) + +gen Percentage = (Absent_Due_To_Ill_Health_count / Total_Registered_count) * 100 +list Percentage in 1/10 + +* Calculate Percentages (With Weights) ----------------------------------------- +* Note: Replace WeightVariable with the actual weight variable name. +gen Total_Registered_Weighted = cond(PChildRegisterSchool == 1, WeightVariable, 0) +egen Total_Registered_Weighted_sum = total(Total_Registered_Weighted) + +gen Absent_Due_To_Ill_Health_Weighted = cond(PChildDayAbsSchoolWhy_IllHealth == 1, WeightVariable, 0) +egen Absent_Due_To_Ill_Health_Weighted_sum = total(Absent_Due_To_Ill_Health_Weighted) + +gen Percentage_Wt = (Absent_Due_To_Ill_Health_Weighted_sum / Total_Registered_Weighted_sum) * 100 +list Percentage_Wt in 1/10 + +* End of Scripts \ No newline at end of file diff --git a/Indicators/SchoolBasedProgrammes-CRF93-SchoolAbseentism/SBP_CRF93_Abseentism.py b/Indicators/SchoolBasedProgrammes-CRF93-SchoolAbseentism/SBP_CRF93_Abseentism.py new file mode 100644 index 0000000..7f97709 --- /dev/null +++ b/Indicators/SchoolBasedProgrammes-CRF93-SchoolAbseentism/SBP_CRF93_Abseentism.py @@ -0,0 +1,47 @@ +#------------------------------------------------------------------------------# +# WFP Standardized Scripts +# Calculating Percentage of School Absence Due to Ill Health +#------------------------------------------------------------------------------# + +# Load Packages --------------------------------------------------------------# +import pandas as pd + +# Load Sample Data ------------------------------------------------------------# +# Load the dataset (adjust the file path and name as needed) +# df = pd.read_csv("SBPProcessM_module_SchoolAgeChildRoster_submodule_RepeatSchoolAgeChild.csv") + +# Rename variables to match the R script --------------------------------------# +df.rename(columns={ + 'SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildRegisterSchool': 'PChildRegisterSchool', + 'SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildDayAttendSchool': 'PChildDayAttendSchool', + 'SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildDayAbsSchool': 'PChildDayAbsSchool', + 'SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildDayAbsSchoolWhy/1': 'PChildDayAbsSchoolWhy_IllHealth', + 'SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildDayAbsIllHealth': 'PChildDayAbsIllHealth', + 'SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildAbsIllHealth': 'PChildAbsIllHealth' +}, inplace=True) + +# Convert variables to appropriate types ---------------------------------------# +# Ensure PChildDayAbsSchoolWhy_IllHealth is binary (True/False) +df['PChildDayAbsSchoolWhy_IllHealth'] = df['PChildDayAbsSchoolWhy_IllHealth'] == 1 + +# Filter dataset for children registered to attend school -----------------------# +df_registered = df[df['PChildRegisterSchool'] == 1] + +# Calculate the percentage of absences due to ill health ------------------------# +# Without weights +total_registered = len(df_registered) +absent_due_to_ill_health = df_registered['PChildDayAbsSchoolWhy_IllHealth'].sum() +percentage_absent_due_to_ill_health = (absent_due_to_ill_health / total_registered) * 100 + +print(f"Percentage of children absent due to ill health (without weights): {percentage_absent_due_to_ill_health:.2f}%") + +# If you have a weight variable, include it in the calculation ------------------# +# Uncomment and adjust the following if a weight variable is available +# df_registered['WeightVariable'] = df_registered['WeightVariable'].fillna(0) # Replace with actual weight column +# total_registered_weighted = df_registered['WeightVariable'].sum() +# absent_due_to_ill_health_weighted = df_registered.loc[df_registered['PChildDayAbsSchoolWhy_IllHealth'], 'WeightVariable'].sum() +# percentage_absent_due_to_ill_health_wt = (absent_due_to_ill_health_weighted / total_registered_weighted) * 100 + +# print(f"Percentage of children absent due to ill health (with weights): {percentage_absent_due_to_ill_health_wt:.2f}%") + +# End of Scripts ----------------------------------------------------------------# \ No newline at end of file diff --git a/Indicators/SchoolBasedProgrammes-CRF93-SchoolAbseentism/SBP_CRF93_Abseentism.sps b/Indicators/SchoolBasedProgrammes-CRF93-SchoolAbseentism/SBP_CRF93_Abseentism.sps index 013d4eb..8ecfb03 100644 --- a/Indicators/SchoolBasedProgrammes-CRF93-SchoolAbseentism/SBP_CRF93_Abseentism.sps +++ b/Indicators/SchoolBasedProgrammes-CRF93-SchoolAbseentism/SBP_CRF93_Abseentism.sps @@ -1,27 +1,49 @@ -* Encoding: UTF-8. +***------------------------------------------------------------------------------* +*** WFP Standardized Scripts +*** Calculating Percentage of School Absence Due to Ill Health +***------------------------------------------------------------------------------* -*this syntax is based on SPSS download version from MoDA +* Encoding: UTF-8. -* Filter dataset for children registered to attend school. -SELECT IF (PChildRegisterSchool = 1). +* Rename Variables -------------------------------------------------------------* +RENAME VARIABLES (SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildRegisterSchool = PChildRegisterSchool + SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildDayAttendSchool = PChildDayAttendSchool + SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildDayAbsSchool = PChildDayAbsSchool + SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildDayAbsSchoolWhy/1 = PChildDayAbsSchoolWhy_IllHealth + SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildDayAbsIllHealth = PChildDayAbsIllHealth + SBPProcessM_module/SchoolAgeChildRoster_submodule/RepeatSchoolAgeChild/PChildAbsIllHealth = PChildAbsIllHealth). + +* Assign Variable Labels --------------------------------------------------------* +VARIABLE LABELS PChildRegisterSchool "Is ${PChildName} registered in school?" + PChildDayAttendSchool "In the last 30 school days, how many days did ${PChildName} go to school?" + PChildDayAbsSchool "In the last 30 school days, how many days was ${PChildName} absent from school?" + PChildDayAbsSchoolWhy_IllHealth "What was the reason they missed school: Ill-health/sick" + PChildDayAbsIllHealth "How many days was your child absent from school because of ill-health" + PChildAbsIllHealth "Please, specify the type of illness". -* Compute a variable for absence due to ill health as a binary indicator. -COMPUTE AbsentDueToIllHealth = (PChildDayAbsSchoolWhy.1 = 1). +* Convert Variables ------------------------------------------------------------* +* Ensure the relevant variables are in the correct format. +COMPUTE PChildDayAbsSchoolWhy_IllHealth = (PChildDayAbsSchoolWhy_IllHealth = 1). -* Aggregate data to calculate total registered and total absent due to ill health. +* Filter dataset for children registered to attend school -----------------------* +SELECT IF (PChildRegisterSchool = 1). + +* Aggregate data to calculate total registered and total absent due to ill health.* DATASET DECLARE AggregatedData. AGGREGATE OUTFILE='AggregatedData' /BREAK= /TotalRegistered = N - /TotalAbsentDueToIllHealth = SUM(AbsentDueToIllHealth). + /TotalAbsentDueToIllHealth = SUM(PChildDayAbsSchoolWhy_IllHealth). -* Compute the percentage of absences due to ill health. +* Compute the percentage of absences due to ill health ---------------------------* DATASET ACTIVATE AggregatedData. COMPUTE PercentAbsentDueToIllHealth = (TotalAbsentDueToIllHealth / TotalRegistered) * 100. -* Assign a label to the new percentage variable. +* Assign a label to the new percentage variable --------------------------------* VARIABLE LABELS PercentAbsentDueToIllHealth '% Children Missing School Due to Ill Health'. -* Display the result. +* Display the result -----------------------------------------------------------* FORMATS PercentAbsentDueToIllHealth (F2.2). -LIST. \ No newline at end of file +LIST. + +* End of Scripts \ No newline at end of file