GarrettMerz
diff --git a/‎appendix/TMVABDTStudies.tex‎
Lines changed: 1 addition & 1 deletion b/‎appendix/TMVABDTStudies.tex‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎appendix/couplings_auxplots.tex‎
Lines changed: 3 additions & 5 deletions b/‎appendix/couplings_auxplots.tex‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎appendix/gpr_templates.tex‎
Lines changed: 10 additions & 10 deletions b/‎appendix/gpr_templates.tex‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎appendix/gpr_validation.tex‎
Lines changed: 16 additions & 32 deletions b/‎appendix/gpr_validation.tex‎
Lines changed: 16 additions & 32 deletions
diff --git a/‎sections/couplings_chapter.tex‎
Lines changed: 12 additions & 16 deletions b/‎sections/couplings_chapter.tex‎
Lines changed: 12 additions & 16 deletions
@@ -91,7 +91,7 @@ \subsubsection{Leptonic channel}
 \item The second-smallest $\Delta$R between a photon and a jet in the event
 \end{itemize} 
 
-The linear correlations between these variables in $ttH$ CP even and CP odd MadGraph5\_aMC@NLO+Pythia8 Monte Carlo are shown in Figure \ref{fig:lepcorr4vec}.  Figures \ref{fig:lep4vecvbls1} - \ref{fig:lep4vecvbls6} compare the distribution of each training variable in $ttH$ CP even and CP odd Monte Carlo.
+The linear correlations between these variables in $ttH$ CP even and CP odd MadGraph5 aMCNLO+Pythia8 Monte Carlo are shown in Figure \ref{fig:lepcorr4vec}.  Figures \ref{fig:lep4vecvbls1} - \ref{fig:lep4vecvbls6} compare the distribution of each training variable in $ttH$ CP even and CP odd Monte Carlo.
 
 \begin{figure}[htbp]
   \centering
 
@@ -12,14 +12,12 @@
 %        \end{figure}
 %\end{center}
 
-\begin{landscape}
-\begin{figure}[h]
+\begin{sidewaysfigure}[h]
 \centering
-\includegraphics[width=1.5\textwidth]{figures/couplings_chapter/purity_2D.pdf}
+\includegraphics[width=\textwidth]{figures/couplings_chapter/purity_2D.pdf}
 \caption{Contribution of STXS truth bins to each analysis category in total event yield. The top row corresponds to the value of $S_{90}/(S_{90} + B_{90})$ in each category, where $S_{90}$ and $B_{90}$ are respectively the total number of signal (including all STXS regions) and background events expected in the smallest $m_{\gamma \gamma}$ range containing 90\% of the signal yield. Other entries correspond to the percentage contribution of a given STXS truth bin to the Higgs signal yield in each analysis category. Entries for the STXS regions targeted by each analysis category are outlined in black if this value is above 15\%. }
 \label{fig:design:yields}
-\end{figure}
-\end{landscape}
+\end{sidewaysfigure}
 
 \begin{figure}[htbp]
   \centering
 
@@ -807,6 +807,16 @@ \subsection{Spurious Signal GPR-smoothed templates}
 			GG2H\_PTH\_300\_450\_\_2                                  & Pow &-3.49&0.851\\
 			GG2H\_PTH\_450\_650\_\_0                                  & Exp* &-0.67&N/A\\
 			GG2H\_PTH\_450\_650\_\_1                                  & Exp* &-0.96&0.0262\\
+			GG2H\_PTH\_GT650\_\_0                                     & Exp* &0.63&N/A\\
+			GG2H\_PTH\_GT650\_\_1                                     & Exp* &-0.36&N/A\\
+			QQ2HQQ\_0J\_\_0                                             & Exp* &-0.68&N/A\\
+			QQ2HQQ\_0J\_\_1                                             & Exp* &-0.33&-0.204\\
+			QQ2HQQ\_1J\_\_0                                             & Exp* &-0.53&N/A\\
+			QQ2HQQ\_1J\_\_1                                             & Exp* &0.44&0.247\\
+			QQ2HQQ\_1J\_\_2                                             & Pow &-1.35&-0.67\\
+			QQ2HQQ\_GE2J\_MJJ\_0\_60\_\_0                               & Exp* &0.64&N/A\\
+			QQ2HQQ\_GE2J\_MJJ\_0\_60\_\_1                               & Exp* &-0.39&-0.0541\\
+			QQ2HQQ\_GE2J\_MJJ\_0\_60\_\_2                               & Exp &-1.51&0.221\\
 			\hline\hline
 		\end{tabular}
 	}
@@ -825,16 +835,6 @@ \subsection{Spurious Signal GPR-smoothed templates}
 			                                                          & Function    & \multicolumn{2}{c}{$max(S)$} \\ 
 			Event category                                            &       & Nominal  & Smooth temp \\ 
 			\hline\hline
-			GG2H\_PTH\_GT650\_\_0                                     & Exp* &0.63&N/A\\
-			GG2H\_PTH\_GT650\_\_1                                     & Exp* &-0.36&N/A\\
-			QQ2HQQ\_0J\_\_0                                             & Exp* &-0.68&N/A\\
-			QQ2HQQ\_0J\_\_1                                             & Exp* &-0.33&-0.204\\
-			QQ2HQQ\_1J\_\_0                                             & Exp* &-0.53&N/A\\
-			QQ2HQQ\_1J\_\_1                                             & Exp* &0.44&0.247\\
-			QQ2HQQ\_1J\_\_2                                             & Pow &-1.35&-0.67\\
-			QQ2HQQ\_GE2J\_MJJ\_0\_60\_\_0                               & Exp* &0.64&N/A\\
-			QQ2HQQ\_GE2J\_MJJ\_0\_60\_\_1                               & Exp* &-0.39&-0.0541\\
-			QQ2HQQ\_GE2J\_MJJ\_0\_60\_\_2                               & Exp &-1.51&0.221\\
 			QQ2HQQ\_GE2J\_MJJ\_60\_120\_\_0                             & Exp* &0.66&0.0616\\
 			QQ2HQQ\_GE2J\_MJJ\_60\_120\_\_1                             & Pow &-2.35&0.216\\
 			QQ2HQQ\_GE2J\_MJJ\_120\_350\_\_0                            & Exp* &-0.6&N/A\\
 
@@ -105,8 +105,7 @@ \subsection{Nominal Bias Study}
 
 We report the results of the nominal bias study for all categories in the \Tab{\ref{tab:NoSigSS}}.
 
-\begin{landscape}
-	\begin{table}
+\begin{sidewaystable}
 		\centering 
 		\resizebox{\linewidth}{!}{
 			\begin{tabular}{lcSS
@@ -153,8 +152,7 @@ \subsection{Nominal Bias Study}
 		}
 		\caption{Spurious signal means and widths for the three test functional-form distributions for a range of different template statistics.}
 		\label{tab:NoSigSS}
-	\end{table}	
-\end{landscape}
+\end{sidewaystable}	
 
 To determine how to reduce the bias further, we note a further set of tests performed in an earlier iteration of this method \cite{Hyneman}, evaluating the difference in GP fit bias when different functional priors were used as the GP mean. Templates were constructed for several statistics regimes using power law (Fig.~\ref{fig:prior_bias_powerlaw}), ExpPoly2 (Fig.~\ref{fig:prior_bias_exppoly2}), and Bernstein 5 (Fig.~\ref{fig:prior_bias_bern5}) functions as the template basis; the possible choices of GP mean tested for each template were an exponential function, a linear function, and a flat line. In the tested templates with more than 10 effective MC events per bin, the choice of GP mean does not seem to affect the GP fit behavior significantly, though some fitting bias is observed in the lower-statistics templates. The unit of the y-axis in these plots is the percentage disagreement between the smoothed and the unsmoothed template, similar to a ratio plot.
 
@@ -270,8 +268,7 @@ \subsection{Extended Templates}
 
 We report the results of the padded-template bias study for all categories in the \Tab{\ref{tab:NoSigSSpadded}}.
 
-\begin{landscape}
-	\begin{table}
+\begin{sidewaystable}
 		\centering 
 		\resizebox{\linewidth}{!}{
 			\begin{tabular}{lcS
@@ -316,8 +313,7 @@ \subsection{Extended Templates}
 		}
 		\caption{Spurious signal means and widths for the three test functional-form distributions for a range of different template statistics.}
 		\label{tab:NoSigSSpadded}
-	\end{table}	
-\end{landscape}
+\end{sidewaystable}	
 
 \subsection{Extended Templates, Linear Error Kernel}
 
@@ -406,8 +402,7 @@ \subsection{Extended Templates, Linear Error Kernel}
 
 We report the results of the padded-template, linear-error kernel bias study for all categories in the \Tab{\ref{tab:NoSigSSlinear}}. 
 
-\begin{landscape}
-	\begin{table}
+\begin{sidewaystable}
 		\centering 
 		\resizebox{\linewidth}{!}{
 			\begin{tabular}{lcSS
@@ -454,17 +449,15 @@ \subsection{Extended Templates, Linear Error Kernel}
 		}
 		\caption{Spurious signal means and widths for the three test functional-form distributions for a range of different template statistics.}
 		\label{tab:NoSigSSlinear}
-	\end{table}	
-\end{landscape}
+\end{sidewaystable}	
 
 To further validate the choice of 20 effective events per bin as the cutoff, we investigate some edge cases. Since the templates have 130 bins (due to the 10 bin padding on either side), we note that the 1000-event templates have just over 7 effective events per bin, while the 10000 event templates have just over 76 events per bin. We test templates with exactly 10 effective events per bin (1300 total events), slightly more than 10 effective events per bin (1400 total events), exactly 20 effective events per bin (2600 total events), and slightly more than 21 effective events per bin (2800 total events).
 
 We note that, in the 10 event/bin regime, for templates generated with ExpPoly2 and ExpPoly3, we see no bias when fitting with ExpPoly2 and ExpPoly3, but see a bias of roughly 35\% of the statistical uncertainty on the spurious signal when fitting with lower degree-of-freedom templates (i.e., Exponential and Powerlaw). Upon closer examination, we observe that this is due to the presence of more substantial edge effects in the low-mass region of this very low statistics category that cannot be appropriately modelled by the Gaussian Process.
 
 However, by requiring at least 20 effective events per bin, we observe that this bias is reduced to less than or equal to 20\% of the statistical uncertainty on the spurious signal. At the low-statistics end of this range, however, we note that the statistical uncertainty is expected to dominate (that is, spurious signal will not be a significant uncertainty), so we can conclude that the effects of the GPR bias will be minimal. We further note that, as statistics increase past 75 effective events per bin, bias drops off to less than 10\% of the spurious signal uncertainty- in regimes where the spurious signal uncertainty is expected to dominate, the bias is found to be negligible.
 
-\begin{landscape}
-	\begin{table}
+\begin{sidewaystable}
 		\centering 
 		\resizebox{\linewidth}{!}{
 			\begin{tabular}{lcSS
@@ -506,12 +499,10 @@ \subsection{Extended Templates, Linear Error Kernel}
 		}
 		\caption{Spurious signal means and widths for all choices of fit functional-form, using the "low" template with the ExpPoly2 generating functional form, for a range of different template statistics.}
 		\label{tab:NoSigSSedges1}
-	\end{table}	
-\end{landscape}
+\end{sidewaystable}	
 
 
-\begin{landscape}
-	\begin{table}
+\begin{sidewaystable}
 		\centering 
 		\resizebox{\linewidth}{!}{
 			\begin{tabular}{lcSS
@@ -559,8 +550,7 @@ \subsection{Extended Templates, Linear Error Kernel}
 		}
 		\caption{Spurious signal means and widths for all choices of fit functional-form, using the "medium" template with the ExpPoly3 generating functional form and the "high" template with the ExpPoly3 generating functional form, for a range of different template statistics.}
 		\label{tab:NoSigSSedges2}
-	\end{table}	
-\end{landscape}
+\end{sidewaystable}	
 
 
 \begin{figure} 
@@ -734,8 +724,7 @@ \subsection{Feature Injection Study}
 \end{center}
 \end{figure}
 
-\begin{landscape}
-	\begin{table}
+\begin{sidewaystable}
 		\centering 
 		\resizebox{\linewidth}{!}{
 			\begin{tabular}{lcSS
@@ -779,8 +768,7 @@ \subsection{Feature Injection Study}
 		}
 		\caption{Spurious signal means and widths for the three test functional-form distributions for a range of different template statistics, with a signal feature injection that is approximately 3 GeV wide and 1\% of the template integral.}
 		\label{tab:SigSS}
-	\end{table}
-\end{landscape}
+\end{sidewaystable}
 
 The feature-injection bias for a three-sigma feature does not change appreciably with template shape (that is, for a given statistics level, the bias is approximately the same for all three templates). However, at high stats, the bias / feature size drops off as a function of template statistics. This makes sense, as the presence of true underlying features in high statistics templates is not compatible with the assumption that our true templates are smoothly falling functions. However, for templates containing greater than 20 effective background MC events per bin prior to feature injection (that is, those in the statistics range we conclude that it is safe to use GPR in), the measured bias is less than 18\% of the injected feature size. 
 
@@ -830,8 +818,7 @@ \subsection{Feature Injection Study}
 \end{center}
 \end{figure}
 
-\begin{landscape}
-	\begin{table}
+\begin{sidewaystable}
 		\centering 
 		\resizebox{\linewidth}{!}{
 			\begin{tabular}{lcSS
@@ -867,8 +854,7 @@ \subsection{Feature Injection Study}
 		}
 		\caption{Spurious signal means and widths for the three test functional-form distributions for a range of different template statistics, with a signal feature injection that is approximately 3 GeV wide. The template statistics are fixed at one million events, and the feature size is varied.}
 		\label{tab:SigSSvarinj}
-	\end{table}
-\end{landscape}
+\end{sidewaystable}
 
 As a final check, we investigate the bias when a standard-model-signal like feature is injected (~1 GeV wide). As expected, we see that the bias is larger- narrow features are more smoothed by the Gaussian Process fit, but are still present in the templates.
 
@@ -949,8 +935,7 @@ \subsection{Feature Injection Study}
 \end{center}
 \end{figure}
 
-\begin{landscape}
-	\begin{table}
+\begin{sidewaystable}
 		\centering 
 		\resizebox{\linewidth}{!}{
 			\begin{tabular}{lcSS
@@ -994,8 +979,7 @@ \subsection{Feature Injection Study}
 		}
 		\caption{Spurious signal means and widths for the three test functional-form distributions for a range of different template statistics, with a signal feature injection that is approximately 3 GeV wide and 1\% of the template integral.}
 		\label{tab:SigSS1S}
-	\end{table}
-\end{landscape}
+\end{sidewaystable}
 
 From these studies, we conclude that, in the presence of underlying features that we wish to preserve, the bias is dependent on both the size and shape of the expected feature- features are blunted somewhat by the GP, but are still present in the smoothed template; how much they are blunted depends on their shape and size, both absolute and relative to the template as a whole.
 
 
@@ -19,11 +19,10 @@ \section{Categorization} \label{sec:Categorization}
 
 The inputs to all BDTs are kinematic variables for the various objects in an event. In order to avoid sculpting of the shapes used in the statistical analysis, any variable found to be linearly correlated with $m_{\gamma \gamma}$ in the signal or background training samples by 5\% or more is removed from the list of inputs. The list of all variables used as input to both the multiclassifier BDT and the binary BDTs is given in Table \ref{tab:design:trainingvariables}.
 
-\begin{landscape}
 
-\begin{table}[]
+\begin{sidewaystable}[]
 \begin{center} \footnotesize
-\resizebox{1.25\textwidth}{!}{
+\resizebox{\textwidth}{!}{
 \begin{tabular}{|c|c|c|c|c|}
 \hline
 STXS regions & Multi-class BDT & STXS regions  & Binary BDT  \\ \hline
@@ -91,8 +90,7 @@ \section{Categorization} \label{sec:Categorization}
 \label{tab:design:trainingvariables}
 
 \end{center}
-\end{table}
-\end{landscape}
+\end{sidewaystable}
 
 To train the multiclassifier BDT, all signal samples are merged ($ggF$, $VH$, $VBF$, $ttH$, $tH$). A weight is then applied to each event such that all processes have equal yields in the training sample (that is, so processes such as $tH$ with a small cross-section are not underrepresented). The output of the multiclassifier BDT is a 44-dimensional vector discriminant with an index $y_{i}$ for each truth bin; these indices are then converted into class probabilities $z_{i}$ using a softmax function: $z_{i} = e^{y_{i}}/{\Sigma_{j}e^{y_{j}}}$. The BDT is trained by minimizing the cross-entropy of the softmax $z_{i}$ using the LightGBM package \cite{LightGBM}.
 
@@ -243,47 +241,45 @@ \section{Categorization} \label{sec:Categorization}
 
 \begin{figure}[h]
 \centering
-\includegraphics[width=1.09\textwidth]{figures/couplings_chapter/purity_2D_subplots_1}
+\includegraphics[width=\textwidth]{figures/couplings_chapter/purity_2D_subplots_1}
 \caption{The correspondence between analysis category and STXS truth bins, in terms of the percentage contribution of a given STXS truth bin (y-axis) to the Higgs signal yield in a given analysis category (x-axis) for \ggtoH\ categories and truth bins. Entries with a value below $1\%$ are omitted.}
 \label{fig:yields_1}
 \end{figure}
 
 
 \begin{figure}[h]
 \centering
-\includegraphics[width=1.09\textwidth]{figures/couplings_chapter/purity_2D_subplots_2}
+\includegraphics[width=\textwidth]{figures/couplings_chapter/purity_2D_subplots_2}
 \caption{The correspondence between analysis category and STXS truth bins, in terms of the percentage contribution of a given STXS truth bin (y-axis) to the Higgs signal yield in a given analysis category (x-axis) for \qqtoHqq\ categories and truth bins. Entries with a value below $1\%$ are omitted.}
 \label{fig:yields_2}
 \end{figure}
 
 
 \begin{figure}[h]
 \centering
-\includegraphics[width=1.09\textwidth]{figures/couplings_chapter/purity_2D_subplots_3}
+\includegraphics[width=\textwidth]{figures/couplings_chapter/purity_2D_subplots_3}
 \caption{The correspondence between analysis category and STXS truth bins, in terms of the percentage contribution of a given STXS truth bin (y-axis) to the Higgs signal yield in a given analysis category (x-axis) for \qqtoHll\ and \qqtoHln\ categories and truth bins. Entries with a value below $1\%$ are omitted.}
 \label{fig:yields_3}
 \end{figure}
 
 
 \begin{figure}[h]
 \centering
-\includegraphics[width=1.09\textwidth]{figures/couplings_chapter/purity_2D_subplots_4}
+\includegraphics[width=\textwidth]{figures/couplings_chapter/purity_2D_subplots_4}
 \caption{The correspondence between analysis category and STXS truth bins, in terms of the percentage contribution of a given STXS truth bin (y-axis) to the Higgs signal yield in a given analysis category (x-axis) for \ttH\, $tWH$, and $tHjb$ categories and truth bins. Entries with a value below $1\%$ are omitted.}
 \label{fig:yields_4}
 \end{figure}
 
-\begin{landscape}
-\begin{figure}[h]
+\begin{sidewaysfigure}[h]
 \centering
-\includegraphics[width=1.5\textwidth]{figures/couplings_chapter/purity_2D_subplots_5}
+\includegraphics[width=\textwidth]{figures/couplings_chapter/purity_2D_subplots_5}
 \caption{The correspondence between analysis category and STXS truth bins, in terms of the percentage contribution of a given STXS truth bin (y-axis) to the Higgs signal yield in a given analysis category (x-axis) for \qqtoHqq\ STXS truth bins and \ggtoH\ analysis categories. Entries with a value below $1\%$ are omitted.}
 \label{fig:yields_5}
-\end{figure}
-\end{landscape}
+\end{sidewaysfigure}
 
 \begin{figure}[h]
 \centering
-\includegraphics[width=1.09\textwidth]{figures/couplings_chapter/purity_2D_subplots_6}
+\includegraphics[width=\textwidth]{figures/couplings_chapter/purity_2D_subplots_6}
 \caption{The correspondence between analysis category and STXS truth bins, in terms of the percentage contribution of a given STXS truth bin (y-axis) to the Higgs signal yield in a given analysis category (x-axis) for \ggtoH\ STXS truth bins and \qqtoHqq\ analysis categories. Entries with a value below $1\%$ are omitted.}
 \label{fig:yields_6}
 \end{figure}
@@ -292,7 +288,7 @@ \section{Signal and Background Modelling} \label{sec:SignalBackground}
 
 As in the CP analysis, a profile likelihood ratio fit is conducted simultaneously in all categories and a signal strength parameter is extracted.
 
-Signal in each category is modelled using a Double-Sided Crystal Ball function, fit to Higgs-signal Monte Carlo. The Higgs mass is fixed to the run-1 measured value of $125.09$ GeV $\pm 0.21 $GeV(stat) $\pm 0.1 $GeV(syst) \cite{Higgsmass}.
+Signal in each category is modelled using a Double-Sided Crystal Ball function, fit to Higgs-signal Monte Carlo. The Higgs mass is fixed to the run-1 measured value of $125.09$ GeV $\pm 0.21 $GeV (stat) $\pm 0.1 $GeV (syst) \cite{Higgsmass}.
 
 Similarly, background is modelled using the spurious signal test. As detailed in Chapter \ref{chap:sigbkgparam}, in the $ggH$ and $qq \rightarrow Hqq'$ categories, the templates for the spurious signal study are conducted from Sherpa diphoton samples reweighted to model the proportional contributions of $\gamma \gamma$, $\gamma j$ and $jj$ events consisting of both true and fake photons in each category. In the leptonic $VH$ and $ttH+tH$ regions, however, the $\gamma j$ and $jj$ contributions are small enough to be neglected, so $V\gamma\gamma$ and $tt\gamma\gamma$ Monte Carlo respectively are used for the templates. In the low-stat categories, a Wald test is used to select the functional form. The spurious signal values and the choice of function are given in Tables \ref{tab:spurious_sig} and \ref{tab:spurious_sig2} .