From e62d40137deaae6a00d0db6143d71014c9653197 Mon Sep 17 00:00:00 2001 From: Charlie Kapsiak Date: Thu, 26 Sep 2024 10:51:43 -0500 Subject: [PATCH] Work on draft --- common/previous_talks.tex | 6 +- statcomm_talk/statcomm_talk.tex | 604 ++++++++++++++++---------------- 2 files changed, 311 insertions(+), 299 deletions(-) diff --git a/common/previous_talks.tex b/common/previous_talks.tex index 3691bc9..965ea9e 100644 --- a/common/previous_talks.tex +++ b/common/previous_talks.tex @@ -1,4 +1,2 @@ -\prevtalk{2023-08-26}{ -\href{https://indico.cern.ch/event/1324384/\#2-rpv-single-top-squark-analys}{2023-10-06: Introduction Talk}} - -\prevtalk{2024-05-01}{\href{https://indico.cern.ch/event/1423342/#6-rpv-single-stop-search-multi}{2024-06-14: Background Estimation}} +\prevtalk{2023-08-26}{\href{https://indico.cern.ch/event/1324384/\#2-rpv-single-top-squark-analys}{2023-10-06: Introduction Talk}} +\prevtalk{2024-05-01}{\href{https://indico.cern.ch/event/1423342/\#6-rpv-single-stop-search-multi}{2024-06-14: Background Estimation}} diff --git a/statcomm_talk/statcomm_talk.tex b/statcomm_talk/statcomm_talk.tex index 34928c3..a4ebc2d 100644 --- a/statcomm_talk/statcomm_talk.tex +++ b/statcomm_talk/statcomm_talk.tex @@ -58,7 +58,7 @@ \institute{\inst{1}University of Minnesota} -\title[Single Stop Update]{RPV Single Stop Search Statistical Inquiry} +\title[Single Stop Statistics]{RPV Single Stop Search Statistical Inquiry} \subtitle{Statistical Considerations for Non-parametric 2D Background Estimation Using Gaussian Processes} \date{2024-09-30} @@ -114,7 +114,11 @@ \item Unexplored region of RPV parameter space \item Large cross section allows us to probe higher masses \end{itemize} - \item For more information see \href{https://indico.cern.ch/event/1324384/\#2-rpv-single-top-squark-analys}{2023-10-06: Introduction Talk} + \item For more information see: %\href{https://indico.cern.ch/event/1324384/\#2-rpv-single-top-squark-analys}{2023-10-06: Introduction Talk} + + \begin{itemize} + \input{\commonfiles{previous_talks.tex}} + \end{itemize} \end{itemize} \begin{center} @@ -129,85 +133,51 @@ \end{frame} -\iflong - \begin{frame}{Current Analysis Status} - \begin{block}{} - The past months have seen substantial progress on several fronts. We summarize below the current major areas of work: - \end{block} - % \begin{columns}[t] - % \begin{column}{0.5\textwidth} - \begin{center} \textbf{Key Analysis Elements} \end{center} - \begin{itemize} - \coloreditem{ready} Control/Signal region definitions. - \coloreditem{working} Mass reconstruction. - \coloreditem{working} Background estimation - \coloreditem{prelim} Statistical analysis procedure. - \coloreditem{prelim} Trigger studies. - \coloreditem{prelim} Central MC production. - \coloreditem{prelim} Early Run3 data. - \end{itemize} - % \end{column} - % \begin{column}{0.5\textwidth} - % \begin{center} \textbf{Past Presentations} \end{center} - % \begin{itemize} - % \input{\commonfiles{previous_talks.tex}} - % \end{itemize} - % \end{column} - % \end{columns} - - \begin{center} - \begin{tikzpicture} - \path[fill=early] (0,0) coordinate (A) circle(0.25em); - \node[anchor=left, right=0.1em of A] (A1) {Early Stages} ; - - \path[fill=prelim] ( $ (A1.east) + (0.2,0)$) coordinate (B) circle(0.25em); - \node[anchor=left, right=0.1em of B] (B1) {Preliminary}; - - - \path[fill=working] ( $ (B1.east) + (0.2,0) $ ) coordinate (B) circle(0.25em); - \node[anchor=left, right=0.1em of B] (C1) {Working Version}; - - \path[fill=ready] ( $(C1.east) + (0.2,0) $) coordinate (D) circle(0.25em); - \node[anchor=left, right=0.1em of D] {Analysis Ready}; - \end{tikzpicture} - \end{center} - \end{frame} -\fi - -\newcommand{\specialcell}[2][c]{\begin{tabular}[#1]{@{}c@{}}#2\end{tabular}} -\begin{frame}[label=regions]{General Analysis Features} - \begin{itemize} - \item No leptons. - \item A moderate number of jets, several with high $p_{T}$. - \item Multiple b-jets with large angular separation. - \item Resonances from both the $\stopq$ and the $\chargino$. - \end{itemize} +% \iflong +% \begin{frame}{Current Analysis Status} +% \begin{block}{} +% The past months have seen substantial progress on several fronts. We summarize below the current major areas of work: +% \end{block} +% % \begin{columns}[t] +% % \begin{column}{0.5\textwidth} +% \begin{center} \textbf{Key Analysis Elements} \end{center} +% \begin{itemize} +% \coloreditem{ready} Control/Signal region definitions. +% \coloreditem{working} Mass reconstruction. +% \coloreditem{working} Background estimation +% \coloreditem{prelim} Statistical analysis procedure. +% \coloreditem{prelim} Trigger studies. +% \coloreditem{prelim} Central MC production. +% \coloreditem{prelim} Early Run3 data. +% \end{itemize} +% % \end{column} +% % \begin{column}{0.5\textwidth} +% % \begin{center} \textbf{Past Presentations} \end{center} +% % \begin{itemize} +% % \input{\commonfiles{previous_talks.tex}} +% % \end{itemize} +% % \end{column} +% % \end{columns} + +% \begin{center} +% \begin{tikzpicture} +% \path[fill=early] (0,0) coordinate (A) circle(0.25em); +% \node[anchor=left, right=0.1em of A] (A1) {Early Stages} ; + +% \path[fill=prelim] ( $ (A1.east) + (0.2,0)$) coordinate (B) circle(0.25em); +% \node[anchor=left, right=0.1em of B] (B1) {Preliminary}; + + +% \path[fill=working] ( $ (B1.east) + (0.2,0) $ ) coordinate (B) circle(0.25em); +% \node[anchor=left, right=0.1em of B] (C1) {Working Version}; + +% \path[fill=ready] ( $(C1.east) + (0.2,0) $) coordinate (D) circle(0.25em); +% \node[anchor=left, right=0.1em of D] {Analysis Ready}; +% \end{tikzpicture} +% \end{center} +% \end{frame} +% \fi - \begin{center} - \scalebox{0.8}{ - \begin{tabular}{|ccccc|} - \hline - \multicolumn{5}{|c|}{Baseline Selections} \\ \hline - \multicolumn{5}{|c|}{\texttt{HLT\_PFHT* | HLT\_AK8PFJet*\_TrimMass*}} \\ - \multicolumn{5}{|c|}{$4 \leq \mathrm{N_j} \leq 6$ ($p_{\mathrm{T,j}} > 30~\text{GeV}$, $|\eta_{\mathrm{j}}| < 2.4$)} \\ - \multicolumn{5}{|c|}{$p_{\mathrm{T,j_1}} > 300~\text{GeV}$} \\ - \multicolumn{5}{|c|}{$\mathrm{N}_e (\text{tight}), \mathrm{N}_\mu (\text{medium}) = 0$} \\ - \multicolumn{5}{|c|}{\rule[-0.5em]{0em}{0em}$m_4 \equiv m_{\mathrm{j_1,j_2,j_3,j_4}}$} \\ \hline - \rule{0em}{1.4em}\specialcell{$\lambda_{312}''$\\ Uncompressed SR} - & \specialcell{$\lambda_{312}''$\\ Compressed SR} - & \specialcell{$\lambda_{313}''$\\ Uncompressed SR} - & \specialcell{$\lambda_{313}''$\\ Compressed SR} - & \specialcell{Control Region} \\ \hline - $\mathrm{N_{b,M} } \geq 2$ & $\mathrm{N_{b,M} } \geq 2$ & $\mathrm{N_{b,T} } \geq 3$ & $\mathrm{N_{b,M}} \geq 3$ & $\mathrm{N_{b,L}} = 0$ \\ - $\mathrm{N_{b,T} } \geq 1$ & $\mathrm{N_{b,T} } \geq 1$ & & & \\ - $\Delta R_{b_{1},b_{2}} > 1$ & $\Delta R_{b_{1},b_{2}} > 1$ & $\Delta R_{b_{1},b_{2}} > 1$ & $\Delta R_{b_{1},b_{2}} > 1$ & \\ - \rule[-0.5em]{0em}{0em}$m_3 \equiv m_{\mathrm{j_2,j_3,j_4}}$ & $m_3 \equiv m_{\mathrm{j_1,j_2,j_3}}$ & $m_3 \equiv m_{\mathrm{j_2,j_3,j_4}}$ & $m_3 \equiv m_{\mathrm{j_1,j_2,j_3}}$ & {} \\ - \hline - \end{tabular} - } - - \end{center} -\end{frame} \begin{frame}{Resonance Reconstruction} @@ -268,29 +238,29 @@ \end{frame} -\begin{frame}{Estimation Strategies} - \begin{splitcol}[0.55] - \begin{col} - \begin{itemize} - \item For all bump hunts, key technique is estimation of the background shape. - The region where signal is expected is blinded, then the fit is used to estimated the background. - \item Traditional bump hunts have used ad-hoc functions \cite{zisopoulos_parametric_2023}, chosen because they approximate the observed shape. - \item However, this can introduce bias from the choice of function, and it has been shown that they scale poorly with increasing luminosity \cite{frate_modeling_2017}. - \item For multidimensional searches, the problem can also be compounded by the complexity of selecting an appropriate 2D function. - \end{itemize} - \end{col} - \begin{col} - \begin{center} - \begin{onlyenv}<1> - \includegraphics[width=\textwidth]{figures/higgs} - \end{onlyenv} - \begin{onlyenv}<2> - \graphiccite{figures/fit_table}{1}{zisopoulos_parametric_2023} - \end{onlyenv} - \end{center} - \end{col} - \end{splitcol} -\end{frame} +% \begin{frame}{Estimation Strategies} +% \begin{splitcol}[0.55] +% \begin{col} +% \begin{itemize} +% \item For all bump hunts, key technique is estimation of the background shape. +% The region where signal is expected is blinded, then the fit is used to estimated the background. +% \item Traditional bump hunts have used ad-hoc functions \cite{zisopoulos_parametric_2023}, chosen because they approximate the observed shape. +% \item However, this can introduce bias from the choice of function, and it has been shown that they scale poorly with increasing luminosity \cite{frate_modeling_2017}. +% \item For multidimensional searches, the problem can also be compounded by the complexity of selecting an appropriate 2D function. +% \end{itemize} +% \end{col} +% \begin{col} +% \begin{center} +% \begin{onlyenv}<1> +% \includegraphics[width=\textwidth]{figures/higgs} +% \end{onlyenv} +% \begin{onlyenv}<2> +% \graphiccite{figures/fit_table}{1}{zisopoulos_parametric_2023} +% \end{onlyenv} +% \end{center} +% \end{col} +% \end{splitcol} +% \end{frame} \begin{frame}{Current Strategy} \begin{itemize} @@ -303,160 +273,160 @@ \end{itemize} \end{frame} -\section[Gaussian Process Regression]{Gaussian Process Regression Overview} -\label{sec:gauss-proc-regr} - +% \section[Gaussian Process Regression]{Gaussian Process Regression Overview} +% \label{sec:gauss-proc-regr} -\begin{frame}{Representing Histograms With Gaussians} - \begin{overprint} - \begin{itemize} - \item We seek a way to describe our histogram probabilistically without reference to a specific parametric form. How can this be done? - \item The answer: consider each of the N bins to be a random variable: part of a Multivariate Normal (MVN). - \item<1-> Consider our falling mass distribution. - \item<2-> Imagine for simplicity we rebinned to have just 2 bins. - \item<3-> We can represent the underlying distribution as a 2D Gaussian. - \end{itemize} - \end{overprint} - \begin{overprint} - \begin{center} - \begin{onlyenv}<1> - \scalebox{0.5}{\includestandalone{\commonfiles{gp/histogram}}} - \end{onlyenv} - \end{center} - \def\meanOne{0.8} - \def\meanTwo{0.4} - \def\binOne{0.7} - \def\binOneStd{0.1} - \def\binTwo{0.48} - \def\binTwoStd{0.1} - - \begin{onlyenv}<2> % - \begin{center} - \scalebox{0.5}{\includestandalone{\commonfiles{gp/sampled_hist}}}% - \end{center} - \end{onlyenv} - \begin{onlyenv}<3> - \begin{center} - \scalebox{0.5}{\includestandalone{\commonfiles{gp/independent}}}% - \scalebox{0.5}{\includestandalone{\commonfiles{gp/sampled_hist}}}% - \end{center} - \end{onlyenv}% - \foreach \one/\two [count=\n] in {\meanOne/\meanTwo, 0.6/0.2, 0.9/0.1} { % - \pgfmathtruncatemacro\z{\n+3} % - \only<\z>{ % - \def\binOne{\one} % - \def\binTwo{\two} % - \begin{center} - \scalebox{0.5}{\includestandalone{\commonfiles{gp/sampled_2d}}} % - \scalebox{0.5}{\includestandalone{\commonfiles{gp/sampled_hist}}} - \end{center} - } % - } - \end{overprint} -\end{frame} +% \begin{frame}{Representing Histograms With Gaussians} +% \begin{overprint} +% \begin{itemize} +% \item We seek a way to describe our histogram probabilistically without reference to a specific parametric form. How can this be done? +% \item The answer: consider each of the N bins to be a random variable: part of a Multivariate Normal (MVN). +% \item<1-> Consider our falling mass distribution. +% \item<2-> Imagine for simplicity we rebinned to have just 2 bins. +% \item<3-> We can represent the underlying distribution as a 2D Gaussian. +% \end{itemize} +% \end{overprint} +% \begin{overprint} +% \begin{center} +% \begin{onlyenv}<1> +% \scalebox{0.5}{\includestandalone{\commonfiles{gp/histogram}}} +% \end{onlyenv} +% \end{center} +% \def\meanOne{0.8} +% \def\meanTwo{0.4} +% \def\binOne{0.7} +% \def\binOneStd{0.1} +% \def\binTwo{0.48} +% \def\binTwoStd{0.1} + +% \begin{onlyenv}<2> % +% \begin{center} +% \scalebox{0.5}{\includestandalone{\commonfiles{gp/sampled_hist}}}% +% \end{center} +% \end{onlyenv} + +% \begin{onlyenv}<3> +% \begin{center} +% \scalebox{0.5}{\includestandalone{\commonfiles{gp/independent}}}% +% \scalebox{0.5}{\includestandalone{\commonfiles{gp/sampled_hist}}}% +% \end{center} +% \end{onlyenv}% +% \foreach \one/\two [count=\n] in {\meanOne/\meanTwo, 0.6/0.2, 0.9/0.1} { % +% \pgfmathtruncatemacro\z{\n+3} % +% \only<\z>{ % +% \def\binOne{\one} % +% \def\binTwo{\two} % +% \begin{center} +% \scalebox{0.5}{\includestandalone{\commonfiles{gp/sampled_2d}}} % +% \scalebox{0.5}{\includestandalone{\commonfiles{gp/sampled_hist}}} +% \end{center} +% } % +% } +% \end{overprint} +% \end{frame} -\begin{frame}{Prediction} - \begin{itemize} - \item The previous slide shows how we can use an MVN to describe a histogram. - \item How can we have actual predictive power? How can we incorporate known data to extrapolate to unknown points? - \item Answer: Condition the gaussian! If $p(b_{1},b_{2}) \sim \mathcal{N}(b_{1},b_{2})$ then $p(b_{1} | b_{2,obs} ) \sim \mathcal{N}(b_{1},b_{2,obs})$ - \end{itemize} - \begin{center} - \def\meanOne{0.8} - \def\meanTwo{0.4} - \def\binOne{0.7} - \def\binTwo{0.48} - \def\binTwoStd{0.1} - \scalebox{0.5}{\includestandalone{\commonfiles{gp/conditioned_2d}}} % - \scalebox{0.5}{\includestandalone{\commonfiles{gp/conditioned_hist}}} % - \end{center} +% \begin{frame}{Prediction} +% \begin{itemize} +% \item The previous slide shows how we can use an MVN to describe a histogram. +% \item How can we have actual predictive power? How can we incorporate known data to extrapolate to unknown points? +% \item Answer: Condition the gaussian! If $p(b_{1},b_{2}) \sim \mathcal{N}(b_{1},b_{2})$ then $p(b_{1} | b_{2,obs} ) \sim \mathcal{N}(b_{1},b_{2,obs})$ +% \end{itemize} +% \begin{center} +% \def\meanOne{0.8} +% \def\meanTwo{0.4} +% \def\binOne{0.7} +% \def\binTwo{0.48} +% \def\binTwoStd{0.1} +% \scalebox{0.5}{\includestandalone{\commonfiles{gp/conditioned_2d}}} % +% \scalebox{0.5}{\includestandalone{\commonfiles{gp/conditioned_hist}}} % +% \end{center} -\end{frame} +% \end{frame} -\begin{frame}{Beyond the Histogram} - \begin{itemize} - \item What if we have more than 2 bins? What if we have infinitely many bins (ie a function)? - \item How should we describe the MVN then? - \end{itemize} -\end{frame} +% \begin{frame}{Beyond the Histogram} +% \begin{itemize} +% \item What if we have more than 2 bins? What if we have infinitely many bins (ie a function)? +% \item How should we describe the MVN then? +% \end{itemize} +% \end{frame} -\begin{frame}{What is a Gaussian Process?} - \begin{definition} - A gaussian process is a possibly infinite series of random variables, any finite subset of which is jointly gaussian. - \end{definition} - Generally, the random variables are indexed by real values $x$, since we are generally considering regression over $\mathbb{R}^{n}$. +% \begin{frame}{What is a Gaussian Process?} +% \begin{definition} +% A gaussian process is a possibly infinite series of random variables, any finite subset of which is jointly gaussian. +% \end{definition} +% Generally, the random variables are indexed by real values $x$, since we are generally considering regression over $\mathbb{R}^{n}$. - A gaussian process $f(x)$ is completely defined by its mean and covariance - \begin{equation} - \begin{split} - \markpos{gpmean}{m(x)} &= \mathbb{E} \left[ f(x) \right] \\ - \markpos{gpk}{k(x,x')} &= \mathbb{E} \left[ \left( f(x) - m(x) \right) \left( f(x') - m(x') \right)\right] - \end{split} - \end{equation} +% A gaussian process $f(x)$ is completely defined by its mean and covariance +% \begin{equation} +% \begin{split} +% \markpos{gpmean}{m(x)} &= \mathbb{E} \left[ f(x) \right] \\ +% \markpos{gpk}{k(x,x')} &= \mathbb{E} \left[ \left( f(x) - m(x) \right) \left( f(x') - m(x') \right)\right] +% \end{split} +% \end{equation} - \begin{center} - \includegraphics[width=0.4\textwidth]{figures/two_points_1} - \hspace{1cm} - \includegraphics[width=0.4\textwidth]{figures/two_points_2} - \end{center} +% \begin{center} +% \includegraphics[width=0.4\textwidth]{figures/two_points_1} +% \hspace{1cm} +% \includegraphics[width=0.4\textwidth]{figures/two_points_2} +% \end{center} - \begin{onlyenv}<2> - \posannot[20:3cm]{gpmean}{fill=UMNMaroon!10, draw=UMNMaroon}{The mean value of the MVN == Background Estimate } - \posannot[-30:4cm]{gpk}{fill=UMNMaroon!10, draw=UMNMaroon}{The covariance between any two points. \\ These are the background estimate error bars.} - \end{onlyenv} -\end{frame} +% \begin{onlyenv}<2> +% \posannot[20:3cm]{gpmean}{fill=UMNMaroon!10, draw=UMNMaroon}{The mean value of the MVN == Background Estimate } +% \posannot[-30:4cm]{gpk}{fill=UMNMaroon!10, draw=UMNMaroon}{The covariance between any two points. \\ These are the background estimate error bars.} +% \end{onlyenv} +% \end{frame} -% \begin{frame}{Gaussian Process Regression} +% % \begin{frame}{Gaussian Process Regression} +% % \begin{itemize} +% % \item The ability to define distributions over functions allows us to do inference using Baye's theorem. +% % \item Specifically, given $N$ training points and a Gaussian process prior, we can produce a posterior Gaussian process that provides a means to do regression. +% % \end{itemize} +% % \begin{center} +% % \graphiccite{\includegraphics[width=0.7\textwidth]{figures/prior_and_conditioning}}{\cite{rasmussen_gaussian_2006}} +% % \end{center} +% % \end{frame} + +% \begin{frame}[label=kernelscales]{Kernels and Scales} % \begin{itemize} -% \item The ability to define distributions over functions allows us to do inference using Baye's theorem. -% \item Specifically, given $N$ training points and a Gaussian process prior, we can produce a posterior Gaussian process that provides a means to do regression. +% \item The choice of kernel is the most important aspect of Gaussian processes. +% \item The choice of $k(x,y)$ reflects our understanding of how the points should be correlated, how smooth the functions should be, etc. +% \item The choice of kernel consists of both the selection of the form and the hyperparameters. +% \item The form of the kernel is chosen to reflect prior understanding of how regions of space should be related. +% \item One a kernel is chosen, hyperparameters are determined algorithmically, so as to maximize the marginal log likelihood: % \end{itemize} -% \begin{center} -% \graphiccite{\includegraphics[width=0.7\textwidth]{figures/prior_and_conditioning}}{\cite{rasmussen_gaussian_2006}} -% \end{center} +% \begin{equation} +% \log p(\bm{y}|X) = +% \markpos{term1}{-\frac{1}{2}\bm{y}^T(K+\sigma^2_n I)^{-1}\bm{y}} +% - \markpos{term2}{\frac{1}{2}\log|K+\sigma^2_n I|} +% - \frac{n}{2}\log2\pi +% \end{equation} +% \begin{onlyenv}<2> +% \posannot{term1}{fill=UMNMaroon!10, draw=UMNMaroon}{Compatibility of model with data} +% \posannot[210:3cm]{term2}{fill=UMNMaroon!10, draw=UMNMaroon}{Overfitting penalty works against kernels with large determinants.} +% \end{onlyenv} % \end{frame} -\begin{frame}[label=kernelscales]{Kernels and Scales} - \begin{itemize} - \item The choice of kernel is the most important aspect of Gaussian processes. - \item The choice of $k(x,y)$ reflects our understanding of how the points should be correlated, how smooth the functions should be, etc. - \item The choice of kernel consists of both the selection of the form and the hyperparameters. - \item The form of the kernel is chosen to reflect prior understanding of how regions of space should be related. - \item One a kernel is chosen, hyperparameters are determined algorithmically, so as to maximize the marginal log likelihood: - \end{itemize} - \begin{equation} - \log p(\bm{y}|X) = - \markpos{term1}{-\frac{1}{2}\bm{y}^T(K+\sigma^2_n I)^{-1}\bm{y}} - - \markpos{term2}{\frac{1}{2}\log|K+\sigma^2_n I|} - - \frac{n}{2}\log2\pi - \end{equation} - \begin{onlyenv}<2> - \posannot{term1}{fill=UMNMaroon!10, draw=UMNMaroon}{Compatibility of model with data} - \posannot[210:3cm]{term2}{fill=UMNMaroon!10, draw=UMNMaroon}{Overfitting penalty works against kernels with large determinants.} - \end{onlyenv} -\end{frame} - -\begin{frame}{Kernels and Scales Continued} - \begin{itemize} - \item The most common kernel is the RBF kernel - \begin{equation} - k_{\text{RBF}}(\mathbf{x_1}, \mathbf{x_2}) = \exp \left( -\frac{1}{2} \frac{(\mathbf{x_1} - \mathbf{x_2})^2}{ \ell^{2}} \right) - \end{equation} - \item Let us see how this kernel lets us extrapolate $m_{\stopq}$ in to the blinded window indicated by the red bars, for different length scales $\ell$, using QCD Simulation on $m_{\stopq}$. - \end{itemize} +% \begin{frame}{Kernels and Scales Continued} +% \begin{itemize} +% \item The most common kernel is the RBF kernel +% \begin{equation} +% k_{\text{RBF}}(\mathbf{x_1}, \mathbf{x_2}) = \exp \left( -\frac{1}{2} \frac{(\mathbf{x_1} - \mathbf{x_2})^2}{ \ell^{2}} \right) +% \end{equation} +% \item Let us see how this kernel lets us extrapolate $m_{\stopq}$ in to the blinded window indicated by the red bars, for different length scales $\ell$, using QCD Simulation on $m_{\stopq}$. +% \end{itemize} - \begin{center} - \blockcite{\markimage{\includegraphics[width=0.31\textwidth]{figures/fitplots/pull_sr_inj_signal_312_1500_1400__lb1050__r15p0__fs_100p0__m1500p0_s90p0__w_1350p0_1650p0}}{0.4,0.6}{ls1}}{Scale = 100GeV} - \blockcite{{\includegraphics[width=0.31\textwidth]{figures/fitplots/pull_sr_inj_signal_312_1500_1400__lb1050__r15p0__fs_600p0__m1500p0_s90p0__w_1350p0_1650p0}}{}{}}{Scale = 600GeV} - \blockcite{\markimage{\includegraphics[width=0.31\textwidth]{figures/fitplots/pull_sr_inj_signal_312_1500_1400__lb1050__r15p0__fs_1350p0__m1500p0_s90p0__w_1350p0_1650p0}}{0.3,0.7}{ls2}}{Scale = 1350GeV} - \end{center} - \begin{onlyenv}<2> - \posannot[30:3cm]{ls1}{fill=UMNMaroon!10, draw=UMNMaroon}{Small length scales have have large \\uncertainties in their regressions.} - \posannot[210:3cm]{ls2}{fill=UMNMaroon!10, draw=UMNMaroon}{Long length scales can't accomodate local variations.} - \end{onlyenv} -\end{frame} +% \begin{center} +% \blockcite{\markimage{\includegraphics[width=0.31\textwidth]{figures/fitplots/pull_sr_inj_signal_312_1500_1400__lb1050__r15p0__fs_100p0__m1500p0_s90p0__w_1350p0_1650p0}}{0.4,0.6}{ls1}}{Scale = 100GeV} +% \blockcite{{\includegraphics[width=0.31\textwidth]{figures/fitplots/pull_sr_inj_signal_312_1500_1400__lb1050__r15p0__fs_600p0__m1500p0_s90p0__w_1350p0_1650p0}}{}{}}{Scale = 600GeV} +% \blockcite{\markimage{\includegraphics[width=0.31\textwidth]{figures/fitplots/pull_sr_inj_signal_312_1500_1400__lb1050__r15p0__fs_1350p0__m1500p0_s90p0__w_1350p0_1650p0}}{0.3,0.7}{ls2}}{Scale = 1350GeV} +% \end{center} +% \begin{onlyenv}<2> +% \posannot[30:3cm]{ls1}{fill=UMNMaroon!10, draw=UMNMaroon}{Small length scales have have large \\uncertainties in their regressions.} +% \posannot[210:3cm]{ls2}{fill=UMNMaroon!10, draw=UMNMaroon}{Long length scales can't accomodate local variations.} +% \end{onlyenv} +% \end{frame} \section[Regression Results]{2D Background Estimation With Gaussian Processes} \label{sec:2d-gauss-proc} @@ -630,33 +600,46 @@ \end{frame} -\section[Statistical Considerations]{Preliminary Statistical Strategy} +\section[Statistical Considerations]{Preliminary Statistical Strategy and Inquiries} -\begin{frame}{Strategy Overview} - \begin{block}{} - We see that GPR can provide a quality estimate of the background over a wide range of blinding windows. How can we use this to extract signal and set limits? - \end{block} - \begin{enumerate} - \item Determine appropriate kernel form using MC and CR Data. We will derive a systematic to account for differences between regions for the kernel choice. - \item For each signal, use MC to determine blinding window. - \item Run GPR on the blinded SR to determine the kernel hyperparameters, and to estimate the background in the window. - \item Test fit quality of optimized kernel in CR and MC to validate, derive systematic related to deviation. - \item\label{item:5} Use background estimate provided by GPR in our statistical model. - \end{enumerate} -\end{frame} -\begin{frame}{Notes on Statistical Procedure} - \begin{itemize} - \item Gaussian process regression provides a complete posterior distribution describing the background. - \item Therefore, a proper statistical treatment requires considering not just the posterior mean, but the complete distribution. - \begin{itemize} - \item We are working on an implementation in Combine, using the eigenvectors of the posterior covariance as nuisance parameter templates. - \item We also have a working implementation of MCMC/Variational Inference using a well established probablistic programming language, Pyro. - \end{itemize} - \iflong \item {\bfseries This is is an area of active work. } However, we have made steady progress and have an ``alpha'' implementation. Hope to present statistical framework in next 1-2 months. \fi - \end{itemize} +\begin{frame}{Overview} + \begin{onlyenv}<1> + \begin{block}{} + We see that GPR can provide a good estimate of the background over a wide range of blinding windows. How can we use this to extract signal and set limits? + \end{block} + \end{onlyenv} + \begin{onlyenv}<1-2> + \begin{enumerate} + \item Determine appropriate kernel form using MC and CR Data. We will derive a systematic to account for differences between regions for the kernel choice. + \item For each signal, use MC to determine blinding window. + \item Run GPR on the blinded SR to determine the kernel hyperparameters, and to estimate the background in the window. + \item\label{item:5} Use background estimate provided by GPR in our statistical model. + \end{enumerate} + \end{onlyenv} + \begin{onlyenv}<2> + \begin{block}{ + } We are hoping to finalize the procedure to move forward towards pre-approval, but we have several questions for which we were hoping for expert input. + \begin{itemize} + + \item The necessity and methodology of hyperparameter uncertainties, effectively for the purpose of developing model selection systematics%The gaussian process furnishes an ``inherent'' uncertainty on its prediction + + \item Somewhat related, whether it is reasonable to use HiggsCombine to perform the final fit with a pre-determined regression. + \end{itemize} + \end{block} + \end{onlyenv} \end{frame} + +% \begin{frame}{Notes on Statistical Procedure} +% \begin{itemize} +% \item Gaussian process regression provides a complete posterior distribution describing the background. +% \item Therefore, a proper statistical treatment requires considering not just the posterior mean, but the complete distribution. +% \begin{itemize} +% \item We are working on an implementation in Combine, using the eigenvectors of the posterior covariance as nuisance parameter templates. +% \item We also have a working implementation of MCMC/Variational Inference using a well established probablistic programming language, Pyro. +% \end{frame} + \tikzset{onslide/.code args={<#1>#2}{% \only<#1>{\pgfkeysalso{#2}} }} @@ -688,16 +671,16 @@ \begin{itemize} \item We diagonalize the MVN to produce ``eigen-variations'', compatible with combine's statistical model. \item The MVN mean is used as the nominal background estimate. - \item Key transformation used is this: - \begin{equation} - z \sim \mathcal{N} \left( \mu,\Sigma \right) \implies A z + b \sim \mathcal{N} \left( A \mu + b , A \Sigma A^{T} \right) - \end{equation} - % \item Since $\Sigma$ is PSD, we can write $\Sigma = Q \Lambda Q^{T} = Q \Lambda^{1/2} \Lambda^{1/2} Q^{T}$ - - % \item Suppose that our posterior MVN is given by $\mathcal{N} \left( \mu,\Sigma \right)$ of dimension N. Then if $z_{n} \sim \mathcal{N} \left( 0,1 \right) $ it follows that - % \begin{equation} - % Q \Lambda^{1/2} z + \mu \sim \mathcal{N} \left( \mu , \Sigma \right) - % \end{equation} + % \item Key transformation used is this: + % \begin{equation} + % z \sim \mathcal{N} \left( \mu,\Sigma \right) \implies A z + b \sim \mathcal{N} \left( A \mu + b , A \Sigma A^{T} \right) + % \end{equation} + % \item Since $\Sigma$ is PSD, we can write $\Sigma = Q \Lambda Q^{T} = Q \Lambda^{1/2} \Lambda^{1/2} Q^{T}$ + + % \item Suppose that our posterior MVN is given by $\mathcal{N} \left( \mu,\Sigma \right)$ of dimension N. Then if $z_{n} \sim \mathcal{N} \left( 0,1 \right) $ it follows that + % \begin{equation} + % Q \Lambda^{1/2} z + \mu \sim \mathcal{N} \left( \mu , \Sigma \right) + % \end{equation} \end{itemize} \end{onlyenv} @@ -746,35 +729,31 @@ \end{overlayarea} \end{frame} -% \begin{frame}{Systematic Uncertainties and Validation} -% \begin{itemize} -% \item We envision a validation strategy where the kernel hyperparameters are trained on SR data, then the regression is validated by examining the fit quality in simulation and CR data. -% \item More work needed to determine exactly what systematics should be considered, and how they should be implemented. -% \end{itemize} -% -% -% SHOW COMPARISON OF CR AND MC -% \end{frame} - - -\section{Conclusion} -\label{sec:conclusion} +\begin{frame}{Model Selection Systematics} + \begin{itemize} + \item The gaussian process furnishes an uncertainty on its regression output. + \item However this does not take in to account model-selection, both in kernel form and hyperparameter optimization. + \item We haven't settled on a good method for handling this. + \item A ``hacky'' method is to draw toys from the poisson distribution of our input, do the training, and then determine if the varied posterior mean lies within the nominal posterior's uncertainty + \item In this case though it is not so clear how to turn potential devations in to a reasonable systematic. + \end{itemize} +\end{frame} -\begin{frame}{Conclusion} +\begin{frame}{Points of Concern} + We are hoping for your expert input on several aspects of the procedure. \begin{itemize} - \item The past months have seen substantial progress on the background estimation and statistical analysis procedure. - \item Framework can now produce good estimates for 2D backgrounds over a range of locations and masking windows. Ongoing work on improving and unifying estimation. - \iflong \item We hope to present on further topics in the coming weeks/months, including trigger studies and data/mc comparisons for Run2 and Run3, and out finalized statitiscal procedure. \fi - \item We hope to hear any feedback from experts regarding the methodology, or any general comments or suggestions! + \item Our most pressing question for proceeding with the analysis is the handling of model selection systematics + \item This process does not take in to account systematics related to model selection, ie the choice of kernel (including its hyperparameters)? + \item One solution is to take a fully bayesian approach, however it is less clear in the case of deep kernels how to handle this? + \item It would be nice to be able to use Combine for the final fit, does the proposed method of using the ``eigen-variations'' seem valid as combine inputs? \end{itemize} - \vspace{1cm} - \begin{center} - {\Large Thank you!} + {\Large Thank you for time and advice} \end{center} \end{frame} + \begin{frame}[allowframebreaks]{Bibliography} % \bibliographystyle{plain} % \bibliographystyle{amsalpha} @@ -879,6 +858,41 @@ \section{Appendix} \includegraphics[width=\textwidth]{figures/CombineLikelihoodEqns.png} \end{frame} +\newcommand{\specialcell}[2][c]{\begin{tabular}[#1]{@{}c@{}}#2\end{tabular}} +\begin{frame}[label=regions]{General Analysis Features} + \begin{itemize} + \item No leptons. + \item A moderate number of jets, several with high $p_{T}$. + \item Multiple b-jets with large angular separation. + \item Resonances from both the $\stopq$ and the $\chargino$. + \end{itemize} + + \begin{center} + \scalebox{0.8}{ + \begin{tabular}{|ccccc|} + \hline + \multicolumn{5}{|c|}{Baseline Selections} \\ \hline + \multicolumn{5}{|c|}{\texttt{HLT\_PFHT* | HLT\_AK8PFJet*\_TrimMass*}} \\ + \multicolumn{5}{|c|}{$4 \leq \mathrm{N_j} \leq 6$ ($p_{\mathrm{T,j}} > 30~\text{GeV}$, $|\eta_{\mathrm{j}}| < 2.4$)} \\ + \multicolumn{5}{|c|}{$p_{\mathrm{T,j_1}} > 300~\text{GeV}$} \\ + \multicolumn{5}{|c|}{$\mathrm{N}_e (\text{tight}), \mathrm{N}_\mu (\text{medium}) = 0$} \\ + \multicolumn{5}{|c|}{\rule[-0.5em]{0em}{0em}$m_4 \equiv m_{\mathrm{j_1,j_2,j_3,j_4}}$} \\ \hline + \rule{0em}{1.4em}\specialcell{$\lambda_{312}''$\\ Uncompressed SR} + & \specialcell{$\lambda_{312}''$\\ Compressed SR} + & \specialcell{$\lambda_{313}''$\\ Uncompressed SR} + & \specialcell{$\lambda_{313}''$\\ Compressed SR} + & \specialcell{Control Region} \\ \hline + $\mathrm{N_{b,M} } \geq 2$ & $\mathrm{N_{b,M} } \geq 2$ & $\mathrm{N_{b,T} } \geq 3$ & $\mathrm{N_{b,M}} \geq 3$ & $\mathrm{N_{b,L}} = 0$ \\ + $\mathrm{N_{b,T} } \geq 1$ & $\mathrm{N_{b,T} } \geq 1$ & & & \\ + $\Delta R_{b_{1},b_{2}} > 1$ & $\Delta R_{b_{1},b_{2}} > 1$ & $\Delta R_{b_{1},b_{2}} > 1$ & $\Delta R_{b_{1},b_{2}} > 1$ & \\ + \rule[-0.5em]{0em}{0em}$m_3 \equiv m_{\mathrm{j_2,j_3,j_4}}$ & $m_3 \equiv m_{\mathrm{j_1,j_2,j_3}}$ & $m_3 \equiv m_{\mathrm{j_2,j_3,j_4}}$ & $m_3 \equiv m_{\mathrm{j_1,j_2,j_3}}$ & {} \\ + \hline + \end{tabular} + } + + \end{center} +\end{frame} + \end{document}