From 6315c2af31abeff1088e891539e105c606f70dc7 Mon Sep 17 00:00:00 2001
From: moralejo <moralejo@ifae.es>
Date: Fri, 9 Feb 2024 09:55:43 +0000
Subject: [PATCH] New notebook for producing theta2 plots from a set of DL2
 files

---
 notebooks/explore_DL2.ipynb | 338 ++++++++++++++++++++++++++++++++++++
 1 file changed, 338 insertions(+)
 create mode 100644 notebooks/explore_DL2.ipynb

diff --git a/notebooks/explore_DL2.ipynb b/notebooks/explore_DL2.ipynb
new file mode 100644
index 0000000000..c5b8b2f1bd
--- /dev/null
+++ b/notebooks/explore_DL2.ipynb
@@ -0,0 +1,338 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "italian-enforcement",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import glob\n",
+    "import astropy.units as u\n",
+    "from astropy.coordinates.erfa_astrom import ErfaAstromInterpolator, erfa_astrom\n",
+    "from lstchain.reco.utils import get_effective_time, extract_source_position, compute_theta2\n",
+    "from ctapipe.containers import EventType\n",
+    "\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "afa5037c",
+   "metadata": {},
+   "source": [
+    "## DL2 exploration notebook\n",
+    "\n",
+    "This notebooks opens a set of LST-1 DL2 files, divides the data in two subsets according to reconstructed energy, and computes (for certain gammaness cuts) the theta2 plots with respect to a direction specified by the user (the candidate source).\n",
+    "\n",
+    "The cuts (gammaness & theta2) used for computing significances are \"reasonable\" for a first attempt at source detection. The ones for the high-E subset are about optimal for sensitivity (at low zeniths, say below 40 deg, good observation conditions and a Crab-like spectrum). \n",
+    "For low-energies it is hard to say what \"optimal cuts\" would be - that is quite dependent on the source energy spectrum, and also more sensitive to zenith angle (via the energy threshold). Do **not** play with the cuts on a yet-undetected source! If custom optimization of cuts is necessary, that can be done on simulations (for an assumed spectrum) or on a confirmed and bright source."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f88630c7",
+   "metadata": {},
+   "source": [
+    "## USER INPUT: dataset and source name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "turkish-psychology",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = glob.glob(\"/fefs/aswg/workspace/abelardo.moralejo/Crab_winter_22to23/TMP/DL2/dl2_*.h5\")\n",
+    "source_name = \"Crab\" \n",
+    "# theta2 plots will be calculated w.r.t. this source (name must be known to astropy, \n",
+    "# and must be in the FoV for the selected dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fuzzy-skill",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset.sort()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "154f2041",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tablename = \"/dl2/event/telescope/parameters/LST_LSTCam\"\n",
+    "dummy = []\n",
+    "\n",
+    "t_eff = 0\n",
+    "t_elapsed = 0\n",
+    "\n",
+    "for file in dataset:\n",
+    "    print(file)\n",
+    "    tb = pd.read_hdf(file, tablename)\n",
+    "    lt, et = get_effective_time(tb)\n",
+    "    t_eff += lt\n",
+    "    t_elapsed += et\n",
+    "    dummy.append(tb)\n",
+    "    \n",
+    "table = pd.concat(dummy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "78608dc7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24313927",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dummy = None\n",
+    "gc.collect() # clean-up memory (in case of long table)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "orange-isolation",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(table)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fifteen-chase",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(table.columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "twenty-treat",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'Effective time: {t_eff.to(u.h):.3f};  Elapsed time: {t_elapsed.to(u.h):.3f}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "589e8317",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set the cuts here. \n",
+    "# We make two subsets: Ereco < 0.2 TeV and Ereco > 0.2 TeV\n",
+    "\n",
+    "min_gammaness_cut = [0.5, 0.95]\n",
+    "min_intensity_cut = [50, 50] # p.e.\n",
+    "min_energy_cut = [0., 0.2] # TeV\n",
+    "max_energy_cut = [0.2, 1e6] # TeV\n",
+    "\n",
+    "theta2_cut = [0.04, 0.02] # deg2 - this one is applied later, by adding contents of theta2 histograms\n",
+    "\n",
+    "event_selection = [] # Index 0 will contain low-E cuts, index 1 the high-E cuts\n",
+    "for k in range(2):\n",
+    "    event_selection.append((table.gammaness > min_gammaness_cut[k]) &\n",
+    "                           (table.intensity > min_intensity_cut[k]) &\n",
+    "                           (table.reco_energy > min_energy_cut[k]) &\n",
+    "                           (table.reco_energy < max_energy_cut[k]) &\n",
+    "                           (table.event_type == EventType.SUBARRAY.value))\n",
+    "# SUBARRAY is the event type for \"cosmics\" (i.e. \"physics trigger\", showers)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bed359f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gamma_candidates = []\n",
+    "for k in range(2):\n",
+    "    gamma_candidates.append(table[event_selection[k]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c1e77c5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "focal = 29.30565 * u.m  # EFFECTIVE focal length (i.e. accounts for coma aberration)\n",
+    "\n",
+    "source_position = []\n",
+    "# Beware: this can be quite slow for long datasets (and the more so the softer the event selection cuts!)\n",
+    "for k in range(2):\n",
+    "    with erfa_astrom.set(ErfaAstromInterpolator(5 * u.min)):\n",
+    "        source_position.append(extract_source_position(gamma_candidates[k], \n",
+    "                                                       source_name, \n",
+    "                                                       equivalent_focal_length=focal))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e87ca2cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nbins = [50, 100] # number of bins of theta2 plot for low E and high E\n",
+    "number_of_offs = [1, 3] # number of off regions (just one at low E, because of worse angular resolution)\n",
+    "\n",
+    "for k in range(2): # index 0 is low E (defined above);  index 1 is high E\n",
+    "    print(\"\\n\\n\\n\")\n",
+    "    \n",
+    "    # Off positions: the one opposite to the source w.r.t. the camera center, plus two more \n",
+    "    # at the same distance from the center \"at right angles\" w.r.t. to the line source -camera_center:\n",
+    "    off_180 = [-source_position[k][0], -source_position[k][1]]\n",
+    "    off_90 = [-source_position[k][1], source_position[k][0]]\n",
+    "    off_270 = [source_position[k][1], -source_position[k][0]]\n",
+    "\n",
+    "    theta2_on = np.array(compute_theta2(gamma_candidates[k], source_position[k]))\n",
+    "    theta2_off_180 = np.array(compute_theta2(gamma_candidates[k], off_180))\n",
+    "    theta2_off_90 = np.array(compute_theta2(gamma_candidates[k], off_90))\n",
+    "    theta2_off_270 = np.array(compute_theta2(gamma_candidates[k], off_270))\n",
+    "\n",
+    "    theta_range = (0, 0.5)\n",
+    "\n",
+    "    nbinscut = int(np.round (theta2_cut[k] / ((theta_range[1] - theta_range[0]) / nbins[k])))\n",
+    "\n",
+    "\n",
+    "    fig = plt.figure(figsize=(16,6))\n",
+    "\n",
+    "    counts_on, bins  = np.histogram(theta2_on, bins=nbins[k], range=theta_range)\n",
+    "    counts_off, _ = np.histogram(theta2_off_180, bins=bins)\n",
+    "\n",
+    "    if number_of_offs[k] == 3:\n",
+    "        counts_off_90, _ = np.histogram(theta2_off_90, bins=bins)\n",
+    "        counts_off_270, _ = np.histogram(theta2_off_270, bins=bins)\n",
+    "        counts_off += counts_off_90 + counts_off_270\n",
+    "\n",
+    "    alpha = 1/number_of_offs[k]\n",
+    "        \n",
+    "    fig.add_subplot(1, 2, 1)\n",
+    "    plt.errorbar(0.5*(bins[1:]+bins[:-1]), counts_on, yerr=counts_on**0.5, \n",
+    "                 fmt='o', ms=3, label='ON-source')\n",
+    "    plt.errorbar(0.5*(bins[1:]+bins[:-1]), alpha*counts_off, yerr=alpha*(counts_off**0.5), \n",
+    "                 fmt='o', ms=3, label='OFF-source')\n",
+    "\n",
+    "    plt.plot([theta2_cut[k], theta2_cut[k]], [0, counts_on.max()], linestyle='dashed', \n",
+    "             color='tab:green', label='$\\\\theta^2$ cut')\n",
+    "\n",
+    "    plt.xlabel('$\\\\theta^2 (deg^2)$', fontsize=14)\n",
+    "    plt.ylabel('Events', fontsize=14)\n",
+    "    plt.xticks(fontsize=14)\n",
+    "    plt.yticks(fontsize=14)\n",
+    "    plt.ylim(0, counts_on.max()*1.15)\n",
+    "    plt.legend(fontsize=14)\n",
+    "    plt.grid()\n",
+    "\n",
+    "\n",
+    "    excess = counts_on - alpha*counts_off\n",
+    "    err = (counts_on + alpha*alpha*counts_off)**0.5\n",
+    "\n",
+    "\n",
+    "    fig.add_subplot(1, 2, 2)\n",
+    "\n",
+    "    plt.errorbar(0.5*(bins[1:]+bins[:-1]), excess, yerr=err, fmt='o', ms=3)\n",
+    "    plt.plot([theta2_cut[k], theta2_cut[k]], [0, excess.max()], linestyle='dashed', \n",
+    "             color='tab:green', label='$\\\\theta^2$ cut')\n",
+    "\n",
+    "    plt.xlabel('$\\\\theta^2 (deg^2)$', fontsize=14)\n",
+    "    plt.ylabel('Excess', fontsize=14)\n",
+    "    plt.xticks(fontsize=14)\n",
+    "    plt.yticks(fontsize=14)\n",
+    "    plt.grid()\n",
+    "\n",
+    "\n",
+    "    plt.show()\n",
+    "\n",
+    "    non = counts_on[:nbinscut].sum()\n",
+    "    noff = counts_off[:nbinscut].sum()\n",
+    "\n",
+    "    print(f'Energy range: {min_energy_cut[k]:.1f} - {max_energy_cut[k]:.1f} TeV')\n",
+    "    print(f'Excess: {non-alpha*noff:.3f}; Off: {noff}')\n",
+    "    print(f'Gamma rate: {(non-alpha*noff)/t_eff.to_value(u.min):.3f} events / minute')\n",
+    "    print(f'Off rate: {noff/t_eff.to_value(u.min):.3f} events / minute')\n",
+    "    print(f'alpha (backg normalization): {alpha:.3f}')\n",
+    "\n",
+    "    from pyirf.statistics import li_ma_significance\n",
+    "    print(f'Li & Ma Significance: {li_ma_significance(non, noff, alpha):.2f} standard deviations')\n",
+    "    \n",
+    "    # If source is Crab, check what significance one would get in 50 h for a weak source:\n",
+    "\n",
+    "    if source_name.find(\"Crab\") >= 0:\n",
+    "        non_50h = 50. / t_eff.to_value(u.h) * non\n",
+    "        noff_50h = 50. / t_eff.to_value(u.h) * noff\n",
+    "        fraction = 0.01\n",
+    "        print()\n",
+    "        print(f'Li & Ma Significance for {int(fraction*100)}% of excess in 50 h (useful if source is Crab): '\n",
+    "              f'{li_ma_significance((non_50h-alpha*noff_50h)*fraction+alpha*noff_50h, noff_50h, alpha):.2f} '\n",
+    "              'standard deviations')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "52bdec06",
+   "metadata": {},
+   "source": [
+    "## For good-quality, low-zenith Crab runs the high-E significance for 1% of Crab in 50 h should be around 5 sigma "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a0d9a0fb",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}