From 6315c2af31abeff1088e891539e105c606f70dc7 Mon Sep 17 00:00:00 2001 From: moralejo Date: Fri, 9 Feb 2024 09:55:43 +0000 Subject: [PATCH] New notebook for producing theta2 plots from a set of DL2 files --- notebooks/explore_DL2.ipynb | 338 ++++++++++++++++++++++++++++++++++++ 1 file changed, 338 insertions(+) create mode 100644 notebooks/explore_DL2.ipynb diff --git a/notebooks/explore_DL2.ipynb b/notebooks/explore_DL2.ipynb new file mode 100644 index 0000000000..c5b8b2f1bd --- /dev/null +++ b/notebooks/explore_DL2.ipynb @@ -0,0 +1,338 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "italian-enforcement", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import glob\n", + "import astropy.units as u\n", + "from astropy.coordinates.erfa_astrom import ErfaAstromInterpolator, erfa_astrom\n", + "from lstchain.reco.utils import get_effective_time, extract_source_position, compute_theta2\n", + "from ctapipe.containers import EventType\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "afa5037c", + "metadata": {}, + "source": [ + "## DL2 exploration notebook\n", + "\n", + "This notebooks opens a set of LST-1 DL2 files, divides the data in two subsets according to reconstructed energy, and computes (for certain gammaness cuts) the theta2 plots with respect to a direction specified by the user (the candidate source).\n", + "\n", + "The cuts (gammaness & theta2) used for computing significances are \"reasonable\" for a first attempt at source detection. The ones for the high-E subset are about optimal for sensitivity (at low zeniths, say below 40 deg, good observation conditions and a Crab-like spectrum). \n", + "For low-energies it is hard to say what \"optimal cuts\" would be - that is quite dependent on the source energy spectrum, and also more sensitive to zenith angle (via the energy threshold). Do **not** play with the cuts on a yet-undetected source! If custom optimization of cuts is necessary, that can be done on simulations (for an assumed spectrum) or on a confirmed and bright source." + ] + }, + { + "cell_type": "markdown", + "id": "f88630c7", + "metadata": {}, + "source": [ + "## USER INPUT: dataset and source name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "turkish-psychology", + "metadata": {}, + "outputs": [], + "source": [ + "dataset = glob.glob(\"/fefs/aswg/workspace/abelardo.moralejo/Crab_winter_22to23/TMP/DL2/dl2_*.h5\")\n", + "source_name = \"Crab\" \n", + "# theta2 plots will be calculated w.r.t. this source (name must be known to astropy, \n", + "# and must be in the FoV for the selected dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fuzzy-skill", + "metadata": {}, + "outputs": [], + "source": [ + "dataset.sort()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "154f2041", + "metadata": {}, + "outputs": [], + "source": [ + "tablename = \"/dl2/event/telescope/parameters/LST_LSTCam\"\n", + "dummy = []\n", + "\n", + "t_eff = 0\n", + "t_elapsed = 0\n", + "\n", + "for file in dataset:\n", + " print(file)\n", + " tb = pd.read_hdf(file, tablename)\n", + " lt, et = get_effective_time(tb)\n", + " t_eff += lt\n", + " t_elapsed += et\n", + " dummy.append(tb)\n", + " \n", + "table = pd.concat(dummy)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78608dc7", + "metadata": {}, + "outputs": [], + "source": [ + "import gc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24313927", + "metadata": {}, + "outputs": [], + "source": [ + "dummy = None\n", + "gc.collect() # clean-up memory (in case of long table)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "orange-isolation", + "metadata": {}, + "outputs": [], + "source": [ + "len(table)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fifteen-chase", + "metadata": {}, + "outputs": [], + "source": [ + "print(table.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "twenty-treat", + "metadata": {}, + "outputs": [], + "source": [ + "print(f'Effective time: {t_eff.to(u.h):.3f}; Elapsed time: {t_elapsed.to(u.h):.3f}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "589e8317", + "metadata": {}, + "outputs": [], + "source": [ + "# Set the cuts here. \n", + "# We make two subsets: Ereco < 0.2 TeV and Ereco > 0.2 TeV\n", + "\n", + "min_gammaness_cut = [0.5, 0.95]\n", + "min_intensity_cut = [50, 50] # p.e.\n", + "min_energy_cut = [0., 0.2] # TeV\n", + "max_energy_cut = [0.2, 1e6] # TeV\n", + "\n", + "theta2_cut = [0.04, 0.02] # deg2 - this one is applied later, by adding contents of theta2 histograms\n", + "\n", + "event_selection = [] # Index 0 will contain low-E cuts, index 1 the high-E cuts\n", + "for k in range(2):\n", + " event_selection.append((table.gammaness > min_gammaness_cut[k]) &\n", + " (table.intensity > min_intensity_cut[k]) &\n", + " (table.reco_energy > min_energy_cut[k]) &\n", + " (table.reco_energy < max_energy_cut[k]) &\n", + " (table.event_type == EventType.SUBARRAY.value))\n", + "# SUBARRAY is the event type for \"cosmics\" (i.e. \"physics trigger\", showers)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bed359f7", + "metadata": {}, + "outputs": [], + "source": [ + "gamma_candidates = []\n", + "for k in range(2):\n", + " gamma_candidates.append(table[event_selection[k]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c1e77c5", + "metadata": {}, + "outputs": [], + "source": [ + "focal = 29.30565 * u.m # EFFECTIVE focal length (i.e. accounts for coma aberration)\n", + "\n", + "source_position = []\n", + "# Beware: this can be quite slow for long datasets (and the more so the softer the event selection cuts!)\n", + "for k in range(2):\n", + " with erfa_astrom.set(ErfaAstromInterpolator(5 * u.min)):\n", + " source_position.append(extract_source_position(gamma_candidates[k], \n", + " source_name, \n", + " equivalent_focal_length=focal))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e87ca2cf", + "metadata": {}, + "outputs": [], + "source": [ + "nbins = [50, 100] # number of bins of theta2 plot for low E and high E\n", + "number_of_offs = [1, 3] # number of off regions (just one at low E, because of worse angular resolution)\n", + "\n", + "for k in range(2): # index 0 is low E (defined above); index 1 is high E\n", + " print(\"\\n\\n\\n\")\n", + " \n", + " # Off positions: the one opposite to the source w.r.t. the camera center, plus two more \n", + " # at the same distance from the center \"at right angles\" w.r.t. to the line source -camera_center:\n", + " off_180 = [-source_position[k][0], -source_position[k][1]]\n", + " off_90 = [-source_position[k][1], source_position[k][0]]\n", + " off_270 = [source_position[k][1], -source_position[k][0]]\n", + "\n", + " theta2_on = np.array(compute_theta2(gamma_candidates[k], source_position[k]))\n", + " theta2_off_180 = np.array(compute_theta2(gamma_candidates[k], off_180))\n", + " theta2_off_90 = np.array(compute_theta2(gamma_candidates[k], off_90))\n", + " theta2_off_270 = np.array(compute_theta2(gamma_candidates[k], off_270))\n", + "\n", + " theta_range = (0, 0.5)\n", + "\n", + " nbinscut = int(np.round (theta2_cut[k] / ((theta_range[1] - theta_range[0]) / nbins[k])))\n", + "\n", + "\n", + " fig = plt.figure(figsize=(16,6))\n", + "\n", + " counts_on, bins = np.histogram(theta2_on, bins=nbins[k], range=theta_range)\n", + " counts_off, _ = np.histogram(theta2_off_180, bins=bins)\n", + "\n", + " if number_of_offs[k] == 3:\n", + " counts_off_90, _ = np.histogram(theta2_off_90, bins=bins)\n", + " counts_off_270, _ = np.histogram(theta2_off_270, bins=bins)\n", + " counts_off += counts_off_90 + counts_off_270\n", + "\n", + " alpha = 1/number_of_offs[k]\n", + " \n", + " fig.add_subplot(1, 2, 1)\n", + " plt.errorbar(0.5*(bins[1:]+bins[:-1]), counts_on, yerr=counts_on**0.5, \n", + " fmt='o', ms=3, label='ON-source')\n", + " plt.errorbar(0.5*(bins[1:]+bins[:-1]), alpha*counts_off, yerr=alpha*(counts_off**0.5), \n", + " fmt='o', ms=3, label='OFF-source')\n", + "\n", + " plt.plot([theta2_cut[k], theta2_cut[k]], [0, counts_on.max()], linestyle='dashed', \n", + " color='tab:green', label='$\\\\theta^2$ cut')\n", + "\n", + " plt.xlabel('$\\\\theta^2 (deg^2)$', fontsize=14)\n", + " plt.ylabel('Events', fontsize=14)\n", + " plt.xticks(fontsize=14)\n", + " plt.yticks(fontsize=14)\n", + " plt.ylim(0, counts_on.max()*1.15)\n", + " plt.legend(fontsize=14)\n", + " plt.grid()\n", + "\n", + "\n", + " excess = counts_on - alpha*counts_off\n", + " err = (counts_on + alpha*alpha*counts_off)**0.5\n", + "\n", + "\n", + " fig.add_subplot(1, 2, 2)\n", + "\n", + " plt.errorbar(0.5*(bins[1:]+bins[:-1]), excess, yerr=err, fmt='o', ms=3)\n", + " plt.plot([theta2_cut[k], theta2_cut[k]], [0, excess.max()], linestyle='dashed', \n", + " color='tab:green', label='$\\\\theta^2$ cut')\n", + "\n", + " plt.xlabel('$\\\\theta^2 (deg^2)$', fontsize=14)\n", + " plt.ylabel('Excess', fontsize=14)\n", + " plt.xticks(fontsize=14)\n", + " plt.yticks(fontsize=14)\n", + " plt.grid()\n", + "\n", + "\n", + " plt.show()\n", + "\n", + " non = counts_on[:nbinscut].sum()\n", + " noff = counts_off[:nbinscut].sum()\n", + "\n", + " print(f'Energy range: {min_energy_cut[k]:.1f} - {max_energy_cut[k]:.1f} TeV')\n", + " print(f'Excess: {non-alpha*noff:.3f}; Off: {noff}')\n", + " print(f'Gamma rate: {(non-alpha*noff)/t_eff.to_value(u.min):.3f} events / minute')\n", + " print(f'Off rate: {noff/t_eff.to_value(u.min):.3f} events / minute')\n", + " print(f'alpha (backg normalization): {alpha:.3f}')\n", + "\n", + " from pyirf.statistics import li_ma_significance\n", + " print(f'Li & Ma Significance: {li_ma_significance(non, noff, alpha):.2f} standard deviations')\n", + " \n", + " # If source is Crab, check what significance one would get in 50 h for a weak source:\n", + "\n", + " if source_name.find(\"Crab\") >= 0:\n", + " non_50h = 50. / t_eff.to_value(u.h) * non\n", + " noff_50h = 50. / t_eff.to_value(u.h) * noff\n", + " fraction = 0.01\n", + " print()\n", + " print(f'Li & Ma Significance for {int(fraction*100)}% of excess in 50 h (useful if source is Crab): '\n", + " f'{li_ma_significance((non_50h-alpha*noff_50h)*fraction+alpha*noff_50h, noff_50h, alpha):.2f} '\n", + " 'standard deviations')\n" + ] + }, + { + "cell_type": "markdown", + "id": "52bdec06", + "metadata": {}, + "source": [ + "## For good-quality, low-zenith Crab runs the high-E significance for 1% of Crab in 50 h should be around 5 sigma " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0d9a0fb", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}