From 9c8769c2683f6ee36e14e0a2d43622a43ff619aa Mon Sep 17 00:00:00 2001
From: Aditi Bishnoi <adi.bish@outlook.com>
Date: Sun, 7 Jul 2024 14:44:42 +0530
Subject: [PATCH] Update load_Allen_Visual_Behavior_from_SDK.ipynb

temporary fix to obtain event triggered response for all stimuli
---
 .../load_Allen_Visual_Behavior_from_SDK.ipynb | 377 +++++++++++++++++-
 1 file changed, 371 insertions(+), 6 deletions(-)

diff --git a/projects/neurons/load_Allen_Visual_Behavior_from_SDK.ipynb b/projects/neurons/load_Allen_Visual_Behavior_from_SDK.ipynb
index f9e5f8677..1e4627ee1 100644
--- a/projects/neurons/load_Allen_Visual_Behavior_from_SDK.ipynb
+++ b/projects/neurons/load_Allen_Visual_Behavior_from_SDK.ipynb
@@ -4725,6 +4725,370 @@
     "Instead of just omissions, let's now look at the responses to each of the stimuli in this session, which consists of 8 unique images, plus the omitted stimuli (which we characterize as a unique stimulus type). First, we will calculate an event triggered response to each stimulus start time in the stimulus table."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#@title Some edits to utilities to obtain event triggered response\n",
+    "def get_time_array(t_start, t_end, sampling_rate=None, step_size=None, include_endpoint=True):  # NOQA E501\n",
+    "    '''\n",
+    "    A function to get a time array between two specified timepoints at a defined sampling rate  # NOQA E501\n",
+    "    Deals with possibility of time range not being evenly divisible by desired sampling rate  # NOQA E501\n",
+    "    Uses np.linspace instead of np.arange given decimal precision issues with np.arange (see np.arange documentation for details)  # NOQA E501\n",
+    "\n",
+    "    Parameters:\n",
+    "    -----------\n",
+    "    t_start : float\n",
+    "        start time for array\n",
+    "    t_end : float\n",
+    "        end time for array\n",
+    "    sampling_rate : float\n",
+    "        desired sampling of array\n",
+    "        Note: user must specify either sampling_rate or step_size, not both\n",
+    "    step_size : float\n",
+    "        desired step size of array\n",
+    "        Note: user must specify either sampling_rate or step_size, not both\n",
+    "    include_endpoint : Boolean\n",
+    "        Passed to np.linspace to calculate relative time\n",
+    "        If True, stop is the last sample. Otherwise, it is not included.\n",
+    "            Default is True\n",
+    "\n",
+    "    Returns:\n",
+    "    --------\n",
+    "    numpy.array\n",
+    "        an array of timepoints at the desired sampling rate\n",
+    "\n",
+    "    Examples:\n",
+    "    ---------\n",
+    "    get a time array exclusive of the endpoint\n",
+    "    >>> t_array = get_time_array(\n",
+    "        t_start=-1,\n",
+    "        t_end=1,\n",
+    "        step_size=0.5,\n",
+    "        include_endpoint=False\n",
+    "    )\n",
+    "\n",
+    "    np.array([-1., -0.5,  0.,  0.5])\n",
+    "\n",
+    "\n",
+    "    get a time array inclusive of the endpoint\n",
+    "    >>> t_array = get_time_array(\n",
+    "        t_start=-1,\n",
+    "        t_end=1,\n",
+    "        step_size=0.5,\n",
+    "        include_endpoint=False\n",
+    "    )\n",
+    "\n",
+    "    np.array([-1., -0.5,  0.,  0.5, 1.0])\n",
+    "\n",
+    "\n",
+    "    get a time array where the range can't be evenly divided by the desired step_size\n",
+    "    in this case, the time array includes the last timepoint before the desired endpoint\n",
+    "    >>> t_array = get_time_array(\n",
+    "        t_start=-1,\n",
+    "        t_end=0.75,\n",
+    "        step_size=0.5,\n",
+    "        include_endpoint=False\n",
+    "    )\n",
+    "\n",
+    "    np.array([-1., -0.5,  0.,  0.5])\n",
+    "\n",
+    "\n",
+    "    Instead of passing the step_size, we can pass the sampling rate\n",
+    "    >>> t_array = get_time_array(\n",
+    "        t_start=-1,\n",
+    "        t_end=1,\n",
+    "        sampling_rate=2,\n",
+    "        include_endpoint=False\n",
+    "    )\n",
+    "\n",
+    "    np.array([-1., -0.5,  0.,  0.5])\n",
+    "    '''\n",
+    "    assert sampling_rate is not None or step_size is not None, 'must specify either sampling_rate or step_size'  # NOQA E501\n",
+    "    assert sampling_rate is None or step_size is None, 'cannot specify both sampling_rate and step_size'  # NOQA E501\n",
+    "\n",
+    "    # value as a linearly spaced time array\n",
+    "    if not step_size:\n",
+    "        step_size = 1 / sampling_rate\n",
+    "    # define a time array\n",
+    "    n_steps = (t_end - t_start) / step_size\n",
+    "    if n_steps != int(n_steps):\n",
+    "        # if the number of steps isn't an int, that means it isn't possible\n",
+    "        # to end on the desired t_after using the defined sampling rate\n",
+    "        # we need to round down and include the endpoint\n",
+    "        n_steps = int(n_steps)\n",
+    "        t_end_adjusted = t_start + n_steps * step_size\n",
+    "        include_endpoint = True\n",
+    "    else:\n",
+    "        t_end_adjusted = t_end\n",
+    "\n",
+    "    if include_endpoint:\n",
+    "        # add an extra step if including endpoint\n",
+    "        n_steps += 1\n",
+    "\n",
+    "    t_array = np.linspace(\n",
+    "        t_start,\n",
+    "        t_end_adjusted,\n",
+    "        int(n_steps),\n",
+    "        endpoint=include_endpoint\n",
+    "    )\n",
+    "\n",
+    "    return t_array\n",
+    "\n",
+    "def event_triggered_response(data, t, y, event_times, t_start=None, t_end=None, t_before=None, t_after=None,\n",
+    "                             output_sampling_rate=None, include_endpoint=True, output_format='tidy', interpolate=True):  # NOQA E501\n",
+    "    '''\n",
+    "    Slices a timeseries relative to a given set of event times\n",
+    "    to build an event-triggered response.\n",
+    "\n",
+    "    For example, If we have data such as a measurement of neural activity\n",
+    "    over time and specific events in time that we want to align\n",
+    "    the neural activity to, this function will extract segments of the neural\n",
+    "    timeseries in a specified time window around each event.\n",
+    "\n",
+    "    The times of the events need not align with the measured\n",
+    "    times of the neural data.\n",
+    "    Relative times will be calculated by linear interpolation.\n",
+    "\n",
+    "    Parameters:\n",
+    "    -----------\n",
+    "    data: Pandas.DataFrame\n",
+    "        Input dataframe in tidy format\n",
+    "        Each row should be one observation\n",
+    "        Must contains columns representing `t` and `y` (see below)\n",
+    "    t : string\n",
+    "        Name of column in data to use as time data\n",
+    "    y : string\n",
+    "        Name of column to use as y data\n",
+    "    event_times: Panda.Series, numpy array or list of floats\n",
+    "        Times of events of interest. If pd.Series, the original index and index name will be preserved in the output\n",
+    "        Values in column specified by `y` will be sliced and interpolated\n",
+    "            relative to these times\n",
+    "    t_start : float\n",
+    "        start time relative to each event for desired time window\n",
+    "        e.g.:   t_start = -1 would start the window 1 second before each\n",
+    "                t_start = 1 would start the window 1 second after each event\n",
+    "        Note: cannot pass both t_start and t_before\n",
+    "    t_before : float\n",
+    "        time before each of event of interest to include in each slice\n",
+    "        e.g.:   t_before = 1 would start the window 1 second before each event\n",
+    "                t_before = -1 would start the window 1 second after each event\n",
+    "        Note: cannot pass both t_start and t_before\n",
+    "    t_end : float\n",
+    "        end time relative to each event for desired time window\n",
+    "        e.g.:   t_end = 1 would end the window 1 second after each event\n",
+    "                t_end = -1 would end the window 1 second before each event\n",
+    "        Note: cannot pass both t_end and t_after\n",
+    "    t_after : float\n",
+    "        time after each event of interest to include in each slice\n",
+    "        e.g.:   t_after = 1 would start the window 1 second after each event\n",
+    "                t_after = -1 would start the window 1 second before each event\n",
+    "        Note: cannot pass both t_end and t_after\n",
+    "    output_sampling_rate : float\n",
+    "        Desired sampling of output.\n",
+    "        Input data will be interpolated to this sampling rate if interpolate = True (default). # NOQA E501\n",
+    "        If passing interpolate = False, the sampling rate of the input timeseries will # NOQA E501\n",
+    "        be used and output_sampling_rate should not be specified.\n",
+    "    include_endpoint : Boolean\n",
+    "        Passed to np.linspace to calculate relative time\n",
+    "        If True, stop is the last sample. Otherwise, it is not included.\n",
+    "            Default is True\n",
+    "    output_format : string\n",
+    "        'wide' or 'tidy' (default = 'tidy')\n",
+    "        if 'tidy'\n",
+    "            One column representing time\n",
+    "            One column representing event_number\n",
+    "            One column representing event_time\n",
+    "            One row per observation (# rows = len(time) x len(event_times))\n",
+    "        if 'wide', output format will be:\n",
+    "            time as indices\n",
+    "            One row per interpolated timepoint\n",
+    "            One column per event,\n",
+    "                with column names titled event_{EVENT NUMBER}_t={EVENT TIME}\n",
+    "    interpolate : Boolean\n",
+    "        if True (default), interpolates each response onto a common timebase\n",
+    "        if False, shifts each response to align indices to a common timebase\n",
+    "\n",
+    "    Returns:\n",
+    "    --------\n",
+    "    Pandas.DataFrame\n",
+    "        See description in `output_format` section above\n",
+    "\n",
+    "    Examples:\n",
+    "    ---------\n",
+    "    An example use case, recover a sinousoid from noise:\n",
+    "\n",
+    "    First, define a time vector\n",
+    "    >>> t = np.arange(-10,110,0.001)\n",
+    "\n",
+    "    Now build a dataframe with one column for time,\n",
+    "    and another column that is a noise-corrupted sinuosoid with period of 1\n",
+    "    >>> data = pd.DataFrame({\n",
+    "            'time': t,\n",
+    "            'noisy_sinusoid': np.sin(2*np.pi*t) + np.random.randn(len(t))*3\n",
+    "        })\n",
+    "\n",
+    "    Now use the event_triggered_response function to get a tidy\n",
+    "    dataframe of the signal around every event\n",
+    "\n",
+    "    Events will simply be generated as every 1 second interval\n",
+    "    starting at 0, since our period here is 1\n",
+    "    >>> etr = event_triggered_response(\n",
+    "            data,\n",
+    "            x = 'time',\n",
+    "            y = 'noisy_sinusoid',\n",
+    "            event_times = np.arange(100),\n",
+    "            t_start = -1,\n",
+    "            t_end = 1,\n",
+    "            output_sampling_rate = 100\n",
+    "        )\n",
+    "    Then use seaborn to view the result\n",
+    "    We're able to recover the sinusoid through averaging\n",
+    "    >>> import matplotlib.pyplot as plt\n",
+    "    >>> import seaborn as sns\n",
+    "    >>> fig, ax = plt.subplots()\n",
+    "    >>> sns.lineplot(\n",
+    "            data = etr,\n",
+    "            x='time',\n",
+    "            y='noisy_sinusoid',\n",
+    "            ax=ax\n",
+    "        )\n",
+    "    '''\n",
+    "    # ensure that non-conflicting time values are passed\n",
+    "    assert t_before is not None or t_start is not None, 'must pass either t_start or t_before'  # noqa: E501\n",
+    "    assert t_after is not None or t_end is not None, 'must pass either t_start or t_before'  # noqa: E501\n",
+    "\n",
+    "    assert t_before is None or t_start is None, 'cannot pass both t_start and t_before'  # noqa: E501\n",
+    "    assert t_after is None or t_end is None, 'cannot pass both t_after and t_end'  # noqa: E501\n",
+    "\n",
+    "    if interpolate is False:\n",
+    "        output_sampling_rate = None\n",
+    "        # MG - commenting this out because it crashes my code when interpolate = False, even if output_sampling_rate = None\n",
+    "        # assert output_sampling_rate is None, 'if interpolation = False, the sampling rate of the input timeseries will be used. Do not specify output_sampling_rate'  # NOQA E501\n",
+    "\n",
+    "    # assign time values to t_start and t_end\n",
+    "    if t_start is None:\n",
+    "        t_start = -1 * t_before\n",
+    "    if t_end is None:\n",
+    "        t_end = t_after\n",
+    "\n",
+    "    # get original stimulus_presentation_ids, preserve the column for .join() method\n",
+    "    if type(event_times) is pd.Series:  # if pd.Series, preserve the name of index column\n",
+    "        original_index = event_times.index.values\n",
+    "        if type(event_times.index.name) is str:\n",
+    "            original_index_column_name = event_times.index.name\n",
+    "        else:  # if index column does not have a name, name is original_index\n",
+    "            event_times.index.name = 'original_index'\n",
+    "            original_index_column_name = event_times.index.name\n",
+    "    # is list or array, turn into pd.Series\n",
+    "    elif type(event_times) is list or type(event_times) is np.ndarray:\n",
+    "        event_times = pd.Series(data=event_times,\n",
+    "                                name='event_times'\n",
+    "                                )\n",
+    "        # name the index column \"original_index\"\n",
+    "        event_times.index.name = 'original_index'\n",
+    "        original_index_column_name = event_times.index.name\n",
+    "        original_index = event_times.index.values\n",
+    "\n",
+    "    # ensure that t_end is greater than t_start\n",
+    "    assert t_end > t_start, 'must define t_end to be greater than t_start'\n",
+    "\n",
+    "    if output_sampling_rate is None:\n",
+    "        # if sampling rate is None,\n",
+    "        # set it to be the mean sampling rate of the input data\n",
+    "        output_sampling_rate = 1 / np.diff(data[t]).mean()\n",
+    "\n",
+    "    # if interpolate is set to True,\n",
+    "    # we will calculate a common timebase and\n",
+    "    # interpolate every response onto that timebase\n",
+    "    if interpolate:\n",
+    "        # set up a dictionary with key 'time' and\n",
+    "        t_array = get_time_array(\n",
+    "            t_start=t_start,\n",
+    "            t_end=t_end,\n",
+    "            sampling_rate=output_sampling_rate,\n",
+    "            include_endpoint=include_endpoint,\n",
+    "        )\n",
+    "        data_dict = {'time': t_array}\n",
+    "\n",
+    "        # iterate over all event times\n",
+    "        data_reindexed = data.set_index(t, inplace=False)\n",
+    "\n",
+    "        for event_number, event_time in enumerate(np.array(event_times)):\n",
+    "\n",
+    "            # get a slice of the input data surrounding each event time\n",
+    "            data_slice = data_reindexed[y].loc[event_time + t_start: event_time + t_end]  # noqa: E501\n",
+    "\n",
+    "            # update our dictionary to have a new key defined as\n",
+    "            # 'event_{EVENT NUMBER}_t={EVENT TIME}' and\n",
+    "            # a value that includes an array that represents the\n",
+    "            # sliced data around the current event, interpolated\n",
+    "            # on the linearly spaced time array\n",
+    "            if data_slice.values.size != 0:\n",
+    "              data_dict.update({\n",
+    "                  'event_{}_t={}'.format(event_number, event_time): np.interp(\n",
+    "                      data_dict['time'],\n",
+    "                      data_slice.index - event_time,\n",
+    "                      data_slice.values\n",
+    "                  )\n",
+    "              })\n",
+    "\n",
+    "        # define a wide dataframe as a dataframe of the above compiled dictionary  # NOQA E501\n",
+    "        wide_etr = pd.DataFrame(data_dict)\n",
+    "\n",
+    "    # if interpolate is False,\n",
+    "    # we will calculate a common timebase and\n",
+    "    # shift every response onto that timebase\n",
+    "    else:\n",
+    "        event_indices, start_ind_offset, end_ind_offset, trace_timebase = slice_inds_and_offsets(  # NOQA E501\n",
+    "            np.array(data[t]),\n",
+    "            np.array(event_times),\n",
+    "            time_window=[t_start, t_end],\n",
+    "            sampling_rate=None,\n",
+    "            include_endpoint=True\n",
+    "        )\n",
+    "        all_inds = event_indices + \\\n",
+    "            np.arange(start_ind_offset, end_ind_offset)[:, None]\n",
+    "        wide_etr = pd.DataFrame(\n",
+    "            data[y].values.T[all_inds],\n",
+    "            index=trace_timebase,\n",
+    "            columns=['event_{}_t={}'.format(event_index, event_time) for event_index, event_time in enumerate(event_times)]  # NOQA E501\n",
+    "        ).rename_axis(index='time').reset_index()\n",
+    "\n",
+    "    if output_format == 'wide':\n",
+    "        # return the wide dataframe if output_format is 'wide'\n",
+    "        return wide_etr.set_index('time')\n",
+    "    elif output_format == 'tidy':\n",
+    "        # if output format is 'tidy',\n",
+    "        # transform the wide dataframe to tidy format\n",
+    "        # first, melt the dataframe with the 'id_vars' column as \"time\"\n",
+    "        tidy_etr = wide_etr.melt(id_vars='time')\n",
+    "\n",
+    "        # add an \"event_number\" column that contains the event number\n",
+    "        tidy_etr['event_number'] = tidy_etr['variable'].map(\n",
+    "            lambda s: s.split('event_')[1].split('_')[0]\n",
+    "        ).astype(int)\n",
+    "\n",
+    "        tidy_etr[original_index_column_name] = tidy_etr['event_number'].apply(\n",
+    "            lambda row: original_index[row])\n",
+    "\n",
+    "        # add an \"event_time\" column that contains the event time ()\n",
+    "        tidy_etr['event_time'] = tidy_etr['variable'].map(\n",
+    "            lambda s: s.split('t=')[1]\n",
+    "        ).astype(float)\n",
+    "\n",
+    "        # drop the \"variable\" column, rename the \"value\" column\n",
+    "        tidy_etr = (\n",
+    "            tidy_etr\n",
+    "            .drop(columns=['variable'])\n",
+    "            .rename(columns={'value': y})\n",
+    "        )\n",
+    "        # return the tidy event triggered responses\n",
+    "        return tidy_etr"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -4745,7 +5109,7 @@
     "# iterate over each unique cell\n",
     "for cell_specimen_id in tqdm(neural_data['cell_specimen_id'].unique()):\n",
     "  # calculate the event triggered response for this cell to every stimulus\n",
-    "  full_etr_this_cell = utilities.event_triggered_response(\n",
+    "  full_etr_this_cell = event_triggered_response(\n",
     "      neural_data.query('cell_specimen_id == @cell_specimen_id'),\n",
     "      t='timestamps',\n",
     "      y='dff',\n",
@@ -7360,6 +7724,7 @@
     }
    ],
    "source": [
+    "features_and_labels = features_and_labels.dropna() # remove NAN values for TSNE\n",
     "X = features_and_labels[cell_ids]\n",
     "X.sample(10)"
    ]
@@ -8464,9 +8829,9 @@
    "name": "python3"
   },
   "kernelspec": {
-   "display_name": "Python [conda env:nma-compneuro]",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "conda-env-nma-compneuro-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -8478,9 +8843,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.17"
+   "version": "3.8.8"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ "nbformat_minor": 1
+}