From 9c8769c2683f6ee36e14e0a2d43622a43ff619aa Mon Sep 17 00:00:00 2001 From: Aditi Bishnoi Date: Sun, 7 Jul 2024 14:44:42 +0530 Subject: [PATCH] Update load_Allen_Visual_Behavior_from_SDK.ipynb temporary fix to obtain event triggered response for all stimuli --- .../load_Allen_Visual_Behavior_from_SDK.ipynb | 377 +++++++++++++++++- 1 file changed, 371 insertions(+), 6 deletions(-) diff --git a/projects/neurons/load_Allen_Visual_Behavior_from_SDK.ipynb b/projects/neurons/load_Allen_Visual_Behavior_from_SDK.ipynb index f9e5f8677..1e4627ee1 100644 --- a/projects/neurons/load_Allen_Visual_Behavior_from_SDK.ipynb +++ b/projects/neurons/load_Allen_Visual_Behavior_from_SDK.ipynb @@ -4725,6 +4725,370 @@ "Instead of just omissions, let's now look at the responses to each of the stimuli in this session, which consists of 8 unique images, plus the omitted stimuli (which we characterize as a unique stimulus type). First, we will calculate an event triggered response to each stimulus start time in the stimulus table." ] }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Some edits to utilities to obtain event triggered response\n", + "def get_time_array(t_start, t_end, sampling_rate=None, step_size=None, include_endpoint=True): # NOQA E501\n", + " '''\n", + " A function to get a time array between two specified timepoints at a defined sampling rate # NOQA E501\n", + " Deals with possibility of time range not being evenly divisible by desired sampling rate # NOQA E501\n", + " Uses np.linspace instead of np.arange given decimal precision issues with np.arange (see np.arange documentation for details) # NOQA E501\n", + "\n", + " Parameters:\n", + " -----------\n", + " t_start : float\n", + " start time for array\n", + " t_end : float\n", + " end time for array\n", + " sampling_rate : float\n", + " desired sampling of array\n", + " Note: user must specify either sampling_rate or step_size, not both\n", + " step_size : float\n", + " desired step size of array\n", + " Note: user must specify either sampling_rate or step_size, not both\n", + " include_endpoint : Boolean\n", + " Passed to np.linspace to calculate relative time\n", + " If True, stop is the last sample. Otherwise, it is not included.\n", + " Default is True\n", + "\n", + " Returns:\n", + " --------\n", + " numpy.array\n", + " an array of timepoints at the desired sampling rate\n", + "\n", + " Examples:\n", + " ---------\n", + " get a time array exclusive of the endpoint\n", + " >>> t_array = get_time_array(\n", + " t_start=-1,\n", + " t_end=1,\n", + " step_size=0.5,\n", + " include_endpoint=False\n", + " )\n", + "\n", + " np.array([-1., -0.5, 0., 0.5])\n", + "\n", + "\n", + " get a time array inclusive of the endpoint\n", + " >>> t_array = get_time_array(\n", + " t_start=-1,\n", + " t_end=1,\n", + " step_size=0.5,\n", + " include_endpoint=False\n", + " )\n", + "\n", + " np.array([-1., -0.5, 0., 0.5, 1.0])\n", + "\n", + "\n", + " get a time array where the range can't be evenly divided by the desired step_size\n", + " in this case, the time array includes the last timepoint before the desired endpoint\n", + " >>> t_array = get_time_array(\n", + " t_start=-1,\n", + " t_end=0.75,\n", + " step_size=0.5,\n", + " include_endpoint=False\n", + " )\n", + "\n", + " np.array([-1., -0.5, 0., 0.5])\n", + "\n", + "\n", + " Instead of passing the step_size, we can pass the sampling rate\n", + " >>> t_array = get_time_array(\n", + " t_start=-1,\n", + " t_end=1,\n", + " sampling_rate=2,\n", + " include_endpoint=False\n", + " )\n", + "\n", + " np.array([-1., -0.5, 0., 0.5])\n", + " '''\n", + " assert sampling_rate is not None or step_size is not None, 'must specify either sampling_rate or step_size' # NOQA E501\n", + " assert sampling_rate is None or step_size is None, 'cannot specify both sampling_rate and step_size' # NOQA E501\n", + "\n", + " # value as a linearly spaced time array\n", + " if not step_size:\n", + " step_size = 1 / sampling_rate\n", + " # define a time array\n", + " n_steps = (t_end - t_start) / step_size\n", + " if n_steps != int(n_steps):\n", + " # if the number of steps isn't an int, that means it isn't possible\n", + " # to end on the desired t_after using the defined sampling rate\n", + " # we need to round down and include the endpoint\n", + " n_steps = int(n_steps)\n", + " t_end_adjusted = t_start + n_steps * step_size\n", + " include_endpoint = True\n", + " else:\n", + " t_end_adjusted = t_end\n", + "\n", + " if include_endpoint:\n", + " # add an extra step if including endpoint\n", + " n_steps += 1\n", + "\n", + " t_array = np.linspace(\n", + " t_start,\n", + " t_end_adjusted,\n", + " int(n_steps),\n", + " endpoint=include_endpoint\n", + " )\n", + "\n", + " return t_array\n", + "\n", + "def event_triggered_response(data, t, y, event_times, t_start=None, t_end=None, t_before=None, t_after=None,\n", + " output_sampling_rate=None, include_endpoint=True, output_format='tidy', interpolate=True): # NOQA E501\n", + " '''\n", + " Slices a timeseries relative to a given set of event times\n", + " to build an event-triggered response.\n", + "\n", + " For example, If we have data such as a measurement of neural activity\n", + " over time and specific events in time that we want to align\n", + " the neural activity to, this function will extract segments of the neural\n", + " timeseries in a specified time window around each event.\n", + "\n", + " The times of the events need not align with the measured\n", + " times of the neural data.\n", + " Relative times will be calculated by linear interpolation.\n", + "\n", + " Parameters:\n", + " -----------\n", + " data: Pandas.DataFrame\n", + " Input dataframe in tidy format\n", + " Each row should be one observation\n", + " Must contains columns representing `t` and `y` (see below)\n", + " t : string\n", + " Name of column in data to use as time data\n", + " y : string\n", + " Name of column to use as y data\n", + " event_times: Panda.Series, numpy array or list of floats\n", + " Times of events of interest. If pd.Series, the original index and index name will be preserved in the output\n", + " Values in column specified by `y` will be sliced and interpolated\n", + " relative to these times\n", + " t_start : float\n", + " start time relative to each event for desired time window\n", + " e.g.: t_start = -1 would start the window 1 second before each\n", + " t_start = 1 would start the window 1 second after each event\n", + " Note: cannot pass both t_start and t_before\n", + " t_before : float\n", + " time before each of event of interest to include in each slice\n", + " e.g.: t_before = 1 would start the window 1 second before each event\n", + " t_before = -1 would start the window 1 second after each event\n", + " Note: cannot pass both t_start and t_before\n", + " t_end : float\n", + " end time relative to each event for desired time window\n", + " e.g.: t_end = 1 would end the window 1 second after each event\n", + " t_end = -1 would end the window 1 second before each event\n", + " Note: cannot pass both t_end and t_after\n", + " t_after : float\n", + " time after each event of interest to include in each slice\n", + " e.g.: t_after = 1 would start the window 1 second after each event\n", + " t_after = -1 would start the window 1 second before each event\n", + " Note: cannot pass both t_end and t_after\n", + " output_sampling_rate : float\n", + " Desired sampling of output.\n", + " Input data will be interpolated to this sampling rate if interpolate = True (default). # NOQA E501\n", + " If passing interpolate = False, the sampling rate of the input timeseries will # NOQA E501\n", + " be used and output_sampling_rate should not be specified.\n", + " include_endpoint : Boolean\n", + " Passed to np.linspace to calculate relative time\n", + " If True, stop is the last sample. Otherwise, it is not included.\n", + " Default is True\n", + " output_format : string\n", + " 'wide' or 'tidy' (default = 'tidy')\n", + " if 'tidy'\n", + " One column representing time\n", + " One column representing event_number\n", + " One column representing event_time\n", + " One row per observation (# rows = len(time) x len(event_times))\n", + " if 'wide', output format will be:\n", + " time as indices\n", + " One row per interpolated timepoint\n", + " One column per event,\n", + " with column names titled event_{EVENT NUMBER}_t={EVENT TIME}\n", + " interpolate : Boolean\n", + " if True (default), interpolates each response onto a common timebase\n", + " if False, shifts each response to align indices to a common timebase\n", + "\n", + " Returns:\n", + " --------\n", + " Pandas.DataFrame\n", + " See description in `output_format` section above\n", + "\n", + " Examples:\n", + " ---------\n", + " An example use case, recover a sinousoid from noise:\n", + "\n", + " First, define a time vector\n", + " >>> t = np.arange(-10,110,0.001)\n", + "\n", + " Now build a dataframe with one column for time,\n", + " and another column that is a noise-corrupted sinuosoid with period of 1\n", + " >>> data = pd.DataFrame({\n", + " 'time': t,\n", + " 'noisy_sinusoid': np.sin(2*np.pi*t) + np.random.randn(len(t))*3\n", + " })\n", + "\n", + " Now use the event_triggered_response function to get a tidy\n", + " dataframe of the signal around every event\n", + "\n", + " Events will simply be generated as every 1 second interval\n", + " starting at 0, since our period here is 1\n", + " >>> etr = event_triggered_response(\n", + " data,\n", + " x = 'time',\n", + " y = 'noisy_sinusoid',\n", + " event_times = np.arange(100),\n", + " t_start = -1,\n", + " t_end = 1,\n", + " output_sampling_rate = 100\n", + " )\n", + " Then use seaborn to view the result\n", + " We're able to recover the sinusoid through averaging\n", + " >>> import matplotlib.pyplot as plt\n", + " >>> import seaborn as sns\n", + " >>> fig, ax = plt.subplots()\n", + " >>> sns.lineplot(\n", + " data = etr,\n", + " x='time',\n", + " y='noisy_sinusoid',\n", + " ax=ax\n", + " )\n", + " '''\n", + " # ensure that non-conflicting time values are passed\n", + " assert t_before is not None or t_start is not None, 'must pass either t_start or t_before' # noqa: E501\n", + " assert t_after is not None or t_end is not None, 'must pass either t_start or t_before' # noqa: E501\n", + "\n", + " assert t_before is None or t_start is None, 'cannot pass both t_start and t_before' # noqa: E501\n", + " assert t_after is None or t_end is None, 'cannot pass both t_after and t_end' # noqa: E501\n", + "\n", + " if interpolate is False:\n", + " output_sampling_rate = None\n", + " # MG - commenting this out because it crashes my code when interpolate = False, even if output_sampling_rate = None\n", + " # assert output_sampling_rate is None, 'if interpolation = False, the sampling rate of the input timeseries will be used. Do not specify output_sampling_rate' # NOQA E501\n", + "\n", + " # assign time values to t_start and t_end\n", + " if t_start is None:\n", + " t_start = -1 * t_before\n", + " if t_end is None:\n", + " t_end = t_after\n", + "\n", + " # get original stimulus_presentation_ids, preserve the column for .join() method\n", + " if type(event_times) is pd.Series: # if pd.Series, preserve the name of index column\n", + " original_index = event_times.index.values\n", + " if type(event_times.index.name) is str:\n", + " original_index_column_name = event_times.index.name\n", + " else: # if index column does not have a name, name is original_index\n", + " event_times.index.name = 'original_index'\n", + " original_index_column_name = event_times.index.name\n", + " # is list or array, turn into pd.Series\n", + " elif type(event_times) is list or type(event_times) is np.ndarray:\n", + " event_times = pd.Series(data=event_times,\n", + " name='event_times'\n", + " )\n", + " # name the index column \"original_index\"\n", + " event_times.index.name = 'original_index'\n", + " original_index_column_name = event_times.index.name\n", + " original_index = event_times.index.values\n", + "\n", + " # ensure that t_end is greater than t_start\n", + " assert t_end > t_start, 'must define t_end to be greater than t_start'\n", + "\n", + " if output_sampling_rate is None:\n", + " # if sampling rate is None,\n", + " # set it to be the mean sampling rate of the input data\n", + " output_sampling_rate = 1 / np.diff(data[t]).mean()\n", + "\n", + " # if interpolate is set to True,\n", + " # we will calculate a common timebase and\n", + " # interpolate every response onto that timebase\n", + " if interpolate:\n", + " # set up a dictionary with key 'time' and\n", + " t_array = get_time_array(\n", + " t_start=t_start,\n", + " t_end=t_end,\n", + " sampling_rate=output_sampling_rate,\n", + " include_endpoint=include_endpoint,\n", + " )\n", + " data_dict = {'time': t_array}\n", + "\n", + " # iterate over all event times\n", + " data_reindexed = data.set_index(t, inplace=False)\n", + "\n", + " for event_number, event_time in enumerate(np.array(event_times)):\n", + "\n", + " # get a slice of the input data surrounding each event time\n", + " data_slice = data_reindexed[y].loc[event_time + t_start: event_time + t_end] # noqa: E501\n", + "\n", + " # update our dictionary to have a new key defined as\n", + " # 'event_{EVENT NUMBER}_t={EVENT TIME}' and\n", + " # a value that includes an array that represents the\n", + " # sliced data around the current event, interpolated\n", + " # on the linearly spaced time array\n", + " if data_slice.values.size != 0:\n", + " data_dict.update({\n", + " 'event_{}_t={}'.format(event_number, event_time): np.interp(\n", + " data_dict['time'],\n", + " data_slice.index - event_time,\n", + " data_slice.values\n", + " )\n", + " })\n", + "\n", + " # define a wide dataframe as a dataframe of the above compiled dictionary # NOQA E501\n", + " wide_etr = pd.DataFrame(data_dict)\n", + "\n", + " # if interpolate is False,\n", + " # we will calculate a common timebase and\n", + " # shift every response onto that timebase\n", + " else:\n", + " event_indices, start_ind_offset, end_ind_offset, trace_timebase = slice_inds_and_offsets( # NOQA E501\n", + " np.array(data[t]),\n", + " np.array(event_times),\n", + " time_window=[t_start, t_end],\n", + " sampling_rate=None,\n", + " include_endpoint=True\n", + " )\n", + " all_inds = event_indices + \\\n", + " np.arange(start_ind_offset, end_ind_offset)[:, None]\n", + " wide_etr = pd.DataFrame(\n", + " data[y].values.T[all_inds],\n", + " index=trace_timebase,\n", + " columns=['event_{}_t={}'.format(event_index, event_time) for event_index, event_time in enumerate(event_times)] # NOQA E501\n", + " ).rename_axis(index='time').reset_index()\n", + "\n", + " if output_format == 'wide':\n", + " # return the wide dataframe if output_format is 'wide'\n", + " return wide_etr.set_index('time')\n", + " elif output_format == 'tidy':\n", + " # if output format is 'tidy',\n", + " # transform the wide dataframe to tidy format\n", + " # first, melt the dataframe with the 'id_vars' column as \"time\"\n", + " tidy_etr = wide_etr.melt(id_vars='time')\n", + "\n", + " # add an \"event_number\" column that contains the event number\n", + " tidy_etr['event_number'] = tidy_etr['variable'].map(\n", + " lambda s: s.split('event_')[1].split('_')[0]\n", + " ).astype(int)\n", + "\n", + " tidy_etr[original_index_column_name] = tidy_etr['event_number'].apply(\n", + " lambda row: original_index[row])\n", + "\n", + " # add an \"event_time\" column that contains the event time ()\n", + " tidy_etr['event_time'] = tidy_etr['variable'].map(\n", + " lambda s: s.split('t=')[1]\n", + " ).astype(float)\n", + "\n", + " # drop the \"variable\" column, rename the \"value\" column\n", + " tidy_etr = (\n", + " tidy_etr\n", + " .drop(columns=['variable'])\n", + " .rename(columns={'value': y})\n", + " )\n", + " # return the tidy event triggered responses\n", + " return tidy_etr" + ] + }, { "cell_type": "code", "execution_count": null, @@ -4745,7 +5109,7 @@ "# iterate over each unique cell\n", "for cell_specimen_id in tqdm(neural_data['cell_specimen_id'].unique()):\n", " # calculate the event triggered response for this cell to every stimulus\n", - " full_etr_this_cell = utilities.event_triggered_response(\n", + " full_etr_this_cell = event_triggered_response(\n", " neural_data.query('cell_specimen_id == @cell_specimen_id'),\n", " t='timestamps',\n", " y='dff',\n", @@ -7360,6 +7724,7 @@ } ], "source": [ + "features_and_labels = features_and_labels.dropna() # remove NAN values for TSNE\n", "X = features_and_labels[cell_ids]\n", "X.sample(10)" ] @@ -8464,9 +8829,9 @@ "name": "python3" }, "kernelspec": { - "display_name": "Python [conda env:nma-compneuro]", + "display_name": "Python 3", "language": "python", - "name": "conda-env-nma-compneuro-py" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -8478,9 +8843,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.8.8" } }, "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "nbformat_minor": 1 +}