Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add feature extraction #18

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Pylint
name: Ruff linting

on: [push]

Expand All @@ -18,8 +18,8 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install gpflow>=2.6.0 numpy pandas==1.5.3 pylint rpy2==3.4.5 scipy statsmodels
- name: Analysing the code with pylint
pip install gpflow>=2.6.0 numpy pandas==1.5.3 rpy2==3.4.5 ruff scipy statsmodels
- name: Analysing the code with Ruff
run: |
pylint $(git ls-files '*.py') --disable=line-too-long,missing-class-docstring,missing-function-docstring,missing-module-docstring --fail-under=7
# continue-on-error: true
ruff check . --exit-zero
continue-on-error: true
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# FCEst

[![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint)
[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
[![Coverage](https://img.shields.io/badge/coverage-50%25-brightgreen)](coverage.xml)

`FCEst` is a package for estimating static and time-varying functional connectivity (TVFC) in Python.
Expand Down
9 changes: 9 additions & 0 deletions fcest/features/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .brain_states import BrainStatesExtractor
from .graph_metrics import GraphMetricsExtractor

__all__ = [
"brain_states",
"graph_metrics",
"BrainStatesExtractor",
"GraphMetricsExtractor",
]
177 changes: 177 additions & 0 deletions fcest/features/brain_states.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import logging
import os

import numpy as np
import pandas as pd
from sklearn.cluster import KMeans

from fcest.helpers.array_operations import reconstruct_symmetric_matrix_from_tril


class BrainStatesExtractor:

def __init__(
self,
connectivity_metric: str,
num_time_series: int,
tvfc_estimates: np.array,
) -> None:
"""
Class for extracting brain states from TVFC estimates.

Parameters
----------
connectivity_metric : str, default='correlation'
tvfc_estimates : np.array
Array of shape (num_subjects, num_time_steps, num_features).
"""
logging.info("Initializing BrainStatesExtractor...")

self.connectivity_metric = connectivity_metric
self.num_time_steps = tvfc_estimates.shape[1]
self.num_time_series = num_time_series

self.tvfc_estimates_tril = tvfc_estimates.reshape(-1, tvfc_estimates.shape[-1])

def extract_brain_states(
self,
num_brain_states: int,
):
"""
Extract brain states from TVFC estimates.

Parameters
----------
num_brain_states : int
"""
return self.compute_basis_state(
all_subjects_tril_tvfc=self.tvfc_estimates_tril,
num_basis_states=num_brain_states,
num_time_series=self.num_time_series,
num_time_steps=self.num_time_steps,
)

def compute_basis_state(
self,
all_subjects_tril_tvfc: np.array,
num_basis_states: int,
num_time_series: int,
num_time_steps: int,
brain_states_savedir: str,
) -> tuple[float, pd.DataFrame, pd.DataFrame]:
"""
Brain states (recurring whole-brain patterns) are a way to summarize estimated TVFC.
Here we follow Allen et al. (2012) and use k-means clustering to find the brain states across subjects,
independently for each of the two runs within each of the two session.

Parameters
----------
:param all_subjects_tril_tvfc:
Array of shape (num_subjects * N, D*(D-1)/2).
:param num_basis_states:
The number of states/clusters we want to extract.
Ideally this number would be determined automatically, or based on some k-means elbow analysis.
:param num_time_series:
:param num_time_steps:
:param brain_states_savedir:
Directory to save brain states to.
:return:
"""
logging.info("Running k-means clustering...")
kmeans = KMeans(
n_clusters=num_basis_states,
algorithm='lloyd',
n_init=10,
verbose=0,
).fit(all_subjects_tril_tvfc)
logging.info("Finished k-means clustering.")

cluster_centers = self._get_cluster_centroids(
kmeans=kmeans,
num_time_series=num_time_series
)

cluster_centers, cluster_sort_order = self._sort_cluster_centers(
cluster_centers=cluster_centers
)

# Save clusters (i.e. brain states) to file.
if not os.path.exists(brain_states_savedir):
os.makedirs(brain_states_savedir)
for i_cluster, cluster_centroid in enumerate(cluster_centers):
cluster_df = pd.DataFrame(cluster_centroid) # (D, D)
cluster_df.to_csv(
os.path.join(
brain_states_savedir,
f'{self.connectivity_metric:s}_brain_state_{i_cluster:d}.csv'
),
float_format='%.2f',
)
logging.info(f"Brain state saved in '{brain_states_savedir:s}'.")

all_subjects_brain_state_assignments_df = self._get_brain_state_assignments(
labels=kmeans.labels_,
num_time_steps=num_time_steps
) # (num_subjects, N)

all_subjects_brain_state_assignments_df = all_subjects_brain_state_assignments_df.replace(
to_replace=cluster_sort_order,
value=np.arange(num_basis_states)
)

all_subjects_dwell_times_df = self._compute_dwell_time(
num_brain_states=num_basis_states,
brain_state_assignments=all_subjects_brain_state_assignments_df
) # (num_subjects, num_brain_states)

return kmeans.inertia_, all_subjects_brain_state_assignments_df, all_subjects_dwell_times_df

def _get_cluster_centroids(self, kmeans: KMeans, num_time_series: int) -> np.array:
"""
Get cluster centroids (centers) from k-means clustering.
"""
# Get cluster centers - these are the characteristic basis brain states.
cluster_centers = kmeans.cluster_centers_ # (num_clusters, num_features)

# Reconstruct correlation matrix per cluster.
cluster_centers = [
reconstruct_symmetric_matrix_from_tril(cluster_vector, num_time_series) for cluster_vector in cluster_centers
]
cluster_centers = np.array(cluster_centers) # (num_clusters, D, D)

return cluster_centers

def _sort_cluster_centers(self, cluster_centers: np.array) -> np.array:

# Re-order clusters based on high to low (descending) 'contrast' - higher contrast states are more interesting.
cluster_contrasts = np.var(cluster_centers, axis=(1, 2)) # (num_clusters, )
cluster_sort_order = np.argsort(cluster_contrasts)[::-1] # (num_clusters, )

cluster_centers = cluster_centers[cluster_sort_order, :, :] # (num_clusters, D, D)

return cluster_centers, cluster_sort_order

def _get_brain_state_assignments(
self,
labels,
num_time_steps: int,
) -> pd.DataFrame:
"""
Get labels per covariance matrix, which can be used to re-construct a states time series per subject.
Each subject is now only characterized by an assignment to one of the clusters at each time step.

Parameters
----------
:param labels:
Array of shape (num_subjects * N, ).
:param num_time_steps:
:return:
"""
assert len(labels) % num_time_steps == 0
num_subjects = int(len(labels) / num_time_steps)

labels = labels.reshape(num_subjects, num_time_steps) # (num_subjects, N)

labels_df = pd.DataFrame(labels)

return labels_df
17 changes: 17 additions & 0 deletions fcest/features/graph_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import networkx as nx


class GraphMetricsExtractor:

def __init__(self, graph):
self.graph = graph

def extract(self):
return {
"nodes": self.graph.number_of_nodes(),
"edges": self.graph.number_of_edges(),
"density": nx.density(self.graph),
"diameter": nx.diameter(self.graph),
"average_clustering": nx.average_clustering(self.graph),
"average_shortest_path_length": nx.average_shortest_path_length(self.graph),
}
42 changes: 42 additions & 0 deletions fcest/helpers/array_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,45 @@ def _is_positive_definite(matrix: np.array) -> bool:
return True
except la.LinAlgError:
return False


def reconstruct_symmetric_matrix_from_tril(
cluster_array: np.array,
num_time_series: int,
diagonal: str = 'ones',
) -> np.array:
"""
Reconstruct full matrix from lower triangular values.

Parameters
----------
:param cluster_array:
:param num_time_series:
:param diagonal:
:return:
Array of shape (D, D).
"""
match diagonal:
case 'ones':
reconstructed_corr_matrix = np.ones(shape=(num_time_series, num_time_series)) # (D, D)
case 'zeros':
reconstructed_corr_matrix = np.zeros(shape=(num_time_series, num_time_series)) # (D, D)
case _:
reconstructed_corr_matrix = np.ones(shape=(num_time_series, num_time_series)) # (D, D)

mask = np.tri(num_time_series, dtype=bool, k=-1) # matrix of bools

# Add lower triangular values.
reconstructed_corr_matrix[mask] = cluster_array

# Add upper triangular values (transpose matrix first and then re-add lower triangular values).
reconstructed_corr_matrix = reconstructed_corr_matrix.T
reconstructed_corr_matrix[mask] = cluster_array

assert _check_symmetric(reconstructed_corr_matrix)

return reconstructed_corr_matrix


def _check_symmetric(a: np.array, rtol=1e-05, atol=1e-08) -> bool:
return np.allclose(a, a.T, rtol=rtol, atol=atol)
77 changes: 77 additions & 0 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".git-rewrite",
".hg",
".ipynb_checkpoints",
".mypy_cache",
".nox",
".pants.d",
".pyenv",
".pytest_cache",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
".vscode",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"site-packages",
"venv",
]

# Same as Black.
line-length = 88
indent-width = 4

# Assume Python 3.10
target-version = "py310"

[lint]
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
# McCabe complexity (`C901`) by default.
select = ["E4", "E7", "E9", "F"]
ignore = []

# Allow fix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
unfixable = []

# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

[format]
# Like Black, use double quotes for strings.
quote-style = "double"

# Like Black, indent with spaces, rather than tabs.
indent-style = "space"

# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false

# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"

# Enable auto-formatting of code examples in docstrings. Markdown,
# reStructuredText code/literal blocks and doctests are all supported.
#
# This is currently disabled by default, but it is planned for this
# to be opt-out in the future.
docstring-code-format = false

# Set the line length limit used when formatting code snippets in
# docstrings.
#
# This only has an effect when the `docstring-code-format` setting is
# enabled.
docstring-code-line-length = "dynamic"
Loading