From 49e019dec569070d57f5cac84056af4125897e94 Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Sun, 14 Jul 2024 12:38:15 -0500
Subject: [PATCH 01/13] Starting new version.

---
 pyproject.toml              | 4 ++--
 src/pandahelper/__init__.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f403170..e6d4f68 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "panda-helper"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
     "beautifulsoup4>=4.12.2",
     "numpy>=1.26.0",
@@ -44,7 +44,7 @@ test = ["pytest>=7.4", "pylint>=3.0"]
 extend-include = ["*.ipynb"]
 
 [tool.ruff.lint]
-select = ["D", "F", "B"]  # pydocstyle, pyflakes, flake8-bugbear, isort
+select = ["D", "F", "B"]  # pydocstyle, pyflakes, flake8-bugbear
 
 [tool.ruff.lint.pydocstyle]
 convention = "google"
diff --git a/src/pandahelper/__init__.py b/src/pandahelper/__init__.py
index 8b3e9b1..c0600a2 100644
--- a/src/pandahelper/__init__.py
+++ b/src/pandahelper/__init__.py
@@ -5,5 +5,5 @@
 from pandahelper.profiles import DataFrameProfile, SeriesProfile
 from pandahelper.stats import distribution_stats, frequency_table
 
-__version__ = "0.1.0"
+__version__ = "0.1.1"
 __all__ = ["frequency_table", "distribution_stats", "DataFrameProfile", "SeriesProfile"]

From dbb18aaf38bc48163cafc7bd01b206dfd453a1b5 Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Sun, 14 Jul 2024 14:43:56 -0500
Subject: [PATCH 02/13] Change 'count' to 'number of columns' in nulls per row
 DataFrameProfile table.

---
 .gitignore                                    |  6 ++--
 src/pandahelper/profiles.py                   |  9 +++++-
 tests/test_data/test_df_profile_name.txt      | 32 +++++++++----------
 tests/test_data/test_df_profile_name_311.txt  |  2 +-
 tests/test_data/test_df_profile_no_name.txt   | 32 +++++++++----------
 .../test_data/test_df_profile_no_name_311.txt |  2 +-
 6 files changed, 45 insertions(+), 38 deletions(-)

diff --git a/.gitignore b/.gitignore
index b950a07..e73bcbb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,14 +9,14 @@
 conda_environment_dev_*
 
 # folders
-.coverage
 .idea
-data
-notes
 dist
 htmlcov
+notebooks
+notes
 site
 __pycache__
 
 # files
+.coverage
 .DS_Store
diff --git a/src/pandahelper/profiles.py b/src/pandahelper/profiles.py
index 1940cd1..40337dc 100644
--- a/src/pandahelper/profiles.py
+++ b/src/pandahelper/profiles.py
@@ -43,9 +43,16 @@ def __init__(self, df: pd.DataFrame, *, name: str = "", fmt: str = "simple"):
         self.memory_usage = df.memory_usage(index=True, deep=True) / 1000000  # MB
         self.num_duplicates = sum(df.duplicated(keep="first"))
         self.nulls_per_row = df.isna().sum(axis=1)
-        self.null_stats = phs.dist_stats_dict(self.nulls_per_row)
+        self.null_stats = self.__null_stats()
         self._format = fmt
 
+    def __null_stats(self, delete_key="count"):
+        """Prepare distribution statistics for the number of nulls per row."""
+        stats = phs.dist_stats_dict(self.nulls_per_row)
+        new_stats = {"Number of Columns": self.shape[1]}
+        del stats[delete_key]
+        return new_stats | stats
+
     def __create_tables(self, table_fmt: str):
         """Create DataFrameProfile summary tables.
 
diff --git a/tests/test_data/test_df_profile_name.txt b/tests/test_data/test_df_profile_name.txt
index b71f123..d643fe4 100644
--- a/tests/test_data/test_df_profile_name.txt
+++ b/tests/test_data/test_df_profile_name.txt
@@ -40,19 +40,19 @@ VEHICLE TYPE CODE 5            object                  0.006452
 FLAG                           bool                    0.0002
 
 Summary of Nulls Per Row
---------------------------  ---------
-count                       200
-min                           3
-1%                            3.99
-5%                            6
-25%                           7
-50%                           8
-75%                          10
-95%                          12
-99%                          14.01
-max                          15
-mean                          8.71
-standard deviation            2.04863
-median                        8
-median absolute deviation     1
-skew                          0.36218
+--------------------------  --------
+Number of Columns           30
+min                          3
+1%                           3.99
+5%                           6
+25%                          7
+50%                          8
+75%                         10
+95%                         12
+99%                         14.01
+max                         15
+mean                         8.71
+standard deviation           2.04863
+median                       8
+median absolute deviation    1
+skew                         0.36218
diff --git a/tests/test_data/test_df_profile_name_311.txt b/tests/test_data/test_df_profile_name_311.txt
index 917e60e..16c9c05 100644
--- a/tests/test_data/test_df_profile_name_311.txt
+++ b/tests/test_data/test_df_profile_name_311.txt
@@ -41,7 +41,7 @@ FLAG                           bool                    0.0002
 
 Summary of Nulls Per Row
 --------------------------  ---------
-count                       200
+Number of Columns            30
 min                           3
 1%                            3.99
 5%                            6
diff --git a/tests/test_data/test_df_profile_no_name.txt b/tests/test_data/test_df_profile_no_name.txt
index b15610c..0f160d4 100644
--- a/tests/test_data/test_df_profile_no_name.txt
+++ b/tests/test_data/test_df_profile_no_name.txt
@@ -39,19 +39,19 @@ VEHICLE TYPE CODE 5            object                  0.006452
 FLAG                           bool                    0.0002
 
 Summary of Nulls Per Row
---------------------------  ---------
-count                       200
-min                           3
-1%                            3.99
-5%                            6
-25%                           7
-50%                           8
-75%                          10
-95%                          12
-99%                          14.01
-max                          15
-mean                          8.71
-standard deviation            2.04863
-median                        8
-median absolute deviation     1
-skew                          0.36218
+--------------------------  --------
+Number of Columns           30
+min                          3
+1%                           3.99
+5%                           6
+25%                          7
+50%                          8
+75%                         10
+95%                         12
+99%                         14.01
+max                         15
+mean                         8.71
+standard deviation           2.04863
+median                       8
+median absolute deviation    1
+skew                         0.36218
diff --git a/tests/test_data/test_df_profile_no_name_311.txt b/tests/test_data/test_df_profile_no_name_311.txt
index 601a43d..150dca7 100644
--- a/tests/test_data/test_df_profile_no_name_311.txt
+++ b/tests/test_data/test_df_profile_no_name_311.txt
@@ -40,7 +40,7 @@ FLAG                           bool                    0.0002
 
 Summary of Nulls Per Row
 --------------------------  ---------
-count                       200
+Number of Columns            30
 min                           3
 1%                            3.99
 5%                            6

From 9b1121e6dc8b32390bfbe4f9750af4319d50412a Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Sun, 14 Jul 2024 15:07:32 -0500
Subject: [PATCH 03/13] Remove flaky test.

---
 tests/test_data/test_df_profile_name_311.txt  | 58 -------------------
 .../test_data/test_df_profile_no_name_311.txt | 57 ------------------
 tests/test_profiles.py                        | 19 ------
 3 files changed, 134 deletions(-)
 delete mode 100644 tests/test_data/test_df_profile_name_311.txt
 delete mode 100644 tests/test_data/test_df_profile_no_name_311.txt

diff --git a/tests/test_data/test_df_profile_name_311.txt b/tests/test_data/test_df_profile_name_311.txt
deleted file mode 100644
index 16c9c05..0000000
--- a/tests/test_data/test_df_profile_name_311.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-DataFrame-Level Info
-----------------------  ---------
-DF Name                 test_name
-DF Shape                (200, 30)
-Duplicated Rows         0
-Memory Usage (MB)       0.200
-
-Series Name                    Data Type      Memory Usage (MB)
------------------------------  -----------  -------------------
-Index                          int64                   0.000132
-CRASH DATE                     object                  0.012785
-CRASH TIME                     object                  0.01235
-BOROUGH                        object                  0.010519
-ZIP CODE                       float64                 0.0016
-LATITUDE                       float64                 0.0016
-LONGITUDE                      float64                 0.0016
-LOCATION                       object                  0.014763
-ON STREET NAME                 object                  0.015064
-CROSS STREET NAME              object                  0.01001
-OFF STREET NAME                object                  0.00952
-NUMBER OF PERSONS INJURED      int64                   0.0016
-NUMBER OF PERSONS KILLED       int64                   0.0016
-NUMBER OF PEDESTRIANS INJURED  int64                   0.0016
-NUMBER OF PEDESTRIANS KILLED   int64                   0.0016
-NUMBER OF CYCLIST INJURED      int64                   0.0016
-NUMBER OF CYCLIST KILLED       int64                   0.0016
-NUMBER OF MOTORIST INJURED     int64                   0.0016
-NUMBER OF MOTORIST KILLED      int64                   0.0016
-CONTRIBUTING FACTOR VEHICLE 1  object                  0.015643
-CONTRIBUTING FACTOR VEHICLE 2  object                  0.012727
-CONTRIBUTING FACTOR VEHICLE 3  object                  0.007012
-CONTRIBUTING FACTOR VEHICLE 4  object                  0.006652
-CONTRIBUTING FACTOR VEHICLE 5  object                  0.006436
-COLLISION_ID                   int64                   0.0016
-VEHICLE TYPE CODE 1            object                  0.014306
-VEHICLE TYPE CODE 2            object                  0.012294
-VEHICLE TYPE CODE 3            object                  0.00715
-VEHICLE TYPE CODE 4            object                  0.00673
-VEHICLE TYPE CODE 5            object                  0.00646
-FLAG                           bool                    0.0002
-
-Summary of Nulls Per Row
---------------------------  ---------
-Number of Columns            30
-min                           3
-1%                            3.99
-5%                            6
-25%                           7
-50%                           8
-75%                          10
-95%                          12
-99%                          14.01
-max                          15
-mean                          8.71
-standard deviation            2.04863
-median                        8
-median absolute deviation     1
-skew                          0.36218
diff --git a/tests/test_data/test_df_profile_no_name_311.txt b/tests/test_data/test_df_profile_no_name_311.txt
deleted file mode 100644
index 150dca7..0000000
--- a/tests/test_data/test_df_profile_no_name_311.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-DataFrame-Level Info
-----------------------  ---------
-DF Shape                (200, 30)
-Duplicated Rows         0
-Memory Usage (MB)       0.200
-
-Series Name                    Data Type      Memory Usage (MB)
------------------------------  -----------  -------------------
-Index                          int64                   0.000132
-CRASH DATE                     object                  0.012785
-CRASH TIME                     object                  0.01235
-BOROUGH                        object                  0.010519
-ZIP CODE                       float64                 0.0016
-LATITUDE                       float64                 0.0016
-LONGITUDE                      float64                 0.0016
-LOCATION                       object                  0.014763
-ON STREET NAME                 object                  0.015064
-CROSS STREET NAME              object                  0.01001
-OFF STREET NAME                object                  0.00952
-NUMBER OF PERSONS INJURED      int64                   0.0016
-NUMBER OF PERSONS KILLED       int64                   0.0016
-NUMBER OF PEDESTRIANS INJURED  int64                   0.0016
-NUMBER OF PEDESTRIANS KILLED   int64                   0.0016
-NUMBER OF CYCLIST INJURED      int64                   0.0016
-NUMBER OF CYCLIST KILLED       int64                   0.0016
-NUMBER OF MOTORIST INJURED     int64                   0.0016
-NUMBER OF MOTORIST KILLED      int64                   0.0016
-CONTRIBUTING FACTOR VEHICLE 1  object                  0.015643
-CONTRIBUTING FACTOR VEHICLE 2  object                  0.012727
-CONTRIBUTING FACTOR VEHICLE 3  object                  0.007012
-CONTRIBUTING FACTOR VEHICLE 4  object                  0.006652
-CONTRIBUTING FACTOR VEHICLE 5  object                  0.006436
-COLLISION_ID                   int64                   0.0016
-VEHICLE TYPE CODE 1            object                  0.014306
-VEHICLE TYPE CODE 2            object                  0.012294
-VEHICLE TYPE CODE 3            object                  0.00715
-VEHICLE TYPE CODE 4            object                  0.00673
-VEHICLE TYPE CODE 5            object                  0.00646
-FLAG                           bool                    0.0002
-
-Summary of Nulls Per Row
---------------------------  ---------
-Number of Columns            30
-min                           3
-1%                            3.99
-5%                            6
-25%                           7
-50%                           8
-75%                          10
-95%                          12
-99%                          14.01
-max                          15
-mean                          8.71
-standard deviation            2.04863
-median                        8
-median absolute deviation     1
-skew                          0.36218
diff --git a/tests/test_profiles.py b/tests/test_profiles.py
index cde7cf1..e27ecbd 100644
--- a/tests/test_profiles.py
+++ b/tests/test_profiles.py
@@ -35,25 +35,6 @@ def test_dataframe_profile_valid_312(test_df):
             assert filecmp.cmp(compare_file, test_file, shallow=False)
 
 
-@pytest.mark.skipif(
-    not ((3, 11) <= sys.version_info < (3, 12)), reason="Runs on Python 3.11"
-)
-def test_dataframe_profile_valid_311(test_df):
-    """Generated DataFrame profile should match test profile (Python 3.11)."""
-    compare_profile_name = "test_df_profile_name_311.txt"
-    compare_profile_no_name = "test_df_profile_no_name_311.txt"
-    compare_files = [
-        os.path.join(TEST_DATA_DIR, compare_profile_name),
-        os.path.join(TEST_DATA_DIR, compare_profile_no_name),
-    ]
-    names = ["test_name", ""]
-    with tempfile.TemporaryDirectory() as tmp:
-        for name, compare_file in zip(names, compare_files):
-            test_file = os.path.join(tmp, "temp.txt")
-            php.DataFrameProfile(test_df, name=name).save(test_file)
-            assert filecmp.cmp(compare_file, test_file, shallow=False)
-
-
 def test_dataframe_profile_invalid(non_series_invalid, num_series, cat_like_series):
     """DataFrame profile should not accept invalid data types."""
     invalid_types = [*non_series_invalid, num_series, cat_like_series]

From f30f8e7ebb4e38818685c7b058c21c20223464b4 Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Mon, 15 Jul 2024 11:14:28 -0500
Subject: [PATCH 04/13] Lower default value for SeriesProfile frequency table.

    Also minor code formatting changes.
---
 src/pandahelper/profiles.py | 42 ++++++++++++++++++-------------------
 src/pandahelper/stats.py    |  4 ++--
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/src/pandahelper/profiles.py b/src/pandahelper/profiles.py
index 40337dc..a09f1fa 100644
--- a/src/pandahelper/profiles.py
+++ b/src/pandahelper/profiles.py
@@ -137,7 +137,7 @@ def __init__(
         series: pd.Series,
         *,
         fmt: str = "simple",
-        freq_most_least: tuple = (20, 5),
+        freq_most_least: tuple = (10, 5),
     ):
         """Initialize SeriesProfile.
 
@@ -233,26 +233,6 @@ def save(self, path):
             fh.write(str(self))
 
 
-def _format_html_table(table: str, align: str = "left", font: str = "monospace") -> str:
-    """Add additional formatting to HTML table prepared by tabulate."""
-    soup = bs4.BeautifulSoup(table, "html.parser")
-    for row in soup.find_all("tr"):
-        tags = row.find_all(["th", "td"])  # row in thead will have 'th'
-        for tag in tags:
-            tag["style"] = f"font-family: {font}, monospace; text-align: {align};"
-    return str(soup)
-
-
-def _decimal_align_col(table: str, col: int):
-    """Create decimal-aligned numbers in column of HTML table."""
-    soup = bs4.BeautifulSoup(table, "html.parser")
-    for row in soup.find_all("tr"):
-        tags = row.find_all("td")
-        if tags:
-            tags[col].string = tags[col].string.replace(" ", "\u2007")  # figure space
-    return str(soup)
-
-
 def _abbreviate_df(df, first=20, last=5):
     """Return a shortened DataFrame or Series.
 
@@ -282,3 +262,23 @@ def _abbreviate_df(df, first=20, last=5):
     else:
         abbrev = pd.concat([df.iloc[:first], df.iloc[(len(df) - last) : len(df)]])
     return abbrev
+
+
+def _format_html_table(table: str, align: str = "left", font: str = "monospace") -> str:
+    """Add additional formatting to HTML table prepared by tabulate."""
+    soup = bs4.BeautifulSoup(table, "html.parser")
+    for row in soup.find_all("tr"):
+        tags = row.find_all(["th", "td"])  # row in thead will have 'th'
+        for tag in tags:
+            tag["style"] = f"font-family: {font}, monospace; text-align: {align};"
+    return str(soup)
+
+
+def _decimal_align_col(table: str, col: int):
+    """Create decimal-aligned numbers in column of HTML table."""
+    soup = bs4.BeautifulSoup(table, "html.parser")
+    for row in soup.find_all("tr"):
+        tags = row.find_all("td")
+        if tags:
+            tags[col].string = tags[col].string.replace(" ", "\u2007")  # figure space
+    return str(soup)
diff --git a/src/pandahelper/stats.py b/src/pandahelper/stats.py
index 25ae251..9f8ee63 100644
--- a/src/pandahelper/stats.py
+++ b/src/pandahelper/stats.py
@@ -31,7 +31,7 @@ def frequency_table(series: pd.Series) -> pd.DataFrame:
     return output.sort_values(by="Count", ascending=False)
 
 
-def _abbreviate_string(s, limit=60):
+def _abbreviate_string(s, limit=60) -> str:
     """Return first x characters of a string.
 
     Args:
@@ -157,7 +157,7 @@ def _add_quantiles(series: pd.Series, d: dict):
     d["99%"] = series.quantile(0.99)
 
 
-def _order_stats(stats: dict):
+def _order_stats(stats: dict) -> dict:
     """Sort stats dictionary by order provided in all_stats.
 
     Helper function used in distribution_stats.

From 9ea7a4108996422eaa433e3b86ed20dbbb3c0bdb Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Tue, 16 Jul 2024 00:39:07 -0500
Subject: [PATCH 05/13] SeriesProfile now reports gaps for timeseries data.

    Also added time series functions to calculate time gaps.
    gh-20
---
 docs/api.md                 |  4 ++
 src/pandahelper/__init__.py | 10 ++++-
 src/pandahelper/profiles.py | 76 +++++++++++++++++++++++--------------
 src/pandahelper/times.py    | 50 ++++++++++++++++++++++++
 tests/conftest.py           | 13 ++++++-
 tests/test_profiles.py      | 60 +++++++++++++++++++++++++++++
 tests/test_times.py         | 44 +++++++++++++++++++++
 tests/utils.py              | 16 ++++++++
 8 files changed, 242 insertions(+), 31 deletions(-)
 create mode 100644 src/pandahelper/times.py
 create mode 100644 tests/test_times.py
 create mode 100644 tests/utils.py

diff --git a/docs/api.md b/docs/api.md
index b8e6d06..63313ad 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -8,3 +8,7 @@ description: Panda-Helper API Reference. Detailed description of the Panda-Helpe
 <br>
 
 ::: pandahelper.stats
+
+<br>
+
+::: pandahelper.times
diff --git a/src/pandahelper/__init__.py b/src/pandahelper/__init__.py
index c0600a2..b0a5288 100644
--- a/src/pandahelper/__init__.py
+++ b/src/pandahelper/__init__.py
@@ -4,6 +4,14 @@
 
 from pandahelper.profiles import DataFrameProfile, SeriesProfile
 from pandahelper.stats import distribution_stats, frequency_table
+from pandahelper.times import time_diffs, time_diffs_index
 
 __version__ = "0.1.1"
-__all__ = ["frequency_table", "distribution_stats", "DataFrameProfile", "SeriesProfile"]
+__all__ = [
+    "frequency_table",
+    "distribution_stats",
+    "DataFrameProfile",
+    "SeriesProfile",
+    "time_diffs",
+    "time_diffs_index",
+]
diff --git a/src/pandahelper/profiles.py b/src/pandahelper/profiles.py
index a09f1fa..3aea3fa 100644
--- a/src/pandahelper/profiles.py
+++ b/src/pandahelper/profiles.py
@@ -5,6 +5,7 @@
 import pandas.api.types as pat
 from tabulate import tabulate
 import pandahelper.stats as phs
+import pandahelper.times as pht
 
 
 class DataFrameProfile:
@@ -61,7 +62,6 @@ def __create_tables(self, table_fmt: str):
 
         Returns:
             list(str): List of Tabulate tables.
-
         """
         df_info = [
             ("DF Shape", self.shape),
@@ -129,7 +129,10 @@ class SeriesProfile:
         num_unique (int): Number of unique values.
         num_nulls (int): Number of null values.
         frequency (pd.DataFrame): Frequency table with counts and percentage.
-        stats (list): Distribution statistics for Series.
+        stats (dict): Distribution statistics for Series.
+        time_diffs (pd.Series): Time diffs (gaps) if series is of type `datetime64`.
+            Alternately, can be time diffs in a Series with a DateTimeIndex if the
+            `time_index` parameter was set to `True` when creating Series Profile.
     """
 
     def __init__(
@@ -138,48 +141,57 @@ def __init__(
         *,
         fmt: str = "simple",
         freq_most_least: tuple = (10, 5),
+        time_index: bool = False,
     ):
         """Initialize SeriesProfile.
 
         Args:
-            series (pd.Series): DataFrame to profile.
-            fmt (str: optional): Printed table format. See
-                https://github.com/astanin/python-tabulate for options.
+            series (pd.Series): Pandas Series to profile.
+            fmt (str: optional): Printed table format. See:
+                <https://github.com/astanin/python-tabulate> for options.
             freq_most_least (tuple: optional): Tuple (x, y) of the x most common and
                 y least common values to display in frequency table.
+            time_index (bool: optional): Whether to use the index for calculating time
+                diffs for a `datetime64`-related Pandas Series. Not relevant for
+                non-time related Series.
 
         Raises:
-            TypeError: If input is not a pd.Series.
+            TypeError: If input is not a Pandas Series.
         """
         if not isinstance(series, pd.Series):
             raise TypeError(f"{series}, is not pd.DataFrame")
         if freq_most_least[0] < 0 or freq_most_least[1] < 0:
             raise ValueError("Tuple values must be >= 0!")
+        self._format = fmt
+        self._freq_table = freq_most_least
         self.name = series.name
         self.dtype = series.dtype
         self.count = series.count()  # counts non-null values
         self.num_unique = series.nunique()
         self.num_nulls = series.size - self.count  # NAs, nans, NaT, but not ""
         self.frequency = phs.frequency_table(series)
-        self.stats = None
-        if not (
-            pat.is_object_dtype(self.dtype)
-            or isinstance(self.dtype, pd.CategoricalDtype)
-        ):
-            self.stats = phs.dist_stats_dict(series)
-        self._format = fmt
-        self._freq_table = freq_most_least
+        self.stats = self.__calc_stats(series)
+        self.time_diffs = self.__calc_time_diffs(series, time_index)
 
-    def __create_tables(self, table_fmt: str):
-        """Create SeriesProfile summary tables.
-
-        Args:
-            table_fmt (str): Tabulate table format name.
-
-        Returns:
-            list(str): List of Tabulate tables.
-
-        """
+    def __calc_stats(self, series):
+        """Calculate distribution stats if allowed dtype, else return None."""
+        if pat.is_object_dtype(self.dtype) or isinstance(
+            self.dtype, pd.CategoricalDtype
+        ):
+            return None
+        return phs.dist_stats_dict(series)
+
+    @staticmethod
+    def __calc_time_diffs(series, use_time_index: bool) -> pd.Series or None:
+        """Calculate time diffs for time-indexed series or datetime64 series."""
+        if use_time_index and pat.is_datetime64_any_dtype(series.index):
+            return pht.time_diffs_index(series)
+        if (not use_time_index) and pat.is_datetime64_any_dtype(series):
+            return pht.time_diffs(series)
+        return None
+
+    def __create_tables(self, table_fmt: str) -> list[str]:
+        """Create and return SeriesProfile summary tables."""
         series_info = [
             ("Data Type", self.dtype),
             ("Count", self.count),
@@ -201,16 +213,22 @@ def __create_tables(self, table_fmt: str):
         stats_table = ""
         if self.stats is not None:
             stats = self.stats
-            if pat.is_complex_dtype(
-                self.dtype
-            ):  # tabulate converts complex numbers to real numbers
+            # tabulate casts complex numbers to real numbers, dropping imaginary part
+            if pat.is_complex_dtype(self.dtype):
                 stats = {k: str(v) for k, v in self.stats.items()}
             stats_table = tabulate(
                 list(stats.items()),
                 headers=["Statistic", "Value"],
                 tablefmt=table_fmt,
             )
-        return [series_table, freq_table, stats_table]
+        time_diffs_table = ""
+        if self.time_diffs is not None:
+            time_diffs_table = tabulate(
+                phs.frequency_table(self.time_diffs),
+                headers=["Time Gaps (Diffs)", "Count", "% of total"],
+                tablefmt=table_fmt,
+            )
+        return [series_table, freq_table, stats_table, time_diffs_table]
 
     def __repr__(self):
         """Printable version of profile."""
@@ -221,7 +239,7 @@ def _repr_html_(self):
         """HTML representation of profile."""
         tables = [_format_html_table(t) for t in self.__create_tables("html")]
         tables[2] = _decimal_align_col(tables[2], 1)
-        return tables[0] + "<br>" + tables[1] + "<br>" + tables[2]
+        return tables[0] + "<br>" + tables[1] + "<br>" + tables[2] + "<br>" + tables[3]
 
     def save(self, path):
         """Save profile to provided path.
diff --git a/src/pandahelper/times.py b/src/pandahelper/times.py
new file mode 100644
index 0000000..c1aabd1
--- /dev/null
+++ b/src/pandahelper/times.py
@@ -0,0 +1,50 @@
+"""Panda-Helper time-series functions."""
+
+import pandas as pd
+import pandas.api.types as pat
+
+
+def time_diffs(series: pd.Series | pd.DatetimeIndex) -> pd.Series(pd.Timedelta):
+    """Calculate time diffs (gaps) for Pandas Series or Index of timestamps.
+
+    Sorts input by time before calculating diffs.
+
+    Args:
+        series (pd.Series or pd.DatetimeIndex): Pandas Series or DatetimeIndex
+            to calculate time diffs on.
+
+    Returns:
+        Series of diffs (gaps) indexed by the time the diff was calculated.
+
+    Raises:
+        TypeError: If input is not Series of type datetime64 or DatetimeIndex.
+    """
+    if not pat.is_datetime64_any_dtype(series.dtype):
+        raise TypeError("Should be Series of datetime64 dtype.")
+    series = series.sort_values()
+    diffs = pd.Series(series.diff(), name="diffs")
+    diffs.index = series
+    return diffs
+
+
+def time_diffs_index(df: pd.DataFrame | pd.Series) -> pd.Series(pd.Timedelta):
+    """Calculate time diffs (gaps) for time-indexed Pandas Series or Dataframe.
+
+    Sorts input by time before calculating diffs.
+
+    Args:
+        df (pd.Series or pd.DataFrame): Pandas Series or DataFrame with DateTimeIndex
+            to calculate time diffs on.
+
+    Returns:
+        Series of diffs (gaps) indexed by the time the diff was calculated.
+
+    Raises:
+        TypeError: If input does not have a DatetimeIndex.
+    """
+    if isinstance(df.index, pd.DatetimeIndex):
+        df = df.sort_index()
+        diffs = pd.Series(df.index.diff(), name="diffs")
+        diffs.index = df.index
+        return diffs
+    raise TypeError(f"Index should be of type {pd.DatetimeIndex}")
diff --git a/tests/conftest.py b/tests/conftest.py
index e04f82f..f19a424 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,13 +2,14 @@
 
 Note that fixtures with a package-scope are run once and then available as
 cached value.
-
 """
 
+from datetime import datetime
 import os
 import numpy as np
 import pandas as pd
 import pytest
+from .utils import make_category_data
 
 TEST_DATA_DIR = "tests/test_data"
 TEST_DATA_FILE = "sample_collisions.csv"
@@ -16,6 +17,16 @@
 NUM_SERIES = "NUMBER OF PERSONS INJURED"
 
 
+@pytest.fixture
+def cat_df(scope="package"):  # pylint: disable=W0613
+    """Return test pd.DataFrame."""
+    start = datetime(year=1999, month=1, day=1, hour=0, minute=0)
+    end = start + pd.Timedelta(hours=10)
+    df = make_category_data("Springfield", start, end, freq="h")
+    df = df.sample(frac=1, random_state=2)  # index is out of order
+    return df
+
+
 @pytest.fixture
 def test_df(scope="package"):  # pylint: disable=W0613
     """Return test pd.DataFrame."""
diff --git a/tests/test_profiles.py b/tests/test_profiles.py
index e27ecbd..a00b8a1 100644
--- a/tests/test_profiles.py
+++ b/tests/test_profiles.py
@@ -9,6 +9,7 @@
 import bs4
 import numpy as np
 import pandas as pd
+import pandas.api.types as pat
 import pytest
 import pandahelper.profiles as php
 
@@ -200,6 +201,65 @@ def test_series_profile_frequency_table(test_df):
         assert len(freq_table.find_all("tr")) == v + 1  # +1 for header
 
 
+def test_series_profile_time_index_true(cat_df):
+    """time_index=True calculates time diffs for Series with DateTimeIndex."""
+    series = cat_df["category"]
+    profile = php.SeriesProfile(series, time_index=True)
+    assert pat.is_datetime64_any_dtype(series.index)
+    assert profile.time_diffs.iloc[0] is pd.NaT
+    assert all(profile.time_diffs[1:] == pd.Timedelta(hours=1))
+
+
+def test_series_profile_time_index_false(cat_df):
+    """time_index=False does not calculate time diffs for Series with DateTimeIndex."""
+    series = cat_df["category"]
+    profile = php.SeriesProfile(series, time_index=False)
+    assert pat.is_datetime64_any_dtype(series.index)
+    assert profile.time_diffs is None
+
+
+@pytest.fixture
+def ts_timeindex(scope="module"):  # pylint: disable=W0613
+    """Return pd.Series of type datetime64 with DatetimeIndex."""
+    start = datetime(year=1999, month=1, day=1, hour=0, minute=0)
+    end = start + pd.Timedelta(hours=40)
+    time_series = pd.Series(pd.date_range(start, end, freq="4h", inclusive="left"))
+    index_end = start + pd.Timedelta(hours=10)
+    time_series.index = pd.date_range(start, index_end, freq="h", inclusive="left")
+    return time_series
+
+
+def test_series_profile_ts_range_index_true(ts_timeindex):  # pylint: disable=W0621
+    """time_index=True does not calculate time diffs for Series with RangeIndex."""
+    series = ts_timeindex
+    series.index = range(len(ts_timeindex))
+    profile = php.SeriesProfile(series, time_index=True)
+    assert not pat.is_datetime64_any_dtype(series.index)
+    assert profile.time_diffs is None
+
+
+def test_series_profile_both_time_index_false(ts_timeindex):  # pylint: disable=W0621
+    """SeriesProfile should have time diffs from series, (not index).
+
+    Given for Series(datetime64) with TimeIndex and time_index=False.
+    """
+    profile = php.SeriesProfile(ts_timeindex, time_index=False)
+    assert pat.is_datetime64_any_dtype(ts_timeindex.index)
+    assert profile.time_diffs.iloc[0] is pd.NaT
+    assert all(profile.time_diffs[1:] == pd.Timedelta(hours=4))
+
+
+def test_series_profile_both_time_index_true(ts_timeindex):  # pylint: disable=W0621
+    """SeriesProfile should have time diffs from index, (not series).
+
+    Given for Series(datetime64) with TimeIndex and time_index=True.
+    """
+    profile = php.SeriesProfile(ts_timeindex, time_index=True)
+    assert pat.is_datetime64_any_dtype(ts_timeindex.index)
+    assert profile.time_diffs.iloc[0] is pd.NaT
+    assert all(profile.time_diffs[1:] == pd.Timedelta(hours=1))
+
+
 def test_series_profile_frequency_table_invalid(test_df):
     """Invalid frequency table most_least tuples should raise ValueError."""
     invalid_tuples = [(0, -1), (-1, 0), (-1, -1)]
diff --git a/tests/test_times.py b/tests/test_times.py
new file mode 100644
index 0000000..17665ab
--- /dev/null
+++ b/tests/test_times.py
@@ -0,0 +1,44 @@
+"""Tests for functions in times.py."""
+
+import pandas as pd
+import pytest
+import pandahelper.times as pht
+
+
+def test_time_diffs_index(cat_df):
+    """time_diffs_index should work on shuffled pd.Series or pd.DataFrame."""
+    # test DF
+    df_result = pht.time_diffs_index(cat_df)
+    assert df_result.iloc[0] is pd.NaT
+    assert all(df_result[1:] == pd.Timedelta(hours=1))
+    # test Series
+    series_result = pht.time_diffs_index(cat_df["B"])
+    assert series_result.iloc[0] is pd.NaT
+    assert all(series_result[1:] == pd.Timedelta(hours=1))
+
+
+def test_time_diffs_index_exception():
+    """pd.DataFrame and pd.Series without time index raise exception."""
+    data = {"A": list(range(5))}
+    dtypes = [pd.DataFrame(data), pd.Series(data)]
+    for tipo in dtypes:
+        with pytest.raises(TypeError) as exc:
+            pht.time_diffs_index(tipo)
+        assert str(pd.DatetimeIndex) in str(exc)
+
+
+def test_time_diffs(cat_df):
+    """time_diffs should work on shuffled pd.Series or Index of timestamps."""
+    valid = [cat_df.index, pd.Series(cat_df.index)]
+    for v in valid:
+        result = pht.time_diffs(v)
+        assert result.iloc[0] is pd.NaT
+        assert all(result[1:] == pd.Timedelta(hours=1))
+
+
+def test_time_diffs_exception():
+    """Non-datetime64 pd.Series raises exception."""
+    invalid = [pd.Series(list(range(5))), pd.Series([pd.Timedelta(hours=1)] * 2)]
+    for tipo in invalid:
+        with pytest.raises(TypeError):
+            pht.time_diffs(tipo)
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 0000000..52d462c
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,16 @@
+"""Test-related utility functions."""
+
+import pandas as pd
+
+
+def make_category_data(cat_name, start, end, freq):
+    """Return pd.DataFrame of arbitrary data for specified 'category'."""
+    rng = pd.date_range(start, end, freq=freq, inclusive="left")
+    data = {
+        "A": list(range(1, len(rng) + 1, 1)),
+        "B": [chr(ord("A") + (x % 26)) for x in range(0, len(rng), 1)],
+        "C": [float((-1) ** (x % 2) * x) for x in range(0, len(rng), 1)],
+    }
+    df = pd.DataFrame(data, index=rng)
+    df["category"] = cat_name
+    return df

From 4c172d7cb614cada8939ed93bd36b59911b95997 Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Tue, 16 Jul 2024 01:01:57 -0500
Subject: [PATCH 06/13] Fix type hint for Python 3.9.

    gh-20
---
 src/pandahelper/times.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/pandahelper/times.py b/src/pandahelper/times.py
index c1aabd1..98461a1 100644
--- a/src/pandahelper/times.py
+++ b/src/pandahelper/times.py
@@ -1,10 +1,11 @@
 """Panda-Helper time-series functions."""
 
+from typing import Union  # TODO: Remove when deprecating Python 3.9
 import pandas as pd
 import pandas.api.types as pat
 
 
-def time_diffs(series: pd.Series | pd.DatetimeIndex) -> pd.Series(pd.Timedelta):
+def time_diffs(series: Union[pd.Series, pd.DatetimeIndex]) -> pd.Series(pd.Timedelta):
     """Calculate time diffs (gaps) for Pandas Series or Index of timestamps.
 
     Sorts input by time before calculating diffs.
@@ -27,7 +28,7 @@ def time_diffs(series: pd.Series | pd.DatetimeIndex) -> pd.Series(pd.Timedelta):
     return diffs
 
 
-def time_diffs_index(df: pd.DataFrame | pd.Series) -> pd.Series(pd.Timedelta):
+def time_diffs_index(df: Union[pd.Series, pd.DatetimeIndex]) -> pd.Series(pd.Timedelta):
     """Calculate time diffs (gaps) for time-indexed Pandas Series or Dataframe.
 
     Sorts input by time before calculating diffs.

From cc506d17b0f8da34f5df002d195a755f31b13921 Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Tue, 16 Jul 2024 01:19:41 -0500
Subject: [PATCH 07/13] Update CHANGELOG.

---
 CHANGELOG.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 271b43b..fce8c94 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,14 @@
 # Changelog
 
 ## Unreleased
+- Add functionality to perform some common data cleaning tasks.
+- Add `geo.py` module and functionality to set 'close' lat-long coordinates to same value.
 
 ## 0.1.1 - Unreleased
 ### Added
-- functionality to detect time series gaps
+- SeriesProfile now reports gaps in pd.Series with type `datetime64` or for Series with `DatetimeIndex`. [gh-20](https://github.com/ray310/Panda-Helper/issues/20)
+- `times.py` module has been added with public functions `time_diffs` and `time_diffs_index`. [gh-20](https://github.com/ray310/Panda-Helper/issues/20)
+- [`freq_most_least` default parameter for SeriesProfile has been changed to `(10, 5)`.](https://github.com/ray310/Panda-Helper/commit/9ea7a4108996422eaa433e3b86ed20dbbb3c0bdb)
 
 ____
 ## 0.1.0 - 2024-07-14

From 7001a4fa2deb1061bae643e2e3b6feae23fe6f8a Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Tue, 16 Jul 2024 23:17:04 -0500
Subject: [PATCH 08/13] Add id_gaps and id_gaps_index functions to times.py.

    gh-20
---
 CHANGELOG.md                |   2 +-
 mkdocs.yml                  |   1 +
 src/pandahelper/__init__.py |   4 +-
 src/pandahelper/times.py    | 122 ++++++++++++++++++++++++++++++++++--
 tests/conftest.py           |  18 ++++--
 tests/test_profiles.py      |  11 ----
 tests/test_times.py         |  65 +++++++++++++++----
 7 files changed, 188 insertions(+), 35 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fce8c94..d373552 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@
 ## 0.1.1 - Unreleased
 ### Added
 - SeriesProfile now reports gaps in pd.Series with type `datetime64` or for Series with `DatetimeIndex`. [gh-20](https://github.com/ray310/Panda-Helper/issues/20)
-- `times.py` module has been added with public functions `time_diffs` and `time_diffs_index`. [gh-20](https://github.com/ray310/Panda-Helper/issues/20)
+- `times.py` module has been added with public functions `time_diffs`, `time_diffs_index`, `id_gaps`, `id_gaps_index`. [gh-20](https://github.com/ray310/Panda-Helper/issues/20)
 - [`freq_most_least` default parameter for SeriesProfile has been changed to `(10, 5)`.](https://github.com/ray310/Panda-Helper/commit/9ea7a4108996422eaa433e3b86ed20dbbb3c0bdb)
 
 ____
diff --git a/mkdocs.yml b/mkdocs.yml
index 778b6d1..df63eb4 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -101,6 +101,7 @@ plugins:
       python:
         paths: [src]
         options:
+          members_order: alphabetical
           docstring_style: google
           docstring_section_style: list
           docstring_options:
diff --git a/src/pandahelper/__init__.py b/src/pandahelper/__init__.py
index b0a5288..58894b2 100644
--- a/src/pandahelper/__init__.py
+++ b/src/pandahelper/__init__.py
@@ -4,7 +4,7 @@
 
 from pandahelper.profiles import DataFrameProfile, SeriesProfile
 from pandahelper.stats import distribution_stats, frequency_table
-from pandahelper.times import time_diffs, time_diffs_index
+from pandahelper.times import time_diffs, time_diffs_index, id_gaps, id_gaps_index
 
 __version__ = "0.1.1"
 __all__ = [
@@ -14,4 +14,6 @@
     "SeriesProfile",
     "time_diffs",
     "time_diffs_index",
+    "id_gaps",
+    "id_gaps_index",
 ]
diff --git a/src/pandahelper/times.py b/src/pandahelper/times.py
index 98461a1..1a99154 100644
--- a/src/pandahelper/times.py
+++ b/src/pandahelper/times.py
@@ -6,7 +6,7 @@
 
 
 def time_diffs(series: Union[pd.Series, pd.DatetimeIndex]) -> pd.Series(pd.Timedelta):
-    """Calculate time diffs (gaps) for Pandas Series or Index of timestamps.
+    """Calculate time difference between subsequent observations.
 
     Sorts input by time before calculating diffs.
 
@@ -19,19 +19,39 @@ def time_diffs(series: Union[pd.Series, pd.DatetimeIndex]) -> pd.Series(pd.Timed
 
     Raises:
         TypeError: If input is not Series of type datetime64 or DatetimeIndex.
+
+    Examples:
+        Calculate time differences between observations on Series of timestamps after
+        it has been randomized:
+
+        >>> import pandahelper as ph
+        >>> import pandas as pd
+        >>>
+        >>> start = pd.Timestamp(year=1999, month=1, day=1)
+        >>> rng = pd.date_range(start, periods=10, freq="D").delete([3, 4, 5, 8])
+        >>> series = pd.Series(rng).sample(frac=1, random_state=3)  # randomize order
+
+        >>> ph.time_diffs(series)
+        1999-01-01      NaT
+        1999-01-02   1 days
+        1999-01-03   1 days
+        1999-01-07   4 days
+        1999-01-08   1 days
+        1999-01-10   2 days
+        Name: diffs, dtype: timedelta64[ns]
     """
     if not pat.is_datetime64_any_dtype(series.dtype):
-        raise TypeError("Should be Series of datetime64 dtype.")
+        raise TypeError("Should be of datetime64 dtype.")
     series = series.sort_values()
     diffs = pd.Series(series.diff(), name="diffs")
     diffs.index = series
     return diffs
 
 
-def time_diffs_index(df: Union[pd.Series, pd.DatetimeIndex]) -> pd.Series(pd.Timedelta):
-    """Calculate time diffs (gaps) for time-indexed Pandas Series or Dataframe.
+def time_diffs_index(df: Union[pd.Series, pd.DataFrame]) -> pd.Series(pd.Timedelta):
+    """Calculate time difference between subsequent time-indexed observations.
 
-    Sorts input by time before calculating diffs.
+    Sorts input by time index before calculating diffs.
 
     Args:
         df (pd.Series or pd.DataFrame): Pandas Series or DataFrame with DateTimeIndex
@@ -42,6 +62,27 @@ def time_diffs_index(df: Union[pd.Series, pd.DatetimeIndex]) -> pd.Series(pd.Tim
 
     Raises:
         TypeError: If input does not have a DatetimeIndex.
+
+    Examples:
+        Calculate time differences between observations on time-indexed DataFrame after
+        it has been randomized:
+
+        >>> import pandahelper as ph
+        >>> import pandas as pd
+        >>>
+        >>> start = pd.Timestamp(year=1999, month=1, day=1)
+        >>> rng = pd.date_range(start, periods=10, freq="D").delete([3, 4, 5, 8])
+        >>> # index by time then randomize order
+        >>> df = pd.DataFrame(range(len(rng)), index=rng).sample(frac=1, random_state=3)
+
+        >>> ph.time_diffs_index(df)
+        1999-01-01      NaT
+        1999-01-02   1 days
+        1999-01-03   1 days
+        1999-01-07   4 days
+        1999-01-08   1 days
+        1999-01-10   2 days
+        Name: diffs, dtype: timedelta64[ns]
     """
     if isinstance(df.index, pd.DatetimeIndex):
         df = df.sort_index()
@@ -49,3 +90,74 @@ def time_diffs_index(df: Union[pd.Series, pd.DatetimeIndex]) -> pd.Series(pd.Tim
         diffs.index = df.index
         return diffs
     raise TypeError(f"Index should be of type {pd.DatetimeIndex}")
+
+
+def id_gaps(
+    series: Union[pd.Series, pd.DatetimeIndex], threshold: pd.Timedelta
+) -> pd.DataFrame:
+    """Identify time gaps above `threshold` in datetime64 Series or DatetimeIndex.
+
+    Sorts input by time before calculating gaps.
+
+    Args:
+        series (pd.Series or pd.DatetimeIndex): `datetime64` Series or DatetimeIndex.
+        threshold (pd.Timedelta): Threshold to identify gaps
+            (and not expected time differences).
+
+    Returns:
+        One-column Pandas DataFrame of gaps indexed by when gap was calculated.
+
+    Examples:
+        Identify time gaps on Series of timestamps with a 2 and 4 hour
+        gap after it has been randomized:
+
+        >>> import pandahelper as ph
+        >>> import pandas as pd
+        >>>
+        >>> start = pd.Timestamp(year=1999, month=1, day=1)
+        >>> rng = pd.date_range(start, periods=24, freq="1h").delete([3, 4, 8, 9, 10])
+        >>> series = pd.Series(rng).sample(frac=1, random_state=3)  # randomize order
+
+        >>> ph.id_gaps(series, pd.Timedelta(hours=1))
+                                      diffs
+        1999-01-01 11:00:00 0 days 04:00:00
+        1999-01-01 04:00:00 0 days 02:00:00
+    """
+    diffs = time_diffs(series)
+    return diffs[diffs > threshold].sort_values(ascending=False).to_frame()
+
+
+def id_gaps_index(
+    df: Union[pd.Series, pd.DataFrame], threshold: pd.Timedelta
+) -> pd.DataFrame:
+    """Identify time gaps above `threshold` in time-indexed Series or DataFrame.
+
+    Sorts input by time index before calculating diffs.
+
+    Args:
+        df (pd.Series or pd.DataFrame): Time-indexed Series or DataFrame.
+        threshold (pd.Timedelta): Threshold to identify gaps
+            (and not expected time differences).
+
+    Returns:
+        One-column Pandas DataFrame of gaps indexed by when gap was calculated.
+
+    Examples:
+        Identify time gaps on an hourly, time-indexed Series with a 2 and 4 hour
+        gap after it has been randomized:
+
+        >>> import pandahelper as ph
+        >>> import pandas as pd
+        >>>
+        >>> start = pd.Timestamp(year=1999, month=1, day=1)
+        >>> rng = pd.date_range(start, periods=24, freq="1h").delete([3, 8, 9, 10])
+        >>> # index by time then randomize order
+        >>> df = pd.DataFrame(range(len(rng)), index=rng).sample(frac=1, random_state=3)
+
+        >>> ph.id_gaps_index(df, pd.Timedelta(hours=1))
+                                      diffs
+        1999-01-01 11:00:00 0 days 04:00:00
+        1999-01-01 04:00:00 0 days 02:00:00
+    """
+    diffs = time_diffs_index(df)
+    return diffs[diffs > threshold].sort_values(ascending=False).to_frame()
diff --git a/tests/conftest.py b/tests/conftest.py
index f19a424..a7bb3d8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -4,7 +4,6 @@
 cached value.
 """
 
-from datetime import datetime
 import os
 import numpy as np
 import pandas as pd
@@ -19,17 +18,28 @@
 
 @pytest.fixture
 def cat_df(scope="package"):  # pylint: disable=W0613
-    """Return test pd.DataFrame."""
-    start = datetime(year=1999, month=1, day=1, hour=0, minute=0)
+    """Return test pd.DataFrame with DatetimeIndex."""
+    start = pd.Timestamp(year=1999, month=1, day=1)
     end = start + pd.Timedelta(hours=10)
     df = make_category_data("Springfield", start, end, freq="h")
     df = df.sample(frac=1, random_state=2)  # index is out of order
     return df
 
 
+@pytest.fixture
+def ts_timeindex(scope="package"):  # pylint: disable=W0613
+    """Return pd.Series of type datetime64 with DatetimeIndex."""
+    start = pd.Timestamp(year=1999, month=1, day=1)
+    end = start + pd.Timedelta(hours=40)
+    time_series = pd.Series(pd.date_range(start, end, freq="4h", inclusive="left"))
+    index_end = start + pd.Timedelta(hours=10)
+    time_series.index = pd.date_range(start, index_end, freq="h", inclusive="left")
+    return time_series
+
+
 @pytest.fixture
 def test_df(scope="package"):  # pylint: disable=W0613
-    """Return test pd.DataFrame."""
+    """Return test pd.DataFrame from sample of NYC collisions dataset."""
     return pd.read_csv(os.path.join(TEST_DATA_DIR, TEST_DATA_FILE))
 
 
diff --git a/tests/test_profiles.py b/tests/test_profiles.py
index a00b8a1..713b544 100644
--- a/tests/test_profiles.py
+++ b/tests/test_profiles.py
@@ -218,17 +218,6 @@ def test_series_profile_time_index_false(cat_df):
     assert profile.time_diffs is None
 
 
-@pytest.fixture
-def ts_timeindex(scope="module"):  # pylint: disable=W0613
-    """Return pd.Series of type datetime64 with DatetimeIndex."""
-    start = datetime(year=1999, month=1, day=1, hour=0, minute=0)
-    end = start + pd.Timedelta(hours=40)
-    time_series = pd.Series(pd.date_range(start, end, freq="4h", inclusive="left"))
-    index_end = start + pd.Timedelta(hours=10)
-    time_series.index = pd.date_range(start, index_end, freq="h", inclusive="left")
-    return time_series
-
-
 def test_series_profile_ts_range_index_true(ts_timeindex):  # pylint: disable=W0621
     """time_index=True does not calculate time diffs for Series with RangeIndex."""
     series = ts_timeindex
diff --git a/tests/test_times.py b/tests/test_times.py
index 17665ab..83dfba2 100644
--- a/tests/test_times.py
+++ b/tests/test_times.py
@@ -5,6 +5,23 @@
 import pandahelper.times as pht
 
 
+def test_time_diffs(cat_df):
+    """time_diffs should work on shuffled pd.Series or Index of timestamps."""
+    valid = [cat_df.index, pd.Series(cat_df.index)]
+    for v in valid:
+        result = pht.time_diffs(v)
+        assert result.iloc[0] is pd.NaT
+        assert all(result[1:] == pd.Timedelta(hours=1))
+
+
+def test_time_diffs_exception():
+    """Non-datetime64 pd.Series raises exception."""
+    invalid = [pd.Series(list(range(5))), pd.Series([pd.Timedelta(hours=1)] * 2)]
+    for tipo in invalid:
+        with pytest.raises(TypeError):
+            pht.time_diffs(tipo)
+
+
 def test_time_diffs_index(cat_df):
     """time_diffs_index should work on shuffled pd.Series or pd.DataFrame."""
     # test DF
@@ -27,18 +44,40 @@ def test_time_diffs_index_exception():
         assert str(pd.DatetimeIndex) in str(exc)
 
 
-def test_time_diffs(cat_df):
-    """time_diffs should work on shuffled pd.Series or Index of timestamps."""
-    valid = [cat_df.index, pd.Series(cat_df.index)]
-    for v in valid:
-        result = pht.time_diffs(v)
-        assert result.iloc[0] is pd.NaT
-        assert all(result[1:] == pd.Timedelta(hours=1))
+def test_id_gaps_index(ts_timeindex):
+    """id_gap_index returns expected gap from time-Series with DatetimeIndex."""
+    result = pht.id_gaps_index(
+        ts_timeindex, pd.Timedelta(minutes=59, microseconds=999999)
+    )
+    expected = pd.DataFrame(
+        [pd.Timedelta(hours=1)] * 9,
+        index=pd.date_range(pd.Timestamp(1999, 1, 1, 1), periods=9, freq="h"),
+        columns=["diffs"],
+    )
+    pd.testing.assert_frame_equal(expected, result, check_index_type=True)
 
 
-def test_time_diffs_exception():
-    """Non-datetime64 pd.Series raises exception."""
-    invalid = [pd.Series(list(range(5))), pd.Series([pd.Timedelta(hours=1)] * 2)]
-    for tipo in invalid:
-        with pytest.raises(TypeError):
-            pht.time_diffs(tipo)
+def test_id_gaps_index_no_gaps(ts_timeindex):
+    """id_gap_index returns empty Dataframe when threshold exceeds diffs."""
+    result = pht.id_gaps_index(ts_timeindex, pd.Timedelta(minutes=60, microseconds=1))
+    assert len(result) == 0
+
+
+def test_id_gaps_(ts_timeindex):
+    """id_gap returns expected gap from time-Series with DatetimeIndex."""
+    result = pht.id_gaps(
+        ts_timeindex, pd.Timedelta(hours=3, minutes=59, microseconds=999999)
+    )
+    expected = pd.DataFrame(
+        [pd.Timedelta(hours=4)] * 9,
+        index=pd.date_range(pd.Timestamp(1999, 1, 1, 4), periods=9, freq="4h"),
+        columns=["diffs"],
+    )
+    expected.index.freq = None  # diffs won't have freq set
+    pd.testing.assert_frame_equal(expected, result, check_index_type=True)
+
+
+def test_id_gaps_no_gaps(ts_timeindex):
+    """id_gap_index returns empty Dataframe when threshold exceeds diffs."""
+    result = pht.id_gaps(ts_timeindex, pd.Timedelta(hours=4, microseconds=1))
+    assert len(result) == 0

From 90f236e78e93e07beebe5a484839c1724af6ed8d Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Wed, 17 Jul 2024 01:39:43 -0500
Subject: [PATCH 09/13] Add examples to stats documentation.

    gh-2
---
 src/pandahelper/stats.py | 61 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/src/pandahelper/stats.py b/src/pandahelper/stats.py
index 9f8ee63..9de9925 100644
--- a/src/pandahelper/stats.py
+++ b/src/pandahelper/stats.py
@@ -18,6 +18,20 @@ def frequency_table(series: pd.Series) -> pd.DataFrame:
 
     Raises:
         TypeError: If input is not a Pandas Series.
+
+    Examples:
+        >>> import random
+        >>> import pandahelper as ph
+        >>>
+        >>> random.seed(314)
+        >>> cities = ["Springfield", "Quahog", "Philadelphia", "Shelbyville"]
+        >>> series = pd.Series(random.choices(cities, k = 200))
+        >>> ph.frequency_table(series)
+                          Count % of Total
+            Springfield      66     33.00%
+            Quahog           51     25.50%
+            Philadelphia     44     22.00%
+            Shelbyville      39     19.50%
     """
     if not isinstance(series, pd.Series):
         raise TypeError(f"{series}, is not pd.Series")
@@ -70,6 +84,53 @@ def distribution_stats(series: pd.Series) -> pd.DataFrame:
 
     Raises:
         TypeError: If input is not a numeric-like pd.Series.
+
+    Examples:
+        Distribution stats for Pandas Series of type `float64`:
+        >>> from random import seed, gauss, expovariate
+        >>> import pandahelper as ph
+        >>> import pandas as pd
+        >>>
+        >>> seed(314)
+        >>> series = pd.Series([gauss(mu=30, sigma=20) for x in range(200)])
+        >>> ph.distribution_stats(series)
+                                       Statistic Value
+            count                           200.000000
+            min                             -23.643007
+            1%                              -11.918955
+            5%                                2.833604
+            25%                              17.553793
+            50%                              31.420759
+            75%                              42.074998
+            95%                              60.305435
+            99%                              72.028633
+            max                              81.547828
+            mean                             30.580535
+            standard deviation               18.277706
+            median                           31.420759
+            median absolute deviation        12.216607
+            skew                             -0.020083
+
+        Distribution stats for Pandas Series of type `datetime64`:
+        >>> start = pd.Timestamp(2000, 1, 1)
+        >>> tds = [pd.Timedelta(hours=int(expovariate(lambd=.003))) for x in range(200)]
+        >>> times = [start + td for td in tds]
+        >>> series = pd.Series(times)
+        >>> ph.distribution_stats(series)
+                                       Statistic Value
+        count                                      200
+        min                        2000-01-01 00:00:00
+        1%                         2000-01-01 01:59:24
+        5%                         2000-01-01 09:00:00
+        25%                        2000-01-04 08:00:00
+        50%                        2000-01-08 04:30:00
+        75%                        2000-01-16 21:00:00
+        95%                        2000-02-08 01:36:00
+        99%                        2000-02-22 10:20:24
+        max                        2000-04-01 17:00:00
+        mean                       2000-01-12 14:24:18
+        standard deviation  12 days 16:47:15.284423042
+        median                     2000-01-08 04:30:00
     """
     stats = dist_stats_dict(series)
     return pd.DataFrame.from_dict(stats, orient="index", columns=["Statistic Value"])

From 24a1e9cdeef74046e2545c83932c2469f1d47685 Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Wed, 17 Jul 2024 22:55:18 -0500
Subject: [PATCH 10/13] Add 'category_gaps' function to times.py.

    gh-20
---
 CHANGELOG.md                |   2 +-
 src/pandahelper/__init__.py |   9 ++-
 src/pandahelper/times.py    |  70 +++++++++++++++++++++++
 tests/test_times.py         | 110 ++++++++++++++++++++++++++++++++++++
 4 files changed, 189 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d373552..c2df74c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@
 ## 0.1.1 - Unreleased
 ### Added
 - SeriesProfile now reports gaps in pd.Series with type `datetime64` or for Series with `DatetimeIndex`. [gh-20](https://github.com/ray310/Panda-Helper/issues/20)
-- `times.py` module has been added with public functions `time_diffs`, `time_diffs_index`, `id_gaps`, `id_gaps_index`. [gh-20](https://github.com/ray310/Panda-Helper/issues/20)
+- `times.py` module has been added with public functions `time_diffs`, `time_diffs_index`, `id_gaps`, `id_gaps_index`, `category_gaps`. [gh-20](https://github.com/ray310/Panda-Helper/issues/20)
 - [`freq_most_least` default parameter for SeriesProfile has been changed to `(10, 5)`.](https://github.com/ray310/Panda-Helper/commit/9ea7a4108996422eaa433e3b86ed20dbbb3c0bdb)
 
 ____
diff --git a/src/pandahelper/__init__.py b/src/pandahelper/__init__.py
index 58894b2..0a3cbd1 100644
--- a/src/pandahelper/__init__.py
+++ b/src/pandahelper/__init__.py
@@ -4,7 +4,13 @@
 
 from pandahelper.profiles import DataFrameProfile, SeriesProfile
 from pandahelper.stats import distribution_stats, frequency_table
-from pandahelper.times import time_diffs, time_diffs_index, id_gaps, id_gaps_index
+from pandahelper.times import (
+    time_diffs,
+    time_diffs_index,
+    id_gaps,
+    id_gaps_index,
+    category_gaps,
+)
 
 __version__ = "0.1.1"
 __all__ = [
@@ -16,4 +22,5 @@
     "time_diffs_index",
     "id_gaps",
     "id_gaps_index",
+    "category_gaps",
 ]
diff --git a/src/pandahelper/times.py b/src/pandahelper/times.py
index 1a99154..d7bbbb2 100644
--- a/src/pandahelper/times.py
+++ b/src/pandahelper/times.py
@@ -1,5 +1,6 @@
 """Panda-Helper time-series functions."""
 
+from warnings import warn
 from typing import Union  # TODO: Remove when deprecating Python 3.9
 import pandas as pd
 import pandas.api.types as pat
@@ -161,3 +162,72 @@ def id_gaps_index(
     """
     diffs = time_diffs_index(df)
     return diffs[diffs > threshold].sort_values(ascending=False).to_frame()
+
+
+def category_gaps(
+    series: pd.Series, threshold: pd.Timedelta, max_cat: int = 50
+) -> [pd.DataFrame, None]:
+    """Calculate sum of gaps for each category in time-indexed Series.
+
+    Gaps are time differences in excess of expected time increment (threshold). Gap per
+    category is relative to the minimum and maximum times in the Series.
+    Intended for use with categorical-like Series.
+
+    Args:
+        series (pd.Series): Categorical-like Series.
+        threshold (pd.Timedelta): Threshold for the time difference to be considered
+            a gap. For hourly data, threshold should be pd.Timedelta(hours=1).
+        max_cat (int): Maximum number categories (unique values) before issuing
+            warning and returning `None`.
+
+    Returns:
+        Key-value pairs with category name and associated gap. Will return None if
+            number of categories exceeds `max_cat`.
+
+    Warns:
+        UserWarning: If the number of categories (unique values) in the series
+            exceeds `max_cat`.
+
+    Examples:
+        >>> import pandahelper as ph
+        >>> import pandas as pd
+        >>>
+        >>> start = pd.Timestamp(year=1999, month=1, day=1)
+        >>> a = pd.Series(["A"] * 30, index=pd.date_range(start, periods=30, freq="D"))
+        >>> b = pd.Series(["B"] * 15, index=pd.date_range(start, periods=15, freq="2D"))
+        >>> c = pd.Series(["C"] * 10, index=pd.date_range(start, periods=10, freq="D"))
+        >>> ph.category_gaps(pd.concat([a, b, c]), threshold=pd.Timedelta(days=1))
+                      Cumulative Gap
+            C        20 days
+            B        15 days
+            A         0 days
+    """
+    if not isinstance(series, pd.Series) or not isinstance(
+        series.index, pd.DatetimeIndex
+    ):
+        raise TypeError(
+            f"Series should be {pd.Series} with index of type {pd.DatetimeIndex}"
+        )
+    if not isinstance(threshold, pd.Timedelta):
+        raise TypeError(f"Increment should be {pd.Timedelta}")
+    gaps = {}
+    time_range = series.index.max() - series.index.min()
+    categories = series.unique()
+    if len(categories) > max_cat:
+        msg = (
+            f"Number of categories is greater than f{max_cat}. To proceed "
+            f"increase 'max_cat' and run function again."
+        )
+        warn(msg, stacklevel=2)
+        return None
+    for cat in categories:
+        cat_slice = series.loc[series == cat]
+        if pd.isnull(cat):  # treat nulls as distinct category
+            nulls = series.apply(lambda x: x is cat)  # pylint: disable=W0640
+            cat_slice = series[nulls]
+        cat_range = cat_slice.index.max() - cat_slice.index.min()
+        diffs = time_diffs_index(cat_slice)
+        gap = (diffs[diffs > threshold] - threshold).sum()
+        gaps[cat] = time_range - cat_range + gap
+    df = pd.Series(gaps.values(), index=gaps.keys(), name="Cumulative Gap")
+    return df.sort_values(ascending=False).to_frame()
diff --git a/tests/test_times.py b/tests/test_times.py
index 83dfba2..2197c09 100644
--- a/tests/test_times.py
+++ b/tests/test_times.py
@@ -1,8 +1,10 @@
 """Tests for functions in times.py."""
 
+import numpy as np
 import pandas as pd
 import pytest
 import pandahelper.times as pht
+from .utils import make_category_data
 
 
 def test_time_diffs(cat_df):
@@ -81,3 +83,111 @@ def test_id_gaps_no_gaps(ts_timeindex):
     """id_gap_index returns empty Dataframe when threshold exceeds diffs."""
     result = pht.id_gaps(ts_timeindex, pd.Timedelta(hours=4, microseconds=1))
     assert len(result) == 0
+
+
+def test_category_gaps_frequency():
+    """Gaps are calculated correctly for categories of varying frequency in Series."""
+    start = pd.Timestamp(year=1999, month=1, day=1)
+    duration = pd.Timedelta(days=365)
+    end = start + duration
+    delay = pd.Timedelta(days=180)
+    c1 = make_category_data("Springfield", start, end, freq="h")
+    c2 = make_category_data("Quahog", start + delay, end, freq="h")
+    c3 = make_category_data("Park South", start, end, freq="2h")
+    c4 = make_category_data("East Midtown", start, end, freq="4h")
+    c5 = make_category_data("San Diego", start, end, freq="W")
+    c6 = make_category_data("South Philadelphia", start, end, freq="MS")
+    df = pd.concat([c1, c2, c3, c4, c5, c6])
+    gaps = {
+        "South Philadelphia": duration - pd.Timedelta(hours=12),
+        "San Diego": duration - pd.Timedelta(hours=52),
+        "East Midtown": duration - duration / 4,
+        "Park South": duration / 2,
+        "Quahog": delay,
+        "Springfield": pd.Timedelta(hours=0),
+    }
+    expected = pd.DataFrame(
+        gaps.values(), columns=["Cumulative Gap"], index=list(gaps.keys())
+    )
+    result = pht.category_gaps(df["category"], pd.Timedelta(hours=1))
+    pd.testing.assert_frame_equal(expected, result, check_index_type=True)
+
+
+def test_category_gaps_no_gaps():
+    """Series with no gaps should show 0 gaps."""
+    start = pd.Timestamp(year=1999, month=1, day=1)
+    end = start + pd.Timedelta(hours=1)
+    c1 = make_category_data("Springfield", start, end, freq="h")
+    c2 = make_category_data("Park South", start, end, freq="2h")
+    df = pd.concat([c1, c2])
+    gaps = {
+        "Springfield": pd.Timedelta(hours=0),
+        "Park South": pd.Timedelta(hours=0),
+    }
+    expected = pd.DataFrame(
+        gaps.values(), columns=["Cumulative Gap"], index=list(gaps.keys())
+    )
+    result = pht.category_gaps(df["category"], pd.Timedelta(hours=1))
+    pd.testing.assert_frame_equal(expected, result, check_index_type=True)
+
+
+def test_category_gaps_nulls():
+    """Nulls should be treated as separate categories with correctly calculated gaps."""
+    start = pd.Timestamp(year=1999, month=1, day=1)
+    end = start + pd.Timedelta(hours=25)  # to get 24 hour range with freq='2h'
+    df = make_category_data("Quahog", start, end, freq="2h")
+    df.iloc[:2, 3] = None
+    df.iloc[2:4, 3] = pd.NA
+    df.iloc[4:6, 3] = np.nan
+    df.iloc[6:8, 3] = pd.NaT
+    gaps = {
+        None: pd.Timedelta(hours=23),
+        pd.NA: pd.Timedelta(hours=23),
+        np.nan: pd.Timedelta(hours=23),
+        pd.NaT: pd.Timedelta(hours=23),
+        "Quahog": pd.Timedelta(hours=20),
+    }
+    expected = pd.DataFrame(
+        gaps.values(), columns=["Cumulative Gap"], index=list(gaps.keys())
+    )
+    result = pht.category_gaps(df["category"], pd.Timedelta(hours=1))
+    pd.testing.assert_frame_equal(expected, result, check_index_type=True)
+
+
+def test_category_gaps_not_series_exception():
+    """Non-series input raises Exception."""
+    df = pd.DataFrame({"A": list(range(5))})
+    with pytest.raises(TypeError) as exc:
+        pht.category_gaps(df, pd.Timedelta(hours=1))
+    assert str(pd.Series) in str(exc.value)
+
+
+def test_category_gaps_wrong_series_exception():
+    """Non-time indexed series raises Exception."""
+    series = pd.Series({"A": list(range(5))})
+    with pytest.raises(TypeError) as exc:
+        pht.category_gaps(series, pd.Timedelta(hours=1))
+    assert str(pd.DatetimeIndex) in str(exc.value)
+
+
+def test_category_gaps_timedelta_wrong_type_exception():
+    """Wrong input type for threshold raises exception."""
+    start = pd.Timestamp(year=1999, month=1, day=1)
+    end = start + pd.Timedelta(days=365)
+    df = make_category_data("Springfield", start, end, freq="h")
+    with pytest.raises(TypeError) as exc:
+        pht.category_gaps(df["category"], start)
+    assert str(pd.Timedelta) in str(exc.value)
+
+
+def test_category_gaps_warning():
+    """Series with more categories than max_cat raises warning and returns None."""
+    start = pd.Timestamp(year=1999, month=1, day=1)
+    end = start + pd.Timedelta(hours=1)
+    c1 = make_category_data("Springfield", start, end, freq="h")
+    c2 = make_category_data("Park South", start, end, freq="2h")
+    df = pd.concat([c1, c2])
+    with pytest.warns(UserWarning):
+        assert (
+            pht.category_gaps(df["category"], pd.Timedelta(hours=1), max_cat=1) is None
+        )

From f3b7a23fae2bcf0d132b229f89341030e3d26f90 Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Thu, 18 Jul 2024 01:23:50 -0500
Subject: [PATCH 11/13] DataFrameProfile now includes time_diffs if DataFrame
 is time-indexed.

    Also adjusted formatting of __repr__ and _repr_html_.
---
 src/pandahelper/profiles.py                  | 41 ++++++++++----
 tests/conftest.py                            | 57 ++++++++++++--------
 tests/test_data/test_df_time_profile.txt     | 36 +++++++++++++
 tests/test_data/test_series_time_profile.txt | 20 +++++++
 tests/test_profiles.py                       | 46 ++++++++++++----
 tests/test_times.py                          | 45 +++++-----------
 6 files changed, 172 insertions(+), 73 deletions(-)
 create mode 100644 tests/test_data/test_df_time_profile.txt
 create mode 100644 tests/test_data/test_series_time_profile.txt

diff --git a/src/pandahelper/profiles.py b/src/pandahelper/profiles.py
index 3aea3fa..e5bbd11 100644
--- a/src/pandahelper/profiles.py
+++ b/src/pandahelper/profiles.py
@@ -22,6 +22,7 @@ class DataFrameProfile:
         num_duplicates (int): Number of duplicated rows.
         nulls_per_row (pd.Series): Count of null values per row.
         null_stats (list): Distribution statistics on nulls per row.
+        time_diffs (pd.Series): Time diffs (gaps) if DataFrame has a DateTimeIndex.
     """
 
     def __init__(self, df: pd.DataFrame, *, name: str = "", fmt: str = "simple"):
@@ -44,6 +45,7 @@ def __init__(self, df: pd.DataFrame, *, name: str = "", fmt: str = "simple"):
         self.memory_usage = df.memory_usage(index=True, deep=True) / 1000000  # MB
         self.num_duplicates = sum(df.duplicated(keep="first"))
         self.nulls_per_row = df.isna().sum(axis=1)
+        self.time_diffs = self.__calc_time_diffs(df)
         self.null_stats = self.__null_stats()
         self._format = fmt
 
@@ -54,6 +56,13 @@ def __null_stats(self, delete_key="count"):
         del stats[delete_key]
         return new_stats | stats
 
+    @staticmethod
+    def __calc_time_diffs(df: pd.DataFrame) -> pd.Series or None:
+        """Calculate time diffs if DataFrame is time-indexed."""
+        if pat.is_datetime64_any_dtype(df.index):
+            return pht.time_diffs_index(df)
+        return None
+
     def __create_tables(self, table_fmt: str):
         """Create DataFrameProfile summary tables.
 
@@ -92,7 +101,15 @@ def __create_tables(self, table_fmt: str):
             headers=["Summary of Nulls Per Row", ""],
             tablefmt=table_fmt,
         )
-        return [df_table, dtype_usage_table, null_table]
+        tables = [df_table, dtype_usage_table, null_table]
+        if self.time_diffs is not None:
+            time_diffs_table = tabulate(
+                phs.frequency_table(self.time_diffs),
+                headers=["Time Gaps (Diffs)", "Count", "% of total"],
+                tablefmt=table_fmt,
+            )
+            tables.append(time_diffs_table)
+        return tables
 
     def __repr__(self):
         """Printable version of profile."""
@@ -104,7 +121,8 @@ def _repr_html_(self):
         tables = [_format_html_table(t) for t in self.__create_tables("html")]
         tables[1] = _decimal_align_col(tables[1], 2)  # type/memory usage table
         tables[2] = _decimal_align_col(tables[2], 1)  # stats table
-        return tables[0] + "<br>" + tables[1] + "<br>" + tables[2]
+        output = "".join([table + "<br>" for table in tables])
+        return output[:-4]  # remove last <br>
 
     def save(self, path: str):
         """Save profile to provided path.
@@ -159,7 +177,7 @@ def __init__(
             TypeError: If input is not a Pandas Series.
         """
         if not isinstance(series, pd.Series):
-            raise TypeError(f"{series}, is not pd.DataFrame")
+            raise TypeError(f"{series}, is not pd.Series")
         if freq_most_least[0] < 0 or freq_most_least[1] < 0:
             raise ValueError("Tuple values must be >= 0!")
         self._format = fmt
@@ -173,7 +191,7 @@ def __init__(
         self.stats = self.__calc_stats(series)
         self.time_diffs = self.__calc_time_diffs(series, time_index)
 
-    def __calc_stats(self, series):
+    def __calc_stats(self, series: pd.Series):
         """Calculate distribution stats if allowed dtype, else return None."""
         if pat.is_object_dtype(self.dtype) or isinstance(
             self.dtype, pd.CategoricalDtype
@@ -182,7 +200,7 @@ def __calc_stats(self, series):
         return phs.dist_stats_dict(series)
 
     @staticmethod
-    def __calc_time_diffs(series, use_time_index: bool) -> pd.Series or None:
+    def __calc_time_diffs(series: pd.Series, use_time_index: bool) -> pd.Series or None:
         """Calculate time diffs for time-indexed series or datetime64 series."""
         if use_time_index and pat.is_datetime64_any_dtype(series.index):
             return pht.time_diffs_index(series)
@@ -210,7 +228,7 @@ def __create_tables(self, table_fmt: str) -> list[str]:
         freq_table = tabulate(
             freq_info, headers=["Value", "Count", "% of total"], tablefmt=table_fmt
         )
-        stats_table = ""
+        tables = [series_table, freq_table]
         if self.stats is not None:
             stats = self.stats
             # tabulate casts complex numbers to real numbers, dropping imaginary part
@@ -221,14 +239,15 @@ def __create_tables(self, table_fmt: str) -> list[str]:
                 headers=["Statistic", "Value"],
                 tablefmt=table_fmt,
             )
-        time_diffs_table = ""
+            tables.append(stats_table)
         if self.time_diffs is not None:
             time_diffs_table = tabulate(
                 phs.frequency_table(self.time_diffs),
                 headers=["Time Gaps (Diffs)", "Count", "% of total"],
                 tablefmt=table_fmt,
             )
-        return [series_table, freq_table, stats_table, time_diffs_table]
+            tables.append(time_diffs_table)
+        return tables
 
     def __repr__(self):
         """Printable version of profile."""
@@ -238,8 +257,10 @@ def __repr__(self):
     def _repr_html_(self):
         """HTML representation of profile."""
         tables = [_format_html_table(t) for t in self.__create_tables("html")]
-        tables[2] = _decimal_align_col(tables[2], 1)
-        return tables[0] + "<br>" + tables[1] + "<br>" + tables[2] + "<br>" + tables[3]
+        if self.stats is not None:
+            tables[2] = _decimal_align_col(tables[2], 1)
+        output = "".join([table + "<br>" for table in tables])
+        return output[:-4]  # remove last <br>
 
     def save(self, path):
         """Save profile to provided path.
diff --git a/tests/conftest.py b/tests/conftest.py
index a7bb3d8..c25ac19 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,27 +16,6 @@
 NUM_SERIES = "NUMBER OF PERSONS INJURED"
 
 
-@pytest.fixture
-def cat_df(scope="package"):  # pylint: disable=W0613
-    """Return test pd.DataFrame with DatetimeIndex."""
-    start = pd.Timestamp(year=1999, month=1, day=1)
-    end = start + pd.Timedelta(hours=10)
-    df = make_category_data("Springfield", start, end, freq="h")
-    df = df.sample(frac=1, random_state=2)  # index is out of order
-    return df
-
-
-@pytest.fixture
-def ts_timeindex(scope="package"):  # pylint: disable=W0613
-    """Return pd.Series of type datetime64 with DatetimeIndex."""
-    start = pd.Timestamp(year=1999, month=1, day=1)
-    end = start + pd.Timedelta(hours=40)
-    time_series = pd.Series(pd.date_range(start, end, freq="4h", inclusive="left"))
-    index_end = start + pd.Timedelta(hours=10)
-    time_series.index = pd.date_range(start, index_end, freq="h", inclusive="left")
-    return time_series
-
-
 @pytest.fixture
 def test_df(scope="package"):  # pylint: disable=W0613
     """Return test pd.DataFrame from sample of NYC collisions dataset."""
@@ -75,3 +54,39 @@ def non_series_invalid(scope="package"):  # pylint: disable=W0613
         np.array([1, 2, 3]),
     ]
     return invalid_types
+
+
+@pytest.fixture
+def simple_df(scope="package"):  # pylint: disable=W0613
+    """Return test pd.DataFrame with DatetimeIndex."""
+    start = pd.Timestamp(year=1999, month=1, day=1)
+    end = start + pd.Timedelta(hours=10)
+    df = make_category_data("Springfield", start, end, freq="h")
+    df = df.sample(frac=1, random_state=2)  # index is out of order
+    return df
+
+
+@pytest.fixture
+def ts_timeindex(scope="package"):  # pylint: disable=W0613
+    """Return pd.Series of type datetime64 with DatetimeIndex."""
+    start = pd.Timestamp(year=1999, month=1, day=1)
+    end = start + pd.Timedelta(hours=40)
+    time_series = pd.Series(pd.date_range(start, end, freq="4h", inclusive="left"))
+    index_end = start + pd.Timedelta(hours=10)
+    time_series.index = pd.date_range(start, index_end, freq="h", inclusive="left")
+    return time_series
+
+
+@pytest.fixture
+def cat_df(scope="package"):  # pylint: disable=W0613
+    """Return pd.DataFrame with DatetimeIndex."""
+    start = pd.Timestamp(year=1999, month=1, day=1)
+    end = start + pd.Timedelta(days=365)
+    delay = pd.Timedelta(days=180)
+    c1 = make_category_data("Springfield", start, end, freq="h")
+    c2 = make_category_data("Quahog", start + delay, end, freq="h")
+    c3 = make_category_data("Park South", start, end, freq="2h")
+    c4 = make_category_data("East Midtown", start, end, freq="4h")
+    c5 = make_category_data("San Diego", start, end, freq="W")
+    c6 = make_category_data("South Philadelphia", start, end, freq="MS")
+    return pd.concat([c1, c2, c3, c4, c5, c6])
diff --git a/tests/test_data/test_df_time_profile.txt b/tests/test_data/test_df_time_profile.txt
new file mode 100644
index 0000000..2978417
--- /dev/null
+++ b/tests/test_data/test_df_time_profile.txt
@@ -0,0 +1,36 @@
+DataFrame-Level Info
+----------------------  ----------
+DF Shape                (19834, 4)
+Duplicated Rows         0
+Memory Usage (MB)       2.633
+
+Series Name    Data Type         Memory Usage (MB)
+-------------  --------------  -------------------
+Index          datetime64[ns]             0.158672
+A              int64                      0.158672
+B              object                     0.9917
+C              float64                    0.158672
+category       object                     1.16563
+
+Summary of Nulls Per Row
+--------------------------  --
+Number of Columns            4
+min                          0
+1%                           0
+5%                           0
+25%                          0
+50%                          0
+75%                          0
+95%                          0
+99%                          0
+max                          0
+mean                         0
+standard deviation           0
+median                       0
+median absolute deviation    0
+skew                         0
+
+Time Gaps (Diffs)      Count  % of total
+-------------------  -------  ------------
+0 days 00:00:00        11074  55.84%
+0 days 01:00:00         8759  44.16%
diff --git a/tests/test_data/test_series_time_profile.txt b/tests/test_data/test_series_time_profile.txt
new file mode 100644
index 0000000..977b7fc
--- /dev/null
+++ b/tests/test_data/test_series_time_profile.txt
@@ -0,0 +1,20 @@
+category Info
+---------------  ------
+Data Type        object
+Count            19834
+Unique Values    6
+Null Values      0
+
+Value                 Count  % of total
+------------------  -------  ------------
+Springfield            8760  44.17%
+Quahog                 4440  22.39%
+Park South             4380  22.08%
+East Midtown           2190  11.04%
+San Diego                52  0.26%
+South Philadelphia       12  0.06%
+
+Time Gaps (Diffs)      Count  % of total
+-------------------  -------  ------------
+0 days 00:00:00        11074  55.84%
+0 days 01:00:00         8759  44.16%
diff --git a/tests/test_profiles.py b/tests/test_profiles.py
index 713b544..487ca90 100644
--- a/tests/test_profiles.py
+++ b/tests/test_profiles.py
@@ -36,6 +36,18 @@ def test_dataframe_profile_valid_312(test_df):
             assert filecmp.cmp(compare_file, test_file, shallow=False)
 
 
+@pytest.mark.skipif(
+    not ((3, 12) <= sys.version_info < (3, 13)), reason="Runs on Python 3.12"
+)
+def test_dataframe_time_profile_valid_312(cat_df):
+    """Time-indexed DataFrame profile should match test profile (Python 3.12)."""
+    compare_file = os.path.join(TEST_DATA_DIR, "test_df_time_profile.txt")
+    with tempfile.TemporaryDirectory() as tmp:
+        test_file = os.path.join(tmp, "temp.txt")
+        php.DataFrameProfile(cat_df).save(test_file)
+        assert filecmp.cmp(compare_file, test_file, shallow=False)
+
+
 def test_dataframe_profile_invalid(non_series_invalid, num_series, cat_like_series):
     """DataFrame profile should not accept invalid data types."""
     invalid_types = [*non_series_invalid, num_series, cat_like_series]
@@ -44,17 +56,18 @@ def test_dataframe_profile_invalid(non_series_invalid, num_series, cat_like_seri
             php.DataFrameProfile(invalid)
 
 
-def test_dataframe_profile_html(test_df):
+def test_dataframe_profile_html(cat_df):
     """Test html representation of DataFrameProfile."""
-    profile = php.DataFrameProfile(test_df)
+    profile = php.DataFrameProfile(cat_df)
     # fmt: off
     soup = bs4.BeautifulSoup(profile._repr_html_(), "html.parser")  # pylint: disable=W0212
     # fmt: on
     tables = soup.find_all("table")
-    assert len(tables) == 3  # null_table
+    assert len(tables) == 4
     assert len(tables[2].find_all("tr")) == 16  # 15 dist stats + head row
     first_td = tables[2].find("td")
     assert first_td["style"] == "font-family: monospace, monospace; text-align: left;"
+    assert len(tables[3].find_all("tr")) == 3  # 2 deltas + head row
 
 
 def test_series_profile_text_valid_numerical_format(num_series):
@@ -77,6 +90,16 @@ def test_series_profile_text_valid_object_format(cat_like_series):
         assert filecmp.cmp(compare_file, test_file, shallow=False)
 
 
+def test_series_profile_text_valid_time_format(cat_df):
+    """Text version of SeriesProfile for time data matches test profile."""
+    comparison_profile = "test_series_time_profile.txt"
+    compare_file = os.path.join(TEST_DATA_DIR, comparison_profile)
+    with tempfile.TemporaryDirectory() as tmp:
+        test_file = os.path.join(tmp, "temp.txt")
+        php.SeriesProfile(cat_df["category"], time_index=True).save(test_file)
+        assert filecmp.cmp(compare_file, test_file, shallow=False)
+
+
 def test_series_profile_series_dtypes():
     """pd.Series should create SeriesProfile for allowed data types."""
     start = datetime(year=1999, month=1, day=1)
@@ -168,18 +191,19 @@ def test_series_profile_invalid(non_series_invalid, test_df):
             php.SeriesProfile(invalid)
 
 
-def test_series_profile_html(num_series):
+def test_series_profile_html(cat_df):
     """Test html representation of SeriesProfile."""
-    profile = php.SeriesProfile(num_series)
+    profile = php.SeriesProfile(cat_df["C"], time_index=True)
     # fmt: off
     soup = bs4.BeautifulSoup(profile._repr_html_(), "html.parser")  # pylint: disable=W0212
     # fmt: on
     tables = soup.find_all("table")
-    assert len(tables) == 3  # null_table
-    assert len(tables[1].find_all("tr")) == 6  # freq table
+    assert len(tables) == 4
+    assert len(tables[1].find_all("tr")) == 16  # freq table
     assert len(tables[2].find_all("tr")) == 16  # 15 dist stats + head row
     first_td = tables[2].find("td")
     assert first_td["style"] == "font-family: monospace, monospace; text-align: left;"
+    assert len(tables[3].find_all("tr")) == 3  # 2 deltas + head row
 
 
 def test_series_profile_frequency_table(test_df):
@@ -201,18 +225,18 @@ def test_series_profile_frequency_table(test_df):
         assert len(freq_table.find_all("tr")) == v + 1  # +1 for header
 
 
-def test_series_profile_time_index_true(cat_df):
+def test_series_profile_time_index_true(simple_df):
     """time_index=True calculates time diffs for Series with DateTimeIndex."""
-    series = cat_df["category"]
+    series = simple_df["category"]
     profile = php.SeriesProfile(series, time_index=True)
     assert pat.is_datetime64_any_dtype(series.index)
     assert profile.time_diffs.iloc[0] is pd.NaT
     assert all(profile.time_diffs[1:] == pd.Timedelta(hours=1))
 
 
-def test_series_profile_time_index_false(cat_df):
+def test_series_profile_time_index_false(simple_df):
     """time_index=False does not calculate time diffs for Series with DateTimeIndex."""
-    series = cat_df["category"]
+    series = simple_df["category"]
     profile = php.SeriesProfile(series, time_index=False)
     assert pat.is_datetime64_any_dtype(series.index)
     assert profile.time_diffs is None
diff --git a/tests/test_times.py b/tests/test_times.py
index 2197c09..24321e2 100644
--- a/tests/test_times.py
+++ b/tests/test_times.py
@@ -7,9 +7,9 @@
 from .utils import make_category_data
 
 
-def test_time_diffs(cat_df):
+def test_time_diffs(simple_df):
     """time_diffs should work on shuffled pd.Series or Index of timestamps."""
-    valid = [cat_df.index, pd.Series(cat_df.index)]
+    valid = [simple_df.index, pd.Series(simple_df.index)]
     for v in valid:
         result = pht.time_diffs(v)
         assert result.iloc[0] is pd.NaT
@@ -24,14 +24,14 @@ def test_time_diffs_exception():
             pht.time_diffs(tipo)
 
 
-def test_time_diffs_index(cat_df):
+def test_time_diffs_index(simple_df):
     """time_diffs_index should work on shuffled pd.Series or pd.DataFrame."""
     # test DF
-    df_result = pht.time_diffs_index(cat_df)
+    df_result = pht.time_diffs_index(simple_df)
     assert df_result.iloc[0] is pd.NaT
     assert all(df_result[1:] == pd.Timedelta(hours=1))
     # test Series
-    series_result = pht.time_diffs_index(cat_df["B"])
+    series_result = pht.time_diffs_index(simple_df["B"])
     assert series_result.iloc[0] is pd.NaT
     assert all(series_result[1:] == pd.Timedelta(hours=1))
 
@@ -85,19 +85,10 @@ def test_id_gaps_no_gaps(ts_timeindex):
     assert len(result) == 0
 
 
-def test_category_gaps_frequency():
+def test_category_gaps_frequency(cat_df):
     """Gaps are calculated correctly for categories of varying frequency in Series."""
-    start = pd.Timestamp(year=1999, month=1, day=1)
     duration = pd.Timedelta(days=365)
-    end = start + duration
     delay = pd.Timedelta(days=180)
-    c1 = make_category_data("Springfield", start, end, freq="h")
-    c2 = make_category_data("Quahog", start + delay, end, freq="h")
-    c3 = make_category_data("Park South", start, end, freq="2h")
-    c4 = make_category_data("East Midtown", start, end, freq="4h")
-    c5 = make_category_data("San Diego", start, end, freq="W")
-    c6 = make_category_data("South Philadelphia", start, end, freq="MS")
-    df = pd.concat([c1, c2, c3, c4, c5, c6])
     gaps = {
         "South Philadelphia": duration - pd.Timedelta(hours=12),
         "San Diego": duration - pd.Timedelta(hours=52),
@@ -109,7 +100,7 @@ def test_category_gaps_frequency():
     expected = pd.DataFrame(
         gaps.values(), columns=["Cumulative Gap"], index=list(gaps.keys())
     )
-    result = pht.category_gaps(df["category"], pd.Timedelta(hours=1))
+    result = pht.category_gaps(cat_df["category"], pd.Timedelta(hours=1))
     pd.testing.assert_frame_equal(expected, result, check_index_type=True)
 
 
@@ -154,11 +145,10 @@ def test_category_gaps_nulls():
     pd.testing.assert_frame_equal(expected, result, check_index_type=True)
 
 
-def test_category_gaps_not_series_exception():
+def test_category_gaps_not_series_exception(cat_df):
     """Non-series input raises Exception."""
-    df = pd.DataFrame({"A": list(range(5))})
     with pytest.raises(TypeError) as exc:
-        pht.category_gaps(df, pd.Timedelta(hours=1))
+        pht.category_gaps(cat_df, pd.Timedelta(hours=1))
     assert str(pd.Series) in str(exc.value)
 
 
@@ -170,24 +160,17 @@ def test_category_gaps_wrong_series_exception():
     assert str(pd.DatetimeIndex) in str(exc.value)
 
 
-def test_category_gaps_timedelta_wrong_type_exception():
+def test_category_gaps_timedelta_wrong_type_exception(cat_df):
     """Wrong input type for threshold raises exception."""
-    start = pd.Timestamp(year=1999, month=1, day=1)
-    end = start + pd.Timedelta(days=365)
-    df = make_category_data("Springfield", start, end, freq="h")
     with pytest.raises(TypeError) as exc:
-        pht.category_gaps(df["category"], start)
+        pht.category_gaps(cat_df["category"], pd.Timestamp(year=1999, month=1, day=1))
     assert str(pd.Timedelta) in str(exc.value)
 
 
-def test_category_gaps_warning():
+def test_category_gaps_warning(cat_df):
     """Series with more categories than max_cat raises warning and returns None."""
-    start = pd.Timestamp(year=1999, month=1, day=1)
-    end = start + pd.Timedelta(hours=1)
-    c1 = make_category_data("Springfield", start, end, freq="h")
-    c2 = make_category_data("Park South", start, end, freq="2h")
-    df = pd.concat([c1, c2])
     with pytest.warns(UserWarning):
         assert (
-            pht.category_gaps(df["category"], pd.Timedelta(hours=1), max_cat=1) is None
+            pht.category_gaps(cat_df["category"], pd.Timedelta(hours=1), max_cat=5)
+            is None
         )

From ebcae875c7a55e5f374320cdae066ebebb903449 Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Sun, 21 Jul 2024 21:16:07 -0500
Subject: [PATCH 12/13] Minor change to profile format.

---
 src/pandahelper/profiles.py                  | 4 ++--
 tests/test_data/test_df_time_profile.txt     | 8 ++++----
 tests/test_data/test_series_time_profile.txt | 8 ++++----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/pandahelper/profiles.py b/src/pandahelper/profiles.py
index e5bbd11..6a5b1d0 100644
--- a/src/pandahelper/profiles.py
+++ b/src/pandahelper/profiles.py
@@ -105,7 +105,7 @@ def __create_tables(self, table_fmt: str):
         if self.time_diffs is not None:
             time_diffs_table = tabulate(
                 phs.frequency_table(self.time_diffs),
-                headers=["Time Gaps (Diffs)", "Count", "% of total"],
+                headers=["Time Diffs", "Count", "% of total"],
                 tablefmt=table_fmt,
             )
             tables.append(time_diffs_table)
@@ -243,7 +243,7 @@ def __create_tables(self, table_fmt: str) -> list[str]:
         if self.time_diffs is not None:
             time_diffs_table = tabulate(
                 phs.frequency_table(self.time_diffs),
-                headers=["Time Gaps (Diffs)", "Count", "% of total"],
+                headers=["Time Diffs", "Count", "% of total"],
                 tablefmt=table_fmt,
             )
             tables.append(time_diffs_table)
diff --git a/tests/test_data/test_df_time_profile.txt b/tests/test_data/test_df_time_profile.txt
index 2978417..f4a1784 100644
--- a/tests/test_data/test_df_time_profile.txt
+++ b/tests/test_data/test_df_time_profile.txt
@@ -30,7 +30,7 @@ median                       0
 median absolute deviation    0
 skew                         0
 
-Time Gaps (Diffs)      Count  % of total
--------------------  -------  ------------
-0 days 00:00:00        11074  55.84%
-0 days 01:00:00         8759  44.16%
+Time Diffs         Count  % of total
+---------------  -------  ------------
+0 days 00:00:00    11074  55.84%
+0 days 01:00:00     8759  44.16%
diff --git a/tests/test_data/test_series_time_profile.txt b/tests/test_data/test_series_time_profile.txt
index 977b7fc..413b170 100644
--- a/tests/test_data/test_series_time_profile.txt
+++ b/tests/test_data/test_series_time_profile.txt
@@ -14,7 +14,7 @@ East Midtown           2190  11.04%
 San Diego                52  0.26%
 South Philadelphia       12  0.06%
 
-Time Gaps (Diffs)      Count  % of total
--------------------  -------  ------------
-0 days 00:00:00        11074  55.84%
-0 days 01:00:00         8759  44.16%
+Time Diffs         Count  % of total
+---------------  -------  ------------
+0 days 00:00:00    11074  55.84%
+0 days 01:00:00     8759  44.16%

From 2e8fe740dc8b9dc4602c15a23d841291c4f995ca Mon Sep 17 00:00:00 2001
From: ray310 <64942339+ray310@users.noreply.github.com>
Date: Sun, 21 Jul 2024 21:48:45 -0500
Subject: [PATCH 13/13] Updating documentation.

    -Add tutorial to project site.
    -Fix README
---
 README.md          | 300 ++++++++++++++++++++++++++++++++++++++++++++-
 docs/index.md      |   4 +-
 docs/tutorial.md   | 297 ++++++++++++++++++++++++++++++++++++++++++++
 docs/user_guide.md |   5 -
 mkdocs.yml         |  10 +-
 5 files changed, 607 insertions(+), 9 deletions(-)
 create mode 100644 docs/tutorial.md
 delete mode 100644 docs/user_guide.md

diff --git a/README.md b/README.md
index 127ccf3..0406284 100644
--- a/README.md
+++ b/README.md
@@ -11,4 +11,302 @@ Assess data quality and usefulness with minimal effort.
 
 Quickly perform initial data exploration, _so you can move on to more in-depth analysis_.
 
-Please see [project website](https://ray310.github.io/Panda-Helper/).
+Please see the [project website](https://ray310.github.io/Panda-Helper/) for more information.
+
+## Installing Panda-Helper
+Panda-Helper can be installed with: `pip install panda-helper`.
+
+## Using Panda Helper
+For our Panda-Helper tutorial, we are going to use a dataset that counts how many
+ bicycles have passed through bike counting sensors at various locations in New York
+ City over time. We are going to merge the dataset with some additional metadata for
+ the sensors. The datasets can be downloaded from:
+
+- Bicycle Counts: [https://data.cityofnewyork.us/Transportation/Bicycle-Counts/uczf-rk3c/about_data](https://data.cityofnewyork.us/Transportation/Bicycle-Counts/uczf-rk3c/about_data)
+- Metadata: [https://data.cityofnewyork.us/Transportation/Bicycle-Counters/smn3-rzf9/about_data](https://data.cityofnewyork.us/Transportation/Bicycle-Counters/smn3-rzf9/about_data)
+
+### Loading Data
+```Python
+import pandas as pd
+
+metadata = pd.read_csv("data/Bicycle_Counters.csv")
+bike_counts = pd.read_csv(
+    "data/Bicycle_Counts.csv",
+    index_col="date",
+    parse_dates=["date"],
+    date_format="%m/%d/%Y %I:%M:%S %p",
+)
+bike_counts = bike_counts.join(metadata.set_index("id"), on="id", how="left")
+```
+
+### DataFrame Profile
+The `DataFrameProfile` is used to get a quick overview of the contents of a Pandas
+ DataFrame. It is an object that can be later referenced or saved if desired.
+In a single view it provides:
+
+- DataFrame shape.
+- Memory usage.
+- The number of duplicated rows (if any).
+- The datatypes of the individual Series.
+- Statistics nulls per row to provide a view on data completeness.
+- Time Differences (Diffs or Gaps) if it is a time-indexed DataFrame.
+    - In the below example we see that most observations occur at the same time as
+   another observation or 15 minutes after the previous observation. There are a few
+   gaps where more than 15 minutes has passed since the last observation.
+
+
+```Python
+import pandahelper as ph
+
+ph.DataFrameProfile(bike_counts)
+```
+```
+DataFrame-Level Info
+----------------------  -------------
+DF Shape                (5589249, 12)
+Duplicated Rows         0
+Memory Usage (MB)       1,926.950
+
+Series Name    Data Type         Memory Usage (MB)
+-------------  --------------  -------------------
+Index          datetime64[ns]               44.714
+countid        int64                        44.714
+id             int64                        44.714
+counts         int64                        44.714
+status         int64                        44.714
+name           object                      438.682
+domain         object                      368.89
+latitude       float64                      44.714
+longitude      float64                      44.714
+interval       int64                        44.714
+timezone       object                      419.194
+sens           int64                        44.714
+counter        object                      297.758
+
+Summary of Nulls Per Row
+--------------------------  ---------
+Number of Columns           12
+min                          0
+1%                           0
+5%                           0
+25%                          0
+50%                          0
+75%                          0
+95%                          1
+99%                          1
+max                          1
+mean                         0.240237
+standard deviation           0.427228
+median                       0
+median absolute deviation    0
+skew                         1.21604
+
+Time Diffs         Count  % of total
+---------------  -------  ------------
+0 days 00:00:00  5176050  92.61%
+0 days 00:15:00   413183  7.39%
+0 days 01:15:00       12  0.00%
+0 days 02:15:00        1  0.00%
+0 days 00:30:00        1  0.00%
+0 days 06:15:00        1  0.00%
+```
+
+### Series Profile (Numeric)
+The `SeriesProfile` is used to get a quick overview of the contents of a Pandas
+ Series. It is an object that can be later referenced or saved if desired.
+In a single view it provides:
+
+- Series data type (dtype).
+- The number of non-null values.
+- The number of unique values.
+- The number of null values.
+- The counts of some of the most common and least common values in the series which
+  can be configured with the optional `freq_most_least` flag
+- Distribution statistics for the Series based on the data type.
+
+_Counts are the number of bike crossings at a bike sensor in a window of time_
+```Python
+ph.SeriesProfile(bike_counts["counts"])
+```
+
+```
+counts Info
+-------------  -------
+Data Type      int64
+Count          5589249
+Unique Values  897
+Null Values    0
+
+  Value    Count  % of total
+-------  -------  ------------
+      0   860809  15.40%
+      1   373805  6.69%
+      2   279622  5.00%
+      3   217329  3.89%
+      4   177636  3.18%
+      5   150857  2.70%
+      6   131232  2.35%
+      7   117491  2.10%
+      8   106717  1.91%
+      9    98373  1.76%
+    824        1  0.00%
+   1092        1  0.00%
+    925        1  0.00%
+    894        1  0.00%
+   1081        1  0.00%
+
+Statistic                           Value
+-------------------------  --------------
+count                         5.58925e+06
+min                           0
+1%                            0
+5%                            0
+25%                           2
+50%                          13
+75%                          37
+95%                          93
+99%                         164
+max                        1133
+mean                         26.4127
+standard deviation           39.3405
+median                       13
+median absolute deviation    13
+skew                          5.17677
+```
+
+### Series Profile (Object)
+A `SeriesProfile` for an `object` Series will provide similar information as a numeric
+ Series but without distribution statistics. Here we use the optional `freq_most_least`
+ parameter to show a longer frequency table.
+
+_Name is the designation of the bike sensor station_
+```Python
+ph.SeriesProfile(bike_counts["name"], freq_most_least=(20, 20))
+```
+```
+name Info
+-------------  -------
+Data Type      object
+Count          5589249
+Unique Values  34
+Null Values    0
+
+Value                                                          Count  % of total
+-----------------------------------------------------------  -------  ------------
+Manhattan Bridge Bike Comprehensive                           381148  6.82%
+Manhattan Bridge Display Bike Counter                         381148  6.82%
+Manhattan Bridge Ped Path                                     368665  6.60%
+Ed Koch Queensboro Bridge Shared Path                         368504  6.59%
+Williamsburg Bridge Bike Path                                 368433  6.59%
+Brooklyn Bridge Bike Path                                     366111  6.55%
+Comprehensive Brooklyn Bridge Counter                         365948  6.55%
+Staten Island Ferry                                           287203  5.14%
+Prospect Park West                                            266080  4.76%
+Kent Ave btw North 8th St and North 9th St                    264522  4.73%
+Pulaski Bridge                                                243868  4.36%
+1st Avenue - 26th St N - Interference testing                 218169  3.90%
+Manhattan Bridge 2012 to 2019 Bike Counter                    202785  3.63%
+8th Ave at 50th St.                                           195920  3.51%
+Manhattan Bridge 2013 to 2018 Bike Counter                    165505  2.96%
+Columbus Ave at 86th St.                                      162481  2.91%
+Amsterdam Ave at 86th St.                                     162369  2.91%
+2nd Avenue - 26th St S                                        136388  2.44%
+Brooklyn Bridge Bicycle Path (Roadway)                         95955  1.72%
+Kent Ave btw South 6th St. and Broadway                        78478  1.40%
+111th St at 50th Ave                                           72567  1.30%
+Fountain Ave                                                   63146  1.13%
+Willis Ave                                                     62148  1.11%
+Willis Ave Bikes                                               62148  1.11%
+Willis Ave Peds                                                62148  1.11%
+Manhattan Bridge 2012 Test Bike Counter                        36179  0.65%
+Manhattan Bridge Interference Calibration 2019 Bike Counter    27675  0.50%
+Ocean Pkwy at Avenue J                                         27260  0.49%
+Pelham Pkwy                                                    21452  0.38%
+Broadway at 50th St                                            20544  0.37%
+High Bridge                                                    16276  0.29%
+Emmons Ave                                                     16267  0.29%
+Forsyth Plaza                                                  14998  0.27%
+Concrete Plant Park                                             6761  0.12%
+```
+
+### Time Series Functionality
+#### Calculate the cumulative gaps in time series data by category
+In the above example we saw a notable difference in the number of observations per
+ bike counter station. We can use `category_gaps` to check for gaps in
+ time-indexed, categorical-like data. We use the `threshold` parameter to define the
+ maximum expected increment in the time-indexed data. Some of the bike stations report
+ data every 15 minutes and some report data every hour so we can use a threshold of one
+ hour.
+
+```Python
+ph.category_gaps(bike_counts["name"], threshold=pd.Timedelta(hours=1))
+```
+```
+                                                       Cumulative Gap
+Concrete Plant Park                                4234 days 13:45:00
+Forsyth Plaza                                      4148 days 16:15:00
+Emmons Ave                                         4135 days 12:30:00
+High Bridge                                        4135 days 10:15:00
+Broadway at 50th St                                4090 days 10:30:00
+Pelham Pkwy                                        4081 days 12:15:00
+Ocean Pkwy at Avenue J                             4021 days 00:15:00
+Manhattan Bridge Interference Calibration 2019 ... 4016 days 15:00:00
+Manhattan Bridge 2012 Test Bike Counter            3928 days 01:30:00
+Willis Ave Peds                                    3657 days 12:45:00
+Willis Ave Bikes                                   3657 days 12:45:00
+Willis Ave                                         3657 days 12:45:00
+Fountain Ave                                       3647 days 01:45:00
+111th St at 50th Ave                               3548 days 21:45:00
+Kent Ave btw South 6th St. and Broadway            3487 days 06:30:00
+Brooklyn Bridge Bicycle Path (Roadway)             3305 days 06:45:00
+2nd Avenue - 26th St S                             2884 days 02:30:00
+Amsterdam Ave at 86th St.                          2613 days 09:30:00
+Columbus Ave at 86th St.                           2612 days 06:00:00
+Manhattan Bridge 2013 to 2018 Bike Counter         2580 days 19:15:00
+8th Ave at 50th St.                                2263 days 19:00:00
+Manhattan Bridge 2012 to 2019 Bike Counter         2192 days 07:30:00
+1st Avenue - 26th St N - Interference testing      2032 days 00:00:00
+Pulaski Bridge                                     1764 days 08:45:00
+Kent Ave btw North 8th St and North 9th St         1549 days 04:30:00
+Prospect Park West                                 1533 days 00:30:00
+Staten Island Ferry                                1312 days 22:15:00
+Comprehensive Brooklyn Bridge Counter               492 days 13:45:00
+Brooklyn Bridge Bike Path                           490 days 21:45:00
+Williamsburg Bridge Bike Path                       466 days 15:00:00
+Ed Koch Queensboro Bridge Shared Path               465 days 22:45:00
+Manhattan Bridge Ped Path                           464 days 07:15:00
+Manhattan Bridge Bike Comprehensive                 333 days 14:45:00
+Manhattan Bridge Display Bike Counter               333 days 14:45:00
+```
+#### Identify when gaps occur in time series data
+It looks like the 'Manhattan Bridge Bike Comprehensive' category has the smallest
+ amount of missing time. We can use `id_gaps_index` to identify when the gaps occur.
+ We see that the largest gap for this bike sensor is ~328 days long in 2013.
+
+```Python
+mbc = bike_counts["name"][bike_counts["name"] == "Manhattan Bridge Bike Comprehensive"]
+ph.id_gaps_index(mbc, threshold=pd.Timedelta(hours=1))
+```
+```
+                                diffs
+date  
+2013-12-03 00:00:00 328 days 00:15:00
+2023-09-27 02:15:00   2 days 02:30:00
+2024-01-21 02:15:00   1 days 02:30:00
+2023-07-03 02:15:00   1 days 02:30:00
+2023-07-01 02:15:00   1 days 02:30:00
+2013-12-03 11:00:00   0 days 06:15:00
+2012-10-12 15:00:00   0 days 02:15:00
+2021-03-14 03:00:00   0 days 01:15:00
+2023-03-12 03:00:00   0 days 01:15:00
+2022-03-13 03:00:00   0 days 01:15:00
+2019-03-10 03:00:00   0 days 01:15:00
+2020-03-08 03:00:00   0 days 01:15:00
+2018-03-11 03:00:00   0 days 01:15:00
+2017-03-12 03:00:00   0 days 01:15:00
+2016-03-13 03:00:00   0 days 01:15:00
+2015-03-08 03:00:00   0 days 01:15:00
+2014-11-04 05:00:00   0 days 01:15:00
+2014-03-09 03:00:00   0 days 01:15:00
+2024-03-10 03:00:00   0 days 01:15:00
+```
diff --git a/docs/index.md b/docs/index.md
index 9ef480f..603c223 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -26,11 +26,11 @@ that allows you to assess data quality and usefulness with minimal effort.
 
     Detailed description of the Panda-Helper API
 
--   [:material-television-guide:{ .lg .middle } __User Guide__](user_guide.md)
+-   [:material-television-guide:{ .lg .middle } __Tutorial__](tutorial.md)
 
     ---
 
-    How to use Panda-Helper with examples
+    Panda-Helper Tutorial
 
 -   [:simple-github:{ .lg .middle } __Source Code__](https://github.com/ray310/Panda-Helper)
 
diff --git a/docs/tutorial.md b/docs/tutorial.md
new file mode 100644
index 0000000..4fe02df
--- /dev/null
+++ b/docs/tutorial.md
@@ -0,0 +1,297 @@
+---
+description: Panda Helper Tutorial
+---
+# Panda Helper Tutorial
+For our Panda-Helper tutorial, we are going to use a dataset that counts how many
+ bicycles have passed through bike counting sensors at various locations in New York
+ City over time. We are going to merge the dataset with some additional metadata for
+ the sensors. The datasets can be downloaded from:
+
+- Bicycle Counts: [https://data.cityofnewyork.us/Transportation/Bicycle-Counts/uczf-rk3c/about_data](https://data.cityofnewyork.us/Transportation/Bicycle-Counts/uczf-rk3c/about_data)
+- Metadata: [https://data.cityofnewyork.us/Transportation/Bicycle-Counters/smn3-rzf9/about_data](https://data.cityofnewyork.us/Transportation/Bicycle-Counters/smn3-rzf9/about_data)
+
+## Loading Data
+```Python
+import pandas as pd
+
+metadata = pd.read_csv("data/Bicycle_Counters.csv")
+bike_counts = pd.read_csv(
+    "data/Bicycle_Counts.csv",
+    index_col="date",
+    parse_dates=["date"],
+    date_format="%m/%d/%Y %I:%M:%S %p",
+)
+bike_counts = bike_counts.join(metadata.set_index("id"), on="id", how="left")
+```
+
+## DataFrame Profile
+The `DataFrameProfile` is used to get a quick overview of the contents of a Pandas
+ DataFrame. It is an object that can be later referenced or saved if desired.
+In a single view it provides:
+
+- DataFrame shape.
+- Memory usage.
+- The number of duplicated rows (if any).
+- The datatypes of the individual Series.
+- Statistics nulls per row to provide a view on data completeness.
+- Time Differences (Diffs or Gaps) if it is a time-indexed DataFrame.
+    - In the below example we see that most observations occur at the same time as
+   another observation or 15 minutes after the previous observation. There are a few
+   gaps where more than 15 minutes has passed since the last observation.
+
+
+```Python
+import pandahelper as ph
+
+ph.DataFrameProfile(bike_counts)
+```
+```
+DataFrame-Level Info
+----------------------  -------------
+DF Shape                (5589249, 12)
+Duplicated Rows         0
+Memory Usage (MB)       1,926.950
+
+Series Name    Data Type         Memory Usage (MB)
+-------------  --------------  -------------------
+Index          datetime64[ns]               44.714
+countid        int64                        44.714
+id             int64                        44.714
+counts         int64                        44.714
+status         int64                        44.714
+name           object                      438.682
+domain         object                      368.89
+latitude       float64                      44.714
+longitude      float64                      44.714
+interval       int64                        44.714
+timezone       object                      419.194
+sens           int64                        44.714
+counter        object                      297.758
+
+Summary of Nulls Per Row
+--------------------------  ---------
+Number of Columns           12
+min                          0
+1%                           0
+5%                           0
+25%                          0
+50%                          0
+75%                          0
+95%                          1
+99%                          1
+max                          1
+mean                         0.240237
+standard deviation           0.427228
+median                       0
+median absolute deviation    0
+skew                         1.21604
+
+Time Diffs         Count  % of total
+---------------  -------  ------------
+0 days 00:00:00  5176050  92.61%
+0 days 00:15:00   413183  7.39%
+0 days 01:15:00       12  0.00%
+0 days 02:15:00        1  0.00%
+0 days 00:30:00        1  0.00%
+0 days 06:15:00        1  0.00%
+```
+
+## Series Profile (Numeric)
+The `SeriesProfile` is used to get a quick overview of the contents of a Pandas
+ Series. It is an object that can be later referenced or saved if desired.
+In a single view it provides:
+
+- Series data type (dtype).
+- The number of non-null values.
+- The number of unique values.
+- The number of null values.
+- The counts of some of the most common and least common values in the series which
+  can be configured with the optional `freq_most_least` flag
+- Distribution statistics for the Series based on the data type.
+
+_Counts are the number of bike crossings at a bike sensor in a window of time._
+```Python
+ph.SeriesProfile(bike_counts["counts"])
+```
+
+```
+counts Info
+-------------  -------
+Data Type      int64
+Count          5589249
+Unique Values  897
+Null Values    0
+
+  Value    Count  % of total
+-------  -------  ------------
+      0   860809  15.40%
+      1   373805  6.69%
+      2   279622  5.00%
+      3   217329  3.89%
+      4   177636  3.18%
+      5   150857  2.70%
+      6   131232  2.35%
+      7   117491  2.10%
+      8   106717  1.91%
+      9    98373  1.76%
+    824        1  0.00%
+   1092        1  0.00%
+    925        1  0.00%
+    894        1  0.00%
+   1081        1  0.00%
+
+Statistic                           Value
+-------------------------  --------------
+count                         5.58925e+06
+min                           0
+1%                            0
+5%                            0
+25%                           2
+50%                          13
+75%                          37
+95%                          93
+99%                         164
+max                        1133
+mean                         26.4127
+standard deviation           39.3405
+median                       13
+median absolute deviation    13
+skew                          5.17677
+```
+
+## Series Profile (Object)
+A `SeriesProfile` for an `object` Series will provide similar information as a numeric
+ Series but without distribution statistics. Here we use the optional `freq_most_least`
+ parameter to show a longer frequency table.
+
+_Name is the designation of the bike sensor station._
+```Python
+ph.SeriesProfile(bike_counts["name"], freq_most_least=(20, 20))
+```
+```
+name Info
+-------------  -------
+Data Type      object
+Count          5589249
+Unique Values  34
+Null Values    0
+
+Value                                                          Count  % of total
+-----------------------------------------------------------  -------  ------------
+Manhattan Bridge Bike Comprehensive                           381148  6.82%
+Manhattan Bridge Display Bike Counter                         381148  6.82%
+Manhattan Bridge Ped Path                                     368665  6.60%
+Ed Koch Queensboro Bridge Shared Path                         368504  6.59%
+Williamsburg Bridge Bike Path                                 368433  6.59%
+Brooklyn Bridge Bike Path                                     366111  6.55%
+Comprehensive Brooklyn Bridge Counter                         365948  6.55%
+Staten Island Ferry                                           287203  5.14%
+Prospect Park West                                            266080  4.76%
+Kent Ave btw North 8th St and North 9th St                    264522  4.73%
+Pulaski Bridge                                                243868  4.36%
+1st Avenue - 26th St N - Interference testing                 218169  3.90%
+Manhattan Bridge 2012 to 2019 Bike Counter                    202785  3.63%
+8th Ave at 50th St.                                           195920  3.51%
+Manhattan Bridge 2013 to 2018 Bike Counter                    165505  2.96%
+Columbus Ave at 86th St.                                      162481  2.91%
+Amsterdam Ave at 86th St.                                     162369  2.91%
+2nd Avenue - 26th St S                                        136388  2.44%
+Brooklyn Bridge Bicycle Path (Roadway)                         95955  1.72%
+Kent Ave btw South 6th St. and Broadway                        78478  1.40%
+111th St at 50th Ave                                           72567  1.30%
+Fountain Ave                                                   63146  1.13%
+Willis Ave                                                     62148  1.11%
+Willis Ave Bikes                                               62148  1.11%
+Willis Ave Peds                                                62148  1.11%
+Manhattan Bridge 2012 Test Bike Counter                        36179  0.65%
+Manhattan Bridge Interference Calibration 2019 Bike Counter    27675  0.50%
+Ocean Pkwy at Avenue J                                         27260  0.49%
+Pelham Pkwy                                                    21452  0.38%
+Broadway at 50th St                                            20544  0.37%
+High Bridge                                                    16276  0.29%
+Emmons Ave                                                     16267  0.29%
+Forsyth Plaza                                                  14998  0.27%
+Concrete Plant Park                                             6761  0.12%
+```
+
+## Time Series Functionality
+### Calculate the cumulative gaps in time series data by category
+In the above example we saw a notable difference in the number of observations per
+ bike counter station. We can use `category_gaps` to check for gaps in
+ time-indexed, categorical-like data. We use the `threshold` parameter to define the
+ maximum expected increment in the time-indexed data. Some of the bike stations report
+ data every 15 minutes and some report data every hour so we can use a threshold of one
+ hour.
+
+```Python
+ph.category_gaps(bike_counts["name"], threshold=pd.Timedelta(hours=1))
+```
+```
+                                                       Cumulative Gap
+Concrete Plant Park                                4234 days 13:45:00
+Forsyth Plaza                                      4148 days 16:15:00
+Emmons Ave                                         4135 days 12:30:00
+High Bridge                                        4135 days 10:15:00
+Broadway at 50th St                                4090 days 10:30:00
+Pelham Pkwy                                        4081 days 12:15:00
+Ocean Pkwy at Avenue J                             4021 days 00:15:00
+Manhattan Bridge Interference Calibration 2019 ... 4016 days 15:00:00
+Manhattan Bridge 2012 Test Bike Counter            3928 days 01:30:00
+Willis Ave Peds                                    3657 days 12:45:00
+Willis Ave Bikes                                   3657 days 12:45:00
+Willis Ave                                         3657 days 12:45:00
+Fountain Ave                                       3647 days 01:45:00
+111th St at 50th Ave                               3548 days 21:45:00
+Kent Ave btw South 6th St. and Broadway            3487 days 06:30:00
+Brooklyn Bridge Bicycle Path (Roadway)             3305 days 06:45:00
+2nd Avenue - 26th St S                             2884 days 02:30:00
+Amsterdam Ave at 86th St.                          2613 days 09:30:00
+Columbus Ave at 86th St.                           2612 days 06:00:00
+Manhattan Bridge 2013 to 2018 Bike Counter         2580 days 19:15:00
+8th Ave at 50th St.                                2263 days 19:00:00
+Manhattan Bridge 2012 to 2019 Bike Counter         2192 days 07:30:00
+1st Avenue - 26th St N - Interference testing      2032 days 00:00:00
+Pulaski Bridge                                     1764 days 08:45:00
+Kent Ave btw North 8th St and North 9th St         1549 days 04:30:00
+Prospect Park West                                 1533 days 00:30:00
+Staten Island Ferry                                1312 days 22:15:00
+Comprehensive Brooklyn Bridge Counter               492 days 13:45:00
+Brooklyn Bridge Bike Path                           490 days 21:45:00
+Williamsburg Bridge Bike Path                       466 days 15:00:00
+Ed Koch Queensboro Bridge Shared Path               465 days 22:45:00
+Manhattan Bridge Ped Path                           464 days 07:15:00
+Manhattan Bridge Bike Comprehensive                 333 days 14:45:00
+Manhattan Bridge Display Bike Counter               333 days 14:45:00
+```
+### Identify when gaps occur in time series data
+It looks like the 'Manhattan Bridge Bike Comprehensive' category has the smallest
+ amount of missing time. We can use `id_gaps_index` to identify when the gaps occur.
+ We see that the largest gap for this bike sensor is ~328 days long in 2013.
+
+```Python
+mbc = bike_counts["name"][bike_counts["name"] == "Manhattan Bridge Bike Comprehensive"]
+ph.id_gaps_index(mbc, threshold=pd.Timedelta(hours=1))
+```
+```
+                                diffs
+date  
+2013-12-03 00:00:00 328 days 00:15:00
+2023-09-27 02:15:00   2 days 02:30:00
+2024-01-21 02:15:00   1 days 02:30:00
+2023-07-03 02:15:00   1 days 02:30:00
+2023-07-01 02:15:00   1 days 02:30:00
+2013-12-03 11:00:00   0 days 06:15:00
+2012-10-12 15:00:00   0 days 02:15:00
+2021-03-14 03:00:00   0 days 01:15:00
+2023-03-12 03:00:00   0 days 01:15:00
+2022-03-13 03:00:00   0 days 01:15:00
+2019-03-10 03:00:00   0 days 01:15:00
+2020-03-08 03:00:00   0 days 01:15:00
+2018-03-11 03:00:00   0 days 01:15:00
+2017-03-12 03:00:00   0 days 01:15:00
+2016-03-13 03:00:00   0 days 01:15:00
+2015-03-08 03:00:00   0 days 01:15:00
+2014-11-04 05:00:00   0 days 01:15:00
+2014-03-09 03:00:00   0 days 01:15:00
+2024-03-10 03:00:00   0 days 01:15:00
+```
diff --git a/docs/user_guide.md b/docs/user_guide.md
deleted file mode 100644
index 3ea7ebd..0000000
--- a/docs/user_guide.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-description: User Guide. How to use Panda-Helper with examples.
----
-
-Coming soon...
diff --git a/mkdocs.yml b/mkdocs.yml
index df63eb4..4971039 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -9,7 +9,7 @@ copyright: >
 nav:
   - Home: index.md
   - Installation: install.md
-  - User Guide: user_guide.md
+  - Tutorial: tutorial.md
   - API Reference: api.md
   - Issue Tracker: https://github.com/ray310/Panda-Helper/issues
 extra_css:
@@ -31,6 +31,7 @@ theme:
     - navigation.instant.progress
     - toc.integrate
     - navigation.footer
+    - content.code.copy
   palette:
     # Palette toggle for light mode
     - media: "(prefers-color-scheme: light)"
@@ -93,6 +94,13 @@ markdown_extensions:
   - pymdownx.emoji:
       emoji_index: !!python/name:material.extensions.emoji.twemoji
       emoji_generator: !!python/name:material.extensions.emoji.to_svg
+  - pymdownx.highlight:
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  - pymdownx.superfences
 plugins:
 - search
 - mkdocstrings: