From 7cf2bb27709b0df5fb534f35c9a107522519c566 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Thu, 20 Jun 2024 21:11:58 +1000
Subject: [PATCH 01/17] minor edits

---
 janitor/polars/row_to_names.py                | 88 +++++++++++--------
 .../functions/test_row_to_names_polars.py     | 35 +++++---
 2 files changed, 78 insertions(+), 45 deletions(-)

diff --git a/janitor/polars/row_to_names.py b/janitor/polars/row_to_names.py
index 7fe1b0b9e..70d48b208 100644
--- a/janitor/polars/row_to_names.py
+++ b/janitor/polars/row_to_names.py
@@ -17,7 +17,7 @@
 
 def _row_to_names(
     df: pl.DataFrame | pl.LazyFrame,
-    row_numbers: int | list,
+    row_numbers: int | list | slice,
     remove_rows: bool,
     remove_rows_above: bool,
     separator: str,
@@ -26,49 +26,67 @@ def _row_to_names(
     Function to convert rows in the DataFrame to column names.
     """
     check("separator", separator, [str])
-    check("row_numbers", row_numbers, [int, list])
-    row_numbers_is_a_list = False
-    if isinstance(row_numbers, list):
-        row_numbers_is_a_list = True
+    if isinstance(row_numbers, int):
+        row_numbers = slice(row_numbers, row_numbers + 1)
+    elif isinstance(row_numbers, slice):
+        if row_numbers.step is not None:
+            raise ValueError(
+                "The step argument for slice is not supported in row_to_names."
+            )
+    elif isinstance(row_numbers, list):
         for entry in row_numbers:
             check("entry in the row_numbers argument", entry, [int])
-        expression = (
-            pl.all()
-            .gather(row_numbers)
-            .cast(pl.String)
-            .implode()
-            .list.join(separator=separator)
+    else:
+        raise TypeError(
+            "row_numbers should be either an integer, "
+            "a slice or a list; "
+            f"instead got type {type(row_numbers).__name__}"
         )
+    is_a_slice = isinstance(row_numbers, slice)
+    if is_a_slice:
+        expression = pl.all().str.concat(delimiter=separator)
         expression = pl.struct(expression)
+        offset = row_numbers.start
+        length = row_numbers.stop - row_numbers.start
+        mapping = df.slice(
+            offset=offset,
+            length=length,
+        )
+        mapping = mapping.select(expression)
     else:
-        expression = pl.all().gather(row_numbers).cast(pl.String)
+        expression = pl.all().gather(row_numbers)
+        expression = expression.str.concat(delimiter=separator)
         expression = pl.struct(expression)
-    mapping = df.select(expression)
-    if isinstance(mapping, pl.LazyFrame):
+        mapping = df.select(expression)
+    if isinstance(df, pl.LazyFrame):
         mapping = mapping.collect()
     mapping = mapping.to_series(0)[0]
     df = df.rename(mapping=mapping)
     if remove_rows_above:
-        if row_numbers_is_a_list:
-            if not pl.Series(row_numbers).diff().drop_nulls().eq(1).all():
-                raise ValueError(
-                    "The remove_rows_above argument is applicable "
-                    "only if the row_numbers argument is an integer, "
-                    "or the integers in a list are consecutive increasing, "
-                    "with a difference of 1."
-                )
+        if not is_a_slice:
+            raise ValueError(
+                "The remove_rows_above argument is applicable "
+                "only if the row_numbers argument is an integer "
+                "or a slice."
+            )
         if remove_rows:
-            tail = row_numbers[-1] if row_numbers_is_a_list else row_numbers
-            tail += 1
-        else:
-            tail = row_numbers[0] if row_numbers_is_a_list else row_numbers
-        df = df.slice(offset=tail)
-    elif remove_rows:
-        idx = "".join(df.columns)
-        df = df.with_row_index(name=idx)
-        if row_numbers_is_a_list:
-            df = df.filter(~pl.col(idx).is_in(row_numbers))
-        else:
-            df = df.filter(pl.col(idx) != row_numbers)
-        df = df.drop(idx)
+            return df.slice(offset=row_numbers.stop)
+        return df.slice(offset=row_numbers.start)
+
+    if remove_rows:
+        if is_a_slice:
+            df = [
+                df.slice(offset=0, length=row_numbers.start),
+                df.slice(offset=row_numbers.stop),
+            ]
+            return pl.concat(df, rechunk=True)
+        name = "".join(df.columns)
+        name = f"{name}_"
+        df = (
+            df.with_row_index(name=name)
+            .filter(pl.col(name=name).is_in(row_numbers).not_())
+            .select(pl.exclude(name))
+        )
+        return df
+
     return df
diff --git a/tests/polars/functions/test_row_to_names_polars.py b/tests/polars/functions/test_row_to_names_polars.py
index be5e07fdd..d7371a1c4 100644
--- a/tests/polars/functions/test_row_to_names_polars.py
+++ b/tests/polars/functions/test_row_to_names_polars.py
@@ -25,12 +25,21 @@ def test_separator_type(df):
 @pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_numbers_type(df):
     """
-    Raise if row_numbers is not an int/list
+    Raise if row_numbers is not an int/slice/list
     """
     with pytest.raises(TypeError, match="row_numbers should be.+"):
         df.janitor.row_to_names({1, 2})
 
 
+@pytest.mark.parametrize("df", [df, df.lazy()])
+def test_row_numbers_slice_step(df):
+    """
+    Raise if row_numbers is a slice and step is passed.
+    """
+    with pytest.raises(ValueError, match="The step argument for slice.+"):
+        df.janitor.row_to_names(slice(1, 3, 1))
+
+
 @pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_numbers_list_type(df):
     """
@@ -52,6 +61,15 @@ def test_row_to_names(df):
     assert df.columns[3] == "Basel"
 
 
+@pytest.mark.parametrize("df", [df, df.lazy()])
+def test_row_to_names_slice(df):
+    df = df.janitor.row_to_names(slice(2, 3))
+    assert df.columns[0] == "3.2346125"
+    assert df.columns[1] == "3"
+    assert df.columns[2] == "lion"
+    assert df.columns[3] == "Basel"
+
+
 @pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names_single_list(df):
     "Test output if row_numbers is a list, and contains a single item."
@@ -108,7 +126,7 @@ def test_row_to_names_delete_above(df):
 @pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names_delete_above_list(df):
     "Test output if row_numbers is a list"
-    df = df.janitor.row_to_names([2, 3], remove_rows_above=True)
+    df = df.janitor.row_to_names(slice(2, 4), remove_rows_above=True)
     if isinstance(df, pl.LazyFrame):
         df = df.collect()
     assert df.to_series(0)[0] == 3.234_612_5
@@ -124,7 +142,7 @@ def test_row_to_names_delete_above_delete_rows(df):
     and remove_rows_above=True
     """
     df = df.janitor.row_to_names(
-        [2, 3], remove_rows=True, remove_rows_above=True
+        slice(2, 4), remove_rows=True, remove_rows_above=True
     )
     if isinstance(df, pl.LazyFrame):
         df = df.collect()
@@ -150,11 +168,8 @@ def test_row_to_names_delete_above_delete_rows_scalar(df):
 
 
 @pytest.mark.parametrize("df", [df, df.lazy()])
-def test_row_to_names_delete_above_list_non_consecutive(df):
-    "Raise if row_numbers is a list, but non consecutive"
-    msg = "The remove_rows_above argument is applicable "
-    msg += "only if the row_numbers argument is an integer, "
-    msg += "or the integers in a list are consecutive increasing, "
-    msg += "with a difference of 1."
-    with pytest.raises(ValueError, match=msg):
+def test_row_to_names_not_a_slice_remove_rows_above(df):
+    with pytest.raises(
+        ValueError, match=r"The remove_rows_above argument is applicable.+"
+    ):
         df.janitor.row_to_names([1, 3], remove_rows_above=True)

From 749c94777d6a795ebf9a429d0ff9cd353651ee6e Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Thu, 20 Jun 2024 22:32:01 +1000
Subject: [PATCH 02/17] minor speed bump

---
 examples/notebooks/Row_to_Names.ipynb | 328 +++++++++++++++-----------
 janitor/functions/row_to_names.py     |  72 +++---
 tests/functions/test_row_to_names.py  |  18 +-
 3 files changed, 245 insertions(+), 173 deletions(-)

diff --git a/examples/notebooks/Row_to_Names.ipynb b/examples/notebooks/Row_to_Names.ipynb
index 2852ffdbd..402396314 100644
--- a/examples/notebooks/Row_to_Names.ipynb
+++ b/examples/notebooks/Row_to_Names.ipynb
@@ -23,7 +23,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -34,21 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = '''shoe, 220, 100\n",
-    "          shoe, 450, 40\n",
-    "          item, retail_price, cost\n",
-    "          shoe, 200, 38\n",
-    "          bag, 305, 25\n",
-    "       '''"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -72,82 +58,63 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <th>2</th>\n",
+       "      <th>a</th>\n",
+       "      <th>b</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>shoe</td>\n",
-       "      <td>220</td>\n",
-       "      <td>100</td>\n",
+       "      <td>nums</td>\n",
+       "      <td>chars</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>shoe</td>\n",
-       "      <td>450</td>\n",
-       "      <td>40</td>\n",
+       "      <td>6</td>\n",
+       "      <td>x</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>item</td>\n",
-       "      <td>retail_price</td>\n",
-       "      <td>cost</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>shoe</td>\n",
-       "      <td>200</td>\n",
-       "      <td>38</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>bag</td>\n",
-       "      <td>305</td>\n",
-       "      <td>25</td>\n",
+       "      <td>9</td>\n",
+       "      <td>y</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                0              1      2\n",
-       "0            shoe            220    100\n",
-       "1            shoe            450     40\n",
-       "2            item   retail_price   cost\n",
-       "3            shoe            200     38\n",
-       "4             bag            305     25"
+       "      a      b\n",
+       "0  nums  chars\n",
+       "1     6      x\n",
+       "2     9      y"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "temp = pd.read_csv(StringIO(data), header=None)\n",
-    "temp"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Looking at the dataframe above, we would love to use row 2 as our column names. One way to achieve this involves a couple of steps\n",
-    "\n",
-    "1. Use loc/iloc to assign row 2 to columns.\n",
-    "2. Strip off any whitespace.\n",
-    "2. Drop row 2 from the dataframe using the drop method.\n",
-    "3. Set axis name to none."
+    "df = pd.DataFrame({\n",
+    "   \"a\": [\"nums\", '6', '9'],\n",
+    "    \"b\": [\"chars\", \"x\", \"y\"],\n",
+    " })\n",
+    "df"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/samuel.oranyeli/pyjanitor/janitor/functions/row_to_names.py:148: UserWarning: The function row_to_names will, in the official 1.0 release, change its behaviour to reset the dataframe's index by default. You can prepare for this change right now by explicitly setting `reset_index=True` when calling on `row_to_names`.\n",
+      "  warnings.warn(\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -161,81 +128,70 @@
        "        vertical-align: top;\n",
        "    }\n",
        "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
        "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
+       "    <tr>\n",
        "      <th></th>\n",
-       "      <th>item</th>\n",
-       "      <th>retail_price</th>\n",
-       "      <th>cost</th>\n",
+       "      <th>6</th>\n",
+       "      <th>x</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th>9</th>\n",
+       "      <th>y</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>shoe</td>\n",
-       "      <td>220</td>\n",
-       "      <td>100</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>shoe</td>\n",
-       "      <td>450</td>\n",
-       "      <td>40</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>shoe</td>\n",
-       "      <td>200</td>\n",
-       "      <td>38</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>bag</td>\n",
-       "      <td>305</td>\n",
-       "      <td>25</td>\n",
+       "      <td>nums</td>\n",
+       "      <td>chars</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "             item retail_price  cost\n",
-       "0            shoe          220   100\n",
-       "1            shoe          450    40\n",
-       "3            shoe          200    38\n",
-       "4             bag          305    25"
+       "      6      x\n",
+       "      9      y\n",
+       "0  nums  chars"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "temp.columns = temp.iloc[2, :]\n",
-    "temp.columns = temp.columns.str.strip()\n",
-    "temp = temp.drop(2, axis=0)\n",
-    "temp = temp.rename_axis(None, axis='columns')\n",
-    "temp"
+    "df.row_to_names(row_numbers=[1,2], remove_rows=True)"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "However, the first two steps prevent us from method chaining. This is easily resolved using the row_to_names function"
+    "dff = pd.concat([df]*1_000_000)\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/samuel.oranyeli/pyjanitor/janitor/functions/row_to_names.py:148: UserWarning: The function row_to_names will, in the official 1.0 release, change its behaviour to reset the dataframe's index by default. You can prepare for this change right now by explicitly setting `reset_index=True` when calling on `row_to_names`.\n",
+      "  warnings.warn(\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -249,68 +205,168 @@
        "        vertical-align: top;\n",
        "    }\n",
        "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
        "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th>nums</th>\n",
+       "      <th>chars</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th></th>\n",
-       "      <th>item</th>\n",
-       "      <th>retail_price</th>\n",
-       "      <th>cost</th>\n",
+       "      <th>6</th>\n",
+       "      <th>x</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>9</td>\n",
+       "      <td>y</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>shoe</td>\n",
-       "      <td>220</td>\n",
-       "      <td>100</td>\n",
+       "      <td>nums</td>\n",
+       "      <td>chars</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>shoe</td>\n",
-       "      <td>450</td>\n",
-       "      <td>40</td>\n",
+       "      <td>6</td>\n",
+       "      <td>x</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>9</td>\n",
+       "      <td>y</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>shoe</td>\n",
-       "      <td>200</td>\n",
-       "      <td>38</td>\n",
+       "      <th>0</th>\n",
+       "      <td>nums</td>\n",
+       "      <td>chars</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>bag</td>\n",
-       "      <td>305</td>\n",
-       "      <td>25</td>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>6</td>\n",
+       "      <td>x</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>9</td>\n",
+       "      <td>y</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>nums</td>\n",
+       "      <td>chars</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>6</td>\n",
+       "      <td>x</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>9</td>\n",
+       "      <td>y</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
+       "<p>2999998 rows × 2 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "             item  retail_price  cost\n",
-       "0            shoe           220   100\n",
-       "1            shoe           450    40\n",
-       "3            shoe           200    38\n",
-       "4             bag           305    25"
+       "    nums  chars\n",
+       "       6      x\n",
+       "2      9      y\n",
+       "0   nums  chars\n",
+       "1      6      x\n",
+       "2      9      y\n",
+       "0   nums  chars\n",
+       "..   ...    ...\n",
+       "1      6      x\n",
+       "2      9      y\n",
+       "0   nums  chars\n",
+       "1      6      x\n",
+       "2      9      y\n",
+       "\n",
+       "[2999998 rows x 2 columns]"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df = (\n",
-    "    pd.read_csv(StringIO(data), header=None)\n",
-    "    .row_to_names(row_number=2, remove_row=True)\n",
-    ")\n",
-    "\n",
-    "df"
+    "dff.row_to_names(row_numbers=[0,1], remove_rows=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "20.8 ms ± 31.9 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%timeit dff.row_to_names(row_numbers=0, remove_rows_above=True, remove_rows=True, reset_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/samuel.oranyeli/pyjanitor/janitor/functions/row_to_names.py:148: UserWarning: The function row_to_names will, in the official 1.0 release, change its behaviour to reset the dataframe's index by default. You can prepare for this change right now by explicitly setting `reset_index=True` when calling on `row_to_names`.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "37.3 ms ± 310 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%timeit dff.row_to_names(row_numbers=0, remove_rows=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "195 ms ± 635 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%timeit dff.row_to_names(row_numbers=[0,1], remove_rows=True, reset_index=True)"
    ]
   }
  ],
@@ -330,7 +386,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.9.16"
   }
  },
  "nbformat": 4,
diff --git a/janitor/functions/row_to_names.py b/janitor/functions/row_to_names.py
index 2f529c8b5..adae640b3 100644
--- a/janitor/functions/row_to_names.py
+++ b/janitor/functions/row_to_names.py
@@ -15,7 +15,7 @@
 @deprecated_alias(row_number="row_numbers", remove_row="remove_rows")
 def row_to_names(
     df: pd.DataFrame,
-    row_numbers: int | list = 0,
+    row_numbers: int | list | slice = 0,
     remove_rows: bool = False,
     remove_rows_above: bool = False,
     reset_index: bool = False,
@@ -83,20 +83,30 @@ def row_to_names(
     Returns:
         A pandas DataFrame with set column names.
     """  # noqa: E501
+
     if not pd.options.mode.copy_on_write:
         df = df.copy()
-
-    check("row_numbers", row_numbers, [int, list])
-    if isinstance(row_numbers, list):
+    else:
+        df = df[:]
+
+    if isinstance(row_numbers, int):
+        row_numbers = slice(row_numbers, row_numbers + 1)
+    elif isinstance(row_numbers, slice):
+        if row_numbers.step is not None:
+            raise ValueError(
+                "The step argument for slice is not supported in row_to_names."
+            )
+    elif isinstance(row_numbers, list):
         for entry in row_numbers:
             check("entry in the row_numbers argument", entry, [int])
+    else:
+        raise TypeError(
+            "row_numbers should be either an integer, "
+            "a slice or a list; "
+            f"instead got type {type(row_numbers).__name__}"
+        )
+    is_a_slice = isinstance(row_numbers, slice)
 
-    warnings.warn(
-        "The function row_to_names will, in the official 1.0 release, "
-        "change its behaviour to reset the dataframe's index by default. "
-        "You can prepare for this change right now by explicitly setting "
-        "`reset_index=True` when calling on `row_to_names`."
-    )
     # should raise if positional indexers are missing
     # IndexError: positional indexers are out-of-bounds
     headers = df.iloc[row_numbers]
@@ -111,25 +121,33 @@ def row_to_names(
     df.columns = headers
     df.columns.name = None
 
-    df_index = df.index
     if remove_rows_above:
-        if isinstance(row_numbers, list):
-            if not (np.diff(row_numbers) == 1).all():
-                raise ValueError(
-                    "The remove_rows_above argument is applicable "
-                    "only if the row_numbers argument is an integer, "
-                    "or the integers in a list are consecutive increasing, "
-                    "with a difference of 1."
-                )
-            tail = row_numbers[0]
+        if not is_a_slice:
+            raise ValueError(
+                "The remove_rows_above argument is applicable "
+                "only if the row_numbers argument is an integer "
+                "or a slice."
+            )
+        if remove_rows:
+            df = df.iloc[row_numbers.stop :]
+        else:
+            df = df.iloc[row_numbers.start :]
+    elif remove_rows:
+        if is_a_slice:
+            start = row_numbers.start if row_numbers.start else 0
+            stop = row_numbers.stop
+            df = [df.iloc[:start], df.iloc[stop:]]
+            df = pd.concat(df, sort=False, copy=False)
         else:
-            tail = row_numbers
-        df = df.iloc[tail:]
-    if remove_rows:
-        if isinstance(row_numbers, int):
-            row_numbers = [row_numbers]
-        df_index = df.index.symmetric_difference(df_index[row_numbers])
-        df = df.loc[df_index]
+            row_numbers = np.setdiff1d(range(len(df)), row_numbers)
+            df = df.iloc[row_numbers]
     if reset_index:
         df.index = range(len(df))
+    else:
+        warnings.warn(
+            "The function row_to_names will, in the official 1.0 release, "
+            "change its behaviour to reset the dataframe's index by default. "
+            "You can prepare for this change right now by explicitly setting "
+            "`reset_index=True` when calling on `row_to_names`."
+        )
     return df
diff --git a/tests/functions/test_row_to_names.py b/tests/functions/test_row_to_names.py
index f966a4966..5295b44d9 100644
--- a/tests/functions/test_row_to_names.py
+++ b/tests/functions/test_row_to_names.py
@@ -64,9 +64,9 @@ def test_row_to_names_delete_above(dataframe):
 
 
 @pytest.mark.functions
-def test_row_to_names_delete_above_list(dataframe):
-    "Test output if row_numbers is a list"
-    df = dataframe.row_to_names([2, 3], remove_rows_above=True)
+def test_row_to_names_delete_above_slice(dataframe):
+    "Test output if row_numbers is a slice"
+    df = dataframe.row_to_names(slice(2, 4), remove_rows_above=True)
     assert df.iloc[0, 0] == 3
     assert df.iloc[0, 1] == 3.234_612_5
     assert df.iloc[0, 2] == 3
@@ -75,13 +75,11 @@ def test_row_to_names_delete_above_list(dataframe):
 
 
 @pytest.mark.functions
-def test_row_to_names_delete_above_list_non_consecutive(dataframe):
-    "Raise if row_numbers is a list, but non consecutive"
-    msg = "The remove_rows_above argument is applicable "
-    msg += "only if the row_numbers argument is an integer, "
-    msg += "or the integers in a list are consecutive increasing, "
-    msg += "with a difference of 1."
-    with pytest.raises(ValueError, match=msg):
+def test_row_to_names_delete_above_is_a_list(dataframe):
+    "Raise if row_numbers is a list"
+    with pytest.raises(
+        ValueError, match=r"The remove_rows_above argument is applicable.+"
+    ):
         dataframe.row_to_names([1, 3], remove_rows_above=True)
 
 

From c2f2150aaf6f5858bd06482ef2f7916c57300f20 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Thu, 20 Jun 2024 22:37:11 +1000
Subject: [PATCH 03/17] remove row_to_names.md

---
 examples/row_to_names.md | 98 ----------------------------------------
 1 file changed, 98 deletions(-)
 delete mode 100644 examples/row_to_names.md

diff --git a/examples/row_to_names.md b/examples/row_to_names.md
deleted file mode 100644
index 2d0bbab68..000000000
--- a/examples/row_to_names.md
+++ /dev/null
@@ -1,98 +0,0 @@
-# df.row_to_names()
-
-## Description
-This method elevates a row to be the column names of a DataFrame. It contains parameters to remove the elevated row from the DataFrame along with removing the rows above the selected row.
-
-    :param df: A pandas DataFrame.
-    :param row_number: The row containing the variable names
-    :param remove_row: Should the row be removed from the DataFrame?
-    :param remove_rows_above: Should the rows above row_number be removed from the resulting DataFrame?
-
-## Parameters
-### df
-A pandas dataframe.
-
-### row_number
-The number of the row containing the variable names. Remember, indexing starts at zero!
-
-### remove_row (Default: False)
-Remove the row that is now the headers from the DataFrame.
-
-### remove_rows_above (Default: False)
-Remove the rows from the index above `row_number`.
-
-
-## Setup
-
-```python
-import pandas as pd
-import janitor
-
-
-data_dict = {
-    "a": [1, 2, 3] * 3,
-    "Bell__Chart": [1, 2, 3] * 3,
-    "decorated-elephant": [1, 2, 3] * 3,
-    "animals": ["rabbit", "leopard", "lion"] * 3,
-    "cities": ["Cambridge", "Shanghai", "Basel"] * 3,
-}
-```
-
-
-
-## Example1: Move first row to column names
- ```python
-example_dataframe = pd.DataFrame(data_dict)
-
-example_dataframe.row_to_names(0)
-```
-
-### Output
-
-       1  1  1   rabbit  Cambridge
-    0  1  1  1   rabbit  Cambridge
-    1  2  2  2  leopard   Shanghai
-    2  3  3  3     lion      Basel
-    3  1  1  1   rabbit  Cambridge
-    4  2  2  2  leopard   Shanghai
-    5  3  3  3     lion      Basel
-    6  1  1  1   rabbit  Cambridge
-    7  2  2  2  leopard   Shanghai
-
-## Example2: Move first row to column names and remove row
-
-```python
-example_dataframe = pd.DataFrame(data_dict)
-
-example_dataframe.row_to_names(0, remove_row=True)
-```
-
-### Output
-
-       1  1  1   rabbit  Cambridge
-    1  2  2  2  leopard   Shanghai
-    2  3  3  3     lion      Basel
-    3  1  1  1   rabbit  Cambridge
-    4  2  2  2  leopard   Shanghai
-    5  3  3  3     lion      Basel
-    6  1  1  1   rabbit  Cambridge
-    7  2  2  2  leopard   Shanghai
-    8  3  3  3     lion      Basel
-
-## Example3: Move first row to column names, remove row, and remove rows above selected row
-
-```python
-example_dataframe = pd.DataFrame(data_dict)
-
-example_dataframe.row_to_names(2, remove_row=True, remove_rows_above=True)
-```
-
-### Output
-
-       3  3  3     lion      Basel
-    3  1  1  1   rabbit  Cambridge
-    4  2  2  2  leopard   Shanghai
-    5  3  3  3     lion      Basel
-    6  1  1  1   rabbit  Cambridge
-    7  2  2  2  leopard   Shanghai
-    8  3  3  3     lion      Basel

From 30b336b112a03224ddaca5ffcfcc085ddb11e9a2 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Thu, 20 Jun 2024 22:38:10 +1000
Subject: [PATCH 04/17] remove row_to_names.ipynb

---
 examples/notebooks/Row_to_Names.ipynb | 394 --------------------------
 1 file changed, 394 deletions(-)
 delete mode 100644 examples/notebooks/Row_to_Names.ipynb

diff --git a/examples/notebooks/Row_to_Names.ipynb b/examples/notebooks/Row_to_Names.ipynb
deleted file mode 100644
index 402396314..000000000
--- a/examples/notebooks/Row_to_Names.ipynb
+++ /dev/null
@@ -1,394 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# row_to_names : Elevates a row to be the column names of a DataFrame."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Background"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This notebook serves to show a brief and simple example of how to swap column names using one of the rows in the dataframe."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import janitor\n",
-    "from io import StringIO"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>a</th>\n",
-       "      <th>b</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>nums</td>\n",
-       "      <td>chars</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>6</td>\n",
-       "      <td>x</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>9</td>\n",
-       "      <td>y</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      a      b\n",
-       "0  nums  chars\n",
-       "1     6      x\n",
-       "2     9      y"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = pd.DataFrame({\n",
-    "   \"a\": [\"nums\", '6', '9'],\n",
-    "    \"b\": [\"chars\", \"x\", \"y\"],\n",
-    " })\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/samuel.oranyeli/pyjanitor/janitor/functions/row_to_names.py:148: UserWarning: The function row_to_names will, in the official 1.0 release, change its behaviour to reset the dataframe's index by default. You can prepare for this change right now by explicitly setting `reset_index=True` when calling on `row_to_names`.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead tr th {\n",
-       "        text-align: left;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr>\n",
-       "      <th></th>\n",
-       "      <th>6</th>\n",
-       "      <th>x</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th></th>\n",
-       "      <th>9</th>\n",
-       "      <th>y</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>nums</td>\n",
-       "      <td>chars</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      6      x\n",
-       "      9      y\n",
-       "0  nums  chars"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df.row_to_names(row_numbers=[1,2], remove_rows=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dff = pd.concat([df]*1_000_000)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/samuel.oranyeli/pyjanitor/janitor/functions/row_to_names.py:148: UserWarning: The function row_to_names will, in the official 1.0 release, change its behaviour to reset the dataframe's index by default. You can prepare for this change right now by explicitly setting `reset_index=True` when calling on `row_to_names`.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead tr th {\n",
-       "        text-align: left;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr>\n",
-       "      <th></th>\n",
-       "      <th>nums</th>\n",
-       "      <th>chars</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th></th>\n",
-       "      <th>6</th>\n",
-       "      <th>x</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>9</td>\n",
-       "      <td>y</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>nums</td>\n",
-       "      <td>chars</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>6</td>\n",
-       "      <td>x</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>9</td>\n",
-       "      <td>y</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>nums</td>\n",
-       "      <td>chars</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>6</td>\n",
-       "      <td>x</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>9</td>\n",
-       "      <td>y</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>nums</td>\n",
-       "      <td>chars</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>6</td>\n",
-       "      <td>x</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>9</td>\n",
-       "      <td>y</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>2999998 rows × 2 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    nums  chars\n",
-       "       6      x\n",
-       "2      9      y\n",
-       "0   nums  chars\n",
-       "1      6      x\n",
-       "2      9      y\n",
-       "0   nums  chars\n",
-       "..   ...    ...\n",
-       "1      6      x\n",
-       "2      9      y\n",
-       "0   nums  chars\n",
-       "1      6      x\n",
-       "2      9      y\n",
-       "\n",
-       "[2999998 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dff.row_to_names(row_numbers=[0,1], remove_rows=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "20.8 ms ± 31.9 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%timeit dff.row_to_names(row_numbers=0, remove_rows_above=True, remove_rows=True, reset_index=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/samuel.oranyeli/pyjanitor/janitor/functions/row_to_names.py:148: UserWarning: The function row_to_names will, in the official 1.0 release, change its behaviour to reset the dataframe's index by default. You can prepare for this change right now by explicitly setting `reset_index=True` when calling on `row_to_names`.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "37.3 ms ± 310 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%timeit dff.row_to_names(row_numbers=0, remove_rows=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "195 ms ± 635 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%timeit dff.row_to_names(row_numbers=[0,1], remove_rows=True, reset_index=True)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.16"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

From 9010b0689328f1f744dbdd3c95c971694c00f3c7 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Fri, 21 Jun 2024 09:33:53 +1000
Subject: [PATCH 05/17] fix tests

---
 janitor/polars/row_to_names.py                | 22 ++++++------
 .../functions/test_row_to_names_polars.py     | 35 +++++++------------
 2 files changed, 22 insertions(+), 35 deletions(-)

diff --git a/janitor/polars/row_to_names.py b/janitor/polars/row_to_names.py
index 7131a18a7..b507018a2 100644
--- a/janitor/polars/row_to_names.py
+++ b/janitor/polars/row_to_names.py
@@ -4,7 +4,7 @@
 
 from janitor.utils import check, import_message
 
-from .polars_flavor import register_dataframe_method, register_lazyframe_method
+from .polars_flavor import register_dataframe_method
 
 try:
     import polars as pl
@@ -17,19 +17,18 @@
     )
 
 
-@register_lazyframe_method
 @register_dataframe_method
 def row_to_names(
-    df: pl.DataFrame | pl.LazyFrame,
-    row_numbers: int | list = 0,
+    df: pl.DataFrame,
+    row_numbers: int | list | slice = 0,
     remove_rows: bool = False,
     remove_rows_above: bool = False,
     separator: str = "_",
-) -> pl.DataFrame | pl.LazyFrame:
+) -> pl.DataFrame:
     """
     Elevates a row, or rows, to be the column names of a DataFrame.
 
-    `row_to_names` can also be applied to a LazyFrame.
+    For a LazyFrame, the user should materialize into a DataFrame before using `row_to_names`..
 
     Examples:
         Replace column names with the first row.
@@ -104,7 +103,7 @@ def row_to_names(
 
     Args:
         row_numbers: Position of the row(s) containing the variable names.
-            Note that indexing starts from 0. It can also be a list.
+            Note that indexing starts from 0. It can also be a list/slice.
             Defaults to 0 (first row).
         remove_rows: Whether the row(s) should be removed from the DataFrame.
         remove_rows_above: Whether the row(s) above the selected row should
@@ -113,7 +112,7 @@ def row_to_names(
             if row_numbers is a list of integers. Default is '_'.
 
     Returns:
-        A polars DataFrame/LazyFrame.
+        A polars DataFrame.
     """  # noqa: E501
     return _row_to_names(
         df=df,
@@ -125,12 +124,12 @@ def row_to_names(
 
 
 def _row_to_names(
-    df: pl.DataFrame | pl.LazyFrame,
+    df: pl.DataFrame,
     row_numbers: int | list | slice,
     remove_rows: bool,
     remove_rows_above: bool,
     separator: str,
-) -> pl.DataFrame | pl.LazyFrame:
+) -> pl.DataFrame:
     """
     Function to convert rows in the DataFrame to column names.
     """
@@ -167,8 +166,7 @@ def _row_to_names(
         expression = expression.str.concat(delimiter=separator)
         expression = pl.struct(expression)
         mapping = df.select(expression)
-    if isinstance(df, pl.LazyFrame):
-        mapping = mapping.collect()
+
     mapping = mapping.to_series(0)[0]
     df = df.rename(mapping=mapping)
     if remove_rows_above:
diff --git a/tests/polars/functions/test_row_to_names_polars.py b/tests/polars/functions/test_row_to_names_polars.py
index 51e0b9043..372e2e09a 100644
--- a/tests/polars/functions/test_row_to_names_polars.py
+++ b/tests/polars/functions/test_row_to_names_polars.py
@@ -3,17 +3,20 @@
 
 import janitor.polars  # noqa: F401
 
-df = pl.DataFrame(
-    {
-        "Bell__Chart": [1.234_523_45, 2.456_234, 3.234_612_5] * 3,
-        "decorated-elephant": [1, 2, 3] * 3,
-        "animals@#$%^": ["rabbit", "leopard", "lion"] * 3,
-        "cities": ["Cambridge", "Shanghai", "Basel"] * 3,
-    }
-)
+
+@pytest.fixture
+def df():
+    """fixture for tests"""
+    return pl.DataFrame(
+        {
+            "Bell__Chart": [1.234_523_45, 2.456_234, 3.234_612_5] * 3,
+            "decorated-elephant": [1, 2, 3] * 3,
+            "animals@#$%^": ["rabbit", "leopard", "lion"] * 3,
+            "cities": ["Cambridge", "Shanghai", "Basel"] * 3,
+        }
+    )
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_separator_type(df):
     """
     Raise if separator is not a string
@@ -22,7 +25,6 @@ def test_separator_type(df):
         df.row_to_names([1, 2], separator=1)
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_numbers_type(df):
     """
     Raise if row_numbers is not an int/slice/list
@@ -31,7 +33,6 @@ def test_row_numbers_type(df):
         df.row_to_names({1, 2})
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_numbers_slice_step(df):
     """
     Raise if row_numbers is a slice and step is passed.
@@ -40,7 +41,6 @@ def test_row_numbers_slice_step(df):
         df.row_to_names(slice(1, 3, 1))
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_numbers_list_type(df):
     """
     Raise if row_numbers is a list
@@ -52,7 +52,6 @@ def test_row_numbers_list_type(df):
         df.row_to_names(["1", 2])
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names(df):
     df = df.row_to_names(2)
     assert df.columns[0] == "3.2346125"
@@ -61,7 +60,6 @@ def test_row_to_names(df):
     assert df.columns[3] == "Basel"
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names_slice(df):
     df = df.row_to_names(slice(2, 3))
     assert df.columns[0] == "3.2346125"
@@ -70,7 +68,6 @@ def test_row_to_names_slice(df):
     assert df.columns[3] == "Basel"
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names_single_list(df):
     "Test output if row_numbers is a list, and contains a single item."
     df = df.row_to_names([2])
@@ -80,7 +77,6 @@ def test_row_to_names_single_list(df):
     assert df.columns[3] == "Basel"
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names_list(df):
     "Test output if row_numbers is a list."
     df = df.row_to_names([1, 2])
@@ -90,7 +86,6 @@ def test_row_to_names_list(df):
     assert df.columns[3] == "Shanghai_Basel"
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names_delete_this_row(df):
     df = df.row_to_names(2, remove_rows=True)
     if isinstance(df, pl.LazyFrame):
@@ -101,7 +96,6 @@ def test_row_to_names_delete_this_row(df):
     assert df.to_series(3)[0] == "Cambridge"
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names_list_delete_this_row(df):
     df = df.row_to_names([2], remove_rows=True)
     if isinstance(df, pl.LazyFrame):
@@ -112,7 +106,6 @@ def test_row_to_names_list_delete_this_row(df):
     assert df.to_series(3)[0] == "Cambridge"
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names_delete_above(df):
     df = df.row_to_names(2, remove_rows_above=True)
     if isinstance(df, pl.LazyFrame):
@@ -123,7 +116,6 @@ def test_row_to_names_delete_above(df):
     assert df.to_series(3)[0] == "Basel"
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names_delete_above_list(df):
     "Test output if row_numbers is a list"
     df = df.row_to_names(slice(2, 4), remove_rows_above=True)
@@ -135,7 +127,6 @@ def test_row_to_names_delete_above_list(df):
     assert df.to_series(3)[0] == "Basel"
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names_delete_above_delete_rows(df):
     """
     Test output for remove_rows=True
@@ -150,7 +141,6 @@ def test_row_to_names_delete_above_delete_rows(df):
     assert df.to_series(3)[0] == "Shanghai"
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names_delete_above_delete_rows_scalar(df):
     """
     Test output for remove_rows=True
@@ -165,7 +155,6 @@ def test_row_to_names_delete_above_delete_rows_scalar(df):
     assert df.to_series(3)[0] == "Cambridge"
 
 
-@pytest.mark.parametrize("df", [df, df.lazy()])
 def test_row_to_names_not_a_slice_remove_rows_above(df):
     with pytest.raises(
         ValueError, match=r"The remove_rows_above argument is applicable.+"

From ff82eba917a0958e73905771e6f4acc8fa9b3ef3 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Fri, 21 Jun 2024 09:41:06 +1000
Subject: [PATCH 06/17] restore deleted files

---
 examples/notebooks/Row_to_Names.ipynb | 338 ++++++++++++++++++++++++++
 examples/row_to_names.md              |  98 ++++++++
 2 files changed, 436 insertions(+)
 create mode 100644 examples/notebooks/Row_to_Names.ipynb
 create mode 100644 examples/row_to_names.md

diff --git a/examples/notebooks/Row_to_Names.ipynb b/examples/notebooks/Row_to_Names.ipynb
new file mode 100644
index 000000000..2852ffdbd
--- /dev/null
+++ b/examples/notebooks/Row_to_Names.ipynb
@@ -0,0 +1,338 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# row_to_names : Elevates a row to be the column names of a DataFrame."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Background"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook serves to show a brief and simple example of how to swap column names using one of the rows in the dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import janitor\n",
+    "from io import StringIO"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = '''shoe, 220, 100\n",
+    "          shoe, 450, 40\n",
+    "          item, retail_price, cost\n",
+    "          shoe, 200, 38\n",
+    "          bag, 305, 25\n",
+    "       '''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>shoe</td>\n",
+       "      <td>220</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>shoe</td>\n",
+       "      <td>450</td>\n",
+       "      <td>40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>item</td>\n",
+       "      <td>retail_price</td>\n",
+       "      <td>cost</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>shoe</td>\n",
+       "      <td>200</td>\n",
+       "      <td>38</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>bag</td>\n",
+       "      <td>305</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                0              1      2\n",
+       "0            shoe            220    100\n",
+       "1            shoe            450     40\n",
+       "2            item   retail_price   cost\n",
+       "3            shoe            200     38\n",
+       "4             bag            305     25"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "temp = pd.read_csv(StringIO(data), header=None)\n",
+    "temp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Looking at the dataframe above, we would love to use row 2 as our column names. One way to achieve this involves a couple of steps\n",
+    "\n",
+    "1. Use loc/iloc to assign row 2 to columns.\n",
+    "2. Strip off any whitespace.\n",
+    "2. Drop row 2 from the dataframe using the drop method.\n",
+    "3. Set axis name to none."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>item</th>\n",
+       "      <th>retail_price</th>\n",
+       "      <th>cost</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>shoe</td>\n",
+       "      <td>220</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>shoe</td>\n",
+       "      <td>450</td>\n",
+       "      <td>40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>shoe</td>\n",
+       "      <td>200</td>\n",
+       "      <td>38</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>bag</td>\n",
+       "      <td>305</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             item retail_price  cost\n",
+       "0            shoe          220   100\n",
+       "1            shoe          450    40\n",
+       "3            shoe          200    38\n",
+       "4             bag          305    25"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "temp.columns = temp.iloc[2, :]\n",
+    "temp.columns = temp.columns.str.strip()\n",
+    "temp = temp.drop(2, axis=0)\n",
+    "temp = temp.rename_axis(None, axis='columns')\n",
+    "temp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "However, the first two steps prevent us from method chaining. This is easily resolved using the row_to_names function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>item</th>\n",
+       "      <th>retail_price</th>\n",
+       "      <th>cost</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>shoe</td>\n",
+       "      <td>220</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>shoe</td>\n",
+       "      <td>450</td>\n",
+       "      <td>40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>shoe</td>\n",
+       "      <td>200</td>\n",
+       "      <td>38</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>bag</td>\n",
+       "      <td>305</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             item  retail_price  cost\n",
+       "0            shoe           220   100\n",
+       "1            shoe           450    40\n",
+       "3            shoe           200    38\n",
+       "4             bag           305    25"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = (\n",
+    "    pd.read_csv(StringIO(data), header=None)\n",
+    "    .row_to_names(row_number=2, remove_row=True)\n",
+    ")\n",
+    "\n",
+    "df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/row_to_names.md b/examples/row_to_names.md
new file mode 100644
index 000000000..2d0bbab68
--- /dev/null
+++ b/examples/row_to_names.md
@@ -0,0 +1,98 @@
+# df.row_to_names()
+
+## Description
+This method elevates a row to be the column names of a DataFrame. It contains parameters to remove the elevated row from the DataFrame along with removing the rows above the selected row.
+
+    :param df: A pandas DataFrame.
+    :param row_number: The row containing the variable names
+    :param remove_row: Should the row be removed from the DataFrame?
+    :param remove_rows_above: Should the rows above row_number be removed from the resulting DataFrame?
+
+## Parameters
+### df
+A pandas dataframe.
+
+### row_number
+The number of the row containing the variable names. Remember, indexing starts at zero!
+
+### remove_row (Default: False)
+Remove the row that is now the headers from the DataFrame.
+
+### remove_rows_above (Default: False)
+Remove the rows from the index above `row_number`.
+
+
+## Setup
+
+```python
+import pandas as pd
+import janitor
+
+
+data_dict = {
+    "a": [1, 2, 3] * 3,
+    "Bell__Chart": [1, 2, 3] * 3,
+    "decorated-elephant": [1, 2, 3] * 3,
+    "animals": ["rabbit", "leopard", "lion"] * 3,
+    "cities": ["Cambridge", "Shanghai", "Basel"] * 3,
+}
+```
+
+
+
+## Example1: Move first row to column names
+ ```python
+example_dataframe = pd.DataFrame(data_dict)
+
+example_dataframe.row_to_names(0)
+```
+
+### Output
+
+       1  1  1   rabbit  Cambridge
+    0  1  1  1   rabbit  Cambridge
+    1  2  2  2  leopard   Shanghai
+    2  3  3  3     lion      Basel
+    3  1  1  1   rabbit  Cambridge
+    4  2  2  2  leopard   Shanghai
+    5  3  3  3     lion      Basel
+    6  1  1  1   rabbit  Cambridge
+    7  2  2  2  leopard   Shanghai
+
+## Example2: Move first row to column names and remove row
+
+```python
+example_dataframe = pd.DataFrame(data_dict)
+
+example_dataframe.row_to_names(0, remove_row=True)
+```
+
+### Output
+
+       1  1  1   rabbit  Cambridge
+    1  2  2  2  leopard   Shanghai
+    2  3  3  3     lion      Basel
+    3  1  1  1   rabbit  Cambridge
+    4  2  2  2  leopard   Shanghai
+    5  3  3  3     lion      Basel
+    6  1  1  1   rabbit  Cambridge
+    7  2  2  2  leopard   Shanghai
+    8  3  3  3     lion      Basel
+
+## Example3: Move first row to column names, remove row, and remove rows above selected row
+
+```python
+example_dataframe = pd.DataFrame(data_dict)
+
+example_dataframe.row_to_names(2, remove_row=True, remove_rows_above=True)
+```
+
+### Output
+
+       3  3  3     lion      Basel
+    3  1  1  1   rabbit  Cambridge
+    4  2  2  2  leopard   Shanghai
+    5  3  3  3     lion      Basel
+    6  1  1  1   rabbit  Cambridge
+    7  2  2  2  leopard   Shanghai
+    8  3  3  3     lion      Basel

From e13f619f19e91436327090f577e97431b93c4d1a Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Fri, 21 Jun 2024 12:21:21 +1000
Subject: [PATCH 07/17] wip

---
 janitor/functions/row_to_names.py | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/janitor/functions/row_to_names.py b/janitor/functions/row_to_names.py
index adae640b3..409ba0e48 100644
--- a/janitor/functions/row_to_names.py
+++ b/janitor/functions/row_to_names.py
@@ -84,10 +84,7 @@ def row_to_names(
         A pandas DataFrame with set column names.
     """  # noqa: E501
 
-    if not pd.options.mode.copy_on_write:
-        df = df.copy()
-    else:
-        df = df[:]
+    df_ = df[:]
 
     if isinstance(row_numbers, int):
         row_numbers = slice(row_numbers, row_numbers + 1)
@@ -109,7 +106,7 @@ def row_to_names(
 
     # should raise if positional indexers are missing
     # IndexError: positional indexers are out-of-bounds
-    headers = df.iloc[row_numbers]
+    headers = df_.iloc[row_numbers]
     if isinstance(headers, pd.DataFrame) and (len(headers) == 1):
         headers = headers.squeeze()
     if isinstance(headers, pd.Series):
@@ -118,8 +115,8 @@ def row_to_names(
         headers = [entry.array for _, entry in headers.items()]
         headers = pd.MultiIndex.from_tuples(headers)
 
-    df.columns = headers
-    df.columns.name = None
+    df_.columns = headers
+    df_.columns.name = None
 
     if remove_rows_above:
         if not is_a_slice:
@@ -129,20 +126,20 @@ def row_to_names(
                 "or a slice."
             )
         if remove_rows:
-            df = df.iloc[row_numbers.stop :]
+            df_ = df_.iloc[row_numbers.stop :]
         else:
-            df = df.iloc[row_numbers.start :]
+            df_ = df_.iloc[row_numbers.start :]
     elif remove_rows:
         if is_a_slice:
             start = row_numbers.start if row_numbers.start else 0
             stop = row_numbers.stop
-            df = [df.iloc[:start], df.iloc[stop:]]
-            df = pd.concat(df, sort=False, copy=False)
+            df_ = [df_.iloc[:start], df_.iloc[stop:]]
+            df_ = pd.concat(df_, sort=False, copy=False)
         else:
-            row_numbers = np.setdiff1d(range(len(df)), row_numbers)
-            df = df.iloc[row_numbers]
+            row_numbers = np.setdiff1d(range(len(df_)), row_numbers)
+            df_ = df_.iloc[row_numbers]
     if reset_index:
-        df.index = range(len(df))
+        df_.index = range(len(df_))
     else:
         warnings.warn(
             "The function row_to_names will, in the official 1.0 release, "
@@ -150,4 +147,4 @@ def row_to_names(
             "You can prepare for this change right now by explicitly setting "
             "`reset_index=True` when calling on `row_to_names`."
         )
-    return df
+    return df_

From ce29cc7b392f63c6a9966609246a0a94b4697c3d Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 7 Jul 2024 14:33:34 +1000
Subject: [PATCH 08/17] update for pandas row_to_names

---
 janitor/functions/row_to_names.py | 185 +++++++++++++++++++++---------
 pyproject.toml                    |   2 +-
 2 files changed, 134 insertions(+), 53 deletions(-)

diff --git a/janitor/functions/row_to_names.py b/janitor/functions/row_to_names.py
index 409ba0e48..aa555c241 100644
--- a/janitor/functions/row_to_names.py
+++ b/janitor/functions/row_to_names.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-import warnings
+from functools import singledispatch
 
 import numpy as np
 import pandas as pd
@@ -84,67 +84,148 @@ def row_to_names(
         A pandas DataFrame with set column names.
     """  # noqa: E501
 
-    df_ = df[:]
+    return _row_to_names(
+        row_numbers,
+        df=df,
+        remove_rows=remove_rows,
+        remove_rows_above=remove_rows_above,
+        reset_index=reset_index,
+    )
+
 
-    if isinstance(row_numbers, int):
-        row_numbers = slice(row_numbers, row_numbers + 1)
-    elif isinstance(row_numbers, slice):
-        if row_numbers.step is not None:
-            raise ValueError(
-                "The step argument for slice is not supported in row_to_names."
-            )
-    elif isinstance(row_numbers, list):
-        for entry in row_numbers:
-            check("entry in the row_numbers argument", entry, [int])
+@singledispatch
+def _row_to_names(
+    row_numbers, df, remove_rows, remove_rows_above, reset_index
+) -> pd.DataFrame:
+    """
+    Base function for row_to_names.
+    """
+    raise TypeError(
+        "row_numbers should be either an integer, "
+        "a slice or a list; "
+        f"instead got type {type(row_numbers).__name__}"
+    )
+
+
+@_row_to_names.register(int)  # noqa: F811
+def _row_to_names_dispatch(  # noqa: F811
+    row_numbers, df, remove_rows, remove_rows_above, reset_index
+):
+    df_ = df[:]
+    headers = df_.iloc[row_numbers]
+    df_.columns = headers
+    df_.columns.name = None
+    if not remove_rows and not remove_rows_above and not reset_index:
+        return df_
+    if not remove_rows and not remove_rows_above and reset_index:
+        return df_.reset_index(drop=True)
+
+    len_df = len(df_)
+    arrays = [arr for _, arr in df_.items()]
+    if remove_rows_above and remove_rows:
+        indexer = np.arange(row_numbers + 1, len_df)
+    elif remove_rows_above:
+        indexer = np.arange(row_numbers, len_df)
+    elif remove_rows:
+        indexer = np.arange(len_df)
+        mask = np.ones(len_df, dtype=np.bool_)
+        mask[row_numbers] = False
+        indexer = indexer[mask]
+    arrays = {num: arr[indexer] for num, arr in enumerate(arrays)}
+    if reset_index:
+        df_index = pd.RangeIndex(start=0, stop=indexer.size)
     else:
-        raise TypeError(
-            "row_numbers should be either an integer, "
-            "a slice or a list; "
-            f"instead got type {type(row_numbers).__name__}"
+        df_index = df_.index[indexer]
+    _df = pd.DataFrame(data=arrays, index=df_index, copy=False)
+    _df.columns = df_.columns
+    return _df
+
+
+@_row_to_names.register(slice)  # noqa: F811
+def _row_to_names_dispatch(  # noqa: F811
+    row_numbers, df, remove_rows, remove_rows_above, reset_index
+):
+    if row_numbers.step is not None:
+        raise ValueError(
+            "The step argument for slice is not supported in row_to_names."
         )
-    is_a_slice = isinstance(row_numbers, slice)
-
-    # should raise if positional indexers are missing
-    # IndexError: positional indexers are out-of-bounds
+    df_ = df[:]
     headers = df_.iloc[row_numbers]
     if isinstance(headers, pd.DataFrame) and (len(headers) == 1):
         headers = headers.squeeze()
-    if isinstance(headers, pd.Series):
-        headers = pd.Index(headers)
+        df_.columns = headers
+        df_.columns.name = None
     else:
-        headers = [entry.array for _, entry in headers.items()]
+        headers = [array._values for _, array in headers.items()]
         headers = pd.MultiIndex.from_tuples(headers)
+        df_.columns = headers
+    if not remove_rows and not remove_rows_above and not reset_index:
+        return df_
+    if not remove_rows and not remove_rows_above and reset_index:
+        return df_.reset_index(drop=True)
+    len_df = len(df_)
+    arrays = [arr._values for _, arr in df_.items()]
+    if remove_rows_above and remove_rows:
+        indexer = np.arange(row_numbers.stop + 1, len_df)
+    elif remove_rows_above:
+        indexer = np.arange(row_numbers.start, len_df)
+    elif remove_rows:
+        indexer = np.arange(len_df)
+        mask = np.ones(len_df, dtype=np.bool_)
+        mask[row_numbers] = False
+        indexer = indexer[mask]
+    arrays = {num: arr[indexer] for num, arr in enumerate(arrays)}
+    if reset_index:
+        df_index = pd.RangeIndex(start=0, stop=indexer.size)
+    else:
+        df_index = df_.index[indexer]
+    _df = pd.DataFrame(data=arrays, index=df_index, copy=False)
+    _df.columns = df_.columns
+    return _df
 
-    df_.columns = headers
-    df_.columns.name = None
 
+@_row_to_names.register(list)  # noqa: F811
+def _row_to_names_dispatch(  # noqa: F811
+    row_numbers, df, remove_rows, remove_rows_above, reset_index
+):
     if remove_rows_above:
-        if not is_a_slice:
-            raise ValueError(
-                "The remove_rows_above argument is applicable "
-                "only if the row_numbers argument is an integer "
-                "or a slice."
-            )
-        if remove_rows:
-            df_ = df_.iloc[row_numbers.stop :]
-        else:
-            df_ = df_.iloc[row_numbers.start :]
-    elif remove_rows:
-        if is_a_slice:
-            start = row_numbers.start if row_numbers.start else 0
-            stop = row_numbers.stop
-            df_ = [df_.iloc[:start], df_.iloc[stop:]]
-            df_ = pd.concat(df_, sort=False, copy=False)
-        else:
-            row_numbers = np.setdiff1d(range(len(df_)), row_numbers)
-            df_ = df_.iloc[row_numbers]
+        raise ValueError(
+            "The remove_rows_above argument is applicable "
+            "only if the row_numbers argument is an integer "
+            "or a slice."
+        )
+
+    for entry in row_numbers:
+        check("entry in the row_numbers argument", entry, [int])
+
+    df_ = df[:]
+    headers = df_.iloc[row_numbers]
+    if isinstance(headers, pd.DataFrame) and (len(headers) == 1):
+        headers = headers.squeeze()
+        df_.columns = headers
+        df_.columns.name = None
+    else:
+        headers = [array._values for _, array in headers.items()]
+        headers = pd.MultiIndex.from_tuples(headers)
+        df_.columns = headers
+
+    if not remove_rows and reset_index:
+        return df_.reset_index(drop=True)
+    if not remove_rows and not reset_index:
+        return df_
+
+    len_df = len(df_)
+    arrays = [arr._values for _, arr in df_.items()]
+    indexer = np.arange(len_df)
+    mask = np.ones(len_df, dtype=np.bool_)
+    mask[row_numbers] = False
+    indexer = indexer[mask]
+
+    arrays = {num: arr[indexer] for num, arr in enumerate(arrays)}
     if reset_index:
-        df_.index = range(len(df_))
+        df_index = pd.RangeIndex(start=0, stop=indexer.size)
     else:
-        warnings.warn(
-            "The function row_to_names will, in the official 1.0 release, "
-            "change its behaviour to reset the dataframe's index by default. "
-            "You can prepare for this change right now by explicitly setting "
-            "`reset_index=True` when calling on `row_to_names`."
-        )
-    return df_
+        df_index = df_.index[indexer]
+    _df = pd.DataFrame(data=arrays, index=df_index, copy=False)
+    _df.columns = df_.columns
+    return _df
diff --git a/pyproject.toml b/pyproject.toml
index 0a697589f..85381f28f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 55
+fail-under = 5
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 928f53f765994185938158d2b7f95fd63faa9376 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 7 Jul 2024 14:33:42 +1000
Subject: [PATCH 09/17] update for pandas row_to_names

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 85381f28f..0a697589f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 5
+fail-under = 55
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 8e8426382db43d27bfcb1ae89cb4818964bd77f9 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 7 Jul 2024 14:37:41 +1000
Subject: [PATCH 10/17] fix docs

---
 janitor/functions/row_to_names.py | 5 ++---
 pyproject.toml                    | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/janitor/functions/row_to_names.py b/janitor/functions/row_to_names.py
index aa555c241..be668e361 100644
--- a/janitor/functions/row_to_names.py
+++ b/janitor/functions/row_to_names.py
@@ -47,7 +47,7 @@ def row_to_names(
         1    9     y
         >>> df.row_to_names([0,1], remove_rows=True, reset_index=True)
           nums chars
-          6    x
+             6     x
         0    9     y
 
         Remove rows above the elevated row and the elevated row itself.
@@ -72,8 +72,7 @@ def row_to_names(
     Args:
         df: A pandas DataFrame.
         row_numbers: Position of the row(s) containing the variable names.
-            Note that indexing starts from 0. It can also be a list,
-            in which case, a MultiIndex column is created.
+            It can be an integer, a list or a slice.
             Defaults to 0 (first row).
         remove_rows: Whether the row(s) should be removed from the DataFrame.
         remove_rows_above: Whether the row(s) above the selected row should
diff --git a/pyproject.toml b/pyproject.toml
index 0a697589f..85381f28f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 55
+fail-under = 5
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 9d4646eed77a17a46f1b31e9de7855579dfc22c9 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 7 Jul 2024 14:37:48 +1000
Subject: [PATCH 11/17] fix docs

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 85381f28f..0a697589f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 5
+fail-under = 55
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 9702b1908900a00921185e99887522cb93a4fcf2 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 7 Jul 2024 14:46:28 +1000
Subject: [PATCH 12/17] fix docs

---
 janitor/functions/row_to_names.py | 2 +-
 pyproject.toml                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/janitor/functions/row_to_names.py b/janitor/functions/row_to_names.py
index be668e361..1ddd95383 100644
--- a/janitor/functions/row_to_names.py
+++ b/janitor/functions/row_to_names.py
@@ -120,7 +120,7 @@ def _row_to_names_dispatch(  # noqa: F811
         return df_.reset_index(drop=True)
 
     len_df = len(df_)
-    arrays = [arr for _, arr in df_.items()]
+    arrays = [arr._values for _, arr in df_.items()]
     if remove_rows_above and remove_rows:
         indexer = np.arange(row_numbers + 1, len_df)
     elif remove_rows_above:
diff --git a/pyproject.toml b/pyproject.toml
index 0a697589f..85381f28f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 55
+fail-under = 5
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 83bb7c051f3650651a398bc963da5abbd5ab60b8 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 7 Jul 2024 14:46:35 +1000
Subject: [PATCH 13/17] fix docs

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 85381f28f..0a697589f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 5
+fail-under = 55
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 84f9d25d31c3cb65e0a24145f7c3d99c5187189d Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 7 Jul 2024 16:04:05 +1000
Subject: [PATCH 14/17] update for polars row to names

---
 janitor/functions/row_to_names.py             |   2 +-
 janitor/polars/row_to_names.py                | 145 +++++++++---------
 tests/functions/test_row_to_names.py          |  16 ++
 .../functions/test_row_to_names_polars.py     |  20 ---
 4 files changed, 93 insertions(+), 90 deletions(-)

diff --git a/janitor/functions/row_to_names.py b/janitor/functions/row_to_names.py
index 1ddd95383..ee97d8531 100644
--- a/janitor/functions/row_to_names.py
+++ b/janitor/functions/row_to_names.py
@@ -165,7 +165,7 @@ def _row_to_names_dispatch(  # noqa: F811
     len_df = len(df_)
     arrays = [arr._values for _, arr in df_.items()]
     if remove_rows_above and remove_rows:
-        indexer = np.arange(row_numbers.stop + 1, len_df)
+        indexer = np.arange(row_numbers.stop, len_df)
     elif remove_rows_above:
         indexer = np.arange(row_numbers.start, len_df)
     elif remove_rows:
diff --git a/janitor/polars/row_to_names.py b/janitor/polars/row_to_names.py
index b507018a2..a0b16b62d 100644
--- a/janitor/polars/row_to_names.py
+++ b/janitor/polars/row_to_names.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+from functools import singledispatch
+
 from janitor.utils import check, import_message
 
 from .polars_flavor import register_dataframe_method
@@ -28,8 +30,6 @@ def row_to_names(
     """
     Elevates a row, or rows, to be the column names of a DataFrame.
 
-    For a LazyFrame, the user should materialize into a DataFrame before using `row_to_names`..
-
     Examples:
         Replace column names with the first row.
 
@@ -103,8 +103,7 @@ def row_to_names(
 
     Args:
         row_numbers: Position of the row(s) containing the variable names.
-            Note that indexing starts from 0. It can also be a list/slice.
-            Defaults to 0 (first row).
+            It can be an integer, list or a slice.
         remove_rows: Whether the row(s) should be removed from the DataFrame.
         remove_rows_above: Whether the row(s) above the selected row should
             be removed from the DataFrame.
@@ -115,85 +114,93 @@ def row_to_names(
         A polars DataFrame.
     """  # noqa: E501
     return _row_to_names(
+        row_numbers,
         df=df,
-        row_numbers=row_numbers,
         remove_rows=remove_rows,
         remove_rows_above=remove_rows_above,
         separator=separator,
     )
 
 
+@singledispatch
 def _row_to_names(
-    df: pl.DataFrame,
-    row_numbers: int | list | slice,
-    remove_rows: bool,
-    remove_rows_above: bool,
-    separator: str,
+    row_numbers, df, remove_rows, remove_rows_above, separator
 ) -> pl.DataFrame:
     """
-    Function to convert rows in the DataFrame to column names.
+    Base function for row_to_names.
     """
-    check("separator", separator, [str])
-    if isinstance(row_numbers, int):
-        row_numbers = slice(row_numbers, row_numbers + 1)
-    elif isinstance(row_numbers, slice):
-        if row_numbers.step is not None:
-            raise ValueError(
-                "The step argument for slice is not supported in row_to_names."
-            )
-    elif isinstance(row_numbers, list):
-        for entry in row_numbers:
-            check("entry in the row_numbers argument", entry, [int])
-    else:
-        raise TypeError(
-            "row_numbers should be either an integer, "
-            "a slice or a list; "
-            f"instead got type {type(row_numbers).__name__}"
+    raise TypeError(
+        "row_numbers should be either an integer, "
+        "a slice or a list; "
+        f"instead got type {type(row_numbers).__name__}"
+    )
+
+
+@_row_to_names.register(int)  # noqa: F811
+def _row_to_names_dispatch(  # noqa: F811
+    row_numbers, df, remove_rows, remove_rows_above, separator
+):
+    expression = pl.col("*").cast(pl.String).gather(row_numbers)
+    expression = pl.struct(expression)
+    headers = df.select(expression).to_series(0).to_list()[0]
+    df = df.rename(mapping=headers)
+    if remove_rows_above and remove_rows:
+        return df.slice(row_numbers + 1)
+    elif remove_rows_above:
+        return df.slice(row_numbers)
+    elif remove_rows:
+        expression = pl.int_range(pl.len()).ne(row_numbers)
+        return df.filter(expression)
+    return df
+
+
+@_row_to_names.register(slice)  # noqa: F811
+def _row_to_names_dispatch(  # noqa: F811
+    row_numbers, df, remove_rows, remove_rows_above, separator
+):
+    if row_numbers.step is not None:
+        raise ValueError(
+            "The step argument for slice is not supported in row_to_names."
         )
-    is_a_slice = isinstance(row_numbers, slice)
-    if is_a_slice:
-        expression = pl.all().str.concat(delimiter=separator)
-        expression = pl.struct(expression)
-        offset = row_numbers.start
-        length = row_numbers.stop - row_numbers.start
-        mapping = df.slice(
-            offset=offset,
-            length=length,
+    headers = df.slice(row_numbers.start, row_numbers.stop - row_numbers.start)
+    headers = headers.cast(pl.String)
+    expression = pl.all().str.concat(delimiter=separator)
+    expression = pl.struct(expression)
+    headers = headers.select(expression).to_series(0).to_list()[0]
+    df = df.rename(mapping=headers)
+    if remove_rows_above and remove_rows:
+        return df.slice(row_numbers.stop)
+    elif remove_rows_above:
+        return df.slice(row_numbers.start)
+    elif remove_rows:
+        expression = pl.int_range(pl.len()).is_between(
+            row_numbers.start, row_numbers.stop, closed="left"
         )
-        mapping = mapping.select(expression)
-    else:
-        expression = pl.all().gather(row_numbers)
-        expression = expression.str.concat(delimiter=separator)
-        expression = pl.struct(expression)
-        mapping = df.select(expression)
-
-    mapping = mapping.to_series(0)[0]
-    df = df.rename(mapping=mapping)
-    if remove_rows_above:
-        if not is_a_slice:
-            raise ValueError(
-                "The remove_rows_above argument is applicable "
-                "only if the row_numbers argument is an integer "
-                "or a slice."
-            )
-        if remove_rows:
-            return df.slice(offset=row_numbers.stop)
-        return df.slice(offset=row_numbers.start)
+        return df.filter(~expression)
+    return df
 
-    if remove_rows:
-        if is_a_slice:
-            df = [
-                df.slice(offset=0, length=row_numbers.start),
-                df.slice(offset=row_numbers.stop),
-            ]
-            return pl.concat(df, rechunk=True)
-        name = "".join(df.columns)
-        name = f"{name}_"
-        df = (
-            df.with_row_index(name=name)
-            .filter(pl.col(name=name).is_in(row_numbers).not_())
-            .select(pl.exclude(name))
+
+@_row_to_names.register(list)  # noqa: F811
+def _row_to_names_dispatch(  # noqa: F811
+    row_numbers, df, remove_rows, remove_rows_above, separator
+):
+    if remove_rows_above:
+        raise ValueError(
+            "The remove_rows_above argument is applicable "
+            "only if the row_numbers argument is an integer "
+            "or a slice."
         )
-        return df
 
+    for entry in row_numbers:
+        check("entry in the row_numbers argument", entry, [int])
+
+    expression = pl.col("*").gather(row_numbers)
+    headers = df.select(expression).cast(pl.String)
+    expression = pl.all().str.concat(delimiter=separator)
+    expression = pl.struct(expression)
+    headers = headers.select(expression).to_series(0).to_list()[0]
+    df = df.rename(mapping=headers)
+    if remove_rows:
+        expression = pl.int_range(pl.len()).is_in(row_numbers)
+        return df.filter(~expression)
     return df
diff --git a/tests/functions/test_row_to_names.py b/tests/functions/test_row_to_names.py
index 5295b44d9..758afe44d 100644
--- a/tests/functions/test_row_to_names.py
+++ b/tests/functions/test_row_to_names.py
@@ -74,6 +74,22 @@ def test_row_to_names_delete_above_slice(dataframe):
     assert df.iloc[0, 4] == "Basel"
 
 
+@pytest.mark.functions
+def test_row_to_names_delete_above_delete_rows(dataframe):
+    """
+    Test output for remove_rows=True
+    and remove_rows_above=True
+    """
+    df = dataframe.row_to_names(
+        slice(2, 4), remove_rows=True, remove_rows_above=True
+    )
+    assert df.iloc[0, 0] == 2
+    assert df.iloc[0, 1] == 2.456234
+    assert df.iloc[0, 2] == 2
+    assert df.iloc[0, 3] == "leopard"
+    assert df.iloc[0, 4] == "Shanghai"
+
+
 @pytest.mark.functions
 def test_row_to_names_delete_above_is_a_list(dataframe):
     "Raise if row_numbers is a list"
diff --git a/tests/polars/functions/test_row_to_names_polars.py b/tests/polars/functions/test_row_to_names_polars.py
index 372e2e09a..1c81660e0 100644
--- a/tests/polars/functions/test_row_to_names_polars.py
+++ b/tests/polars/functions/test_row_to_names_polars.py
@@ -17,14 +17,6 @@ def df():
     )
 
 
-def test_separator_type(df):
-    """
-    Raise if separator is not a string
-    """
-    with pytest.raises(TypeError, match="separator should be.+"):
-        df.row_to_names([1, 2], separator=1)
-
-
 def test_row_numbers_type(df):
     """
     Raise if row_numbers is not an int/slice/list
@@ -88,8 +80,6 @@ def test_row_to_names_list(df):
 
 def test_row_to_names_delete_this_row(df):
     df = df.row_to_names(2, remove_rows=True)
-    if isinstance(df, pl.LazyFrame):
-        df = df.collect()
     assert df.to_series(0)[0] == 1.234_523_45
     assert df.to_series(1)[0] == 1
     assert df.to_series(2)[0] == "rabbit"
@@ -98,8 +88,6 @@ def test_row_to_names_delete_this_row(df):
 
 def test_row_to_names_list_delete_this_row(df):
     df = df.row_to_names([2], remove_rows=True)
-    if isinstance(df, pl.LazyFrame):
-        df = df.collect()
     assert df.to_series(0)[0] == 1.234_523_45
     assert df.to_series(1)[0] == 1
     assert df.to_series(2)[0] == "rabbit"
@@ -108,8 +96,6 @@ def test_row_to_names_list_delete_this_row(df):
 
 def test_row_to_names_delete_above(df):
     df = df.row_to_names(2, remove_rows_above=True)
-    if isinstance(df, pl.LazyFrame):
-        df = df.collect()
     assert df.to_series(0)[0] == 3.234_612_5
     assert df.to_series(1)[0] == 3
     assert df.to_series(2)[0] == "lion"
@@ -119,8 +105,6 @@ def test_row_to_names_delete_above(df):
 def test_row_to_names_delete_above_list(df):
     "Test output if row_numbers is a list"
     df = df.row_to_names(slice(2, 4), remove_rows_above=True)
-    if isinstance(df, pl.LazyFrame):
-        df = df.collect()
     assert df.to_series(0)[0] == 3.234_612_5
     assert df.to_series(1)[0] == 3
     assert df.to_series(2)[0] == "lion"
@@ -133,8 +117,6 @@ def test_row_to_names_delete_above_delete_rows(df):
     and remove_rows_above=True
     """
     df = df.row_to_names(slice(2, 4), remove_rows=True, remove_rows_above=True)
-    if isinstance(df, pl.LazyFrame):
-        df = df.collect()
     assert df.to_series(0)[0] == 2.456234
     assert df.to_series(1)[0] == 2
     assert df.to_series(2)[0] == "leopard"
@@ -147,8 +129,6 @@ def test_row_to_names_delete_above_delete_rows_scalar(df):
     and remove_rows_above=True
     """
     df = df.row_to_names(2, remove_rows=True, remove_rows_above=True)
-    if isinstance(df, pl.LazyFrame):
-        df = df.collect()
     assert df.to_series(0)[0] == 1.23452345
     assert df.to_series(1)[0] == 1
     assert df.to_series(2)[0] == "rabbit"

From e44c44a28a901072a2afdf3a13027b3296cf4237 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 7 Jul 2024 16:44:11 +1000
Subject: [PATCH 15/17] update polars row to names

---
 janitor/polars/row_to_names.py | 19 ++++++++-----------
 pyproject.toml                 |  2 +-
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/janitor/polars/row_to_names.py b/janitor/polars/row_to_names.py
index a0b16b62d..9c90e8e89 100644
--- a/janitor/polars/row_to_names.py
+++ b/janitor/polars/row_to_names.py
@@ -140,9 +140,8 @@ def _row_to_names(
 def _row_to_names_dispatch(  # noqa: F811
     row_numbers, df, remove_rows, remove_rows_above, separator
 ):
-    expression = pl.col("*").cast(pl.String).gather(row_numbers)
-    expression = pl.struct(expression)
-    headers = df.select(expression).to_series(0).to_list()[0]
+    headers = df.row(row_numbers, named=True)
+    headers = {col: str(repl) for col, repl in headers.items()}
     df = df.rename(mapping=headers)
     if remove_rows_above and remove_rows:
         return df.slice(row_numbers + 1)
@@ -163,10 +162,9 @@ def _row_to_names_dispatch(  # noqa: F811
             "The step argument for slice is not supported in row_to_names."
         )
     headers = df.slice(row_numbers.start, row_numbers.stop - row_numbers.start)
-    headers = headers.cast(pl.String)
     expression = pl.all().str.concat(delimiter=separator)
-    expression = pl.struct(expression)
-    headers = headers.select(expression).to_series(0).to_list()[0]
+    headers = headers.select(expression).row(0, named=True)
+    headers = {col: str(repl) for col, repl in headers.items()}
     df = df.rename(mapping=headers)
     if remove_rows_above and remove_rows:
         return df.slice(row_numbers.stop)
@@ -194,11 +192,10 @@ def _row_to_names_dispatch(  # noqa: F811
     for entry in row_numbers:
         check("entry in the row_numbers argument", entry, [int])
 
-    expression = pl.col("*").gather(row_numbers)
-    headers = df.select(expression).cast(pl.String)
-    expression = pl.all().str.concat(delimiter=separator)
-    expression = pl.struct(expression)
-    headers = headers.select(expression).to_series(0).to_list()[0]
+    expression = pl.all().gather(row_numbers)
+    expression = expression.str.concat(delimiter=separator)
+    headers = df.select(expression).row(0, named=True)
+    headers = {col: str(repl) for col, repl in headers.items()}
     df = df.rename(mapping=headers)
     if remove_rows:
         expression = pl.int_range(pl.len()).is_in(row_numbers)
diff --git a/pyproject.toml b/pyproject.toml
index 0a697589f..85381f28f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 55
+fail-under = 5
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From c345965a9d90b15c47af15b7762e852e228d4e4b Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 7 Jul 2024 16:44:20 +1000
Subject: [PATCH 16/17] update polars row to names

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 85381f28f..0a697589f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 5
+fail-under = 55
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 7f7afb3eb902d5bac57dda2462e6f1b4146edb33 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 7 Jul 2024 16:46:27 +1000
Subject: [PATCH 17/17] remove warnings related to typing

---
 janitor/polars/complete.py     | 2 +-
 janitor/polars/pivot_longer.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/janitor/polars/complete.py b/janitor/polars/complete.py
index 546f903bc..ddd6f0a2d 100644
--- a/janitor/polars/complete.py
+++ b/janitor/polars/complete.py
@@ -11,7 +11,7 @@
 try:
     import polars as pl
     import polars.selectors as cs
-    from polars.type_aliases import ColumnNameOrSelector
+    from polars._typing import ColumnNameOrSelector
 except ImportError:
     import_message(
         submodule="polars",
diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
index 9dea2581f..15cce254c 100644
--- a/janitor/polars/pivot_longer.py
+++ b/janitor/polars/pivot_longer.py
@@ -8,7 +8,7 @@
 
 try:
     import polars as pl
-    from polars.type_aliases import ColumnNameOrSelector
+    from polars._typing import ColumnNameOrSelector
 except ImportError:
     import_message(
         submodule="polars",