diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py
index 44c47efd..0940d66a 100644
--- a/src/tabmat/categorical_matrix.py
+++ b/src/tabmat/categorical_matrix.py
@@ -252,7 +252,9 @@ def __init__(
         dtype: np.dtype = np.float64,
     ):
         if pd.isnull(cat_vec).any():
-            raise ValueError("Categorical data can't have missing values.")
+            self.has_missing = True
+        else:
+            self.has_missing = False
 
         if isinstance(cat_vec, pd.Categorical):
             self.cat = cat_vec
@@ -327,9 +329,15 @@ def matvec(
         if out is None:
             out = np.zeros(self.shape[0], dtype=other_m.dtype)
 
-        if self.drop_first:
+        if self.drop_first or self.has_missing:
             matvec_drop_first(
-                self.indices, other_m, self.shape[0], cols, self.shape[1], out
+                self.indices,
+                other_m,
+                self.shape[0],
+                cols,
+                self.shape[1],
+                out,
+                self.drop_first,
             )
         else:
             matvec(self.indices, other_m, self.shape[0], cols, self.shape[1], out)
@@ -394,9 +402,16 @@ def transpose_matvec(
         if cols is not None:
             cols = set_up_rows_or_cols(cols, self.shape[1])
 
-        if self.drop_first:
+        if self.drop_first or self.has_missing:
             transpose_matvec_drop_first(
-                self.indices, vec, self.shape[1], vec.dtype, rows, cols, out
+                self.indices,
+                vec,
+                self.shape[1],
+                vec.dtype,
+                rows,
+                cols,
+                out,
+                self.drop_first,
             )
         else:
             transpose_matvec(
@@ -430,9 +445,9 @@ def sandwich(
         """
         d = np.asarray(d)
         rows = set_up_rows_or_cols(rows, self.shape[0])
-        if self.drop_first:
+        if self.drop_first or self.has_missing:
             res_diag = sandwich_categorical_drop_first(
-                self.indices, d, rows, d.dtype, self.shape[1]
+                self.indices, d, rows, d.dtype, self.shape[1], self.drop_first
             )
         else:
             res_diag = sandwich_categorical(
@@ -474,9 +489,9 @@ def getcol(self, i: int) -> sps.csc_matrix:
 
     def tocsr(self) -> sps.csr_matrix:
         """Return scipy csr representation of matrix."""
-        if self.drop_first:
+        if self.drop_first or self.has_missing:
             nnz, indices, indptr = subset_categorical_drop_first(
-                self.indices, self.shape[1]
+                self.indices, self.shape[1], self.drop_first
             )
             return sps.csr_matrix(
                 (np.ones(nnz, dtype=int), indices, indptr), shape=self.shape
@@ -615,7 +630,7 @@ def multiply(self, other) -> SparseMatrix:
                 f"Shapes do not match. Expected length of {self.shape[0]}. Got {len(other)}."
             )
 
-        if self.drop_first:
+        if self.drop_first or self.has_missing:
             return SparseMatrix(
                 sps.csr_matrix(
                     multiply_drop_first(
@@ -623,6 +638,7 @@ def multiply(self, other) -> SparseMatrix:
                         d=np.squeeze(other),
                         ncols=self.shape[1],
                         dtype=other.dtype,
+                        drop_first=self.drop_first,
                     ),
                     shape=self.shape,
                 )
diff --git a/src/tabmat/ext/cat_split_helpers-tmpl.cpp b/src/tabmat/ext/cat_split_helpers-tmpl.cpp
index c40f851c..bbe9aa72 100644
--- a/src/tabmat/ext/cat_split_helpers-tmpl.cpp
+++ b/src/tabmat/ext/cat_split_helpers-tmpl.cpp
@@ -9,6 +9,9 @@ void _transpose_matvec_${dropfirst}(
     F* other,
     F* res,
     Int res_size
+    % if dropfirst == 'all_rows_drop_first':
+        , bool drop_first
+    % endif
 ) {
     #pragma omp parallel
     {
@@ -16,8 +19,8 @@ void _transpose_matvec_${dropfirst}(
         #pragma omp for
         for (Py_ssize_t i = 0; i < n_rows; i++) {
             % if dropfirst == 'all_rows_drop_first':
-                Py_ssize_t col_idx = indices[i] - 1;
-                if (col_idx != -1) {
+                Py_ssize_t col_idx = indices[i] - drop_first;
+                if (col_idx >= 0) {
                     restemp[col_idx] += other[i];
                 }
             % else:
@@ -54,11 +57,11 @@ void _sandwich_cat_cat(
         for (Py_ssize_t k_idx = 0; k_idx < len_rows; k_idx++) {
             Int k = rows[k_idx];
             Int i = i_indices[k] - i_drop_first;
-            if (i == -1) {
+            if (i < 0) {
                 continue;
             }
             Int j = j_indices[k] - j_drop_first;
-            if (j == -1) {
+            if (j < 0) {
                 continue;
             }
             restemp[(Py_ssize_t) i * res_n_col + j] += d[k];
@@ -99,13 +102,15 @@ void _sandwich_cat_dense${order}(
             // instructions
             // MAYBE TODO: explore whether swapping the loop order for F-ordered mat_j
             // is useful.
-            for (Py_ssize_t j_idx = 0; j_idx < len_j_cols; j_idx++) {
-                Py_ssize_t j = j_cols[j_idx];
-                % if order == 'C':
-                    restemp[i * len_j_cols + j_idx] += d[k] * mat_j[k * mat_j_ncol + j];
-                % else:
-                    restemp[i * len_j_cols + j_idx] += d[k] * mat_j[j * mat_j_nrow + k];
-                % endif
+            if (i >= 0) {
+                for (Py_ssize_t j_idx = 0; j_idx < len_j_cols; j_idx++) {
+                    Py_ssize_t j = j_cols[j_idx];
+                    % if order == 'C':
+                        restemp[i * len_j_cols + j_idx] += d[k] * mat_j[k * mat_j_ncol + j];
+                    % else:
+                        restemp[i * len_j_cols + j_idx] += d[k] * mat_j[j * mat_j_nrow + k];
+                    % endif
+                }
             }
         }
         for (Py_ssize_t i = 0; i < res_size; i++) {
diff --git a/src/tabmat/ext/categorical.pyx b/src/tabmat/ext/categorical.pyx
index 59ea308a..f28a069f 100644
--- a/src/tabmat/ext/categorical.pyx
+++ b/src/tabmat/ext/categorical.pyx
@@ -12,7 +12,7 @@ from libcpp cimport bool
 
 cdef extern from "cat_split_helpers.cpp":
     void _transpose_matvec_all_rows[Int, F](Int, Int*, F*, F*, Int)
-    void _transpose_matvec_all_rows_drop_first[Int, F](Int, Int*, F*, F*, Int)
+    void _transpose_matvec_all_rows_drop_first[Int, F](Int, Int*, F*, F*, Int, bool)
 
 
 def transpose_matvec(
@@ -69,7 +69,8 @@ def transpose_matvec_drop_first(
     dtype,
     rows,
     cols,
-    floating[:] out
+    floating[:] out,
+    bint drop_first
 ):
     cdef int row, row_idx, n_keep_rows, col_idx
     cdef int n_rows = len(indices)
@@ -81,15 +82,15 @@ def transpose_matvec_drop_first(
 
     # Case 1: No row or col restrictions
     if no_row_restrictions and no_col_restrictions:
-        _transpose_matvec_all_rows_drop_first(n_rows, &indices[0], &other[0], &out[0], out_size)
+        _transpose_matvec_all_rows_drop_first(n_rows, &indices[0], &other[0], &out[0], out_size, drop_first)
     # Case 2: row restrictions but no col restrictions
     elif no_col_restrictions:
         rows_view = rows
         n_keep_rows = len(rows_view)
         for row_idx in range(n_keep_rows):
             row = rows_view[row_idx]
-            col_idx = indices[row] - 1
-            if col_idx != -1:
+            col_idx = indices[row] - drop_first
+            if col_idx >= 0:
                 out[col_idx] += other[row]
     # Cases 3 and 4: col restrictions
     else:
@@ -97,8 +98,8 @@ def transpose_matvec_drop_first(
         # Case 3: Col restrictions but no row restrictions
         if no_row_restrictions:
             for row_idx in range(n_rows):
-                col_idx = indices[row_idx] - 1
-                if (col_idx != -1) and (cols_included[col_idx]):
+                col_idx = indices[row_idx] - drop_first
+                if (col_idx >= 0) and (cols_included[col_idx]):
                     out[col_idx] += other[row_idx]
         # Case 4: Both col restrictions and row restrictions
         else:
@@ -106,8 +107,8 @@ def transpose_matvec_drop_first(
             n_keep_rows = len(rows_view)
             for row_idx in range(n_keep_rows):
                 row = rows_view[row_idx]
-                col_idx = indices[row] - 1
-                if (col_idx != -1) and (cols_included[col_idx]):
+                col_idx = indices[row] - drop_first
+                if (col_idx >= 0) and (cols_included[col_idx]):
                     out[col_idx] += other[row]
 
 
@@ -151,7 +152,8 @@ def matvec_drop_first(
     int n_rows, 
     int[:] cols,
     int n_cols, 
-    floating[:] out_vec
+    floating[:] out_vec,
+    bint drop_first
 ):
     """See `matvec`. Here we drop the first category of the
     CategoricalMatrix so the indices refer to the column index + 1.
@@ -161,14 +163,14 @@ def matvec_drop_first(
 
     if cols is None:
         for i in prange(n_rows, nogil=True):
-            col_idx = indices[i] - 1  # reference category is always 0.
-            if col_idx != -1:
+            col_idx = indices[i] - drop_first  # reference category is always 0.
+            if col_idx >= 0:
                 out_vec[i] += other[col_idx]
     else:
         col_included = get_col_included(cols, n_cols)
         for i in prange(n_rows, nogil=True):
-            col_idx = indices[i] - 1
-            if (col_idx != -1) and (col_included[col_idx] == 1):
+            col_idx = indices[i] - drop_first
+            if (col_idx >= 0) and (col_included[col_idx] == 1):
                 out_vec[i] += other[col_idx]
     return
 
@@ -196,7 +198,8 @@ def sandwich_categorical_drop_first(
     floating[:] d,
     int[:] rows,
     dtype,
-    int n_cols
+    int n_cols,
+    bint drop_first
 ):
     cdef floating[:] res = np.zeros(n_cols, dtype=dtype)
     cdef int col_idx, k, k_idx
@@ -204,8 +207,8 @@ def sandwich_categorical_drop_first(
 
     for k_idx in range(n_rows):
         k = rows[k_idx]
-        col_idx = indices[k] - 1  # reference category is always 0.
-        if col_idx != -1:
+        col_idx = indices[k] - drop_first  # reference category is always 0.
+        if col_idx >= 0:
             res[col_idx] += d[k]
     return np.asarray(res)
 
@@ -215,6 +218,7 @@ def multiply_drop_first(
     numeric[:] d,
     int ncols,
     dtype,
+    bint drop_first,
 ):
     """Multiply a CategoricalMatrix by a vector d.
 
@@ -255,9 +259,9 @@ def multiply_drop_first(
 
     for i in range(nrows):
         vnew_indptr[i] = nonref_cnt
-        if indices[i] != 0:
+        if indices[i] >= drop_first:
             vnew_data[nonref_cnt] = d[i]
-            vnew_indices[nonref_cnt] = indices[i] - 1
+            vnew_indices[nonref_cnt] = indices[i] - drop_first
             nonref_cnt += 1
 
     vnew_indptr[i+1] = nonref_cnt
@@ -268,6 +272,7 @@ def multiply_drop_first(
 def subset_categorical_drop_first(
     int[:] indices,
     int ncols,
+    bint drop_first
 ):
     """Construct the inputs to transform a CategoricalMatrix into a csr_matrix.
 
@@ -299,8 +304,8 @@ def subset_categorical_drop_first(
 
     for i in range(nrows):
         vnew_indptr[i] = nonzero_cnt
-        if indices[i] != 0:
-            vnew_indices[nonzero_cnt] = indices[i] - 1
+        if indices[i] >= drop_first:
+            vnew_indices[nonzero_cnt] = indices[i] - drop_first
             nonzero_cnt += 1
 
     vnew_indptr[i+1] = nonzero_cnt