feat(python): Standardize error message format (#11598)

Co-authored-by: Stijn de Gooijer <stijn@degooijer.io>
pola-rs · Oct 10, 2023 · 7f1fec0 · 7f1fec0
1 parent 21dddfd
commit 7f1fec0
Show file tree

Hide file tree

Showing 29 changed files with 94 additions and 96 deletions.
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
@@ -728,7 +728,7 @@ def _read_csv(
             if dtype_slice is not None:
                 raise ValueError(
                     "cannot use glob patterns and unnamed dtypes as `dtypes` argument"
-                    "\n\nUse `dtypes`: Mapping[str, Type[DataType]"
+                    "\n\nUse `dtypes`: Mapping[str, Type[DataType]]"
                 )
             from polars import scan_csv
 
@@ -1719,7 +1719,7 @@ def __setitem__(
         # df["foo"] = series
         if isinstance(key, str):
             raise TypeError(
-                "DataFrame object does not support `Series` assignment by index."
+                "DataFrame object does not support `Series` assignment by index"
                 "\n\nUse `DataFrame.with_columns`."
             )
 
@@ -2996,7 +2996,7 @@ def write_excel(
         except ImportError:
             raise ImportError(
                 "Excel export requires xlsxwriter"
-                "\n\nPlease run `pip install XlsxWriter`"
+                "\n\nPlease run: pip install XlsxWriter"
             ) from None
 
         # setup workbook/worksheet
@@ -3427,7 +3427,7 @@ def write_database(
             else:
                 raise ValueError(
                     f"unexpected value for `if_exists`: {if_exists!r}"
-                    f"\n\nChoose one of: {'fail', 'replace', 'append'}"
+                    f"\n\nChoose one of {{'fail', 'replace', 'append'}}"
                 )
             with _open_adbc_connection(connection) as conn, conn.cursor() as cursor:
                 cursor.adbc_ingest(table_name, self.to_arrow(), mode)
@@ -3443,7 +3443,8 @@ def write_database(
                 from sqlalchemy import create_engine
             except ModuleNotFoundError as exc:
                 raise ModuleNotFoundError(
-                    "'sqlalchemy' not found. Install polars with 'pip install polars[sqlalchemy]'"
+                    "sqlalchemy not found"
+                    "\n\nInstall Polars with: pip install polars[sqlalchemy]"
                 ) from exc
             from csv import reader as delimited_read
 

diff --git a/py-polars/polars/datatypes/classes.py b/py-polars/polars/datatypes/classes.py
@@ -348,7 +348,8 @@ def __init__(
 
         if self.time_unit not in ("ms", "us", "ns"):
             raise ValueError(
-                f"invalid time_unit; expected one of {{'ns','us','ms'}}, got {self.time_unit!r}"
+                "invalid `time_unit`"
+                f"\n\nExpected one of {{'ns','us','ms'}}, got {self.time_unit!r}."
             )
 
     def __eq__(self, other: PolarsDataType) -> bool:  # type: ignore[override]
@@ -390,7 +391,8 @@ def __init__(self, time_unit: TimeUnit = "us"):
         self.time_unit = time_unit
         if self.time_unit not in ("ms", "us", "ns"):
             raise ValueError(
-                f"invalid time_unit; expected one of {{'ns','us','ms'}}, got {self.time_unit!r}"
+                "invalid `time_unit`"
+                f"\n\nExpected one of {{'ns','us','ms'}}, got {self.time_unit!r}."
             )
 
     def __eq__(self, other: PolarsDataType) -> bool:  # type: ignore[override]

diff --git a/py-polars/polars/datatypes/constructor.py b/py-polars/polars/datatypes/constructor.py
@@ -110,8 +110,9 @@ def numpy_values_and_dtype(
             values = values.astype(np.int64)
         else:
             raise ValueError(
-                "'D' (datetime only), 'ms', 'us', and 'ns' resolutions are supported when converting from numpy.{datetime64,timedelta64}"
-                "\n\nPlease cast to the closest supported unit before converting"
+                "incorrect NumPy datetime resolution"
+                "\n\n'D' (datetime only), 'ms', 'us', and 'ns' resolutions are supported when converting from numpy.{datetime64,timedelta64}."
+                " Please cast to the closest supported unit before converting."
             )
     return values, dtype
 

diff --git a/py-polars/polars/expr/binary.py b/py-polars/polars/expr/binary.py
@@ -180,7 +180,7 @@ def decode(self, encoding: TransferEncoding, *, strict: bool = True) -> Expr:
             return wrap_expr(self._pyexpr.bin_base64_decode(strict))
         else:
             raise ValueError(
-                f"encoding must be one of {{'hex', 'base64'}}, got {encoding!r}"
+                f"`encoding` must be one of {{'hex', 'base64'}}, got {encoding!r}"
             )
 
     def encode(self, encoding: TransferEncoding) -> Expr:
@@ -227,5 +227,5 @@ def encode(self, encoding: TransferEncoding) -> Expr:
             return wrap_expr(self._pyexpr.bin_base64_encode())
         else:
             raise ValueError(
-                f"encoding must be one of {{'hex', 'base64'}}, got {encoding!r}"
+                f"`encoding` must be one of {{'hex', 'base64'}}, got {encoding!r}"
             )
diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py
@@ -1181,7 +1181,7 @@ def epoch(self, time_unit: EpochTimeUnit = "us") -> Expr:
             return wrap_expr(self._pyexpr).cast(Date).cast(Int32)
         else:
             raise ValueError(
-                f"time_unit must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got {time_unit!r}"
+                f"`time_unit` must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got {time_unit!r}"
             )
 
     def timestamp(self, time_unit: TimeUnit = "us") -> Expr:

diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py
@@ -140,7 +140,7 @@ def __bool__(self) -> NoReturn:
         raise TypeError(
             "the truth value of an Expr is ambiguous"
             "\n\nHint: use '&' or '|' to logically combine Expr, not 'and'/'or', and"
-            " use 'x.is_in([y,z])' instead of 'x in [y,z]' to check membership"
+            " use `x.is_in([y,z])` instead of `x in [y,z]` to check membership."
         )
 
     def __abs__(self) -> Self:
@@ -9164,7 +9164,7 @@ def _remap_key_or_value_series(
                     ) from exc
                 else:
                     raise ValueError(
-                        f"choose a more suitable output dtype for map_dict as remapping value could not be converted to {dtype!r}: {exc!s}"
+                        f"choose a more suitable output dtype for `map_dict` as remapping value could not be converted to {dtype!r}: {exc!s}"
                     ) from exc
 
             if is_keys:
@@ -9681,28 +9681,28 @@ def _prepare_alpha(
     """Normalise EWM decay specification in terms of smoothing factor 'alpha'."""
     if sum((param is not None) for param in (com, span, half_life, alpha)) > 1:
         raise ValueError(
-            "parameters 'com', 'span', 'half_life', and 'alpha' are mutually exclusive"
+            "parameters `com`, `span`, `half_life`, and `alpha` are mutually exclusive"
         )
     if com is not None:
         if com < 0.0:
-            raise ValueError(f"require 'com' >= 0 (found {com!r})")
+            raise ValueError(f"require `com` >= 0 (found {com!r})")
         alpha = 1.0 / (1.0 + com)
 
     elif span is not None:
         if span < 1.0:
-            raise ValueError(f"require 'span' >= 1 (found {span!r})")
+            raise ValueError(f"require `span` >= 1 (found {span!r})")
         alpha = 2.0 / (span + 1.0)
 
     elif half_life is not None:
         if half_life <= 0.0:
-            raise ValueError(f"require 'half_life' > 0 (found {half_life!r})")
+            raise ValueError(f"require `half_life` > 0 (found {half_life!r})")
         alpha = 1.0 - math.exp(-math.log(2.0) / half_life)
 
     elif alpha is None:
-        raise ValueError("one of 'com', 'span', 'half_life', or 'alpha' must be set")
+        raise ValueError("one of `com`, `span`, `half_life`, or `alpha` must be set")
 
     elif not (0 < alpha <= 1):
-        raise ValueError(f"require 0 < 'alpha' <= 1 (found {alpha!r})")
+        raise ValueError(f"require 0 < `alpha` <= 1 (found {alpha!r})")
 
     return alpha
 

diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py
@@ -1182,7 +1182,7 @@ def decode(self, encoding: TransferEncoding, *, strict: bool = True) -> Expr:
             return wrap_expr(self._pyexpr.str_base64_decode(strict))
         else:
             raise ValueError(
-                f"encoding must be one of {{'hex', 'base64'}}, got {encoding}"
+                f"`encoding` must be one of {{'hex', 'base64'}}, got {encoding!r}"
             )
 
     def encode(self, encoding: TransferEncoding) -> Expr:
@@ -1221,7 +1221,7 @@ def encode(self, encoding: TransferEncoding) -> Expr:
             return wrap_expr(self._pyexpr.str_base64_encode())
         else:
             raise ValueError(
-                f"encoding must be one of {{'hex', 'base64'}}, got {encoding}"
+                f"`encoding` must be one of {{'hex', 'base64'}}, got {encoding!r}"
             )
 
     def extract(self, pattern: str, group_index: int = 1) -> Expr:

diff --git a/py-polars/polars/functions/col.py b/py-polars/polars/functions/col.py
@@ -55,12 +55,12 @@ def _create_col(
             raise TypeError(
                 "invalid input for `col`"
                 "\n\nExpected iterable of type `str` or `DataType`,"
-                f" got iterable of type {type(item).__name__!r}"
+                f" got iterable of type {type(item).__name__!r}."
             )
     else:
         raise TypeError(
             "invalid input for `col`"
-            f"\n\nExpected `str` or `DataType`, got {type(name).__name__!r}"
+            f"\n\nExpected `str` or `DataType`, got {type(name).__name__!r}."
         )
 
 

diff --git a/py-polars/polars/functions/lazy.py b/py-polars/polars/functions/lazy.py
@@ -2008,7 +2008,7 @@ def from_epoch(
         return column.cast(Datetime(time_unit))
     else:
         raise ValueError(
-            f"'time_unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got {time_unit!r}"
+            f"`time_unit` must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got {time_unit!r}"
         )
 
 

diff --git a/py-polars/polars/io/csv/functions.py b/py-polars/polars/io/csv/functions.py
@@ -887,7 +887,7 @@ def scan_csv(
     elif new_columns:
         if with_column_names:
             raise ValueError(
-                "cannot set both 'with_column_names' and 'new_columns'; mutually exclusive"
+                "cannot set both `with_column_names` and `new_columns`; mutually exclusive"
             )
         if dtypes and isinstance(dtypes, Sequence):
             dtypes = dict(zip(new_columns, dtypes))

diff --git a/py-polars/polars/io/database.py b/py-polars/polars/io/database.py
@@ -419,8 +419,8 @@ def read_database(  # noqa D417
                 import arrow_odbc  # noqa: F401
             except ModuleNotFoundError:
                 raise ModuleNotFoundError(
-                    "use of an ODBC connection string requires the `arrow-odbc` package."
-                    "\n\nPlease run `pip install arrow-odbc`."
+                    "use of an ODBC connection string requires the `arrow-odbc` package"
+                    "\n\nPlease run: pip install arrow-odbc"
                 ) from None
 
             connection = ODBCCursorProxy(connection)
@@ -605,7 +605,7 @@ def _read_sql_connectorx(
     except ModuleNotFoundError:
         raise ModuleNotFoundError(
             "connectorx is not installed"
-            "\n\nPlease run `pip install connectorx>=0.3.2`."
+            "\n\nPlease run: pip install connectorx>=0.3.2"
         ) from None
 
     tbl = cx.read_sql(
@@ -644,8 +644,7 @@ def _open_adbc_connection(connection_uri: str) -> Any:
     except ImportError:
         raise ModuleNotFoundError(
             f"ADBC {driver_name} driver not detected"
-            "\n\nIf ADBC supports this database, please run:"
-            " `pip install adbc-driver-{driver_name} pyarrow`"
+            f"\n\nIf ADBC supports this database, please run: pip install adbc-driver-{driver_name} pyarrow"
         ) from None
 
     # some backends require the driver name to be stripped from the URI

diff --git a/py-polars/polars/io/delta.py b/py-polars/polars/io/delta.py
@@ -316,8 +316,7 @@ def _get_delta_lake_table(
 def _check_if_delta_available() -> None:
     if not _DELTALAKE_AVAILABLE:
         raise ModuleNotFoundError(
-            "deltalake is not installed"
-            "\n\nPlease run: `pip install deltalake>=0.9.0`"
+            "deltalake is not installed\n\nPlease run: pip install deltalake>=0.9.0"
         )
 
 

diff --git a/py-polars/polars/io/ipc/functions.py b/py-polars/polars/io/ipc/functions.py
@@ -85,8 +85,7 @@ def read_ipc(
         if use_pyarrow:
             if not _PYARROW_AVAILABLE:
                 raise ModuleNotFoundError(
-                    "'pyarrow' is required when using"
-                    " 'read_ipc(..., use_pyarrow=True)'"
+                    "pyarrow is required when using `read_ipc(..., use_pyarrow=True)`"
                 )
 
             import pyarrow as pa

diff --git a/py-polars/polars/io/spreadsheet/functions.py b/py-polars/polars/io/spreadsheet/functions.py
@@ -469,7 +469,7 @@ def _initialise_spreadsheet_parser(
             import xlsx2csv
         except ImportError:
             raise ModuleNotFoundError(
-                "Required package not installed\n\nPlease run: `pip install xlsx2csv`"
+                "required package not installed" "\n\nPlease run: pip install xlsx2csv"
             ) from None
         parser = xlsx2csv.Xlsx2csv(source, **engine_options)
         sheets = parser.workbook.sheets
@@ -480,7 +480,7 @@ def _initialise_spreadsheet_parser(
             import openpyxl
         except ImportError:
             raise ImportError(
-                "Required package not installed\n\nPlease run `pip install openpyxl`"
+                "required package not installed" "\n\nPlease run: pip install openpyxl"
             ) from None
         parser = openpyxl.load_workbook(source, data_only=True, **engine_options)
         sheets = [{"index": i + 1, "name": ws.title} for i, ws in enumerate(parser)]
@@ -491,13 +491,13 @@ def _initialise_spreadsheet_parser(
             import pyxlsb
         except ImportError:
             raise ImportError(
-                "Required package not installed\n\nPlease run `pip install pyxlsb`"
+                "required package not installed" "\n\nPlease run: pip install pyxlsb"
             ) from None
         try:
             parser = pyxlsb.open_workbook(source, **engine_options)
         except KeyError as err:
             if "no item named 'xl/_rels/workbook.bin.rels'" in str(err):
-                raise TypeError(f"Invalid Excel Binary Workbook: {source!r}") from None
+                raise TypeError(f"invalid Excel Binary Workbook: {source!r}") from None
             raise
         sheets = [
             {"index": i + 1, "name": name} for i, name in enumerate(parser.sheets)
@@ -509,15 +509,16 @@ def _initialise_spreadsheet_parser(
             import ezodf
         except ImportError:
             raise ImportError(
-                "Required package not installed\n\nPlease run `pip install ezodf lxml`"
+                "required package not installed"
+                "\n\nPlease run: pip install ezodf lxml"
             ) from None
         parser = ezodf.opendoc(source, **engine_options)
         sheets = [
             {"index": i + 1, "name": ws.name} for i, ws in enumerate(parser.sheets)
         ]
         return _read_spreadsheet_ods, parser, sheets
 
-    raise NotImplementedError(f"Unrecognised engine: {engine!r}")
+    raise NotImplementedError(f"unrecognized engine: {engine!r}")
 
 
 def _csv_buffer_to_frame(
@@ -533,8 +534,8 @@ def _csv_buffer_to_frame(
     if csv.tell() == 0:
         if raise_if_empty:
             raise NoDataError(
-                "Empty Excel sheet; if you want to read this as "
-                "an empty DataFrame, set `raise_if_empty=False`"
+                "empty Excel sheet"
+                "\n\nIf you want to read this as an empty DataFrame, set `raise_if_empty=False`."
             )
         return pl.DataFrame()
 
@@ -545,7 +546,7 @@ def _csv_buffer_to_frame(
             csv_dtypes
         ).intersection(schema_overrides):
             raise ParameterCollisionError(
-                "Cannot specify columns in both `schema_overrides` and `read_csv_options['dtypes']`"
+                "cannot specify columns in both `schema_overrides` and `read_csv_options['dtypes']`"
             )
         read_csv_options["dtypes"] = {**csv_dtypes, **schema_overrides}
 
@@ -586,7 +587,7 @@ def _read_spreadsheet_ods(
     if sheet_name is not None:
         ws = next((s for s in sheets if s.name == sheet_name), None)
         if ws is None:
-            raise ValueError(f"Sheet {sheet_name!r} not found")
+            raise ValueError(f"sheet {sheet_name!r} not found")
     else:
         ws = sheets[0]
 
@@ -626,8 +627,8 @@ def _read_spreadsheet_ods(
 
     if raise_if_empty and len(df) == 0 and len(df.columns) == 0:
         raise NoDataError(
-            "Empty Excel sheet; if you want to read this as "
-            "an empty DataFrame, set `raise_if_empty=False`"
+            "empty Excel sheet"
+            "\n\nIf you want to read this as an empty DataFrame, set `raise_if_empty=False`."
         )
 
     if strptime_cols:
@@ -685,8 +686,8 @@ def _read_spreadsheet_openpyxl(
     )
     if raise_if_empty and len(df) == 0 and len(df.columns) == 0:
         raise NoDataError(
-            "Empty Excel sheet; if you want to read this as "
-            "an empty DataFrame, set `raise_if_empty=False`"
+            "empty Excel sheet"
+            "\n\nIf you want to read this as an empty DataFrame, set `raise_if_empty=False`."
         )
     return _drop_unnamed_null_columns(df)
 
@@ -732,8 +733,8 @@ def _read_spreadsheet_pyxlsb(
     )
     if raise_if_empty and len(df) == 0 and len(df.columns) == 0:
         raise NoDataError(
-            "Empty Excel sheet; if you want to read this as "
-            "an empty DataFrame, set `raise_if_empty=False`"
+            "empty Excel sheet"
+            "\n\nIf you want to read this as an empty DataFrame, set `raise_if_empty=False`."
         )
     return _drop_unnamed_null_columns(df)
 

diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
@@ -486,7 +486,7 @@ def _comp(self, other: Any, op: ComparisonOperator) -> Series:
             time_zone = self.dtype.time_zone  # type: ignore[union-attr]
             if str(other.tzinfo) != str(time_zone):
                 raise TypeError(
-                    f"Datetime time zone '{other.tzinfo}' does not match Series timezone '{time_zone}'"
+                    f"Datetime time zone {other.tzinfo!r} does not match Series timezone {time_zone!r}"
                 )
             ts = _datetime_to_pl_timestamp(other, self.dtype.time_unit)  # type: ignore[union-attr]
             f = get_ffi_func(op + "_<>", Int64, self._s)
@@ -736,7 +736,7 @@ def _arithmetic(self, other: Any, op_s: str, op_ffi: str) -> Self:
             f = get_ffi_func(op_ffi, self.dtype, self._s)
         if f is None:
             raise TypeError(
-                f"cannot do arithmetic with series of dtype: {self.dtype} and argument"
+                f"cannot do arithmetic with series of dtype: {self.dtype!r} and argument"
                 f" of type: {type(other).__name__!r}"
             )
         return self._from_pyseries(f(other))
@@ -965,7 +965,7 @@ def _pos_idxs(self, size: int) -> Series:
             return self
 
         if self.dtype not in INTEGER_DTYPES:
-            raise NotImplementedError("unsupported idxs datatype.")
+            raise NotImplementedError("unsupported idxs datatype")
 
         if self.len() == 0:
             return Series(self.name, [], dtype=idx_type)