From 0f771b070bbbd7fcdc2f9604632650c8aae8cb8a Mon Sep 17 00:00:00 2001
From: Matthias Bernt <m.bernt@ufz.de>
Date: Thu, 12 May 2022 18:02:54 +0200
Subject: [PATCH 1/8] tabular: show number of columns

fixes https://github.com/galaxyproject/galaxy/issues/13258
---
 lib/galaxy/datatypes/tabular.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/galaxy/datatypes/tabular.py b/lib/galaxy/datatypes/tabular.py
index 87d709e75998..d5181188fda1 100644
--- a/lib/galaxy/datatypes/tabular.py
+++ b/lib/galaxy/datatypes/tabular.py
@@ -127,6 +127,7 @@ def set_meta(self, dataset: DatasetProtocol, *, overwrite: bool = True, **kwd) -
     def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
         kwd.setdefault("line_wrap", False)
         super().set_peek(dataset, **kwd)
+        dataset.blurb = f"{dataset.blurb} {dataset.metadata.columns} columns"
         if dataset.metadata.comment_lines:
             dataset.blurb = f"{dataset.blurb}, {util.commaify(str(dataset.metadata.comment_lines))} comments"
 

From 7abb163adf2187eebf9f7eeef4ab799d0c58bb7a Mon Sep 17 00:00:00 2001
From: Matthias Bernt <m.bernt@ufz.de>
Date: Fri, 13 May 2022 16:54:01 +0200
Subject: [PATCH 2/8] improve display of estimated number of lines

fixes https://github.com/galaxyproject/galaxy/issues/6506

for large files the number of lines is estimated and shown
as a rounded number (using two significant digits), e.g
`~8,700,000 lines`.

with this change it will be: `~87 10^5 lines`

this commit also makes roundify really round numbers (as the name
suggests) and not simply cut at two digits, but this could be
reverted if there are concerns wrt speed due to using more math
---
 lib/galaxy/datatypes/data.py |  2 +-
 lib/galaxy/util/__init__.py  | 49 ++++++++++++++++++++++++++++++++----
 2 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py
index 2c12f640af76..5c82eab1734d 100644
--- a/lib/galaxy/datatypes/data.py
+++ b/lib/galaxy/datatypes/data.py
@@ -1100,7 +1100,7 @@ def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
                     else:
                         est_lines = self.estimate_file_lines(dataset)
                         if est_lines is not None:
-                            dataset.blurb = f"~{util.commaify(util.roundify(str(est_lines)))} {inflector.cond_plural(est_lines, self.line_class)}"
+                            dataset.blurb = f"~{util.trailing_zeros_to_powerof10(util.roundify(str(est_lines)))} {inflector.cond_plural(est_lines, self.line_class)}"
                         else:
                             dataset.blurb = "Error: Cannot estimate lines in dataset"
             else:
diff --git a/lib/galaxy/util/__init__.py b/lib/galaxy/util/__init__.py
index 6949dbafbb71..6f440f7b7145 100644
--- a/lib/galaxy/util/__init__.py
+++ b/lib/galaxy/util/__init__.py
@@ -32,6 +32,10 @@
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from hashlib import md5
+from math import (
+    floor,
+    log10,
+)
 from os.path import relpath
 from typing import (
     Any,
@@ -1135,14 +1139,49 @@ def commaify(amount):
         return commaify(new)
 
 
+def trailing_zeros_to_powerof10(amount):
+    """
+    >>> trailing_zeros_to_powerof10(23000)
+    '23000'
+    >>> trailing_zeros_to_powerof10(2300000)
+    '23 10^5'
+    >>> trailing_zeros_to_powerof10(23000000)
+    '23 10^6'
+    >>> trailing_zeros_to_powerof10(1)
+    '1'
+    >>> trailing_zeros_to_powerof10(0)
+    '0'
+    >>> trailing_zeros_to_powerof10(100)
+    '100'
+    >>> trailing_zeros_to_powerof10(-100)
+    '-100'
+    """
+    amount = str(amount)
+    zeros = 0
+    i = len(amount) - 1
+    while i >= 0 and amount[i] == "0":
+        zeros += 1
+        i -= 1
+    if len(amount) < len(f"{amount[:i+1]} 10^{zeros}"):
+        return amount
+    else:
+        return f"{amount[:i+1]} 10^{zeros}"
+
+
 def roundify(amount, sfs=2):
     """
-    Take a number in string form and truncate to 'sfs' significant figures.
+    Take a number and round it to 'sfs' significant figures.
+
+    >>> roundify(99)
+    99
+    >>> roundify(-99)
+    -99
+    >>> roundify(1111)
+    1100
+    >>> roundify(1999)
+    2000
     """
-    if len(amount) <= sfs:
-        return amount
-    else:
-        return amount[0:sfs] + "0" * (len(amount) - sfs)
+    return round(amount, -int(floor(log10(abs(amount)))) + sfs - 1)
 
 
 @overload

From 911df4984a7b634f53c7b54eae1d3e91bb730ac1 Mon Sep 17 00:00:00 2001
From: Matthias Bernt <m.bernt@ufz.de>
Date: Mon, 19 Feb 2024 14:47:11 +0100
Subject: [PATCH 3/8]  add a dot for the multiplication

---
 lib/galaxy/util/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/galaxy/util/__init__.py b/lib/galaxy/util/__init__.py
index 6f440f7b7145..0039a4099f83 100644
--- a/lib/galaxy/util/__init__.py
+++ b/lib/galaxy/util/__init__.py
@@ -1144,9 +1144,9 @@ def trailing_zeros_to_powerof10(amount):
     >>> trailing_zeros_to_powerof10(23000)
     '23000'
     >>> trailing_zeros_to_powerof10(2300000)
-    '23 10^5'
+    '23\u22C510^5'
     >>> trailing_zeros_to_powerof10(23000000)
-    '23 10^6'
+    '23\u22C510^6'
     >>> trailing_zeros_to_powerof10(1)
     '1'
     >>> trailing_zeros_to_powerof10(0)
@@ -1162,10 +1162,10 @@ def trailing_zeros_to_powerof10(amount):
     while i >= 0 and amount[i] == "0":
         zeros += 1
         i -= 1
-    if len(amount) < len(f"{amount[:i+1]} 10^{zeros}"):
+    if len(amount) < len(f"{amount[: i+1]}\u22C510^{zeros}"):
         return amount
     else:
-        return f"{amount[:i+1]} 10^{zeros}"
+        return f"{amount[: i+1]}\u22C510^{zeros}"
 
 
 def roundify(amount, sfs=2):

From 09a57321bbf8c3af8df637b58dbffeb7427bf56a Mon Sep 17 00:00:00 2001
From: M Bernt <m.bernt@ufz.de>
Date: Mon, 19 Feb 2024 18:05:01 +0100
Subject: [PATCH 4/8] roundify takes int

Co-authored-by: Wolfgang Maier <maierw@posteo.de>
---
 lib/galaxy/datatypes/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py
index 5c82eab1734d..c5771e23d2ee 100644
--- a/lib/galaxy/datatypes/data.py
+++ b/lib/galaxy/datatypes/data.py
@@ -1100,7 +1100,7 @@ def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
                     else:
                         est_lines = self.estimate_file_lines(dataset)
                         if est_lines is not None:
-                            dataset.blurb = f"~{util.trailing_zeros_to_powerof10(util.roundify(str(est_lines)))} {inflector.cond_plural(est_lines, self.line_class)}"
+                            dataset.blurb = f"~{util.trailing_zeros_to_powerof10(util.roundify(est_lines))} {inflector.cond_plural(est_lines, self.line_class)}"
                         else:
                             dataset.blurb = "Error: Cannot estimate lines in dataset"
             else:

From 73708f9e44b67b92cb52556b1ee6bd6647e90938 Mon Sep 17 00:00:00 2001
From: Matthias Bernt <m.bernt@ufz.de>
Date: Mon, 19 Feb 2024 18:43:20 +0100
Subject: [PATCH 5/8] add type annotation to utility functions

---
 lib/galaxy/util/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/galaxy/util/__init__.py b/lib/galaxy/util/__init__.py
index 0039a4099f83..64ef94c1f10f 100644
--- a/lib/galaxy/util/__init__.py
+++ b/lib/galaxy/util/__init__.py
@@ -1139,7 +1139,7 @@ def commaify(amount):
         return commaify(new)
 
 
-def trailing_zeros_to_powerof10(amount):
+def trailing_zeros_to_powerof10(amount: int):
     """
     >>> trailing_zeros_to_powerof10(23000)
     '23000'
@@ -1168,7 +1168,7 @@ def trailing_zeros_to_powerof10(amount):
         return f"{amount[: i+1]}\u22C510^{zeros}"
 
 
-def roundify(amount, sfs=2):
+def roundify(amount: int, sfs: int = 2):
     """
     Take a number and round it to 'sfs' significant figures.
 

From 22f50730a9ac75b947e68a805254f893338486db Mon Sep 17 00:00:00 2001
From: Matthias Bernt <m.bernt@ufz.de>
Date: Mon, 19 Feb 2024 18:44:54 +0100
Subject: [PATCH 6/8] use x

---
 lib/galaxy/util/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/galaxy/util/__init__.py b/lib/galaxy/util/__init__.py
index 64ef94c1f10f..c97423b795be 100644
--- a/lib/galaxy/util/__init__.py
+++ b/lib/galaxy/util/__init__.py
@@ -1144,9 +1144,9 @@ def trailing_zeros_to_powerof10(amount: int):
     >>> trailing_zeros_to_powerof10(23000)
     '23000'
     >>> trailing_zeros_to_powerof10(2300000)
-    '23\u22C510^5'
+    '23\u00d710^5'
     >>> trailing_zeros_to_powerof10(23000000)
-    '23\u22C510^6'
+    '23\u00d710^6'
     >>> trailing_zeros_to_powerof10(1)
     '1'
     >>> trailing_zeros_to_powerof10(0)
@@ -1162,10 +1162,10 @@ def trailing_zeros_to_powerof10(amount: int):
     while i >= 0 and amount[i] == "0":
         zeros += 1
         i -= 1
-    if len(amount) < len(f"{amount[: i+1]}\u22C510^{zeros}"):
+    if len(amount) < len(f"{amount[: i+1]}\u00d710^{zeros}"):
         return amount
     else:
-        return f"{amount[: i+1]}\u22C510^{zeros}"
+        return f"{amount[: i+1]}\u00d710^{zeros}"
 
 
 def roundify(amount: int, sfs: int = 2):

From 079085445572d7e8d6d1917309833ce0db92d6fa Mon Sep 17 00:00:00 2001
From: Matthias Bernt <m.bernt@ufz.de>
Date: Tue, 20 Feb 2024 19:41:43 +0100
Subject: [PATCH 7/8] use metric prefixes K, M, ...

---
 lib/galaxy/datatypes/data.py |   2 +-
 lib/galaxy/util/__init__.py  | 101 ++++++++++++++++++-----------------
 2 files changed, 54 insertions(+), 49 deletions(-)

diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py
index c5771e23d2ee..02aa1f50922f 100644
--- a/lib/galaxy/datatypes/data.py
+++ b/lib/galaxy/datatypes/data.py
@@ -1100,7 +1100,7 @@ def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
                     else:
                         est_lines = self.estimate_file_lines(dataset)
                         if est_lines is not None:
-                            dataset.blurb = f"~{util.trailing_zeros_to_powerof10(util.roundify(est_lines))} {inflector.cond_plural(est_lines, self.line_class)}"
+                            dataset.blurb = f"~{util.trailing_zeros_to_powerof10(est_lines)} {inflector.cond_plural(est_lines, self.line_class)}"
                         else:
                             dataset.blurb = "Error: Cannot estimate lines in dataset"
             else:
diff --git a/lib/galaxy/util/__init__.py b/lib/galaxy/util/__init__.py
index c97423b795be..e61d84aa1eeb 100644
--- a/lib/galaxy/util/__init__.py
+++ b/lib/galaxy/util/__init__.py
@@ -32,10 +32,6 @@
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from hashlib import md5
-from math import (
-    floor,
-    log10,
-)
 from os.path import relpath
 from typing import (
     Any,
@@ -1139,14 +1135,14 @@ def commaify(amount):
         return commaify(new)
 
 
-def trailing_zeros_to_powerof10(amount: int):
+def trailing_zeros_to_powerof10(amount: int) -> str:
     """
     >>> trailing_zeros_to_powerof10(23000)
-    '23000'
+    '23K'
     >>> trailing_zeros_to_powerof10(2300000)
-    '23\u00d710^5'
+    '2.3M'
     >>> trailing_zeros_to_powerof10(23000000)
-    '23\u00d710^6'
+    '23M'
     >>> trailing_zeros_to_powerof10(1)
     '1'
     >>> trailing_zeros_to_powerof10(0)
@@ -1156,32 +1152,13 @@ def trailing_zeros_to_powerof10(amount: int):
     >>> trailing_zeros_to_powerof10(-100)
     '-100'
     """
-    amount = str(amount)
-    zeros = 0
-    i = len(amount) - 1
-    while i >= 0 and amount[i] == "0":
-        zeros += 1
-        i -= 1
-    if len(amount) < len(f"{amount[: i+1]}\u00d710^{zeros}"):
-        return amount
+    m, prefix = metric_prefix(amount, 1000)
+    m_str = str(int(m)) if m.is_integer() else f"{m:.1f}"
+    exp = f"{m_str}{prefix}"
+    if len(exp) <= len(str(amount)):
+        return exp
     else:
-        return f"{amount[: i+1]}\u00d710^{zeros}"
-
-
-def roundify(amount: int, sfs: int = 2):
-    """
-    Take a number and round it to 'sfs' significant figures.
-
-    >>> roundify(99)
-    99
-    >>> roundify(-99)
-    -99
-    >>> roundify(1111)
-    1100
-    >>> roundify(1999)
-    2000
-    """
-    return round(amount, -int(floor(log10(abs(amount)))) + sfs - 1)
+        return str(amount)
 
 
 @overload
@@ -1509,7 +1486,43 @@ def docstring_trim(docstring):
     return "\n".join(trimmed)
 
 
-def nice_size(size):
+def metric_prefix(number: Union[int, float], base: int, text: bool = True) -> Tuple[float, str]:
+    """
+    >>> metric_prefix(100, 1000)
+    (100.0, '')
+    >>> metric_prefix(999, 1000)
+    (999.0, '')
+    >>> metric_prefix(1000, 1000)
+    (1.0, 'K')
+    >>> metric_prefix(999, 1000, False)
+    (999.0, '0')
+    >>> metric_prefix(1000, 1000, False)
+    (1.0, '3')
+    >>> metric_prefix(1001, 1000)
+    (1.001, 'K')
+    >>> metric_prefix(1000000, 1000)
+    (1.0, 'M')
+    >>> metric_prefix(1000**10, 1000)
+    (1.0, 'Q')
+    >>> metric_prefix(1000**11, 1000)
+    (1000.0, 'Q')
+    """
+    prefixes = ["", "K", "M", "G", "T", "P", "E", "Z", "Y", "R", "Q"]
+    if number < 0:
+        number = abs(number)
+        sign = -1
+    else:
+        sign = 1
+
+    for i, prefix in enumerate(prefixes):
+        if number < base:
+            return sign * float(number), prefix if text else str(i * 3)
+        number /= base
+    else:
+        return sign * float(number) * base, prefix if text else str(i * 3)
+
+
+def nice_size(size: Union[float, int, str]) -> str:
     """
     Returns a readably formatted string with the size
 
@@ -1522,23 +1535,15 @@ def nice_size(size):
     >>> nice_size(100000000)
     '95.4 MB'
     """
-    words = ["bytes", "KB", "MB", "GB", "TB", "PB", "EB"]
-    prefix = ""
     try:
         size = float(size)
-        if size < 0:
-            size = abs(size)
-            prefix = "-"
-    except Exception:
+    except ValueError:
         return "??? bytes"
-    for ind, word in enumerate(words):
-        step = 1024 ** (ind + 1)
-        if step > size:
-            size = size / float(1024**ind)
-            if word == "bytes":  # No decimals for bytes
-                return "%s%d bytes" % (prefix, size)
-            return f"{prefix}{size:.1f} {word}"
-    return "??? bytes"
+    size, prefix = metric_prefix(size, 1024)
+    if prefix == "":
+        return "%d bytes" % size
+    else:
+        return f"{size:.1f} {prefix}B"
 
 
 def size_to_bytes(size):

From cb79e87756c2e1aab93bb68ae5c8adbfc910ad9f Mon Sep 17 00:00:00 2001
From: Matthias Bernt <m.bernt@ufz.de>
Date: Wed, 21 Feb 2024 14:18:56 +0100
Subject: [PATCH 8/8] cleanup

---
 lib/galaxy/datatypes/data.py |  2 +-
 lib/galaxy/util/__init__.py  | 64 +++++++++++++++++-------------------
 2 files changed, 31 insertions(+), 35 deletions(-)

diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py
index 02aa1f50922f..5994682784cc 100644
--- a/lib/galaxy/datatypes/data.py
+++ b/lib/galaxy/datatypes/data.py
@@ -1100,7 +1100,7 @@ def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
                     else:
                         est_lines = self.estimate_file_lines(dataset)
                         if est_lines is not None:
-                            dataset.blurb = f"~{util.trailing_zeros_to_powerof10(est_lines)} {inflector.cond_plural(est_lines, self.line_class)}"
+                            dataset.blurb = f"~{util.shorten_with_metric_prefix(est_lines)} {inflector.cond_plural(est_lines, self.line_class)}"
                         else:
                             dataset.blurb = "Error: Cannot estimate lines in dataset"
             else:
diff --git a/lib/galaxy/util/__init__.py b/lib/galaxy/util/__init__.py
index e61d84aa1eeb..31e798a0495f 100644
--- a/lib/galaxy/util/__init__.py
+++ b/lib/galaxy/util/__init__.py
@@ -1135,32 +1135,6 @@ def commaify(amount):
         return commaify(new)
 
 
-def trailing_zeros_to_powerof10(amount: int) -> str:
-    """
-    >>> trailing_zeros_to_powerof10(23000)
-    '23K'
-    >>> trailing_zeros_to_powerof10(2300000)
-    '2.3M'
-    >>> trailing_zeros_to_powerof10(23000000)
-    '23M'
-    >>> trailing_zeros_to_powerof10(1)
-    '1'
-    >>> trailing_zeros_to_powerof10(0)
-    '0'
-    >>> trailing_zeros_to_powerof10(100)
-    '100'
-    >>> trailing_zeros_to_powerof10(-100)
-    '-100'
-    """
-    m, prefix = metric_prefix(amount, 1000)
-    m_str = str(int(m)) if m.is_integer() else f"{m:.1f}"
-    exp = f"{m_str}{prefix}"
-    if len(exp) <= len(str(amount)):
-        return exp
-    else:
-        return str(amount)
-
-
 @overload
 def unicodify(  # type: ignore[misc]
     value: Literal[None],
@@ -1486,7 +1460,7 @@ def docstring_trim(docstring):
     return "\n".join(trimmed)
 
 
-def metric_prefix(number: Union[int, float], base: int, text: bool = True) -> Tuple[float, str]:
+def metric_prefix(number: Union[int, float], base: int) -> Tuple[float, str]:
     """
     >>> metric_prefix(100, 1000)
     (100.0, '')
@@ -1494,10 +1468,6 @@ def metric_prefix(number: Union[int, float], base: int, text: bool = True) -> Tu
     (999.0, '')
     >>> metric_prefix(1000, 1000)
     (1.0, 'K')
-    >>> metric_prefix(999, 1000, False)
-    (999.0, '0')
-    >>> metric_prefix(1000, 1000, False)
-    (1.0, '3')
     >>> metric_prefix(1001, 1000)
     (1.001, 'K')
     >>> metric_prefix(1000000, 1000)
@@ -1514,12 +1484,38 @@ def metric_prefix(number: Union[int, float], base: int, text: bool = True) -> Tu
     else:
         sign = 1
 
-    for i, prefix in enumerate(prefixes):
+    for prefix in prefixes:
         if number < base:
-            return sign * float(number), prefix if text else str(i * 3)
+            return sign * float(number), prefix
         number /= base
     else:
-        return sign * float(number) * base, prefix if text else str(i * 3)
+        return sign * float(number) * base, prefix
+
+
+def shorten_with_metric_prefix(amount: int) -> str:
+    """
+    >>> shorten_with_metric_prefix(23000)
+    '23K'
+    >>> shorten_with_metric_prefix(2300000)
+    '2.3M'
+    >>> shorten_with_metric_prefix(23000000)
+    '23M'
+    >>> shorten_with_metric_prefix(1)
+    '1'
+    >>> shorten_with_metric_prefix(0)
+    '0'
+    >>> shorten_with_metric_prefix(100)
+    '100'
+    >>> shorten_with_metric_prefix(-100)
+    '-100'
+    """
+    m, prefix = metric_prefix(amount, 1000)
+    m_str = str(int(m)) if m.is_integer() else f"{m:.1f}"
+    exp = f"{m_str}{prefix}"
+    if len(exp) <= len(str(amount)):
+        return exp
+    else:
+        return str(amount)
 
 
 def nice_size(size: Union[float, int, str]) -> str: