diff --git a/docs/source/conf.py b/docs/source/conf.py index 62ccff77..97472b19 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,19 +12,24 @@ # import os import sys + # sys.path.insert(0, os.path.abspath('.')) # -- Project information ----------------------------------------------------- -project = 'SecML' +project = "SecML" import datetime -copyright = '{:}, PRALab - Pattern Recognition and Applications Lab & ' \ - 'Pluribus One s.r.l.'.format(datetime.datetime.now().year) -author = 'PRALab' + +copyright = ( + "{:}, PRALab - Pattern Recognition and Applications Lab & " + "Pluribus One s.r.l.".format(datetime.datetime.now().year) +) +author = "PRALab" # The full version, including alpha/beta/rc tags import secml + release = version = secml.__version__ @@ -34,30 +39,36 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.viewcode', - 'sphinx.ext.intersphinx', - 'sphinx.ext.imgmath', - 'matplotlib.sphinxext.plot_directive', - 'numpydoc', - 'm2r', - 'nbsphinx', - 'nbsphinx_link' + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", + "sphinx.ext.imgmath", + "matplotlib.sphinxext.plot_directive", + "numpydoc", + "m2r", + "nbsphinx", + "nbsphinx_link", ] autodoc_default_options = { # 'members': True, - 'member-order': 'alphabetical', + "member-order": "alphabetical", # 'undoc-members': True, # 'show-inheritance': True, - 'exclude-members': '' + "exclude-members": "", } # The following modules should be faked by sphinx (e.g. extras) autodoc_mock_imports = [ - "pytest", "torch", "torchvision", "cleverhans", "tensorflow", - "foolbox", "eagerpy"] + "pytest", + "torch", + "torchvision", + "cleverhans", + "tensorflow", + "foolbox", + "eagerpy", +] # Autosummary pages will be generated by sphinx-autogen instead of sphinx-build # autosummary_generate = True @@ -65,31 +76,30 @@ numpydoc_class_members_toctree = False # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints'] +exclude_patterns = ["build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"] # Always show the source code that generates a plot plot_include_source = True -plot_formats = ['png'] +plot_formats = ["png"] -source_suffix = ['.rst', '.md'] +source_suffix = [".rst", ".md"] -img_latex_preamble = r'\\usepackage{amsmath}' +img_latex_preamble = r"\\usepackage{amsmath}" intersphinx_mapping = { - 'python': ('https://docs.python.org/{.major}'.format( - sys.version_info), None), - 'numpy': ('https://numpy.org/doc/stable/', None), - 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None), - 'sklearn': ("https://scikit-learn.org/stable/", None), - 'matplotlib': ('https://matplotlib.org/', None), - 'pytorch': ('https://pytorch.org/docs/stable/', None), - 'cleverhans': ('https://cleverhans.readthedocs.io/en/latest/', None), - 'foolbox': ('https://foolbox.readthedocs.io/en/stable/', None), + "python": ("https://docs.python.org/{.major}".format(sys.version_info), None), + "numpy": ("https://numpy.org/doc/stable/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/reference", None), + "sklearn": ("https://scikit-learn.org/stable/", None), + "matplotlib": ("https://matplotlib.org/", None), + "pytorch": ("https://pytorch.org/docs/stable/", None), + "cleverhans": ("https://cleverhans.readthedocs.io/en/latest/", None), + "foolbox": ("https://foolbox.readthedocs.io/en/stable/", None), } # -- Options for HTML output ------------------------------------------------- @@ -102,38 +112,38 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # html_logo = '_static/secml.png' # html_favicon = '_static/favicon.png' html_theme_options = { - 'canonical_url': '', - 'analytics_id': 'UA-145155489-1', - 'logo_only': False, - 'display_version': True, - 'prev_next_buttons_location': 'bottom', - 'style_external_links': False, - 'style_nav_header_background': '#2980B9', + "canonical_url": "", + "analytics_id": "UA-145155489-1", + "logo_only": False, + "display_version": True, + "prev_next_buttons_location": "bottom", + "style_external_links": False, + "style_nav_header_background": "#2980B9", # Toc options - 'collapse_navigation': False, - 'sticky_navigation': True, - 'navigation_depth': 5, - 'includehidden': True, - 'titles_only': False + "collapse_navigation": False, + "sticky_navigation": True, + "navigation_depth": 5, + "includehidden": True, + "titles_only": False, } html_context = { "display_gitlab": True, "gitlab_host": "gitlab.com", "gitlab_user": "secml", - "gitlab_repo": 'secml', + "gitlab_repo": "secml", "gitlab_version": "HEAD", "conf_py_path": "/docs/source/", - "source_suffix": '.rst' + "source_suffix": ".rst", } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] diff --git a/docs/source/pyplots/bar.py b/docs/source/pyplots/bar.py index fe801d29..f06b0c58 100644 --- a/docs/source/pyplots/bar.py +++ b/docs/source/pyplots/bar.py @@ -9,16 +9,16 @@ Y2 = (1 - X / float(n)) * (1.0 - 0.5) * CArray.rand((n,)) + 0.5 fig.sp.xticks([0.025, 0.025, 0.95, 0.95]) -fig.sp.bar(X, Y1, facecolor='#9999ff', edgecolor='white') -fig.sp.bar(X, -Y2, facecolor='#ff9999', edgecolor='white') +fig.sp.bar(X, Y1, facecolor="#9999ff", edgecolor="white") +fig.sp.bar(X, -Y2, facecolor="#ff9999", edgecolor="white") for x, y in zip(X, Y1): - fig.sp.text(x, y, '%.2f' % y, ha='center', va='bottom') + fig.sp.text(x, y, "%.2f" % y, ha="center", va="bottom") for x, y in zip(X, Y2): - fig.sp.text(x, -y - 0.02, '%.2f' % y, ha='center', va='top') + fig.sp.text(x, -y - 0.02, "%.2f" % y, ha="center", va="top") -fig.sp.xlim(-.5, n-.5) +fig.sp.xlim(-0.5, n - 0.5) fig.sp.xticks(()) fig.sp.ylim(-1.25, 1.25) fig.sp.yticks(()) diff --git a/docs/source/pyplots/clabel.py b/docs/source/pyplots/clabel.py index 531b980b..6aac5cd3 100644 --- a/docs/source/pyplots/clabel.py +++ b/docs/source/pyplots/clabel.py @@ -3,7 +3,7 @@ def f(x, y): - return (1 - x / 2 + x ** 5 + y ** 3) * (-x ** 2 - y ** 2).exp() + return (1 - x / 2 + x**5 + y**3) * (-(x**2) - y**2).exp() fig = CFigure() @@ -13,7 +13,7 @@ def f(x, y): X, Y = CArray.meshgrid((x_linspace, y_linspace)) -C = fig.sp.contour(X, Y, f(X, Y), linewidths=.5, cmap='hot') +C = fig.sp.contour(X, Y, f(X, Y), linewidths=0.5, cmap="hot") fig.sp.clabel(C, inline=1, fontsize=10) fig.sp.xticks(()) diff --git a/docs/source/pyplots/colorbar.py b/docs/source/pyplots/colorbar.py index bc34befd..693c4b6d 100644 --- a/docs/source/pyplots/colorbar.py +++ b/docs/source/pyplots/colorbar.py @@ -3,7 +3,8 @@ def f(x, y): - return (1 - x / 2 + x ** 5 + y ** 3) * (-x ** 2 - y ** 2).exp() + return (1 - x / 2 + x**5 + y**3) * (-(x**2) - y**2).exp() + fig = CFigure(width=10, title="Colorbar Example") fig.subplot(1, 2, 1) @@ -12,18 +13,17 @@ def f(x, y): y_linspace = CArray.linspace(-3, 3, 256) X, Y = CArray.meshgrid((x_linspace, y_linspace)) -c = fig.sp.contourf(X, Y, f(X, Y), 8, alpha=.75, cmap='hot') +c = fig.sp.contourf(X, Y, f(X, Y), 8, alpha=0.75, cmap="hot") fig.sp.colorbar(c) fig.sp.title("Hot Contourf") fig.sp.xticks(()) fig.sp.yticks(()) fig.subplot(1, 2, 2) -c = fig.sp.contourf(X, Y, f(X, Y), 8, alpha=.75, cmap='winter') +c = fig.sp.contourf(X, Y, f(X, Y), 8, alpha=0.75, cmap="winter") fig.sp.colorbar(c) fig.sp.title("Cold Contourf") fig.sp.xticks(()) fig.sp.yticks(()) fig.show() - diff --git a/docs/source/pyplots/contour.py b/docs/source/pyplots/contour.py index 5b949213..2a267786 100644 --- a/docs/source/pyplots/contour.py +++ b/docs/source/pyplots/contour.py @@ -3,7 +3,8 @@ def f(x, y): - return (1 - x / 2 + x ** 5 + y ** 3) * (-x ** 2 - y ** 2).exp() + return (1 - x / 2 + x**5 + y**3) * (-(x**2) - y**2).exp() + fig = CFigure() @@ -12,10 +13,9 @@ def f(x, y): X, Y = CArray.meshgrid((x_linspace, y_linspace)) -C = fig.sp.contour(X, Y, f(X, Y), linewidths=.5, cmap='hot') +C = fig.sp.contour(X, Y, f(X, Y), linewidths=0.5, cmap="hot") fig.sp.xticks(()) fig.sp.yticks(()) fig.show() - diff --git a/docs/source/pyplots/contourf.py b/docs/source/pyplots/contourf.py index 0fab767a..2df93bc8 100644 --- a/docs/source/pyplots/contourf.py +++ b/docs/source/pyplots/contourf.py @@ -3,7 +3,8 @@ def f(x, y): - return (1 - x / 2 + x ** 5 + y ** 3) * (-x ** 2 - y ** 2).exp() + return (1 - x / 2 + x**5 + y**3) * (-(x**2) - y**2).exp() + fig = CFigure() @@ -11,7 +12,7 @@ def f(x, y): y_linspace = CArray.linspace(-3, 3, 256) X, Y = CArray.meshgrid((x_linspace, y_linspace)) -fig.sp.contourf(X, Y, f(X, Y), 8, alpha=.75, cmap='hot') +fig.sp.contourf(X, Y, f(X, Y), 8, alpha=0.75, cmap="hot") fig.sp.xticks(()) fig.sp.yticks(()) diff --git a/docs/source/pyplots/errorbar.py b/docs/source/pyplots/errorbar.py index 7518ca23..7d14423d 100644 --- a/docs/source/pyplots/errorbar.py +++ b/docs/source/pyplots/errorbar.py @@ -2,13 +2,13 @@ from secml.figure import CFigure fig = CFigure(fontsize=16) -fig.title('Errorbars can go negative!') +fig.title("Errorbars can go negative!") -fig.sp.xscale("symlog", nonposx='clip') -fig.sp.yscale("symlog", nonposy='clip') +fig.sp.xscale("symlog", nonposx="clip") +fig.sp.yscale("symlog", nonposy="clip") x = CArray(10.0).pow(CArray.linspace(0.0, 2.0, 20)) -y = x ** 2.0 +y = x**2.0 fig.sp.errorbar(x, y, xerr=0.1 * x, yerr=5.0 + 0.75 * y) diff --git a/docs/source/pyplots/grid.py b/docs/source/pyplots/grid.py index 6f08f3bf..6728aa22 100644 --- a/docs/source/pyplots/grid.py +++ b/docs/source/pyplots/grid.py @@ -6,7 +6,7 @@ fig = CFigure(fontsize=14) -fig.sp.plot(X, C, color='red', alpha=0.5, linewidth=1.0, linestyle='-', label="cosine") +fig.sp.plot(X, C, color="red", alpha=0.5, linewidth=1.0, linestyle="-", label="cosine") fig.sp.plot(X, S, label="sine") fig.sp.xticks(CArray([-3.14, -3.14 / 2, 0, 3.14 / 2, 3.14])) @@ -15,4 +15,3 @@ fig.sp.legend(loc=0) fig.show() - diff --git a/docs/source/pyplots/hist.py b/docs/source/pyplots/hist.py index 46f40a77..f0565b45 100644 --- a/docs/source/pyplots/hist.py +++ b/docs/source/pyplots/hist.py @@ -9,13 +9,13 @@ x = mu + sigma * CArray.randn((10000,)) num_bins = 50 # the histogram of the data -n, bins, patches = fig.sp.hist(x, num_bins, density=1, facecolor='green', alpha=0.5) +n, bins, patches = fig.sp.hist(x, num_bins, density=1, facecolor="green", alpha=0.5) # add a 'best fit' line y = bins.normpdf(mu, sigma) -fig.sp.plot(bins, y, 'r--') -fig.sp.xlabel('Smarts') -fig.sp.ylabel('Probability') -fig.title(r'Histogram of IQ: $\mu=100$, $\sigma=15$') +fig.sp.plot(bins, y, "r--") +fig.sp.xlabel("Smarts") +fig.sp.ylabel("Probability") +fig.title(r"Histogram of IQ: $\mu=100$, $\sigma=15$") # Tweak spacing to prevent clipping of ylabel fig.subplots_adjust(left=0.15) diff --git a/docs/source/pyplots/legend.py b/docs/source/pyplots/legend.py index b46185b4..a5a92072 100644 --- a/docs/source/pyplots/legend.py +++ b/docs/source/pyplots/legend.py @@ -5,7 +5,7 @@ C, S = X.cos(), X.sin() fig = CFigure(fontsize=14) -fig.sp.plot(X, C, color='red', alpha=0.5, linewidth=1.0, linestyle='-', label="cosine") +fig.sp.plot(X, C, color="red", alpha=0.5, linewidth=1.0, linestyle="-", label="cosine") fig.sp.plot(X, S, label="sine") fig.sp.grid() diff --git a/docs/source/pyplots/loglog.py b/docs/source/pyplots/loglog.py index 3aacbf72..44052b70 100644 --- a/docs/source/pyplots/loglog.py +++ b/docs/source/pyplots/loglog.py @@ -2,7 +2,7 @@ from secml.figure import CFigure fig = CFigure(fontsize=14) -fig.title('loglog base 4 on x') +fig.title("loglog base 4 on x") t = CArray.arange(0.01, 20.0, 0.01) fig.sp.loglog(t, 20 * (-t / 10.0).exp(), basex=2) diff --git a/docs/source/pyplots/plot.py b/docs/source/pyplots/plot.py index b3b94920..ab7b0428 100644 --- a/docs/source/pyplots/plot.py +++ b/docs/source/pyplots/plot.py @@ -6,7 +6,7 @@ fig = CFigure(fontsize=14) -fig.sp.plot(X, C, color='red', alpha=0.5, linewidth=1.0, linestyle='-') +fig.sp.plot(X, C, color="red", alpha=0.5, linewidth=1.0, linestyle="-") fig.sp.plot(X, S) fig.show() diff --git a/docs/source/pyplots/plot_fun.py b/docs/source/pyplots/plot_fun.py index 378c4a87..eca4ccb0 100644 --- a/docs/source/pyplots/plot_fun.py +++ b/docs/source/pyplots/plot_fun.py @@ -14,7 +14,7 @@ def f(array): fig = CFigure() -fig.sp.plot_fun(f, levels=[.5, 1, 1.5]) +fig.sp.plot_fun(f, levels=[0.5, 1, 1.5]) fig.sp.grid() fig.show() diff --git a/docs/source/pyplots/plot_path.py b/docs/source/pyplots/plot_path.py index ba5e5287..5df05937 100644 --- a/docs/source/pyplots/plot_path.py +++ b/docs/source/pyplots/plot_path.py @@ -12,4 +12,3 @@ fig.sp.ylim(1, 8) fig.show() - diff --git a/docs/source/pyplots/scatter.py b/docs/source/pyplots/scatter.py index 945e7bfd..65b7368f 100644 --- a/docs/source/pyplots/scatter.py +++ b/docs/source/pyplots/scatter.py @@ -4,8 +4,8 @@ dataset = CDLRandom().load() fig = CFigure(fontsize=14) -fig.sp.scatter(dataset.X[:, 0].ravel(), - dataset.X[:, 1].ravel(), - s=75, c=dataset.Y, alpha=.7) +fig.sp.scatter( + dataset.X[:, 0].ravel(), dataset.X[:, 1].ravel(), s=75, c=dataset.Y, alpha=0.7 +) fig.show() diff --git a/docs/source/pyplots/semilogx.py b/docs/source/pyplots/semilogx.py index bef9b1a9..8b04e697 100644 --- a/docs/source/pyplots/semilogx.py +++ b/docs/source/pyplots/semilogx.py @@ -7,7 +7,6 @@ fig.sp.semilogx(t, (2 * 3.14 * t).sin()) fig.sp.grid() -fig.sp.title('semilogx') +fig.sp.title("semilogx") fig.show() - diff --git a/docs/source/pyplots/semilogy.py b/docs/source/pyplots/semilogy.py index c3e826ee..9cab7f9d 100644 --- a/docs/source/pyplots/semilogy.py +++ b/docs/source/pyplots/semilogy.py @@ -6,6 +6,6 @@ t = CArray.arange(0.01, 20.0, 0.01) fig.sp.semilogy(t, (-t / 5.0).exp()) -fig.sp.title('semilogy') +fig.sp.title("semilogy") fig.sp.grid() fig.show() diff --git a/docs/source/pyplots/subplot.py b/docs/source/pyplots/subplot.py index 828b8ec0..945e7211 100644 --- a/docs/source/pyplots/subplot.py +++ b/docs/source/pyplots/subplot.py @@ -7,14 +7,14 @@ # create a new subplot fig.subplot(2, 2, 1) x = np.linspace(-np.pi, np.pi, 100) -y = 2*np.sin(x) +y = 2 * np.sin(x) # function `plot` will be applied to the last subplot created fig.sp.plot(x, y) # subplot indices are are the same of the first subplot # so the following function will be run inside the previous plot fig.subplot(2, 2, 1) -y = x +y = x fig.sp.plot(x, y) # create a new subplot @@ -22,7 +22,7 @@ fig.sp.plot(x, y) fig.subplot(2, 2, grid_slot=(1, slice(2))) -y = 2*np.sin(x) +y = 2 * np.sin(x) fig.sp.plot(x, y) plt.show() diff --git a/docs/source/pyplots/subplots_adjust.py b/docs/source/pyplots/subplots_adjust.py index 314d61da..15e09e69 100644 --- a/docs/source/pyplots/subplots_adjust.py +++ b/docs/source/pyplots/subplots_adjust.py @@ -5,7 +5,7 @@ fig = CFigure() x = CArray.arange(100) -y = 3. * CArray.sin(x * 2. * 3.14 / 100.) +y = 3.0 * CArray.sin(x * 2.0 * 3.14 / 100.0) for i in range(n): temp = 510 + i diff --git a/docs/source/pyplots/tick_params.py b/docs/source/pyplots/tick_params.py index 5f16dd39..c3ba27cf 100644 --- a/docs/source/pyplots/tick_params.py +++ b/docs/source/pyplots/tick_params.py @@ -7,12 +7,12 @@ fig = CFigure(fontsize=14) -fig.sp.plot(X, C, color='red', alpha=0.5, linewidth=1.0, linestyle='-') +fig.sp.plot(X, C, color="red", alpha=0.5, linewidth=1.0, linestyle="-") fig.sp.plot(X, S) fig.sp.xticks(CArray([-pi, -pi / 2, 0, pi / 2, pi])) fig.sp.xticklabels(CArray(["- pi", "-pi/2", "0", "pi/2", "pi"])) -fig.sp.tick_params(direction='out', length=6, width=2, colors='r', right=False) +fig.sp.tick_params(direction="out", length=6, width=2, colors="r", right=False) fig.sp.yticks(CArray([-1, 0, +1])) fig.show() diff --git a/docs/source/pyplots/xlabel.py b/docs/source/pyplots/xlabel.py index a9d03235..886d5196 100644 --- a/docs/source/pyplots/xlabel.py +++ b/docs/source/pyplots/xlabel.py @@ -6,9 +6,9 @@ fig = CFigure(fontsize=14) -fig.sp.plot(X, C, color='red', alpha=0.5, linewidth=1.0, linestyle='-') +fig.sp.plot(X, C, color="red", alpha=0.5, linewidth=1.0, linestyle="-") fig.sp.plot(X, S) -fig.sp.xlabel("x", color='r', fontsize=10) +fig.sp.xlabel("x", color="r", fontsize=10) fig.show() diff --git a/docs/source/pyplots/xlim.py b/docs/source/pyplots/xlim.py index 971c3cea..18e32320 100644 --- a/docs/source/pyplots/xlim.py +++ b/docs/source/pyplots/xlim.py @@ -6,7 +6,7 @@ fig = CFigure(fontsize=14) -fig.sp.plot(X, C, color='red', alpha=0.5, linewidth=1.0, linestyle='-') +fig.sp.plot(X, C, color="red", alpha=0.5, linewidth=1.0, linestyle="-") fig.sp.plot(X, S) fig.sp.xlim(-3, 3) diff --git a/docs/source/pyplots/xticklabels.py b/docs/source/pyplots/xticklabels.py index 829d6877..876ec107 100644 --- a/docs/source/pyplots/xticklabels.py +++ b/docs/source/pyplots/xticklabels.py @@ -6,7 +6,7 @@ fig = CFigure(fontsize=14) -fig.sp.plot(X, C, color='red', alpha=0.5, linewidth=1.0, linestyle='-') +fig.sp.plot(X, C, color="red", alpha=0.5, linewidth=1.0, linestyle="-") fig.sp.plot(X, S) fig.sp.xticks(CArray([-3.14, -3.14 / 2, 0, 3.14 / 2, 3.14])) diff --git a/docs/source/pyplots/xticks.py b/docs/source/pyplots/xticks.py index f0104fde..432bc5d6 100644 --- a/docs/source/pyplots/xticks.py +++ b/docs/source/pyplots/xticks.py @@ -6,7 +6,7 @@ fig = CFigure(fontsize=14) -fig.sp.plot(X, C, color='red', alpha=0.5, linewidth=1.0, linestyle='-') +fig.sp.plot(X, C, color="red", alpha=0.5, linewidth=1.0, linestyle="-") fig.sp.plot(X, S) fig.sp.xticks(CArray([-3.14, -3.14 / 2, 0, 3.14 / 2, 3.14])) diff --git a/setup.py b/setup.py index 92688f6b..f382f4a2 100644 --- a/setup.py +++ b/setup.py @@ -8,18 +8,18 @@ # Check if we want to building a release package is_release = False try: - is_release = bool(os.environ['SECML_ISRELEASE']) + is_release = bool(os.environ["SECML_ISRELEASE"]) except KeyError: pass def read(*path_parts): - with open(os.path.join(here, *path_parts), 'r', encoding='ascii') as fp: + with open(os.path.join(here, *path_parts), "r", encoding="ascii") as fp: return fp.read().strip() def parse_readme(*path_parts): # For README.md we accept utf-8 chars - with open(os.path.join(here, *path_parts), 'r', encoding='utf-8') as fp: + with open(os.path.join(here, *path_parts), "r", encoding="utf-8") as fp: return fp.read().strip() @@ -29,27 +29,27 @@ def git_version(): def _minimal_ext_cmd(cmd): # construct minimal environment env = {} - for k in ['SYSTEMROOT', 'PATH', 'HOME']: + for k in ["SYSTEMROOT", "PATH", "HOME"]: v = os.environ.get(k) if v is not None: env[k] = v # LANGUAGE is used on win32 - env['LANGUAGE'] = 'C' - env['LANG'] = 'C' - env['LC_ALL'] = 'C' + env["LANGUAGE"] = "C" + env["LANG"] = "C" + env["LC_ALL"] = "C" # Execute in the current dir - res = subprocess.Popen(cmd, cwd=here, env=env, - stdout=subprocess.PIPE, - stderr=open(os.devnull, 'w')).communicate()[0] + res = subprocess.Popen( + cmd, cwd=here, env=env, stdout=subprocess.PIPE, stderr=open(os.devnull, "w") + ).communicate()[0] return res try: - out = _minimal_ext_cmd(['git', 'rev-parse', '--short', 'HEAD']) - GIT_REVISION = out.strip().decode('ascii') + out = _minimal_ext_cmd(["git", "rev-parse", "--short", "HEAD"]) + GIT_REVISION = out.strip().decode("ascii") if len(GIT_REVISION) == 0: raise OSError except OSError: - GIT_REVISION = 'Unknown' + GIT_REVISION = "Unknown" return GIT_REVISION @@ -60,15 +60,15 @@ def find_version(*path_parts): _v_f = read(*path_parts) # Read main version file if not is_release: # Override for is_release checks _v_git = git_version() - if _v_git == 'Unknown': + if _v_git == "Unknown": try: # Try to read rev from file. May not exists - _v_git = read('src', 'secml', 'VERSION_REV') + _v_git = read("src", "secml", "VERSION_REV") except: pass # _v_git will stay "Unknown" else: - write_rev(_v_git, 'src', 'secml', 'VERSION_REV') + write_rev(_v_git, "src", "secml", "VERSION_REV") # Append rev number only if available - _v = _v_f if _v_git == 'Unknown' else _v_f + '+' + _v_git + _v = _v_f if _v_git == "Unknown" else _v_f + "+" + _v_git else: _v = _v_f # release package _v = parse_version(_v) # Integrity checks @@ -81,7 +81,7 @@ def find_version(*path_parts): def write_rev(v, *path_parts): """Write revision id to file.""" - a = open(os.path.join(here, *path_parts), 'w') + a = open(os.path.join(here, *path_parts), "w") try: a.write(v) finally: @@ -116,15 +116,16 @@ def install_deps(): https://github.com/pypa/pip/issues/3610#issuecomment-356687173 """ - default = open(os.path.join(here, 'requirements.txt'), - 'r', encoding='ascii').readlines() + default = open( + os.path.join(here, "requirements.txt"), "r", encoding="ascii" + ).readlines() new_pkgs = [] links = [] for resource in default: - if 'git+ssh' in resource: - pkg = resource.split('#')[-1] - links.append(resource.strip() + '-9876543210') - new_pkgs.append(pkg.replace('egg=', '').rstrip()) + if "git+ssh" in resource: + pkg = resource.split("#")[-1] + links.append(resource.strip() + "-9876543210") + new_pkgs.append(pkg.replace("egg=", "").rstrip()) else: new_pkgs.append(resource.strip()) return new_pkgs, links @@ -132,7 +133,7 @@ def install_deps(): REQ_PKGS, DEP_LINKS = install_deps() -LONG_DESCRIPTION = parse_readme('README.md') +LONG_DESCRIPTION = parse_readme("README.md") # List of classifiers: https://pypi.org/pypi?%3Aaction=list_classifiers CLASSIFIERS = """\ @@ -157,36 +158,40 @@ def install_deps(): """ setup( - name='secml', + name="secml", version=find_version("src", "secml", "VERSION"), - description='A library for Secure and Explainable Machine Learning', + description="A library for Secure and Explainable Machine Learning", long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", - license='Apache License 2.0', - classifiers=[_f for _f in CLASSIFIERS.split('\n') if _f], + license="Apache License 2.0", + classifiers=[_f for _f in CLASSIFIERS.split("\n") if _f], platforms=["Linux", "Mac OS-X", "Unix", "Windows"], - url='https://secml.readthedocs.io', + url="https://secml.readthedocs.io", download_url="https://pypi.python.org/pypi/secml#files", project_urls={ "Bug Tracker": "https://github.com/pralab/secml/issues", "Source Code": "https://github.com/pralab/secml", }, - maintainer='Maura Pintor, Luca Demetrio', - maintainer_email='maura.pintor@unica.it, luca.demetrio@unige.it', - packages=find_packages('src', exclude=[ - "*.tests", "*.tests.*", "tests.*", "tests"]), - package_dir={'': 'src'}, + maintainer="Maura Pintor, Luca Demetrio", + maintainer_email="maura.pintor@unica.it, luca.demetrio@unige.it", + packages=find_packages("src", exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), + package_dir={"": "src"}, include_package_data=True, - python_requires='>=3.5, <3.11', + python_requires=">=3.5, <3.11", install_requires=REQ_PKGS, extras_require={ - 'pytorch': ["torch>=1.4,!=1.5.*", "torchvision>=0.5,!=0.6.*"], - 'cleverhans': ["tensorflow>=1.14,<2", "cleverhans<3.1"], - 'tf-gpu': ["tensorflow-gpu>=1.14,<2"], - 'foolbox': ["foolbox>=3.3.0", "torch>=1.4,!=1.5.*", "torchvision>=0.5,!=0.6.*"], - 'unittests': ['pytest>=5', - 'pytest-cov>=2.9', 'coverage', - 'jupyter', 'nbval', 'requests-mock'] + "pytorch": ["torch>=1.4,!=1.5.*", "torchvision>=0.5,!=0.6.*"], + "cleverhans": ["tensorflow>=1.14,<2", "cleverhans<3.1"], + "tf-gpu": ["tensorflow-gpu>=1.14,<2"], + "foolbox": ["foolbox>=3.3.0", "torch>=1.4,!=1.5.*", "torchvision>=0.5,!=0.6.*"], + "unittests": [ + "pytest>=5", + "pytest-cov>=2.9", + "coverage", + "jupyter", + "nbval", + "requests-mock", + ], }, - zip_safe=False + zip_safe=False, ) diff --git a/src/secml/__init__.py b/src/secml/__init__.py index d1fa5178..9a16345f 100644 --- a/src/secml/__init__.py +++ b/src/secml/__init__.py @@ -7,26 +7,28 @@ # Logger for this module only. Use `secml.utils.CLog` elsewhere import logging + _logger = logging.getLogger(__name__) _logger.setLevel(logging.INFO) _logger_handle = logging.StreamHandler(sys.stdout) -_logger_handle.setFormatter(logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s")) +_logger_handle.setFormatter( + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +) _logger.addHandler(_logger_handle) -__all__ = ['_NoValue', '__version__', 'global_filterwarnings'] +__all__ = ["_NoValue", "__version__", "global_filterwarnings"] _here = os.path.abspath(os.path.dirname(__file__)) def _read(*path_parts): - with open(os.path.join(_here, *path_parts), 'r', encoding='ascii') as fp: + with open(os.path.join(_here, *path_parts), "r", encoding="ascii") as fp: return fp.read().strip() def _write_rev(v, *path_parts): """Write revision id to file.""" - a = open(os.path.join(_here, *path_parts), 'w', encoding='ascii') + a = open(os.path.join(_here, *path_parts), "w", encoding="ascii") try: a.write(v) finally: @@ -39,27 +41,31 @@ def git_version(): def _minimal_ext_cmd(cmd): # construct minimal environment env = {} - for k in ['SYSTEMROOT', 'PATH', 'HOME']: + for k in ["SYSTEMROOT", "PATH", "HOME"]: v = os.environ.get(k) if v is not None: env[k] = v # LANGUAGE is used on win32 - env['LANGUAGE'] = 'C' - env['LANG'] = 'C' - env['LC_ALL'] = 'C' + env["LANGUAGE"] = "C" + env["LANG"] = "C" + env["LC_ALL"] = "C" # Execute in the current dir - out = subprocess.Popen(cmd, cwd=_here, env=env, - stdout=subprocess.PIPE, - stderr=open(os.devnull, 'w')).communicate()[0] + out = subprocess.Popen( + cmd, + cwd=_here, + env=env, + stdout=subprocess.PIPE, + stderr=open(os.devnull, "w"), + ).communicate()[0] return out try: - out = _minimal_ext_cmd(['git', 'rev-parse', '--short', 'HEAD']) - GIT_REVISION = out.strip().decode('ascii') + out = _minimal_ext_cmd(["git", "rev-parse", "--short", "HEAD"]) + GIT_REVISION = out.strip().decode("ascii") if len(GIT_REVISION) == 0: raise OSError except OSError: - GIT_REVISION = 'Unknown' + GIT_REVISION = "Unknown" return GIT_REVISION @@ -67,24 +73,24 @@ def _minimal_ext_cmd(cmd): # Check if we want to building a release package is_release = False try: - is_release = bool(os.environ['SECML_ISRELEASE']) + is_release = bool(os.environ["SECML_ISRELEASE"]) except KeyError: pass # For version string format see: https://packaging.pypa.io/en/latest/version/ try: - _v_f = _read('VERSION') # Read main version file + _v_f = _read("VERSION") # Read main version file if not is_release: # Override for is_release checks _v_git = git_version() - if _v_git == 'Unknown': + if _v_git == "Unknown": try: # Try to read rev from file. May not exists - _v_git = _read('VERSION_REV') + _v_git = _read("VERSION_REV") except: pass # _v_git will stay "Unknown" else: - _write_rev(_v_git, 'VERSION_REV') + _write_rev(_v_git, "VERSION_REV") # Append rev number only if available - _v = _v_f if _v_git == 'Unknown' else _v_f + '+' + _v_git + _v = _v_f if _v_git == "Unknown" else _v_f + "+" + _v_git else: _v = _v_f # release package _v = parse_version(_v) # Integrity checks @@ -111,42 +117,56 @@ def global_filterwarnings(): # TODO: check after upgrading to tensorflow 2 warnings.filterwarnings( - "ignore", category=DeprecationWarning, + "ignore", + category=DeprecationWarning, message="Using or importing the ABCs from 'collections' instead of " - "from 'collections.abc' is deprecated*") + "from 'collections.abc' is deprecated*", + ) # TODO: check after upgrading to tensorflow 2 warnings.filterwarnings( - "ignore", category=PendingDeprecationWarning, - message="the imp module is deprecated in favour of importlib*") + "ignore", + category=PendingDeprecationWarning, + message="the imp module is deprecated in favour of importlib*", + ) warnings.filterwarnings( - "ignore", category=DeprecationWarning, - message="the imp module is deprecated in favour of importlib*") + "ignore", + category=DeprecationWarning, + message="the imp module is deprecated in favour of importlib*", + ) # TODO: check after upgrading to tensorflow 2 warnings.filterwarnings( - "ignore", category=FutureWarning, message="Passing (type, 1)*") + "ignore", category=FutureWarning, message="Passing (type, 1)*" + ) # TODO: check after cleverhans fix this (post 3.0.1) try: # For some reason we are not able to filter tf warnings import tensorflow as tf + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) except ImportError: pass # TODO: check after upgrading to tensorflow 2 (related to numpy v0.19) warnings.filterwarnings( - "ignore", category=DeprecationWarning, - message=r"tostring\(\) is deprecated\. Use tobytes\(\) instead\.") + "ignore", + category=DeprecationWarning, + message=r"tostring\(\) is deprecated\. Use tobytes\(\) instead\.", + ) # TODO: warning raised by torchvision mnist loader first time you download warnings.filterwarnings( - "ignore", category=UserWarning, module="torchvision.datasets.mnist", - message=r"The given NumPy array is not writeable") + "ignore", + category=UserWarning, + module="torchvision.datasets.mnist", + message=r"The given NumPy array is not writeable", + ) # TODO: cures https://github.com/pytorch/pytorch/issues/47038 warnings.filterwarnings( - "ignore", category=UserWarning, message=r"CUDA initialization") + "ignore", category=UserWarning, message=r"CUDA initialization" + ) # Call the filterwarnings method to make it active project-wide diff --git a/src/secml/_globals.py b/src/secml/_globals.py index 1f713ba3..cbe9fbbc 100644 --- a/src/secml/_globals.py +++ b/src/secml/_globals.py @@ -12,15 +12,13 @@ def foo(arg=np._NoValue): """ -__all__ = [ - '_NoValue' - ] +__all__ = ["_NoValue"] # Disallow reloading this module so as to preserve the identities of the # classes defined here. -if '_is_loaded' in globals(): - raise RuntimeError('Reloading secml._globals is not allowed') +if "_is_loaded" in globals(): + raise RuntimeError("Reloading secml._globals is not allowed") _is_loaded = True @@ -33,6 +31,7 @@ class _NoValueType: Inspired by np._globals module implementation. """ + __instance = None def __new__(cls): diff --git a/src/secml/adv/attacks/c_attack.py b/src/secml/adv/attacks/c_attack.py index 5b95858e..facc5115 100644 --- a/src/secml/adv/attacks/c_attack.py +++ b/src/secml/adv/attacks/c_attack.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from abc import ABCMeta, abstractmethod, abstractproperty from secml.core import CCreator @@ -23,7 +24,7 @@ class CAttack(CCreator, metaclass=ABCMeta): """ - __super__ = 'CAttack' + __super__ = "CAttack" def __init__(self, classifier): # set the classifier to be attacked @@ -112,17 +113,13 @@ def x_seq(self): @property @abstractmethod def f_eval(self): - """Returns the number of function evaluations made during the attack. - - """ + """Returns the number of function evaluations made during the attack.""" raise NotImplementedError @property @abstractmethod def grad_eval(self): - """Returns the number of gradient evaluations made during the attack. - - """ + """Returns the number of gradient evaluations made during the attack.""" raise NotImplementedError @abstractmethod diff --git a/src/secml/adv/attacks/c_attack_mixin.py b/src/secml/adv/attacks/c_attack_mixin.py index 238d05aa..b643cc22 100644 --- a/src/secml/adv/attacks/c_attack_mixin.py +++ b/src/secml/adv/attacks/c_attack_mixin.py @@ -38,13 +38,16 @@ class CAttackMixin(CAttack): """ - def __init__(self, classifier, - distance=None, - dmax=None, - lb=None, - ub=None, - solver_type=None, - solver_params=None): + def __init__( + self, + classifier, + distance=None, + dmax=None, + lb=None, + ub=None, + solver_type=None, + solver_params=None, + ): super(CAttackMixin, self).__init__(classifier) diff --git a/src/secml/adv/attacks/evasion/__init__.py b/src/secml/adv/attacks/evasion/__init__.py index 6cf1f628..a119219a 100644 --- a/src/secml/adv/attacks/evasion/__init__.py +++ b/src/secml/adv/attacks/evasion/__init__.py @@ -9,8 +9,7 @@ except ImportError: pass # cleverhans is an extra component and requires tensorflow else: - from .cleverhans.c_attack_evasion_cleverhans import \ - CAttackEvasionCleverhans + from .cleverhans.c_attack_evasion_cleverhans import CAttackEvasionCleverhans try: import foolbox @@ -19,18 +18,30 @@ pass # foolbox is an extra component and requires pytorch else: from .foolbox.secml_autograd import SecmlLayer - from .foolbox.c_attack_evasion_foolbox import \ - CAttackEvasionFoolbox - from .foolbox.fb_attacks.fb_basic_iterative_attack import \ - CFoolboxBasicIterative, CFoolboxBasicIterativeL1, \ - CFoolboxBasicIterativeL2, CFoolboxBasicIterativeLinf - from .foolbox.fb_attacks.fb_cw_attack import \ - CFoolboxL2CarliniWagner + from .foolbox.c_attack_evasion_foolbox import CAttackEvasionFoolbox + from .foolbox.fb_attacks.fb_basic_iterative_attack import ( + CFoolboxBasicIterative, + CFoolboxBasicIterativeL1, + CFoolboxBasicIterativeL2, + CFoolboxBasicIterativeLinf, + ) + from .foolbox.fb_attacks.fb_cw_attack import CFoolboxL2CarliniWagner from .foolbox.fb_attacks.fb_ddn_attack import CFoolboxL2DDN - from .foolbox.fb_attacks.fb_deepfool_attack import \ - CFoolboxDeepfool, CFoolboxDeepfoolL2, CFoolboxDeepfoolLinf + from .foolbox.fb_attacks.fb_deepfool_attack import ( + CFoolboxDeepfool, + CFoolboxDeepfoolL2, + CFoolboxDeepfoolLinf, + ) from .foolbox.fb_attacks.fb_ead_attack import CFoolboxEAD - from .foolbox.fb_attacks.fb_fgm_attack import \ - CFoolboxFGM, CFoolboxFGML1, CFoolboxFGML2, CFoolboxFGMLinf - from .foolbox.fb_attacks.fb_pgd_attack import \ - CFoolboxPGD, CFoolboxPGDL1, CFoolboxPGDL2, CFoolboxPGDLinf + from .foolbox.fb_attacks.fb_fgm_attack import ( + CFoolboxFGM, + CFoolboxFGML1, + CFoolboxFGML2, + CFoolboxFGMLinf, + ) + from .foolbox.fb_attacks.fb_pgd_attack import ( + CFoolboxPGD, + CFoolboxPGDL1, + CFoolboxPGDL2, + CFoolboxPGDLinf, + ) diff --git a/src/secml/adv/attacks/evasion/c_attack_evasion.py b/src/secml/adv/attacks/evasion/c_attack_evasion.py index 590b3a35..daf9f805 100644 --- a/src/secml/adv/attacks/evasion/c_attack_evasion.py +++ b/src/secml/adv/attacks/evasion/c_attack_evasion.py @@ -7,6 +7,7 @@ .. moduleauthor:: Marco Melis """ + from abc import ABCMeta, abstractmethod from secml.adv.attacks import CAttack @@ -32,11 +33,10 @@ class CAttackEvasion(CAttack, metaclass=ABCMeta): 'all' (default) if all classes can be manipulated. """ - __super__ = 'CAttackEvasion' - def __init__(self, classifier, - y_target=None, - attack_classes='all'): + __super__ = "CAttackEvasion" + + def __init__(self, classifier, y_target=None, attack_classes="all"): super(CAttackEvasion, self).__init__(classifier) @@ -58,7 +58,7 @@ def attack_classes(self): @attack_classes.setter def attack_classes(self, values): - if not (values == 'all' or isinstance(values, CArray)): + if not (values == "all" or isinstance(values, CArray)): raise ValueError("`attack_classes` can be 'all' or a CArray") self._attack_classes = values @@ -79,7 +79,7 @@ def is_attack_class(self, y): """ if is_int(y): - if self._attack_classes == 'all': + if self._attack_classes == "all": return True # all classes can be manipulated elif CArray(y == self._attack_classes).any(): return True # y can be manipulated @@ -87,7 +87,7 @@ def is_attack_class(self, y): return False elif isinstance(y, CArray): v = CArray.zeros(shape=y.shape, dtype=bool) - if self.attack_classes == 'all': + if self.attack_classes == "all": v[:] = True # all classes can be manipulated return v for i in range(self.attack_classes.size): @@ -181,7 +181,9 @@ def run(self, x, y, ds_init=None): adv_ds = CDataset(x.deepcopy(), y.deepcopy()) # array in which the value of the optimization function are stored - fs_opt = CArray.zeros(n_mod_samples, ) + fs_opt = CArray.zeros( + n_mod_samples, + ) for i in range(n_mod_samples): k = idx[i].item() # idx of sample that can be modified @@ -189,14 +191,14 @@ def run(self, x, y, ds_init=None): xi = x[k, :] if x_init is None else x_init[k, :] x_opt, f_opt = self._run(x[k, :], y[k], x_init=xi) - self.logger.info( - "Point: {:}/{:}, f(x):{:}".format(k, x.shape[0], f_opt)) + self.logger.info("Point: {:}/{:}, f(x):{:}".format(k, x.shape[0], f_opt)) adv_ds.X[k, :] = x_opt fs_opt[i] = f_opt y_pred, scores = self.classifier.predict( - adv_ds.X, return_decision_function=True) + adv_ds.X, return_decision_function=True + ) y_pred = CArray(y_pred) diff --git a/src/secml/adv/attacks/evasion/c_attack_evasion_pgd.py b/src/secml/adv/attacks/evasion/c_attack_evasion_pgd.py index 45535bf9..5efa8081 100644 --- a/src/secml/adv/attacks/evasion/c_attack_evasion_pgd.py +++ b/src/secml/adv/attacks/evasion/c_attack_evasion_pgd.py @@ -7,6 +7,7 @@ .. moduleauthor:: Marco Melis """ + from secml.adv.attacks.evasion import CAttackEvasionPGDLS @@ -68,18 +69,22 @@ class CAttackEvasionPGD(CAttackEvasionPGDLS): class_type : 'e-pgd' """ - __class_type = 'e-pgd' - - def __init__(self, classifier, - double_init_ds=None, - double_init=True, - distance='l1', - dmax=0, - lb=0, - ub=1, - y_target=None, - attack_classes='all', - solver_params=None): + + __class_type = "e-pgd" + + def __init__( + self, + classifier, + double_init_ds=None, + double_init=True, + distance="l1", + dmax=0, + lb=0, + ub=1, + y_target=None, + attack_classes="all", + solver_params=None, + ): # INTERNALS self._x0 = None @@ -100,6 +105,7 @@ def __init__(self, classifier, ub=ub, y_target=y_target, attack_classes=attack_classes, - solver_params=solver_params) + solver_params=solver_params, + ) - self.solver_type = 'pgd' + self.solver_type = "pgd" diff --git a/src/secml/adv/attacks/evasion/c_attack_evasion_pgd_exp.py b/src/secml/adv/attacks/evasion/c_attack_evasion_pgd_exp.py index 0aad6389..4d4159ae 100644 --- a/src/secml/adv/attacks/evasion/c_attack_evasion_pgd_exp.py +++ b/src/secml/adv/attacks/evasion/c_attack_evasion_pgd_exp.py @@ -5,6 +5,7 @@ .. moduleauthor:: Battista Biggio """ + from secml.adv.attacks.evasion import CAttackEvasionPGDLS @@ -66,18 +67,22 @@ class onto the feasible domain and try again. class_type : 'e-pgd-exp' """ - __class_type = 'e-pgd-exp' - - def __init__(self, classifier, - double_init_ds=None, - double_init=True, - distance='l1', - dmax=0, - lb=0, - ub=1, - y_target=None, - attack_classes='all', - solver_params=None): + + __class_type = "e-pgd-exp" + + def __init__( + self, + classifier, + double_init_ds=None, + double_init=True, + distance="l1", + dmax=0, + lb=0, + ub=1, + y_target=None, + attack_classes="all", + solver_params=None, + ): # INTERNALS self._x0 = None @@ -98,6 +103,7 @@ def __init__(self, classifier, ub=ub, y_target=y_target, attack_classes=attack_classes, - solver_params=solver_params) + solver_params=solver_params, + ) - self.solver_type = 'pgd-exp' + self.solver_type = "pgd-exp" diff --git a/src/secml/adv/attacks/evasion/c_attack_evasion_pgd_ls.py b/src/secml/adv/attacks/evasion/c_attack_evasion_pgd_ls.py index d2f0beb3..42e3b4eb 100644 --- a/src/secml/adv/attacks/evasion/c_attack_evasion_pgd_ls.py +++ b/src/secml/adv/attacks/evasion/c_attack_evasion_pgd_ls.py @@ -7,6 +7,7 @@ .. moduleauthor:: Marco Melis """ + from secml.adv.attacks import CAttackMixin from secml.adv.attacks.evasion import CAttackEvasion from secml.optim.optimizers import COptimizer @@ -34,7 +35,7 @@ class CAttackEvasionPGDLS(CAttackEvasion, CAttackMixin): It can also be used on sparse, high-dimensional feature spaces, using an L1 constraint on the manipulation of samples to preserve sparsity, as we did for crafting adversarial Android malware in: - + - https://arxiv.org/abs/1704.08996, IEEE TDSC 2017. For more on evasion attacks, see also: @@ -77,18 +78,22 @@ class CAttackEvasionPGDLS(CAttackEvasion, CAttackMixin): class_type : 'e-pgd-ls' """ - __class_type = 'e-pgd-ls' - - def __init__(self, classifier, - double_init_ds=None, - double_init=True, - distance='l1', - dmax=0, - lb=0, - ub=1, - y_target=None, - attack_classes='all', - solver_params=None): + + __class_type = "e-pgd-ls" + + def __init__( + self, + classifier, + double_init_ds=None, + double_init=True, + distance="l1", + dmax=0, + lb=0, + ub=1, + y_target=None, + attack_classes="all", + solver_params=None, + ): # INTERNALS self._x0 = None @@ -106,19 +111,23 @@ def __init__(self, classifier, self._double_init_labels = None self._double_init_scores = None - CAttackEvasion.__init__(self, - classifier=classifier, - y_target=y_target, - attack_classes=attack_classes) - - CAttackMixin.__init__(self, - classifier=classifier, - distance=distance, - dmax=dmax, - lb=lb, - ub=ub, - solver_type='pgd-ls', - solver_params=solver_params) + CAttackEvasion.__init__( + self, + classifier=classifier, + y_target=y_target, + attack_classes=attack_classes, + ) + + CAttackMixin.__init__( + self, + classifier=classifier, + distance=distance, + dmax=dmax, + lb=lb, + ub=ub, + solver_type="pgd-ls", + solver_params=solver_params, + ) ########################################################################### # READ-WRITE ATTRIBUTES @@ -177,8 +186,7 @@ def _find_k_c(self, y_pred, scores): # the successive choice of the competing classes scores[[smpls_idx, k.tolist()]] = nan - if issubclass( - self.classifier.__class__, CClassifierReject): + if issubclass(self.classifier.__class__, CClassifierReject): # set to nan the score of the reject classes to exclude it by # the successive choice of the competing classes scores[:, -1] = nan @@ -220,28 +228,32 @@ def _objective_function_pred_scores(self, y_pred, scores): def _init_solver(self): """Create solver instance.""" if self.classifier is None or self.distance is None: - raise ValueError('Solver not set properly!') + raise ValueError("Solver not set properly!") # map attributes to fun, constr, box - fun = CFunction(fun=self.objective_function, - gradient=self.objective_function_gradient, - n_dim=self.classifier.n_features) + fun = CFunction( + fun=self.objective_function, + gradient=self.objective_function_gradient, + n_dim=self.classifier.n_features, + ) constr = CConstraint.create(self._distance) constr.center = self._x0 constr.radius = self.dmax # only feature increments or decrements are allowed - lb = self._x0.todense() if self.lb == 'x0' else self.lb - ub = self._x0.todense() if self.ub == 'x0' else self.ub + lb = self._x0.todense() if self.lb == "x0" else self.lb + ub = self._x0.todense() if self.ub == "x0" else self.ub - bounds = CConstraint.create('box', lb=lb, ub=ub) + bounds = CConstraint.create("box", lb=lb, ub=ub) self._solver = COptimizer.create( self._solver_type, - fun=fun, constr=constr, + fun=fun, + constr=constr, bounds=bounds, - **self._solver_params) + **self._solver_params + ) # TODO: fix this verbose level propagation self._solver.verbose = self.verbose @@ -249,7 +261,7 @@ def _init_solver(self): # TODO: add probability as in c_attack_poisoning # (we could also move this directly in c_attack) def _get_point_with_min_f_obj(self, y_pred, scores): - """Returns the alternative init sample having the minimum value + """Returns the alternative init sample having the minimum value of objective function. Parameters @@ -270,14 +282,14 @@ def _get_point_with_min_f_obj(self, y_pred, scores): return self._double_init_ds.X[k, :].ravel() def _set_solver_alternative_predictions(self): - """Compute predictions on double init data using solver classifier. - """ + """Compute predictions on double init data using solver classifier.""" if self.double_init_ds is None: raise ValueError("double_init_ds is not defined") # Compute the new predictions y, score = self.classifier.predict( - self.double_init_ds.X, return_decision_function=True) + self.double_init_ds.X, return_decision_function=True + ) self._double_init_labels = y self._double_init_scores = score @@ -286,8 +298,7 @@ def _set_alternative_init(self): self.logger.info("Computing an alternative init point...") # Compute predictions on double init data if necessary - if self._double_init_labels is None or \ - self._double_init_scores is None: + if self._double_init_labels is None or self._double_init_scores is None: self._set_solver_alternative_predictions() y_pred = self._double_init_labels @@ -295,23 +306,25 @@ def _set_alternative_init(self): # for targeted evasion, this does not depend on the data label y0 if self.y_target is not None: - self._xk = self._get_point_with_min_f_obj( - y_pred, scores.deepcopy()) + self._xk = self._get_point_with_min_f_obj(y_pred, scores.deepcopy()) return # for indiscriminate evasion, this depends on y0 # so, we compute xk for all classes - n_classes = self.classifier.n_classes - 1 \ - if issubclass(self.classifier.__class__, CClassifierReject) \ + n_classes = ( + self.classifier.n_classes - 1 + if issubclass(self.classifier.__class__, CClassifierReject) else self.classifier.n_classes - self._xk = CArray.zeros(shape=(n_classes, self.classifier.n_features), - sparse=self.double_init_ds.issparse, - dtype=self.double_init_ds.X.dtype) + ) + self._xk = CArray.zeros( + shape=(n_classes, self.classifier.n_features), + sparse=self.double_init_ds.issparse, + dtype=self.double_init_ds.X.dtype, + ) y0 = self._y0 # Backup last y0 for i in range(n_classes): self._y0 = i - self._xk[i, :] = self._get_point_with_min_f_obj( - y_pred, scores.deepcopy()) + self._xk[i, :] = self._get_point_with_min_f_obj(y_pred, scores.deepcopy()) self._y0 = y0 # Restore last y0 def _run(self, x0, y0, x_init=None): @@ -458,8 +471,7 @@ def objective_function(self, x): """ - y_pred, scores = self.classifier.predict( - x, return_decision_function=True) + y_pred, scores = self.classifier.predict(x, return_decision_function=True) f_obj = self._objective_function_pred_scores(y_pred, scores) @@ -474,8 +486,7 @@ def objective_function_gradient(self, x): A single point. """ - y_pred, scores = self.classifier.predict( - x, return_decision_function=True) + y_pred, scores = self.classifier.predict(x, return_decision_function=True) k, c = self._find_k_c(y_pred, scores) diff --git a/src/secml/adv/attacks/evasion/cleverhans/c_attack_evasion_cleverhans.py b/src/secml/adv/attacks/evasion/cleverhans/c_attack_evasion_cleverhans.py index 0b13fa71..3d344d2f 100644 --- a/src/secml/adv/attacks/evasion/cleverhans/c_attack_evasion_cleverhans.py +++ b/src/secml/adv/attacks/evasion/cleverhans/c_attack_evasion_cleverhans.py @@ -7,18 +7,29 @@ .. moduleauthor:: Maura Pintor """ + import numpy as np import tensorflow as tf -from cleverhans.attacks import \ - FastGradientMethod, CarliniWagnerL2, ElasticNetMethod, SPSA, LBFGS, \ - ProjectedGradientDescent, SaliencyMapMethod, MomentumIterativeMethod, \ - MadryEtAl, BasicIterativeMethod, DeepFool +from cleverhans.attacks import ( + FastGradientMethod, + CarliniWagnerL2, + ElasticNetMethod, + SPSA, + LBFGS, + ProjectedGradientDescent, + SaliencyMapMethod, + MomentumIterativeMethod, + MadryEtAl, + BasicIterativeMethod, + DeepFool, +) from cleverhans.model import Model from secml.adv.attacks import CAttack from secml.adv.attacks.evasion import CAttackEvasion -from secml.adv.attacks.evasion.cleverhans.c_attack_evasion_cleverhans_losses \ - import CAttackEvasionCleverhansLossesMixin +from secml.adv.attacks.evasion.cleverhans.c_attack_evasion_cleverhans_losses import ( + CAttackEvasionCleverhansLossesMixin, +) from secml.array import CArray from secml.core import CCreator from secml.core.constants import nan @@ -27,17 +38,24 @@ from secml.optim.function import CFunction SUPPORTED_ATTACKS = [ - FastGradientMethod, CarliniWagnerL2, ElasticNetMethod, SPSA, LBFGS, - ProjectedGradientDescent, SaliencyMapMethod, MomentumIterativeMethod, - MadryEtAl, BasicIterativeMethod, DeepFool + FastGradientMethod, + CarliniWagnerL2, + ElasticNetMethod, + SPSA, + LBFGS, + ProjectedGradientDescent, + SaliencyMapMethod, + MomentumIterativeMethod, + MadryEtAl, + BasicIterativeMethod, + DeepFool, ] -class CAttackEvasionCleverhans(CAttackEvasion, - CAttackEvasionCleverhansLossesMixin): +class CAttackEvasionCleverhans(CAttackEvasion, CAttackEvasionCleverhansLossesMixin): """This class is a wrapper of the attacks implemented in the Cleverhans library. - + Credits: https://github.com/tensorflow/cleverhans. Parameters @@ -63,11 +81,17 @@ class CAttackEvasionCleverhans(CAttackEvasion, The current Tensorflow default graph will be used. """ - __class_type = 'e-cleverhans' - def __init__(self, classifier, y_target=None, - clvh_attack_class=CarliniWagnerL2, - store_var_list=None, **kwargs): + __class_type = "e-cleverhans" + + def __init__( + self, + classifier, + y_target=None, + clvh_attack_class=CarliniWagnerL2, + store_var_list=None, + **kwargs + ): self._tfsess = tf.compat.v1.Session() @@ -90,17 +114,21 @@ def __init__(self, classifier, y_target=None, if store_var_list is not None: # first, check if the user has set stored variables self._stored_vars = {k: [] for k in store_var_list} - elif any([self._clvrh_attack_class == CarliniWagnerL2, - self._clvrh_attack_class == ElasticNetMethod, ]): + elif any( + [ + self._clvrh_attack_class == CarliniWagnerL2, + self._clvrh_attack_class == ElasticNetMethod, + ] + ): # store `const` by default for these attacks as it # is needed in the `objective_function` computation - self._stored_vars = {'const': []} + self._stored_vars = {"const": []} else: self._stored_vars = None super(CAttackEvasionCleverhans, self).__init__( - classifier=classifier, - y_target=y_target) + classifier=classifier, y_target=y_target + ) self._n_classes = self._classifier.n_classes self._n_feats = self._classifier.n_features @@ -111,16 +139,15 @@ def set(self, param_name, param_value, copy=False): # we need the possibility of running the attack for eps==0, # this is not allowed in standard cleverhans - if 'eps' in param_name: + if "eps" in param_name: if param_value == 0: param_value = 1 self._eps_0 = True else: self._eps_0 = False - if param_name.startswith('attack_params'): - super(CAttackEvasionCleverhans, self).set(param_name, param_value, - copy) + if param_name.startswith("attack_params"): + super(CAttackEvasionCleverhans, self).set(param_name, param_value, copy) # re-initialize the Tensorflow operations self._initialize_tf_ops() @@ -131,8 +158,7 @@ def run(self, x, y, ds_init=None): if self._stored_vars is not None: for key in self._stored_vars: self._stored_vars[key] = [] - return super(CAttackEvasionCleverhans, self).run( - x, y, ds_init=ds_init) + return super(CAttackEvasionCleverhans, self).run(x, y, ds_init=ds_init) ########################################################################### # READ-ONLY ATTRIBUTES @@ -154,23 +180,17 @@ def grad_eval(self): @property def stored_vars(self): - """Variables extracted from the graph during execution of the attack. - - """ + """Variables extracted from the graph during execution of the attack.""" return self._stored_vars @property def attack_params(self): - """Object containing all Cleverhans parameters - - """ + """Object containing all Cleverhans parameters""" return self._attack_params @attack_params.setter def attack_params(self, value): - """Object containing all Cleverhans parameters - - """ + """Object containing all Cleverhans parameters""" self._attack_params = _CClvrh_params(value) ########################################################################### @@ -196,8 +216,13 @@ def objective_function(self, x): elif self._clvrh_attack_class == SPSA: return self._objective_function_SPSA(x) elif self._clvrh_attack_class in [ - FastGradientMethod, ProjectedGradientDescent, LBFGS, - MomentumIterativeMethod, MadryEtAl, BasicIterativeMethod]: + FastGradientMethod, + ProjectedGradientDescent, + LBFGS, + MomentumIterativeMethod, + MadryEtAl, + BasicIterativeMethod, + ]: return self._objective_function_cross_entropy(x) else: raise NotImplementedError @@ -213,21 +238,24 @@ def _create_tf_operations(self): """ if self.y_target is None: - if 'y' in self._clvrh_attack.feedable_kwargs: + if "y" in self._clvrh_attack.feedable_kwargs: self._adv_x_T = self._clvrh_attack.generate( - self._initial_x_P, y=self._y_P, - **self.attack_params.__dict__) + self._initial_x_P, y=self._y_P, **self.attack_params.__dict__ + ) else: # 'y' not required by attack self._adv_x_T = self._clvrh_attack.generate( - self._initial_x_P, **self.attack_params.__dict__) + self._initial_x_P, **self.attack_params.__dict__ + ) else: - if 'y_target' not in self._clvrh_attack.feedable_kwargs: + if "y_target" not in self._clvrh_attack.feedable_kwargs: raise RuntimeError( "cannot perform a targeted {:} attack".format( - self._clvrh_attack.__class__.__name__)) + self._clvrh_attack.__class__.__name__ + ) + ) self._adv_x_T = self._clvrh_attack.generate( - self._initial_x_P, y_target=self._y_P, - **self._attack_params.__dict__) + self._initial_x_P, y_target=self._y_P, **self._attack_params.__dict__ + ) def _initialize_tf_ops(self): @@ -236,27 +264,27 @@ def _initialize_tf_ops(self): self._tfsess.close() session_conf = tf.compat.v1.ConfigProto( inter_op_parallelism_threads=-1, # Perform in caller's thread - use_per_session_threads=False # Per-session thread pools + use_per_session_threads=False, # Per-session thread pools ) self._tfsess = tf.compat.v1.Session(config=session_conf) # wrap the surrogate classifier into a cleverhans classifier - self._clvrh_clf = _CModelCleverhans( - self.classifier, out_dims=self._n_classes) + self._clvrh_clf = _CModelCleverhans(self.classifier, out_dims=self._n_classes) # create an instance of the chosen cleverhans attack self._clvrh_attack = self._clvrh_attack_class( - self._clvrh_clf, sess=self._tfsess) + self._clvrh_clf, sess=self._tfsess + ) # create the placeholder to feed into the attack the initial evasion # samples self._initial_x_P = tf.compat.v1.placeholder( - tf.float32, shape=(None, self._n_feats)) + tf.float32, shape=(None, self._n_feats) + ) # placeholder used to feed the true or the target label (it is a # one-hot encoded vector) - self._y_P = tf.compat.v1.placeholder( - tf.float32, shape=(1, self._n_classes)) + self._y_P = tf.compat.v1.placeholder(tf.float32, shape=(1, self._n_classes)) # call the function of the cleverhans attack called `generate` that # constucts the Tensorflow operation needed to perform the attack @@ -266,12 +294,12 @@ def _define_warning_filter(self): # We filter few warnings raised by numpy, caused by cleverhans self.logger.filterwarnings( - "ignore", category=RuntimeWarning, - message="invalid value encountered in double_scalars*" + "ignore", + category=RuntimeWarning, + message="invalid value encountered in double_scalars*", ) self.logger.filterwarnings( - "ignore", category=RuntimeWarning, - message="Mean of empty slice*" + "ignore", category=RuntimeWarning, message="Mean of empty slice*" ) def _create_one_hot_y(self): @@ -281,8 +309,7 @@ def _create_one_hot_y(self): equal to the true class of the attack sample. """ - one_hot_y = CArray.zeros(shape=(1, self._n_classes), - dtype=np.float32) + one_hot_y = CArray.zeros(shape=(1, self._n_classes), dtype=np.float32) if self.y_target is not None: one_hot_y[0, self.y_target] = 1 @@ -348,8 +375,7 @@ def _run(self, x0, y0, x_init=None): # create a one-hot-encoded vector to feed the true or # the y_target label - one_hot_y = CArray.zeros(shape=(1, self._n_classes), - dtype=np.float32) + one_hot_y = CArray.zeros(shape=(1, self._n_classes), dtype=np.float32) if self.y_target is not None: one_hot_y[0, self.y_target] = 1 @@ -367,8 +393,9 @@ def _run(self, x0, y0, x_init=None): one_hot_y = self._create_one_hot_y() self._x_opt = self._tfsess.run( - self._adv_x_T, feed_dict={self._initial_x_P: x, - self._y_P: one_hot_y.tondarray()}) + self._adv_x_T, + feed_dict={self._initial_x_P: x, self._y_P: one_hot_y.tondarray()}, + ) self._x_opt = CArray(self._x_opt) self._x_seq = self._clvrh_clf._x_seq @@ -428,7 +455,7 @@ def _decision_function(self, x): because the output of the CFunction should be either a scalar or a CArray whereas the predict function returns a tuple. """ - if hasattr(self, '_x_seq') and self._x_seq is not None: + if hasattr(self, "_x_seq") and self._x_seq is not None: if self._is_init is True: # avoid storing twice the initial value self._is_init = False else: # Cache intermediate values @@ -440,8 +467,9 @@ def __init__(self, clf, out_dims=None): self._clf = clf if isinstance(clf, CClassifierReject): - raise ValueError("classifier with reject cannot be " - "converted to a tensorflow model") + raise ValueError( + "classifier with reject cannot be " "converted to a tensorflow model" + ) if not clf.is_fitted(): raise NotFittedError("The classifier should be already trained!") @@ -449,12 +477,11 @@ def __init__(self, clf, out_dims=None): self._out_dims = out_dims # classifier output tensor name. Either "probs" or "logits". - self._output_layer = 'logits' + self._output_layer = "logits" # Given a trained CClassifier, creates a tensorflow node for the # network output and one for its gradient - self._fun = CFunction(fun=self._decision_function, - gradient=clf.gradient) + self._fun = CFunction(fun=self._decision_function, gradient=clf.gradient) self._callable_fn = _CClassifierToTF(self._fun, self._out_dims) super(_CModelCleverhans, self).__init__(nb_classes=clf.n_classes) @@ -489,8 +516,7 @@ def reset_caching(self, x=None): self._x_seq = None def get_variable_value(self, variable_name): - return tf.get_default_graph().get_tensor_by_name( - "{:}:0".format(variable_name)) + return tf.get_default_graph().get_tensor_by_name("{:}:0".format(variable_name)) class _CClassifierToTF: @@ -532,10 +558,13 @@ def __call__(self, x_op): used to have its gradient. """ - out = _py_func_with_gradient(self._fprop_fn, [x_op], - Tout=[tf.float32], - stateful=True, - grad_func=self._tf_gradient_fn)[0] + out = _py_func_with_gradient( + self._fprop_fn, + [x_op], + Tout=[tf.float32], + stateful=True, + grad_func=self._tf_gradient_fn, + )[0] out.set_shape([None, self.out_dims]) return out @@ -584,8 +613,10 @@ def _np_grad_fn(self, x_np, grads_in_np=None): n_samples = x_carray.shape[0] if n_samples > 1: - raise ValueError("The gradient of CCleverhansAttack can be " - "computed only for one sample at time") + raise ValueError( + "The gradient of CCleverhansAttack can be " + "computed only for one sample at time" + ) grad_f_x = self.fun.gradient grads = grad_f_x(x_carray, w=grads_in_np).atleast_2d() @@ -608,12 +639,14 @@ def _tf_gradient_fn(self, op, grads_in): """ pyfun = tf.compat.v1.py_func( - self._np_grad_fn, [op.inputs[0], grads_in], Tout=[tf.float32]) + self._np_grad_fn, [op.inputs[0], grads_in], Tout=[tf.float32] + ) return pyfun def _py_func_with_gradient( - func, inp, Tout, stateful=True, pyfun_name=None, grad_func=None): + func, inp, Tout, stateful=True, pyfun_name=None, grad_func=None +): """ Given a function that returns as output a numpy array, and optionally a function that computes its gradient, this function returns a pyfunction. @@ -640,7 +673,8 @@ def _py_func_with_gradient( """ # Generate random name in order to avoid conflicts with inbuilt names from random import getrandbits - rnd_name = 'PyFuncGrad-' + '%0x' % getrandbits(30 * 4) + + rnd_name = "PyFuncGrad-" + "%0x" % getrandbits(30 * 4) # Register Tensorflow Gradient tf.RegisterGradient(rnd_name)(grad_func) @@ -649,10 +683,8 @@ def _py_func_with_gradient( g = tf.compat.v1.get_default_graph() # Add gradient override map - with g.gradient_override_map( - {"PyFunc": rnd_name, "PyFuncStateless": rnd_name}): - return tf.compat.v1.py_func( - func, inp, Tout, stateful=stateful, name=pyfun_name) + with g.gradient_override_map({"PyFunc": rnd_name, "PyFuncStateless": rnd_name}): + return tf.compat.v1.py_func(func, inp, Tout, stateful=stateful, name=pyfun_name) class _CClvrh_params(CCreator): diff --git a/src/secml/adv/attacks/evasion/cleverhans/c_attack_evasion_cleverhans_losses.py b/src/secml/adv/attacks/evasion/cleverhans/c_attack_evasion_cleverhans_losses.py index 8a98d4de..9738d20e 100644 --- a/src/secml/adv/attacks/evasion/cleverhans/c_attack_evasion_cleverhans_losses.py +++ b/src/secml/adv/attacks/evasion/cleverhans/c_attack_evasion_cleverhans_losses.py @@ -6,6 +6,7 @@ .. moduleauthor:: Maura Pintor """ + import numpy as np from secml.array import CArray @@ -17,9 +18,8 @@ class CAttackEvasionCleverhansLossesMixin(object): Cleverhans attacks.""" def _objective_function_cw(self, x): - if self._stored_vars is not None and \ - 'const' in self._stored_vars: - stored_const = self._stored_vars['const'][0] + if self._stored_vars is not None and "const" in self._stored_vars: + stored_const = self._stored_vars["const"][0] if self._x0.shape[0] == 1: # use same const for all points c_weight = stored_const.item() @@ -29,19 +29,21 @@ def _objective_function_cw(self, x): for i, c in enumerate(stored_const): c_weight[i] = c else: - self.logger.warning('Constant value not stored during run. Using ' - 'initial_const value. For computing the loss ' - 'with the actual value of const set ' - '`store_var_list=["const"]` in ' - 'CAttackEvasionCleverhans.__init__().') + self.logger.warning( + "Constant value not stored during run. Using " + "initial_const value. For computing the loss " + "with the actual value of const set " + '`store_var_list=["const"]` in ' + "CAttackEvasionCleverhans.__init__()." + ) c_weight = self._clvrh_attack.initial_const l2dist = ((self._x0 - x) ** 2).sum(axis=1).ravel() z_labels, z_predicted = self.classifier.predict( - x, return_decision_function=True) - y_target = CArray.zeros(shape=(1, self._n_classes), - dtype=np.float32) + x, return_decision_function=True + ) + y_target = CArray.zeros(shape=(1, self._n_classes), dtype=np.float32) # destination point label if self.y_target is not None: y_target[0, self.y_target] = 1 @@ -49,8 +51,9 @@ def _objective_function_cw(self, x): y_target[0, self._y0] = 1 z_target = (z_predicted * y_target).sum(axis=1).ravel() - z_other = ((z_predicted * (1 - y_target) + - (z_predicted.min(axis=1) - 1) * y_target)).max(axis=1) + z_other = ( + (z_predicted * (1 - y_target) + (z_predicted.min(axis=1) - 1) * y_target) + ).max(axis=1) z_other = z_other.ravel() # The following differs from the exact definition given in Carlini @@ -65,17 +68,18 @@ def _objective_function_cw(self, x): if self.y_target is not None: # if targeted, optimize for making the target class most likely - loss = CArray.maximum(z_other - z_target + self.confidence, - CArray.zeros(x.shape[0])) + loss = CArray.maximum( + z_other - z_target + self.confidence, CArray.zeros(x.shape[0]) + ) else: # if untargeted, optimize for making any other class most likely - loss = CArray.maximum(z_target - z_other + self.confidence, - CArray.zeros(x.shape[0])) + loss = CArray.maximum( + z_target - z_other + self.confidence, CArray.zeros(x.shape[0]) + ) return c_weight * loss + l2dist def _objective_function_cross_entropy(self, x): - preds, scores = self.classifier.predict( - x, return_decision_function=True) + preds, scores = self.classifier.predict(x, return_decision_function=True) if self.y_target is None: target = self._y0 else: @@ -86,9 +90,8 @@ def _objective_function_cross_entropy(self, x): def _objective_function_elastic_net(self, x): - if self._stored_vars is not None and \ - 'const' in self._stored_vars: - stored_const = self._stored_vars['const'][0] + if self._stored_vars is not None and "const" in self._stored_vars: + stored_const = self._stored_vars["const"][0] if self._x0.shape[0] == 1: # use same const for all points c_weight = stored_const.item() @@ -98,19 +101,21 @@ def _objective_function_elastic_net(self, x): for i, c in enumerate(stored_const): c_weight[i] = c else: - self.logger.warning('Constant value not stored during run. Using ' - 'initial_const value. For computing the loss ' - 'with the actual value of const set ' - '`store_var_list=["const"]` in ' - 'CAttackEvasionCleverhans.__init__().') + self.logger.warning( + "Constant value not stored during run. Using " + "initial_const value. For computing the loss " + "with the actual value of const set " + '`store_var_list=["const"]` in ' + "CAttackEvasionCleverhans.__init__()." + ) c_weight = self._clvrh_attack.initial_const - if self._clvrh_attack.decision_rule == 'L1': + if self._clvrh_attack.decision_rule == "L1": d = ((self._x0 - x).abs()).sum(axis=1).ravel() - elif self._clvrh_attack.decision_rule == 'L2': + elif self._clvrh_attack.decision_rule == "L2": d = ((self._x0 - x) ** 2).sum(axis=1).ravel() - elif self._clvrh_attack.decision_rule == 'END': + elif self._clvrh_attack.decision_rule == "END": l1dist = ((self._x0 - x).abs()).sum(axis=1).ravel() l2dist = ((self._x0 - x) ** 2).sum(axis=1).ravel() d = self._clvrh_attack.beta * l1dist + l2dist @@ -118,9 +123,9 @@ def _objective_function_elastic_net(self, x): raise ValueError("The decision rule only supports `EN`, `L1`, `L2`.") z_labels, z_predicted = self.classifier.predict( - x, return_decision_function=True) - y_target = CArray.zeros(shape=(1, self._n_classes), - dtype=np.float32) + x, return_decision_function=True + ) + y_target = CArray.zeros(shape=(1, self._n_classes), dtype=np.float32) # destination point label if self.y_target is not None: y_target[0, self.y_target] = 1 @@ -128,8 +133,9 @@ def _objective_function_elastic_net(self, x): y_target[0, self._y0] = 1 z_target = (z_predicted * y_target).sum(axis=1).ravel() - z_other = ((z_predicted * (1 - y_target) + - (z_predicted.min(axis=1) - 1) * y_target)).max(axis=1) + z_other = ( + (z_predicted * (1 - y_target) + (z_predicted.min(axis=1) - 1) * y_target) + ).max(axis=1) z_other = z_other.ravel() # The following differs from the exact definition given in Carlini @@ -143,12 +149,14 @@ def _objective_function_elastic_net(self, x): if self.y_target is not None: # if targeted, optimize for making the target class most likely - loss = CArray.maximum(z_other - z_target + self.confidence, - CArray.zeros(x.shape[0])) + loss = CArray.maximum( + z_other - z_target + self.confidence, CArray.zeros(x.shape[0]) + ) else: # if untargeted, optimize for making any other class most likely - loss = CArray.maximum(z_target - z_other + self.confidence, - CArray.zeros(x.shape[0])) + loss = CArray.maximum( + z_target - z_other + self.confidence, CArray.zeros(x.shape[0]) + ) return d + loss * c_weight diff --git a/src/secml/adv/attacks/evasion/cleverhans/tests/__init__.py b/src/secml/adv/attacks/evasion/cleverhans/tests/__init__.py index 32d9a645..a5616073 100644 --- a/src/secml/adv/attacks/evasion/cleverhans/tests/__init__.py +++ b/src/secml/adv/attacks/evasion/cleverhans/tests/__init__.py @@ -1,2 +1 @@ -from .c_attack_evasion_cleverhans_testcases import \ - CAttackEvasionCleverhansTestCases +from .c_attack_evasion_cleverhans_testcases import CAttackEvasionCleverhansTestCases diff --git a/src/secml/adv/attacks/evasion/cleverhans/tests/c_attack_evasion_cleverhans_testcases.py b/src/secml/adv/attacks/evasion/cleverhans/tests/c_attack_evasion_cleverhans_testcases.py index 3ac139d2..16f4441c 100644 --- a/src/secml/adv/attacks/evasion/cleverhans/tests/c_attack_evasion_cleverhans_testcases.py +++ b/src/secml/adv/attacks/evasion/cleverhans/tests/c_attack_evasion_cleverhans_testcases.py @@ -2,13 +2,14 @@ from secml.utils import fm -IMAGES_FOLDER = fm.join(fm.abspath(__file__), 'test_images') +IMAGES_FOLDER = fm.join(fm.abspath(__file__), "test_images") if not fm.folder_exist(IMAGES_FOLDER): fm.make_folder(IMAGES_FOLDER) class CAttackEvasionCleverhansTestCases(CAttackEvasionTestCases): """Unittests interface for CAttackEvasionCleverhans.""" + images_folder = IMAGES_FOLDER def _test_confidence(self, x0, y0, x_opt, clf, y_target): @@ -29,13 +30,12 @@ def _test_confidence(self, x0, y0, x_opt, clf, y_target): y_target : int """ - init_pred, init_score = clf.predict( - x0, return_decision_function=True) - final_pred, final_score = clf.predict( - x_opt, return_decision_function=True) + init_pred, init_score = clf.predict(x0, return_decision_function=True) + final_pred, final_score = clf.predict(x_opt, return_decision_function=True) if y_target is not None: - self.assertGreater(final_score[:, y_target].item(), - init_score[:, y_target].item()) + self.assertGreater( + final_score[:, y_target].item(), init_score[:, y_target].item() + ) self.assertLess(final_score[y0].item(), init_score[y0].item()) diff --git a/src/secml/adv/attacks/evasion/cleverhans/tests/test_c_attack_evasion_cleverhans.py b/src/secml/adv/attacks/evasion/cleverhans/tests/test_c_attack_evasion_cleverhans.py index 9326e6a7..03ee087c 100644 --- a/src/secml/adv/attacks/evasion/cleverhans/tests/test_c_attack_evasion_cleverhans.py +++ b/src/secml/adv/attacks/evasion/cleverhans/tests/test_c_attack_evasion_cleverhans.py @@ -1,15 +1,21 @@ -from secml.adv.attacks.evasion.cleverhans.tests import \ - CAttackEvasionCleverhansTestCases +from secml.adv.attacks.evasion.cleverhans.tests import CAttackEvasionCleverhansTestCases try: import cleverhans except ImportError: CAttackEvasionCleverhansTestCases.importskip("cleverhans") -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + CAttackEvasionCleverhansTestCases.importskip("tensorflow") -from cleverhans.attacks import ElasticNetMethod, CarliniWagnerL2, \ - ProjectedGradientDescent, SPSA +from cleverhans.attacks import ( + ElasticNetMethod, + CarliniWagnerL2, + ProjectedGradientDescent, + SPSA, +) from secml.array import CArray from secml.data.loader import CDLRandomBlobs @@ -24,11 +30,11 @@ class TestCAttackEvasionCleverhans(CAttackEvasionCleverhansTestCases): """Unittests for CAttackEvasionCleverhans.""" + make_figures = True # Set as True to produce figures @classmethod def setUpClass(cls): - CAttackEvasionCleverhansTestCases.setUpClass() cls.seed = 0 @@ -36,14 +42,19 @@ def setUpClass(cls): cls.y_target = None cls.clf = CClassifierMulticlassOVA( - CClassifierSVM, kernel=CKernelRBF(gamma=10), - C=0.1, preprocess=CNormalizerMinMax()) - - cls.ds = CDLRandomBlobs(n_features=0, - centers=[[0.1, 0.1], [0.5, 0], [0.8, 0.8]], - cluster_std=0.01, - n_samples=100, - random_state=cls.seed).load() + CClassifierSVM, + kernel=CKernelRBF(gamma=10), + C=0.1, + preprocess=CNormalizerMinMax(), + ) + + cls.ds = CDLRandomBlobs( + n_features=0, + centers=[[0.1, 0.1], [0.5, 0], [0.8, 0.8]], + cluster_std=0.01, + n_samples=100, + random_state=cls.seed, + ).load() cls.clf.fit(cls.ds.X, cls.ds.Y) @@ -56,24 +67,24 @@ def test_SPSA(self): tf.set_random_seed(self.seed) attack_params = { - 'eps': 0.5, - 'delta': 0.1, - 'clip_min': 0.0, - 'clip_max': 1.0, - 'nb_iter': 50, - 'learning_rate': 0.03, + "eps": 0.5, + "delta": 0.1, + "clip_min": 0.0, + "clip_max": 1.0, + "nb_iter": 50, + "learning_rate": 0.03, } evas = CAttackEvasionCleverhans( classifier=self.clf, y_target=self.y_target, clvh_attack_class=SPSA, - **attack_params) + **attack_params, + ) # FIXME: random seed not working for SPSA? self._run_evasion(evas, self.x0, self.y0, expected_x=None) - self._test_confidence( - self.x0, self.y0, evas.x_opt, self.clf, self.y_target) + self._test_confidence(self.x0, self.y0, evas.x_opt, self.clf, self.y_target) self._test_plot(evas) def test_PGD(self): @@ -82,25 +93,25 @@ def test_PGD(self): tf.set_random_seed(self.seed) attack_params = { - 'eps': 0.5, - 'eps_iter': 0.1, - 'ord': 2, - 'rand_init': False, - 'nb_iter': 20 + "eps": 0.5, + "eps_iter": 0.1, + "ord": 2, + "rand_init": False, + "nb_iter": 20, } evas = CAttackEvasionCleverhans( classifier=self.clf, y_target=self.y_target, clvh_attack_class=ProjectedGradientDescent, - **attack_params) + **attack_params, + ) # Expected final optimal point expected_x = CArray([0.7643, 0.6722]) self._run_evasion(evas, self.x0, self.y0, expected_x=expected_x) - self._test_confidence( - self.x0, self.y0, evas.x_opt, self.clf, self.y_target) + self._test_confidence(self.x0, self.y0, evas.x_opt, self.clf, self.y_target) self._test_plot(evas) def test_CWL2(self): @@ -109,28 +120,28 @@ def test_CWL2(self): tf.set_random_seed(self.seed) attack_params = { - 'binary_search_steps': 4, - 'initial_const': 0.01, - 'confidence': 10, - 'abort_early': True, - 'clip_min': 0.0, - 'clip_max': 1.0, - 'max_iterations': 30, - 'learning_rate': 0.03, + "binary_search_steps": 4, + "initial_const": 0.01, + "confidence": 10, + "abort_early": True, + "clip_min": 0.0, + "clip_max": 1.0, + "max_iterations": 30, + "learning_rate": 0.03, } evas = CAttackEvasionCleverhans( classifier=self.clf, y_target=self.y_target, clvh_attack_class=CarliniWagnerL2, - **attack_params) + **attack_params, + ) # Expected final optimal point expected_x = CArray([0.8316, 0.5823]) self._run_evasion(evas, self.x0, self.y0, expected_x=expected_x) - self._test_confidence( - self.x0, self.y0, evas.x_opt, self.clf, self.y_target) + self._test_confidence(self.x0, self.y0, evas.x_opt, self.clf, self.y_target) self._test_stored_consts(evas) self._test_plot(evas) @@ -140,21 +151,22 @@ def test_ENM(self): tf.set_random_seed(self.seed) attack_params = { - 'binary_search_steps': 3, - 'initial_const': 0.01, - 'confidence': 10, - 'abort_early': True, - 'clip_min': 0.0, - 'clip_max': 1.0, - 'max_iterations': 30, - 'learning_rate': 0.03, + "binary_search_steps": 3, + "initial_const": 0.01, + "confidence": 10, + "abort_early": True, + "clip_min": 0.0, + "clip_max": 1.0, + "max_iterations": 30, + "learning_rate": 0.03, } evas = CAttackEvasionCleverhans( classifier=self.clf, y_target=self.y_target, - decision_rule='END', + decision_rule="END", clvh_attack_class=ElasticNetMethod, - **attack_params) + **attack_params, + ) # Expected final optimal point # expected_x = CArray([0.7885, 0.7719]) @@ -162,8 +174,7 @@ def test_ENM(self): self._run_evasion(evas, self.x0, self.y0, expected_x=expected_x) - self._test_confidence( - self.x0, self.y0, evas.x_opt, self.clf, self.y_target) + self._test_confidence(self.x0, self.y0, evas.x_opt, self.clf, self.y_target) self._test_stored_consts(evas) self._test_plot(evas) @@ -193,20 +204,25 @@ def _test_plot(self, evas): fig = CFigure() fig.sp.plot_path(evas.x_seq) - fig.sp.plot_fun(evas.objective_function, - plot_levels=False, multipoint=True, - n_grid_points=50) - fig.sp.plot_decision_regions(self.clf, - plot_background=False, - n_grid_points=100) - - fig.title("ATTACK: {}, y_target: {}".format( - evas._clvrh_attack_class.__name__, self.y_target)) - - name_file = '{}_evasion2D_target_{}.pdf'.format( - evas._clvrh_attack_class.__name__, self.y_target) - fig.savefig(fm.join(self.images_folder, name_file), file_format='pdf') - - -if __name__ == '__main__': + fig.sp.plot_fun( + evas.objective_function, + plot_levels=False, + multipoint=True, + n_grid_points=50, + ) + fig.sp.plot_decision_regions(self.clf, plot_background=False, n_grid_points=100) + + fig.title( + "ATTACK: {}, y_target: {}".format( + evas._clvrh_attack_class.__name__, self.y_target + ) + ) + + name_file = "{}_evasion2D_target_{}.pdf".format( + evas._clvrh_attack_class.__name__, self.y_target + ) + fig.savefig(fm.join(self.images_folder, name_file), file_format="pdf") + + +if __name__ == "__main__": CAttackEvasionCleverhansTestCases.main() diff --git a/src/secml/adv/attacks/evasion/cleverhans/tests/test_c_attack_evasion_cleverhans_mnist.py b/src/secml/adv/attacks/evasion/cleverhans/tests/test_c_attack_evasion_cleverhans_mnist.py index b7c983cf..bd5f234d 100644 --- a/src/secml/adv/attacks/evasion/cleverhans/tests/test_c_attack_evasion_cleverhans_mnist.py +++ b/src/secml/adv/attacks/evasion/cleverhans/tests/test_c_attack_evasion_cleverhans_mnist.py @@ -1,17 +1,28 @@ -from secml.adv.attacks.evasion.cleverhans.tests import \ - CAttackEvasionCleverhansTestCases +from secml.adv.attacks.evasion.cleverhans.tests import CAttackEvasionCleverhansTestCases try: import cleverhans except ImportError: CAttackEvasionCleverhansTestCases.importskip("cleverhans") -import tensorflow as tf - -from cleverhans.attacks import FastGradientMethod, CarliniWagnerL2, \ - ElasticNetMethod, SPSA, LBFGS, \ - ProjectedGradientDescent, SaliencyMapMethod, \ - MomentumIterativeMethod, MadryEtAl, BasicIterativeMethod, DeepFool +try: + import tensorflow as tf +except ImportError: + CAttackEvasionCleverhansTestCases.importskip("tensorflow") + +from cleverhans.attacks import ( + FastGradientMethod, + CarliniWagnerL2, + ElasticNetMethod, + SPSA, + LBFGS, + ProjectedGradientDescent, + SaliencyMapMethod, + MomentumIterativeMethod, + MadryEtAl, + BasicIterativeMethod, + DeepFool, +) from secml.array import CArray from secml.data.loader import CDataLoaderMNIST @@ -26,21 +37,20 @@ class TestCAttackEvasionCleverhansMNIST(CAttackEvasionCleverhansTestCases): """Unittests for CAttackEvasionCleverhans on MNIST dataset""" + make_figures = False # Set as True to produce figures @classmethod def setUpClass(cls): - CAttackEvasionCleverhansTestCases.setUpClass() cls.seed = 0 - cls.tr, cls.val, cls.ts, cls.digits, \ - cls.img_w, cls.img_h = cls._load_mnist() + cls.tr, cls.val, cls.ts, cls.digits, cls.img_w, cls.img_h = cls._load_mnist() cls.clf = CClassifierMulticlassOVA(CClassifierSVM) cls.clf.fit(cls.tr.X, cls.tr.Y) - + cls.x0_img_class = 1 cls.y_target = 2 # Target class for targeted tests @@ -48,27 +58,25 @@ def setUpClass(cls): def _load_mnist(): """Load MNIST 4971 dataset.""" digits = [4, 9, 7, 1] - digits_str = "".join(['{:}-'.format(i) for i in digits[:-1]]) - digits_str += '{:}'.format(digits[-1]) + digits_str = "".join(["{:}-".format(i) for i in digits[:-1]]) + digits_str += "{:}".format(digits[-1]) # FIXME: REMOVE THIS AFTER CDATALOADERS AUTOMATICALLY STORE DS - tr_file = fm.join( - fm.abspath(__file__), 'mnist_tr_{:}.gz'.format(digits_str)) + tr_file = fm.join(fm.abspath(__file__), "mnist_tr_{:}.gz".format(digits_str)) if not fm.file_exist(tr_file): loader = CDataLoaderMNIST() - tr = loader.load('training', digits=digits) + tr = loader.load("training", digits=digits) pickle_utils.save(tr_file, tr) else: - tr = pickle_utils.load(tr_file, encoding='latin1') + tr = pickle_utils.load(tr_file, encoding="latin1") - ts_file = fm.join( - fm.abspath(__file__), 'mnist_ts_{:}.gz'.format(digits_str)) + ts_file = fm.join(fm.abspath(__file__), "mnist_ts_{:}.gz".format(digits_str)) if not fm.file_exist(ts_file): loader = CDataLoaderMNIST() - ts = loader.load('testing', digits=digits) + ts = loader.load("testing", digits=digits) pickle_utils.save(ts_file, ts) else: - ts = pickle_utils.load(ts_file, encoding='latin1') + ts = pickle_utils.load(ts_file, encoding="latin1") idx = CArray.arange(tr.num_samples) val_dts_idx = CArray.randsample(idx, 200, random_state=0) @@ -88,7 +96,7 @@ def _load_mnist(): def _choose_x0_2c(self, x0_img_class): """Find a sample of that belong to the required class. - + Parameters ---------- x0_img_class : int @@ -97,10 +105,9 @@ def _choose_x0_2c(self, x0_img_class): ------- x0 : CArray y0 : CArray - + """ - adv_img_idx = \ - CArray(self.ts.Y.find(self.ts.Y == x0_img_class))[0] + adv_img_idx = CArray(self.ts.Y.find(self.ts.Y == x0_img_class))[0] x0 = self.ts.X[adv_img_idx, :] y0 = self.ts.Y[adv_img_idx] @@ -109,56 +116,68 @@ def _choose_x0_2c(self, x0_img_class): def test_DF(self): """Test of DeepFool algorithm.""" - attack = {'class': DeepFool, - 'params': {'nb_candidate': 2, - 'max_iter': 5, - 'clip_min': 0., - 'clip_max': 1.0}} + attack = { + "class": DeepFool, + "params": { + "nb_candidate": 2, + "max_iter": 5, + "clip_min": 0.0, + "clip_max": 1.0, + }, + } self._test_indiscriminate(attack) def test_FGM(self): """Test of FastGradientMethod algorithm.""" - attack = {'class': FastGradientMethod, - 'params': {'eps': 0.3, - 'clip_min': 0., - 'clip_max': 1.0}} + attack = { + "class": FastGradientMethod, + "params": {"eps": 0.3, "clip_min": 0.0, "clip_max": 1.0}, + } self._test_targeted(attack) self._test_indiscriminate(attack) def test_ENM(self): """Test of ElasticNetMethod algorithm.""" - attack = {'class': ElasticNetMethod, - 'params': {'max_iterations': 5, - 'abort_early': True, - 'learning_rate': 1e-3}} + attack = { + "class": ElasticNetMethod, + "params": {"max_iterations": 5, "abort_early": True, "learning_rate": 1e-3}, + } self._test_targeted(attack) self._test_indiscriminate(attack) def test_CWL2(self): """Test of CarliniWagnerL2 algorithm.""" - attack = {'class': CarliniWagnerL2, - 'params': {'max_iterations': 5, - 'learning_rate': 0.3, - 'clip_min': 0., - 'clip_max': 1.0}} + attack = { + "class": CarliniWagnerL2, + "params": { + "max_iterations": 5, + "learning_rate": 0.3, + "clip_min": 0.0, + "clip_max": 1.0, + }, + } self._test_targeted(attack) self._test_indiscriminate(attack) def test_SPSA(self): """Test of SPSA algorithm.""" - attack = {'class': SPSA, - 'params': {'eps': 0.5, - 'nb_iter': 10, - 'early_stop_loss_threshold': -1., - 'spsa_samples': 32, - 'spsa_iters': 5, - 'learning_rate': 0.03, - 'clip_min': 0., - 'clip_max': 1., }} + attack = { + "class": SPSA, + "params": { + "eps": 0.5, + "nb_iter": 10, + "early_stop_loss_threshold": -1.0, + "spsa_samples": 32, + "spsa_iters": 5, + "learning_rate": 0.03, + "clip_min": 0.0, + "clip_max": 1.0, + }, + } self._test_targeted(attack) # FIXME: random seed not working for SPSA? @@ -166,57 +185,81 @@ def test_SPSA(self): def test_LBFGS(self): """Test of LBFGS algorithm.""" - attack = {'class': LBFGS, - 'params': {'max_iterations': 5, - 'clip_min': 0., - 'clip_max': 1., }} + attack = { + "class": LBFGS, + "params": { + "max_iterations": 5, + "clip_min": 0.0, + "clip_max": 1.0, + }, + } self._test_targeted(attack) def test_PGD(self): """Test of ProjectedGradientDescent algorithm.""" - attack = {'class': ProjectedGradientDescent, - 'params': {'eps': 0.3, - 'clip_min': 0., - 'clip_max': 1., }} + attack = { + "class": ProjectedGradientDescent, + "params": { + "eps": 0.3, + "clip_min": 0.0, + "clip_max": 1.0, + }, + } self._test_targeted(attack) self._test_indiscriminate(attack) def test_SMM(self): """Test of SaliencyMapMethod algorithm.""" - attack = {'class': SaliencyMapMethod, - 'params': {'clip_min': 0., - 'clip_max': 1., }} + attack = { + "class": SaliencyMapMethod, + "params": { + "clip_min": 0.0, + "clip_max": 1.0, + }, + } self._test_targeted(attack) def test_MIM(self): """Test of MomentumIterativeMethod algorithm.""" - attack = {'class': MomentumIterativeMethod, - 'params': {'eps': 0.3, - 'clip_min': 0., - 'clip_max': 1., }} + attack = { + "class": MomentumIterativeMethod, + "params": { + "eps": 0.3, + "clip_min": 0.0, + "clip_max": 1.0, + }, + } self._test_targeted(attack) self._test_indiscriminate(attack) def test_Madry(self): """Test of MadryEtAl algorithm.""" - attack = {'class': MadryEtAl, - 'params': {'eps': 0.3, - 'clip_min': 0., - 'clip_max': 1., }} + attack = { + "class": MadryEtAl, + "params": { + "eps": 0.3, + "clip_min": 0.0, + "clip_max": 1.0, + }, + } self._test_targeted(attack) self._test_indiscriminate(attack) def test_BIM(self): """Test of BasicIterativeMethod algorithm.""" - attack = {'class': BasicIterativeMethod, - 'params': {'eps': 0.3, - 'clip_min': 0., - 'clip_max': 1., }} + attack = { + "class": BasicIterativeMethod, + "params": { + "eps": 0.3, + "clip_min": 0.0, + "clip_max": 1.0, + }, + } self._test_targeted(attack) self._test_indiscriminate(attack) @@ -265,7 +308,7 @@ def _run(self, attack, y_target=None, expected_y=None): Label of the expected final optimal point. """ - attack_idx = attack['class'].__name__ + attack_idx = attack["class"].__name__ self.logger.info("Running algorithm: {:} ".format(attack_idx)) tf.set_random_seed(self.seed) @@ -273,19 +316,20 @@ def _run(self, attack, y_target=None, expected_y=None): evas = CAttackEvasionCleverhans( classifier=self.clf, y_target=y_target, - clvh_attack_class=attack['class'], - **attack['params'] + clvh_attack_class=attack["class"], + **attack["params"], ) evas.verbose = 2 x0, y0 = self._choose_x0_2c(self.x0_img_class) - + with self.logger.timer(): y_pred, scores, adv_ds, f_obj = evas.run(x0, y0) - self.logger.info("Starting score: " + str( - evas.classifier.decision_function(x0, y=1).item())) + self.logger.info( + "Starting score: " + str(evas.classifier.decision_function(x0, y=1).item()) + ) self.logger.info("Final score: " + str(evas.f_opt)) self.logger.info("x*:\n" + str(evas.x_opt)) @@ -336,19 +380,18 @@ def _show_adv(self, x0, y0, x_opt, y_pred, attack_idx, y_target): fig.subplot(1, 3, 1) fig.sp.title(self.digits[y0.item()]) - fig.sp.imshow(x0.reshape((self.img_h, self.img_w)), cmap='gray') + fig.sp.imshow(x0.reshape((self.img_h, self.img_w)), cmap="gray") fig.subplot(1, 3, 2) - fig.sp.imshow( - added_noise.reshape((self.img_h, self.img_w)), cmap='gray') + fig.sp.imshow(added_noise.reshape((self.img_h, self.img_w)), cmap="gray") fig.subplot(1, 3, 3) fig.sp.title(self.digits[y_pred.item()]) - fig.sp.imshow(x_opt.reshape((self.img_h, self.img_w)), cmap='gray') + fig.sp.imshow(x_opt.reshape((self.img_h, self.img_w)), cmap="gray") name_file = "{:}_MNIST_target-{:}.pdf".format(attack_idx, y_target) - fig.savefig(fm.join(self.images_folder, name_file), file_format='pdf') + fig.savefig(fm.join(self.images_folder, name_file), file_format="pdf") -if __name__ == '__main__': +if __name__ == "__main__": CAttackEvasionCleverhansTestCases.main() diff --git a/src/secml/adv/attacks/evasion/foolbox/c_attack_evasion_foolbox.py b/src/secml/adv/attacks/evasion/foolbox/c_attack_evasion_foolbox.py index 37a12af4..b35d7112 100644 --- a/src/secml/adv/attacks/evasion/foolbox/c_attack_evasion_foolbox.py +++ b/src/secml/adv/attacks/evasion/foolbox/c_attack_evasion_foolbox.py @@ -7,6 +7,7 @@ .. moduleauthor:: Maura Pintor """ + import eagerpy as ep import foolbox as fb import torch @@ -14,8 +15,11 @@ from numpy import NaN from secml.adv.attacks.evasion import CAttackEvasion -from secml.adv.attacks.evasion.foolbox.secml_autograd import \ - SecmlLayer, as_tensor, as_carray +from secml.adv.attacks.evasion.foolbox.secml_autograd import ( + SecmlLayer, + as_tensor, + as_carray, +) from secml.array import CArray from secml.core.constants import inf from secml.settings import SECML_PYTORCH_USE_CUDA @@ -51,14 +55,22 @@ class CAttackEvasionFoolbox(CAttackEvasion): Init parameters for creating the attack, as kwargs. """ - __class_type = 'e-foolbox' + __class_type = "e-foolbox" - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, - epsilons=None, fb_attack_class=None, **attack_params): + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=None, + fb_attack_class=None, + **attack_params + ): super(CAttackEvasionFoolbox, self).__init__( - classifier=classifier, - y_target=y_target) + classifier=classifier, y_target=y_target + ) self.attack_params = attack_params self.attack_class = fb_attack_class @@ -69,8 +81,7 @@ def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, # wraps secml classifier in a pytorch layer self._pytorch_model_wrapper = SecmlLayer(classifier) # wraps the pytorch model in the foolbox pytorch wrapper - self.f_model = _FoolboxModel(self._pytorch_model_wrapper, - bounds=(lb, ub)) + self.f_model = _FoolboxModel(self._pytorch_model_wrapper, bounds=(lb, ub)) self._last_f_eval = None self._last_grad_eval = None @@ -87,14 +98,17 @@ def _run(self, x, y, x_init=None): self.f_model.reset() if self.y_target is None: criterion = fb.criteria.Misclassification( - as_tensor(y.ravel().astype('int64'))) + as_tensor(y.ravel().astype("int64")) + ) else: criterion = fb.criteria.TargetedMisclassification( - torch.tensor([self.y_target])) + torch.tensor([self.y_target]) + ) x_t = as_tensor(x, requires_grad=False) advx, clipped, is_adv = self.attack( - self.f_model, x_t, criterion, epsilons=self.epsilon) + self.f_model, x_t, criterion, epsilons=self.epsilon + ) if isinstance(clipped, list): if len(clipped) == 1: @@ -102,7 +116,8 @@ def _run(self, x, y, x_init=None): else: raise ValueError( "This attack is returning a list. Please," - "use a single value of epsilon.") + "use a single value of epsilon." + ) # f_opt is computed only in class-specific wrappers f_opt = NaN @@ -131,7 +146,8 @@ def _adv_objective_function(self, x): raise NotImplementedError( "Objective Function and Objective Function Gradient " "are not supported with this constructor. Please, " - "use one of our wrapper-supported attacks.") + "use one of our wrapper-supported attacks." + ) @property def x_seq(self): @@ -162,12 +178,14 @@ def __init__(self, model, bounds, store_path=True): self._store_path = store_path self._x_path = [] if not isinstance(model, torch.nn.Module): - raise ValueError( - "expected model to be a torch.nn.Module instance") + raise ValueError("expected model to be a torch.nn.Module instance") - device = 'cuda' if use_cuda else 'cpu' + device = "cuda" if use_cuda else "cpu" super().__init__( - model, bounds=bounds, preprocessing=None, device=device, + model, + bounds=bounds, + preprocessing=None, + device=device, ) self.data_format = "channels_first" diff --git a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_basic_iterative_attack.py b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_basic_iterative_attack.py index e8517ac0..476be1ed 100644 --- a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_basic_iterative_attack.py +++ b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_basic_iterative_attack.py @@ -8,14 +8,19 @@ """ -from foolbox.attacks.basic_iterative_method import L1BasicIterativeAttack, L2BasicIterativeAttack, \ - LinfBasicIterativeAttack - -from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import CAttackEvasionFoolbox +from foolbox.attacks.basic_iterative_method import ( + L1BasicIterativeAttack, + L2BasicIterativeAttack, + LinfBasicIterativeAttack, +) + +from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import ( + CAttackEvasionFoolbox, +) from secml.adv.attacks.evasion.foolbox.losses.ce_loss import CELoss from secml.adv.attacks.evasion.foolbox.secml_autograd import as_tensor -DISTANCES = ['l1', 'l2', 'linf'] +DISTANCES = ["l1", "l2", "linf"] class CFoolboxBasicIterative(CELoss, CAttackEvasionFoolbox): @@ -56,32 +61,48 @@ class CFoolboxBasicIterative(CELoss, CAttackEvasionFoolbox): "Adversarial examples in the physical world" https://arxiv.org/abs/1607.02533 """ - __class_type = 'e-foolbox-basiciterative' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, - epsilons=0.2, distance='l2', - rel_stepsize=0.025, abs_stepsize=None, steps=50, - random_start=True): - if distance == 'l1': + __class_type = "e-foolbox-basiciterative" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=0.2, + distance="l2", + rel_stepsize=0.025, + abs_stepsize=None, + steps=50, + random_start=True, + ): + + if distance == "l1": attack = L1BasicIterativeAttack - elif distance == 'l2': + elif distance == "l2": attack = L2BasicIterativeAttack - elif distance == 'linf': + elif distance == "linf": attack = LinfBasicIterativeAttack else: - raise ValueError('Distance {} is not supported for this attack. Only {} are supported'.format( - distance, DISTANCES - )) - - super(CFoolboxBasicIterative, self).__init__(classifier, y_target, - lb=lb, ub=ub, - fb_attack_class=attack, - epsilons=epsilons, - rel_stepsize=rel_stepsize, - abs_stepsize=abs_stepsize, - steps=steps, - random_start=random_start) + raise ValueError( + "Distance {} is not supported for this attack. Only {} are supported".format( + distance, DISTANCES + ) + ) + + super(CFoolboxBasicIterative, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + fb_attack_class=attack, + epsilons=epsilons, + rel_stepsize=rel_stepsize, + abs_stepsize=abs_stepsize, + steps=steps, + random_start=random_start, + ) self._x0 = None self._y0 = None @@ -97,48 +118,87 @@ def _run(self, x, y, x_init=None): class CFoolboxBasicIterativeL1(CFoolboxBasicIterative): - __class_type = 'e-foolbox-basiciterative-l1' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, epsilons=0.2, - rel_stepsize=0.025, abs_stepsize=None, - steps=50, random_start=True): - super(CFoolboxBasicIterativeL1, self).__init__(classifier, y_target, - lb=lb, ub=ub, - distance='l1', - epsilons=epsilons, - rel_stepsize=rel_stepsize, - abs_stepsize=abs_stepsize, - steps=steps, - random_start=random_start) + __class_type = "e-foolbox-basiciterative-l1" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=0.2, + rel_stepsize=0.025, + abs_stepsize=None, + steps=50, + random_start=True, + ): + super(CFoolboxBasicIterativeL1, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + distance="l1", + epsilons=epsilons, + rel_stepsize=rel_stepsize, + abs_stepsize=abs_stepsize, + steps=steps, + random_start=random_start, + ) class CFoolboxBasicIterativeL2(CFoolboxBasicIterative): - __class_type = 'e-foolbox-basiciterative-l2' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, epsilons=0.2, - rel_stepsize=0.025, abs_stepsize=None, - steps=50, random_start=True): - super(CFoolboxBasicIterativeL2, self).__init__(classifier, y_target, - lb=lb, ub=ub, - distance='l2', - epsilons=epsilons, - rel_stepsize=rel_stepsize, - abs_stepsize=abs_stepsize, - steps=steps, - random_start=random_start) + __class_type = "e-foolbox-basiciterative-l2" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=0.2, + rel_stepsize=0.025, + abs_stepsize=None, + steps=50, + random_start=True, + ): + super(CFoolboxBasicIterativeL2, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + distance="l2", + epsilons=epsilons, + rel_stepsize=rel_stepsize, + abs_stepsize=abs_stepsize, + steps=steps, + random_start=random_start, + ) class CFoolboxBasicIterativeLinf(CFoolboxBasicIterative): - __class_type = 'e-foolbox-basiciterative-linf' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, epsilons=0.2, - rel_stepsize=0.025, abs_stepsize=None, - steps=50, random_start=True): - super(CFoolboxBasicIterativeLinf, self).__init__(classifier, y_target, - lb=lb, ub=ub, - distance='linf', - epsilons=epsilons, - rel_stepsize=rel_stepsize, - abs_stepsize=abs_stepsize, - steps=steps, - random_start=random_start) + __class_type = "e-foolbox-basiciterative-linf" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=0.2, + rel_stepsize=0.025, + abs_stepsize=None, + steps=50, + random_start=True, + ): + super(CFoolboxBasicIterativeLinf, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + distance="linf", + epsilons=epsilons, + rel_stepsize=rel_stepsize, + abs_stepsize=abs_stepsize, + steps=steps, + random_start=random_start, + ) diff --git a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_cw_attack.py b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_cw_attack.py index 86b72044..23d560b1 100644 --- a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_cw_attack.py +++ b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_cw_attack.py @@ -14,10 +14,17 @@ import numpy as np from foolbox import Misclassification, TargetedMisclassification from foolbox.attacks.base import raise_if_kwargs, get_criterion -from foolbox.attacks.carlini_wagner import _to_attack_space, _to_model_space, best_other_classes, AdamOptimizer +from foolbox.attacks.carlini_wagner import ( + _to_attack_space, + _to_model_space, + best_other_classes, + AdamOptimizer, +) from foolbox.devutils import flatten, atleast_kd -from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import CAttackEvasionFoolbox +from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import ( + CAttackEvasionFoolbox, +) from secml.adv.attacks.evasion.foolbox.losses.cw_loss import CWLoss from secml.adv.attacks.evasion.foolbox.secml_autograd import as_tensor from secml.array import CArray @@ -62,25 +69,41 @@ class CFoolboxL2CarliniWagner(CWLoss, CAttackEvasionFoolbox): neural networks. In 2017 ieee symposium on security and privacy" https://arxiv.org/abs/1608.04644 """ - __class_type = 'e-foolbox-cw' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, - binary_search_steps=9, steps=10000, stepsize=1e-2, - confidence=0, initial_const=1e-3, abort_early=True): - super(CFoolboxL2CarliniWagner, self).__init__(classifier, y_target, - lb=lb, ub=ub, - fb_attack_class=_L2CarliniWagnerAttack, - epsilons=None, - binary_search_steps=binary_search_steps, - steps=steps, stepsize=stepsize, - confidence=confidence, - initial_const=initial_const, - abort_early=abort_early) + + __class_type = "e-foolbox-cw" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + binary_search_steps=9, + steps=10000, + stepsize=1e-2, + confidence=0, + initial_const=1e-3, + abort_early=True, + ): + super(CFoolboxL2CarliniWagner, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + fb_attack_class=_L2CarliniWagnerAttack, + epsilons=None, + binary_search_steps=binary_search_steps, + steps=steps, + stepsize=stepsize, + confidence=confidence, + initial_const=initial_const, + abort_early=abort_early, + ) self.confidence = confidence self.c = initial_const self._x0 = None self._y0 = None - self.distance = 'l2' + self.distance = "l2" self._step_per_iter = None self.best_c_ = self.c @@ -104,7 +127,7 @@ def _slice_path(self): divided_paths = [] for i, s in enumerate(self.attack._steps_per_iter): cumulative_sum = sum(self.attack._steps_per_iter[:i]) - divided_paths.append(all_paths[cumulative_sum: cumulative_sum + s, :]) + divided_paths.append(all_paths[cumulative_sum : cumulative_sum + s, :]) return divided_paths @property @@ -193,8 +216,8 @@ def loss_fun(delta, consts): # the binary search searches for the smallest consts that produce adversarials for binary_search_step in range(self.binary_search_steps): if ( - binary_search_step == self.binary_search_steps - 1 - and self.binary_search_steps >= 10 + binary_search_step == self.binary_search_steps - 1 + and self.binary_search_steps >= 10 ): # in the last binary search step, repeat the search once consts = np.minimum(upper_bounds, 1e10) diff --git a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_ddn_attack.py b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_ddn_attack.py index 7f5512a8..29f0f9f6 100644 --- a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_ddn_attack.py +++ b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_ddn_attack.py @@ -11,7 +11,9 @@ from foolbox.attacks.ddn import DDNAttack -from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import CAttackEvasionFoolbox +from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import ( + CAttackEvasionFoolbox, +) from secml.adv.attacks.evasion.foolbox.losses.ce_loss import CELoss from secml.adv.attacks.evasion.foolbox.secml_autograd import as_tensor @@ -51,22 +53,38 @@ class CFoolboxL2DDN(CELoss, CAttackEvasionFoolbox): Robert Sabourin, Eric Granger, "Decoupling Direction and Norm for Efficient Gradient-Based L2 Adversarial Attacks and Defenses", https://arxiv.org/abs/1811.09600 - """ - __class_type = 'e-foolbox-ddn' + """ + + __class_type = "e-foolbox-ddn" - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, - epsilons=None, init_epsilon=1.0, steps=10, - gamma=0.05, ): - attack_params = {'init_epsilon': init_epsilon, - 'gamma': gamma, - 'steps': steps, 'epsilons': epsilons} + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=None, + init_epsilon=1.0, + steps=10, + gamma=0.05, + ): + attack_params = { + "init_epsilon": init_epsilon, + "gamma": gamma, + "steps": steps, + "epsilons": epsilons, + } - super(CFoolboxL2DDN, self).__init__(classifier, y_target, - lb=lb, ub=ub, - fb_attack_class=DDNAttack, - **attack_params) + super(CFoolboxL2DDN, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + fb_attack_class=DDNAttack, + **attack_params + ) self._y0 = None - self.distance = 'l2' + self.distance = "l2" def _run(self, x, y, x_init=None): self._y0 = as_tensor(y) diff --git a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_deepfool_attack.py b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_deepfool_attack.py index 98b9e949..95a6c980 100644 --- a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_deepfool_attack.py +++ b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_deepfool_attack.py @@ -8,16 +8,16 @@ from foolbox.attacks.deepfool import L2DeepFoolAttack, LinfDeepFoolAttack -from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import \ - CAttackEvasionFoolbox -from secml.adv.attacks.evasion.foolbox.losses.deepfool_loss import \ - DeepfoolLoss +from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import ( + CAttackEvasionFoolbox, +) +from secml.adv.attacks.evasion.foolbox.losses.deepfool_loss import DeepfoolLoss from secml.adv.attacks.evasion.foolbox.secml_autograd import as_tensor from secml.array import CArray -CELOSS = 'crossentropy' -LOGITLOSS = 'logits' -DISTANCES = ['l2', 'linf'] +CELOSS = "crossentropy" +LOGITLOSS = "logits" +DISTANCES = ["l2", "linf"] class CFoolboxDeepfool(DeepfoolLoss, CAttackEvasionFoolbox): @@ -61,33 +61,48 @@ class CFoolboxDeepfool(DeepfoolLoss, CAttackEvasionFoolbox): "DeepFool: a simple and accurate method to fool deep neural networks", https://arxiv.org/abs/1511.04599 """ - __class_type = 'e-foolbox-deepfool' - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, - epsilons=0.2, distance='l2', steps=50, - candidates=10, overshoot=0.02, loss="logits"): + __class_type = "e-foolbox-deepfool" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=0.2, + distance="l2", + steps=50, + candidates=10, + overshoot=0.02, + loss="logits", + ): if y_target != None: raise ValueError( "Unsupported criterion. Deepfool only " - "supports the untargeted version.") - if distance == 'l2': + "supports the untargeted version." + ) + if distance == "l2": attack = L2DeepFoolAttack - elif distance == 'linf': + elif distance == "linf": attack = LinfDeepFoolAttack else: raise ValueError( - 'Distance {} is not supported for this attack. Only {} ' - 'are supported'.format( - distance, DISTANCES - )) + "Distance {} is not supported for this attack. Only {} " + "are supported".format(distance, DISTANCES) + ) super(CFoolboxDeepfool, self).__init__( - classifier, y_target, - lb=lb, ub=ub, + classifier, + y_target, + lb=lb, + ub=ub, fb_attack_class=attack, - epsilons=epsilons, steps=steps, + epsilons=epsilons, + steps=steps, candidates=candidates, overshoot=overshoot, - loss=loss) + loss=loss, + ) self._x0 = None self._y0 = None self.distance = distance @@ -102,8 +117,9 @@ def _run(self, x, y, x_init=None): # always the same length as the number of steps num_effective_steps = self.x_seq.shape[0] if num_effective_steps < self.attack.steps: - added_vals = CArray.zeros((self.attack.steps - num_effective_steps, - *self.x_seq.shape[1:])) + added_vals = CArray.zeros( + (self.attack.steps - num_effective_steps, *self.x_seq.shape[1:]) + ) added_vals += self.x_seq[-1, :] self._x_seq = self._x_seq.append(added_vals, axis=0) self.num_effective_steps = num_effective_steps # keep in case we need it @@ -114,32 +130,58 @@ def _run(self, x, y, x_init=None): class CFoolboxDeepfoolL2(CFoolboxDeepfool): - __class_type = 'e-foolbox-deepfool-l2' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, - epsilons=0.2, steps=50, candidates=10, overshoot=0.02, - loss="logits"): + __class_type = "e-foolbox-deepfool-l2" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=0.2, + steps=50, + candidates=10, + overshoot=0.02, + loss="logits", + ): super(CFoolboxDeepfoolL2, self).__init__( - classifier, y_target, - lb=lb, ub=ub, - distance='l2', - epsilons=epsilons, steps=steps, + classifier, + y_target, + lb=lb, + ub=ub, + distance="l2", + epsilons=epsilons, + steps=steps, candidates=candidates, overshoot=overshoot, - loss=loss) + loss=loss, + ) class CFoolboxDeepfoolLinf(CFoolboxDeepfool): - __class_type = 'e-foolbox-deepfool-linf' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, - epsilons=0.2, steps=50, candidates=10, overshoot=0.02, - loss="logits"): + __class_type = "e-foolbox-deepfool-linf" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=0.2, + steps=50, + candidates=10, + overshoot=0.02, + loss="logits", + ): super(CFoolboxDeepfoolLinf, self).__init__( - classifier, y_target, - lb=lb, ub=ub, - distance='linf', - epsilons=epsilons, steps=steps, + classifier, + y_target, + lb=lb, + ub=ub, + distance="linf", + epsilons=epsilons, + steps=steps, candidates=candidates, overshoot=overshoot, - loss=loss) + loss=loss, + ) diff --git a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_ead_attack.py b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_ead_attack.py index 978b3d53..9c234a75 100644 --- a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_ead_attack.py +++ b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_ead_attack.py @@ -6,15 +6,23 @@ .. moduleauthor:: Maura Pintor """ + import math from typing import Any, Tuple import eagerpy as ep from foolbox import Misclassification, TargetedMisclassification from foolbox.attacks.base import raise_if_kwargs, get_criterion -from foolbox.attacks.ead import EADAttack, _best_other_classes, _project_shrinkage_thresholding, _apply_decision_rule - -from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import CAttackEvasionFoolbox +from foolbox.attacks.ead import ( + EADAttack, + _best_other_classes, + _project_shrinkage_thresholding, + _apply_decision_rule, +) + +from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import ( + CAttackEvasionFoolbox, +) from secml.adv.attacks.evasion.foolbox.losses.ead_loss import EADLoss from secml.adv.attacks.evasion.foolbox.secml_autograd import as_tensor from secml.array import CArray @@ -70,44 +78,51 @@ class CFoolboxEAD(EADLoss, CAttackEvasionFoolbox): "Ead: elastic-net attacks to deep neural networks via adversarial examples." Proceedings of the AAAI Conference on Artificial Intelligence. Vol. 32. No. 1. 2018. """ - __class_type = 'e-foolbox-ead' - - def __init__(self, - classifier: CClassifier, - y_target: Any = None, - lb=0.0, - ub=1.0, - epsilons=None, - binary_search_steps=9, - steps=50, - initial_stepsize=1e-2, - confidence=0., - initial_const=1e-3, - regularization=1e-2, - decision_rule: str = EN, - abort_early=False, - ): + + __class_type = "e-foolbox-ead" + + def __init__( + self, + classifier: CClassifier, + y_target: Any = None, + lb=0.0, + ub=1.0, + epsilons=None, + binary_search_steps=9, + steps=50, + initial_stepsize=1e-2, + confidence=0.0, + initial_const=1e-3, + regularization=1e-2, + decision_rule: str = EN, + abort_early=False, + ): if decision_rule != L1 and decision_rule != EN: - raise ValueError(f"decision_rule param can be ony {EN} or {L1}, not {decision_rule}") - super(CFoolboxEAD, self).__init__(classifier, - y_target, - lb=lb, ub=ub, - fb_attack_class=_EADAttack, - epsilons=epsilons, - initial_const=initial_const, - binary_search_steps=binary_search_steps, - steps=steps, - confidence=confidence, - initial_stepsize=initial_stepsize, - regularization=regularization, - decision_rule=decision_rule, - abort_early=abort_early) + raise ValueError( + f"decision_rule param can be ony {EN} or {L1}, not {decision_rule}" + ) + super(CFoolboxEAD, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + fb_attack_class=_EADAttack, + epsilons=epsilons, + initial_const=initial_const, + binary_search_steps=binary_search_steps, + steps=steps, + confidence=confidence, + initial_stepsize=initial_stepsize, + regularization=regularization, + decision_rule=decision_rule, + abort_early=abort_early, + ) self.regularization = regularization self.confidence = confidence self.c = initial_const self._x0 = None self._y0 = None - self.distance = 'l1' + self.distance = "l1" self._step_per_iter = None self.best_c_ = self.c @@ -131,7 +146,7 @@ def _slice_path(self): divided_paths = [] for i, s in enumerate(self.attack._steps_per_iter): cumulative_sum = sum(self.attack._steps_per_iter[:i]) - divided_paths.append(all_paths[cumulative_sum: cumulative_sum + s, :]) + divided_paths.append(all_paths[cumulative_sum : cumulative_sum + s, :]) return divided_paths @property @@ -142,13 +157,13 @@ def x_seq(self): class _EADAttack(EADAttack): def run( - self, - model, - inputs, - criterion, - *, - early_stop=None, - **kwargs: Any, + self, + model, + inputs, + criterion, + *, + early_stop=None, + **kwargs: Any, ): raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) @@ -223,8 +238,8 @@ def loss_fun(y_k: ep.Tensor, consts: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]: # the binary search searches for the smallest consts that produce adversarials for binary_search_step in range(self.binary_search_steps): if ( - binary_search_step == self.binary_search_steps - 1 - and self.binary_search_steps >= 10 + binary_search_step == self.binary_search_steps - 1 + and self.binary_search_steps >= 10 ): # in the last iteration, repeat the search once consts = ep.minimum(upper_bounds, 1e10) diff --git a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_fgm_attack.py b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_fgm_attack.py index 16a93643..682be886 100644 --- a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_fgm_attack.py +++ b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_fgm_attack.py @@ -8,13 +8,19 @@ """ -from foolbox.attacks.fast_gradient_method import L1FastGradientAttack, L2FastGradientAttack, LinfFastGradientAttack - -from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import CAttackEvasionFoolbox +from foolbox.attacks.fast_gradient_method import ( + L1FastGradientAttack, + L2FastGradientAttack, + LinfFastGradientAttack, +) + +from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import ( + CAttackEvasionFoolbox, +) from secml.adv.attacks.evasion.foolbox.losses.ce_loss import CELoss from secml.adv.attacks.evasion.foolbox.secml_autograd import as_tensor -DISTANCES = ['l1', 'l2', 'linf'] +DISTANCES = ["l1", "l2", "linf"] class CFoolboxFGM(CELoss, CAttackEvasionFoolbox): @@ -49,28 +55,42 @@ class CFoolboxFGM(CELoss, CAttackEvasionFoolbox): "Explaining and Harnessing Adversarial Examples" https://arxiv.org/abs/1412.6572 """ - __class_type = 'e-foolbox-fgm' - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, - epsilons=0.2, distance='l2', - random_start=True): + __class_type = "e-foolbox-fgm" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=0.2, + distance="l2", + random_start=True, + ): - if distance == 'l1': + if distance == "l1": attack = L1FastGradientAttack - elif distance == 'l2': + elif distance == "l2": attack = L2FastGradientAttack - elif distance == 'linf': + elif distance == "linf": attack = LinfFastGradientAttack else: - raise ValueError('Distance {} is not supported for this attack. Only {} are supported'.format( - distance, DISTANCES - )) - - super(CFoolboxFGM, self).__init__(classifier, y_target, - lb=lb, ub=ub, - fb_attack_class=attack, - epsilons=epsilons, - random_start=random_start) + raise ValueError( + "Distance {} is not supported for this attack. Only {} are supported".format( + distance, DISTANCES + ) + ) + + super(CFoolboxFGM, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + fb_attack_class=attack, + epsilons=epsilons, + random_start=random_start, + ) self._y0 = None self.distance = distance @@ -85,36 +105,51 @@ def _run(self, x, y, x_init=None): class CFoolboxFGML1(CFoolboxFGM): - __class_type = 'e-foolbox-fgm-l1' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, - epsilons=0.2, random_start=True): - super(CFoolboxFGML1, self).__init__(classifier, y_target, - lb=lb, ub=ub, - distance='l1', - epsilons=epsilons, - random_start=random_start) + __class_type = "e-foolbox-fgm-l1" + + def __init__( + self, classifier, y_target=None, lb=0.0, ub=1.0, epsilons=0.2, random_start=True + ): + super(CFoolboxFGML1, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + distance="l1", + epsilons=epsilons, + random_start=random_start, + ) class CFoolboxFGML2(CFoolboxFGM): - __class_type = 'e-foolbox-fgm-l2' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, - epsilon=0.2, random_start=True): - super(CFoolboxFGML2, self).__init__(classifier, y_target, - lb=lb, ub=ub, - distance='l2', - epsilons=epsilon, - random_start=random_start) + __class_type = "e-foolbox-fgm-l2" + + def __init__( + self, classifier, y_target=None, lb=0.0, ub=1.0, epsilon=0.2, random_start=True + ): + super(CFoolboxFGML2, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + distance="l2", + epsilons=epsilon, + random_start=random_start, + ) class CFoolboxFGMLinf(CFoolboxFGM): - __class_type = 'e-foolbox-fgm-linf' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, - epsilon=0.2, random_start=True): - super(CFoolboxFGMLinf, self).__init__(classifier, y_target, - lb=lb, ub=ub, - distance='linf', - epsilons=epsilon, - random_start=random_start) + __class_type = "e-foolbox-fgm-linf" + + def __init__( + self, classifier, y_target=None, lb=0.0, ub=1.0, epsilon=0.2, random_start=True + ): + super(CFoolboxFGMLinf, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + distance="linf", + epsilons=epsilon, + random_start=random_start, + ) diff --git a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_pgd_attack.py b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_pgd_attack.py index e15ebe48..a62c1f86 100644 --- a/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_pgd_attack.py +++ b/src/secml/adv/attacks/evasion/foolbox/fb_attacks/fb_pgd_attack.py @@ -8,14 +8,19 @@ """ -from foolbox.attacks.projected_gradient_descent import L1ProjectedGradientDescentAttack, \ - L2ProjectedGradientDescentAttack, LinfProjectedGradientDescentAttack - -from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import CAttackEvasionFoolbox +from foolbox.attacks.projected_gradient_descent import ( + L1ProjectedGradientDescentAttack, + L2ProjectedGradientDescentAttack, + LinfProjectedGradientDescentAttack, +) + +from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import ( + CAttackEvasionFoolbox, +) from secml.adv.attacks.evasion.foolbox.losses.ce_loss import CELoss from secml.adv.attacks.evasion.foolbox.secml_autograd import as_tensor -DISTANCES = ['l1', 'l2', 'linf'] +DISTANCES = ["l1", "l2", "linf"] class CFoolboxPGD(CELoss, CAttackEvasionFoolbox): @@ -57,32 +62,48 @@ class CFoolboxPGD(CELoss, CAttackEvasionFoolbox): adversarial attacks", https://arxiv.org/abs/1706.06083 """ - __class_type = 'e-foolbox-pgd' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, - epsilons=0.2, distance='l2', - rel_stepsize=0.025, abs_stepsize=None, steps=50, - random_start=True): - if distance == 'l1': + __class_type = "e-foolbox-pgd" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=0.2, + distance="l2", + rel_stepsize=0.025, + abs_stepsize=None, + steps=50, + random_start=True, + ): + + if distance == "l1": attack = L1ProjectedGradientDescentAttack - elif distance == 'l2': + elif distance == "l2": attack = L2ProjectedGradientDescentAttack - elif distance == 'linf': + elif distance == "linf": attack = LinfProjectedGradientDescentAttack else: - raise ValueError('Distance {} is not supported for this attack. Only {} are supported'.format( - distance, DISTANCES - )) - - super(CFoolboxPGD, self).__init__(classifier, y_target, - lb=lb, ub=ub, - fb_attack_class=attack, - epsilons=epsilons, - rel_stepsize=rel_stepsize, - abs_stepsize=abs_stepsize, - steps=steps, - random_start=random_start) + raise ValueError( + "Distance {} is not supported for this attack. Only {} are supported".format( + distance, DISTANCES + ) + ) + + super(CFoolboxPGD, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + fb_attack_class=attack, + epsilons=epsilons, + rel_stepsize=rel_stepsize, + abs_stepsize=abs_stepsize, + steps=steps, + random_start=random_start, + ) self._x0 = None self._y0 = None self.distance = distance @@ -97,45 +118,87 @@ def _run(self, x, y, x_init=None): class CFoolboxPGDL1(CFoolboxPGD): - __class_type = 'e-foolbox-pgd-l1' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, epsilons=0.2, - rel_stepsize=0.025, abs_stepsize=None, steps=50, random_start=True): - super(CFoolboxPGDL1, self).__init__(classifier, y_target, - lb=lb, ub=ub, - distance='l1', - epsilons=epsilons, - rel_stepsize=rel_stepsize, - abs_stepsize=abs_stepsize, - steps=steps, - random_start=random_start) + __class_type = "e-foolbox-pgd-l1" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=0.2, + rel_stepsize=0.025, + abs_stepsize=None, + steps=50, + random_start=True, + ): + super(CFoolboxPGDL1, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + distance="l1", + epsilons=epsilons, + rel_stepsize=rel_stepsize, + abs_stepsize=abs_stepsize, + steps=steps, + random_start=random_start, + ) class CFoolboxPGDL2(CFoolboxPGD): - __class_type = 'e-foolbox-pgd-l2' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, epsilons=0.2, - rel_stepsize=0.025, abs_stepsize=None, steps=50, random_start=True): - super(CFoolboxPGDL2, self).__init__(classifier, y_target, - lb=lb, ub=ub, - distance='l2', - epsilons=epsilons, - rel_stepsize=rel_stepsize, - abs_stepsize=abs_stepsize, - steps=steps, - random_start=random_start) + __class_type = "e-foolbox-pgd-l2" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=0.2, + rel_stepsize=0.025, + abs_stepsize=None, + steps=50, + random_start=True, + ): + super(CFoolboxPGDL2, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + distance="l2", + epsilons=epsilons, + rel_stepsize=rel_stepsize, + abs_stepsize=abs_stepsize, + steps=steps, + random_start=random_start, + ) class CFoolboxPGDLinf(CFoolboxPGD): - __class_type = 'e-foolbox-pgd-linf' - - def __init__(self, classifier, y_target=None, lb=0.0, ub=1.0, epsilons=0.2, - rel_stepsize=0.025, abs_stepsize=None, steps=50, random_start=True): - super(CFoolboxPGDLinf, self).__init__(classifier, y_target, - lb=lb, ub=ub, - distance='linf', - epsilons=epsilons, - rel_stepsize=rel_stepsize, - abs_stepsize=abs_stepsize, - steps=steps, - random_start=random_start) + __class_type = "e-foolbox-pgd-linf" + + def __init__( + self, + classifier, + y_target=None, + lb=0.0, + ub=1.0, + epsilons=0.2, + rel_stepsize=0.025, + abs_stepsize=None, + steps=50, + random_start=True, + ): + super(CFoolboxPGDLinf, self).__init__( + classifier, + y_target, + lb=lb, + ub=ub, + distance="linf", + epsilons=epsilons, + rel_stepsize=rel_stepsize, + abs_stepsize=abs_stepsize, + steps=steps, + random_start=random_start, + ) diff --git a/src/secml/adv/attacks/evasion/foolbox/losses/ce_loss.py b/src/secml/adv/attacks/evasion/foolbox/losses/ce_loss.py index 7abfe3b8..615cd080 100644 --- a/src/secml/adv/attacks/evasion/foolbox/losses/ce_loss.py +++ b/src/secml/adv/attacks/evasion/foolbox/losses/ce_loss.py @@ -4,7 +4,7 @@ class CELoss: def _adv_objective_function(self, x): - loss = CrossEntropyLoss(reduction='none') + loss = CrossEntropyLoss(reduction="none") scores = self._pytorch_model_wrapper(x) target = torch.empty(scores.shape[0], dtype=torch.long) @@ -15,4 +15,3 @@ def _adv_objective_function(self, x): total_loss = loss(scores, target) return total_loss if self.y_target is not None else -total_loss - diff --git a/src/secml/adv/attacks/evasion/foolbox/losses/cw_loss.py b/src/secml/adv/attacks/evasion/foolbox/losses/cw_loss.py index 52bc6b05..8a4cd171 100644 --- a/src/secml/adv/attacks/evasion/foolbox/losses/cw_loss.py +++ b/src/secml/adv/attacks/evasion/foolbox/losses/cw_loss.py @@ -8,13 +8,13 @@ class CWLoss(LogitsLoss): def _adv_objective_function(self, x): if self._x0 is None: - raise Exception('Attack not run yet') + raise Exception("Attack not run yet") l2dist = torch.norm(self._x0 - x.flatten(start_dim=1), dim=1, p=2) ** 2 - + loss = super(CWLoss, self)._adv_objective_function(x) if x.shape[0] == self._consts.shape[0]: c = as_tensor(self._consts) else: c = self._consts[-1].item() total_loss = c * loss + l2dist - return total_loss \ No newline at end of file + return total_loss diff --git a/src/secml/adv/attacks/evasion/foolbox/losses/deepfool_loss.py b/src/secml/adv/attacks/evasion/foolbox/losses/deepfool_loss.py index e8f6dcc6..5d8cce83 100644 --- a/src/secml/adv/attacks/evasion/foolbox/losses/deepfool_loss.py +++ b/src/secml/adv/attacks/evasion/foolbox/losses/deepfool_loss.py @@ -52,24 +52,25 @@ def ce_diff(self, x, k): classes = logits.argsort(dim=-1).flip(dims=(-1,)) i0 = classes[:, 0] ik = classes[:, k] - l0 = -CrossEntropyLoss(reduction='none')(logits, i0) - lk = -CrossEntropyLoss(reduction='none')(logits, ik) + l0 = -CrossEntropyLoss(reduction="none")(logits, i0) + lk = -CrossEntropyLoss(reduction="none")(logits, ik) loss = lk - l0 loss.backward() grad = x.grad return loss, grad - def get_distances(self, losses, grads): - if self.distance == 'l2': - return abs(losses) / ((grads.view(grads.shape[0], -1)).norm(p=2, dim=-1) + 1e-8) - elif self.distance == 'linf': - return abs(losses) / ((grads.view(grads.shape[0], -1)).abs().sum(dim=-1) + 1e-8) + if self.distance == "l2": + return abs(losses) / ( + (grads.view(grads.shape[0], -1)).norm(p=2, dim=-1) + 1e-8 + ) + elif self.distance == "linf": + return abs(losses) / ( + (grads.view(grads.shape[0], -1)).abs().sum(dim=-1) + 1e-8 + ) else: raise NotImplementedError - - def objective_function_gradient(self, x): """ Deepfool uses the gradient to find the closest class. @@ -88,4 +89,4 @@ def objective_function(self, x): Accordingly, we should also return only the first returned value of the function. """ - return as_carray(self._adv_objective_function(as_tensor(x))[0]) \ No newline at end of file + return as_carray(self._adv_objective_function(as_tensor(x))[0]) diff --git a/src/secml/adv/attacks/evasion/foolbox/losses/logits_loss.py b/src/secml/adv/attacks/evasion/foolbox/losses/logits_loss.py index 3b3aaf8e..38b60613 100644 --- a/src/secml/adv/attacks/evasion/foolbox/losses/logits_loss.py +++ b/src/secml/adv/attacks/evasion/foolbox/losses/logits_loss.py @@ -1,10 +1,11 @@ import torch + class LogitsLoss: def _adv_objective_function(self, x): if self._y0 is None: - raise Exception('Attack not run yet') + raise Exception("Attack not run yet") z_predicted = self._pytorch_model_wrapper(x) y_target = torch.zeros((z_predicted.shape)) @@ -15,14 +16,24 @@ def _adv_objective_function(self, x): y_target[:, self._y0.long()] = 1 z_target = (z_predicted * y_target).sum(dim=1) - second_best_labels = torch.argmax(y_target * torch.min(z_predicted) + z_predicted * (1 - y_target), dim=1) - z_other = z_predicted[torch.arange(z_predicted.size(0)).long(), second_best_labels] + second_best_labels = torch.argmax( + y_target * torch.min(z_predicted) + z_predicted * (1 - y_target), dim=1 + ) + z_other = z_predicted[ + torch.arange(z_predicted.size(0)).long(), second_best_labels + ] if self.y_target is not None: # if targeted, optimize for making the target class most likely - loss = torch.max(z_other - z_target + self.confidence, torch.zeros(x.shape[0], dtype=x.dtype)) + loss = torch.max( + z_other - z_target + self.confidence, + torch.zeros(x.shape[0], dtype=x.dtype), + ) else: # if untargeted, optimize for making any other class most likely - loss = torch.max(z_target - z_other + self.confidence, torch.zeros(x.shape[0], dtype=x.dtype)) + loss = torch.max( + z_target - z_other + self.confidence, + torch.zeros(x.shape[0], dtype=x.dtype), + ) - return loss \ No newline at end of file + return loss diff --git a/src/secml/adv/attacks/evasion/foolbox/secml_autograd.py b/src/secml/adv/attacks/evasion/foolbox/secml_autograd.py index 4894d645..d59658a7 100644 --- a/src/secml/adv/attacks/evasion/foolbox/secml_autograd.py +++ b/src/secml/adv/attacks/evasion/foolbox/secml_autograd.py @@ -7,6 +7,7 @@ .. moduleauthor:: Maura Pintor """ + import torch from torch import nn @@ -37,8 +38,7 @@ def backward(ctx, grad_output): input, grad_calls = ctx.saved_tensors # https://github.com/pytorch/pytorch/issues/1776#issuecomment-372150869 with torch.enable_grad(): - grad_input = clf.gradient(x=as_carray(input), - w=as_carray(grad_output)) + grad_input = clf.gradient(x=as_carray(input), w=as_carray(grad_output)) grad_calls += clf._cached_x.shape[0] grad_input = as_tensor(grad_input, True) @@ -75,6 +75,7 @@ class SecmlLayer(nn.Module): Function and Gradient call counts will be tracked, however they must be reset externally before the call. """ + def __init__(self, model): super(SecmlLayer, self).__init__() self._clf = model @@ -84,8 +85,7 @@ def __init__(self, model): self.grad_counter = torch.tensor(0) def forward(self, x): - x = self.secml_autograd(x, self._clf, self.func_counter, - self.grad_counter) + x = self.secml_autograd(x, self._clf, self.func_counter, self.grad_counter) return x def extra_repr(self) -> str: diff --git a/src/secml/adv/attacks/evasion/foolbox/tests/c_attack_evasion_foolbox_testcases.py b/src/secml/adv/attacks/evasion/foolbox/tests/c_attack_evasion_foolbox_testcases.py index e9929a6b..1ff1d45f 100644 --- a/src/secml/adv/attacks/evasion/foolbox/tests/c_attack_evasion_foolbox_testcases.py +++ b/src/secml/adv/attacks/evasion/foolbox/tests/c_attack_evasion_foolbox_testcases.py @@ -41,7 +41,8 @@ def _setup_attack(self, targeted=False): evas = self.attack_class( classifier=self.clf, y_target=y_target, - lb=self.lb, ub=self.ub, + lb=self.lb, + ub=self.ub, **self.attack_params ) return evas @@ -50,24 +51,35 @@ def _test_run_targeted(self): if self.has_targeted: evas = self._setup_attack(targeted=True) self._run_evasion(evas, self.x0, self.y0) - self._plot_2d_evasion(evas, self.ds, self.x0, - filename="{}_target_{}.pdf" - "".format(self.attack_class.__name__, - evas.y_target)) + self._plot_2d_evasion( + evas, + self.ds, + self.x0, + filename="{}_target_{}.pdf" + "".format(self.attack_class.__name__, evas.y_target), + ) else: - self.logger.debug("Targeted version not defined for {}, skipping test" - "".format(self.attack_class.__name__)) + self.logger.debug( + "Targeted version not defined for {}, skipping test" + "".format(self.attack_class.__name__) + ) return def _test_run_untargeted(self): if self.has_untargeted: evas = self._setup_attack(targeted=False) self._run_evasion(evas, self.x0, self.y0) - self._plot_2d_evasion(evas, self.ds, self.x0, "{}_target_{}.pdf" - "".format(self.attack_class.__name__, evas.y_target)) + self._plot_2d_evasion( + evas, + self.ds, + self.x0, + "{}_target_{}.pdf" "".format(self.attack_class.__name__, evas.y_target), + ) else: - self.logger.debug("Untargeted version not defined for {}, skipping test" - "".format(self.attack_class.__name__)) + self.logger.debug( + "Untargeted version not defined for {}, skipping test" + "".format(self.attack_class.__name__) + ) return def _test_check_foolbox_equal_targeted(self): @@ -75,14 +87,16 @@ def _test_check_foolbox_equal_targeted(self): evas = self._setup_attack(targeted=True) foolbox_class = evas.attack_class init_params = self.attack_params - if 'epsilons' in init_params: - init_params.pop('epsilons') + if "epsilons" in init_params: + init_params.pop("epsilons") fb_evas = foolbox_class(**init_params) adv_ds, adv_fb = self._check_adv_example(evas, fb_evas) self.assert_array_almost_equal(adv_ds.X, adv_fb, decimal=3) else: - self.logger.debug("Targeted version not defined for {}, skipping test" - "".format(self.attack_class.__name__)) + self.logger.debug( + "Targeted version not defined for {}, skipping test" + "".format(self.attack_class.__name__) + ) return def _test_check_foolbox_equal_untargeted(self): @@ -90,14 +104,16 @@ def _test_check_foolbox_equal_untargeted(self): evas = self._setup_attack(targeted=False) foolbox_class = evas.attack_class init_params = self.attack_params - if 'epsilons' in init_params: - init_params.pop('epsilons') + if "epsilons" in init_params: + init_params.pop("epsilons") fb_evas = foolbox_class(**init_params) adv_ds, adv_fb = self._check_adv_example(evas, fb_evas) self.assert_array_almost_equal(adv_ds.X, adv_fb, decimal=3) else: - self.logger.debug("Untargeted version not defined for {}, skipping test" - "".format(self.attack_class.__name__)) + self.logger.debug( + "Untargeted version not defined for {}, skipping test" + "".format(self.attack_class.__name__) + ) return def _test_shapes(self): @@ -106,7 +122,9 @@ def _test_shapes(self): elif self.has_targeted: evas = self._setup_attack(targeted=False) else: - self.logger.debug("Nor targeted or untargeted versions are defined. Skipping test.") + self.logger.debug( + "Nor targeted or untargeted versions are defined. Skipping test." + ) y_pred, scores, adv_ds, f_obj = evas.run(self.x0, self.y0) self.assert_array_equal(self.x0.shape, adv_ds.X.shape) self.assert_array_equal(self.y0.shape, adv_ds.Y.shape) @@ -125,12 +143,16 @@ def _check_adv_example(self, secml_attack, fb_attack): criterion = fb.criteria.TargetedMisclassification(torch.tensor([y_target])) y_pred, scores, adv_ds, f_obj = secml_attack.run(self.x0, self.y0) - _, adv_fb, _ = fb_attack(secml_attack.f_model, x0_tensor, criterion, epsilons=secml_attack.epsilon) + _, adv_fb, _ = fb_attack( + secml_attack.f_model, x0_tensor, criterion, epsilons=secml_attack.epsilon + ) adv_fb = CArray(adv_fb.numpy()) return adv_ds, adv_fb def _check_obj_function_and_grad(self): - for is_targeted, check in zip((True, False), (self.has_targeted, self.has_untargeted)): + for is_targeted, check in zip( + (True, False), (self.has_targeted, self.has_untargeted) + ): if check is True: evas = self._setup_attack(targeted=is_targeted) # some attacks require to run the attack before computing @@ -141,4 +163,3 @@ def _check_obj_function_and_grad(self): self.assertEqual(obj_function.shape, (self.x0.shape[0],)) self.assertEqual(obj_function_grad.shape, self.x0.shape) return - diff --git a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_basic_iterative.py b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_basic_iterative.py index b4177925..f2cad415 100644 --- a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_basic_iterative.py +++ b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_basic_iterative.py @@ -1,4 +1,6 @@ -from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import CAttackEvasionFoolboxTestCases +from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import ( + CAttackEvasionFoolboxTestCases, +) try: import foolbox @@ -6,9 +8,11 @@ except ImportError: CAttackEvasionFoolboxTestCases.importskip("foolbox") -from secml.adv.attacks.evasion.foolbox.fb_attacks.fb_basic_iterative_attack \ - import CFoolboxBasicIterativeL1, \ - CFoolboxBasicIterativeL2, CFoolboxBasicIterativeLinf +from secml.adv.attacks.evasion.foolbox.fb_attacks.fb_basic_iterative_attack import ( + CFoolboxBasicIterativeL1, + CFoolboxBasicIterativeL2, + CFoolboxBasicIterativeLinf, +) class TestCAttackEvasionFoolboxBasicIterativeL1(CAttackEvasionFoolboxTestCases): @@ -20,7 +24,12 @@ def setUp(self): super(TestCAttackEvasionFoolboxBasicIterativeL1, self).setUp() self.attack_class = CFoolboxBasicIterativeL1 - self.attack_params = {'rel_stepsize': 0.03, 'steps': 25, 'abs_stepsize': 0.1, 'random_start': False} + self.attack_params = { + "rel_stepsize": 0.03, + "steps": 25, + "abs_stepsize": 0.1, + "random_start": False, + } self.has_targeted = True self.has_untargeted = True @@ -53,7 +62,12 @@ def setUp(self): super(TestCAttackEvasionFoolboxBasicIterativeL2, self).setUp() self.attack_class = CFoolboxBasicIterativeL2 - self.attack_params = {'rel_stepsize': 0.03, 'steps': 100, 'abs_stepsize': 0.1, 'random_start': False} + self.attack_params = { + "rel_stepsize": 0.03, + "steps": 100, + "abs_stepsize": 0.1, + "random_start": False, + } self.has_targeted = True self.has_untargeted = True @@ -86,8 +100,12 @@ def setUp(self): super(TestCAttackEvasionFoolboxBasicIterativeLinf, self).setUp() self.attack_class = CFoolboxBasicIterativeLinf - self.attack_params = {'rel_stepsize': 0.03, 'steps': self.default_steps, 'abs_stepsize': 0.1, - 'random_start': False} + self.attack_params = { + "rel_stepsize": 0.03, + "steps": self.default_steps, + "abs_stepsize": 0.1, + "random_start": False, + } self.has_targeted = True self.has_untargeted = True @@ -108,4 +126,4 @@ def test_shapes(self): self._test_shapes() def test_obj_fun_and_grad(self): - self._check_obj_function_and_grad() \ No newline at end of file + self._check_obj_function_and_grad() diff --git a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_cw.py b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_cw.py index fd2a81f7..695bab15 100644 --- a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_cw.py +++ b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_cw.py @@ -1,4 +1,6 @@ -from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import CAttackEvasionFoolboxTestCases +from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import ( + CAttackEvasionFoolboxTestCases, +) try: import foolbox @@ -6,7 +8,9 @@ except ImportError: CAttackEvasionFoolboxTestCases.importskip("foolbox") -from secml.adv.attacks.evasion.foolbox.fb_attacks.fb_cw_attack import CFoolboxL2CarliniWagner +from secml.adv.attacks.evasion.foolbox.fb_attacks.fb_cw_attack import ( + CFoolboxL2CarliniWagner, +) class TestCAttackEvasionFoolboxCW(CAttackEvasionFoolboxTestCases): @@ -18,7 +22,7 @@ def setUp(self): super(TestCAttackEvasionFoolboxCW, self).setUp() self.attack_class = CFoolboxL2CarliniWagner - self.attack_params = {'steps': self.default_steps, 'abort_early': False} + self.attack_params = {"steps": self.default_steps, "abort_early": False} self.has_targeted = True self.has_untargeted = True diff --git a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_ddn.py b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_ddn.py index 4452cfc6..88d78810 100644 --- a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_ddn.py +++ b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_ddn.py @@ -1,4 +1,6 @@ -from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import CAttackEvasionFoolboxTestCases +from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import ( + CAttackEvasionFoolboxTestCases, +) try: import foolbox @@ -18,7 +20,7 @@ def setUp(self): super(TestCAttackEvasionFoolboxDDN, self).setUp() self.attack_class = CFoolboxL2DDN - self.attack_params = {'steps': self.default_steps, 'epsilons': None} + self.attack_params = {"steps": self.default_steps, "epsilons": None} self.has_targeted = True self.has_untargeted = True @@ -39,4 +41,4 @@ def test_shapes(self): self._test_shapes() def test_obj_fun_and_grad(self): - self._check_obj_function_and_grad() \ No newline at end of file + self._check_obj_function_and_grad() diff --git a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_deepfool.py b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_deepfool.py index 088f7192..df741e9f 100644 --- a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_deepfool.py +++ b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_deepfool.py @@ -1,4 +1,6 @@ -from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import CAttackEvasionFoolboxTestCases +from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import ( + CAttackEvasionFoolboxTestCases, +) try: import foolbox @@ -6,8 +8,10 @@ except ImportError: CAttackEvasionFoolboxTestCases.importskip("foolbox") -from secml.adv.attacks.evasion.foolbox.fb_attacks.fb_deepfool_attack \ - import CFoolboxDeepfoolL2, CFoolboxDeepfoolLinf +from secml.adv.attacks.evasion.foolbox.fb_attacks.fb_deepfool_attack import ( + CFoolboxDeepfoolL2, + CFoolboxDeepfoolLinf, +) class TestCAttackEvasionFoolboxDeepfoolL2Logits(CAttackEvasionFoolboxTestCases): @@ -19,9 +23,13 @@ def setUp(self): super(TestCAttackEvasionFoolboxDeepfoolL2Logits, self).setUp() self.attack_class = CFoolboxDeepfoolL2 - self.attack_params = {'steps': 25, 'epsilons': None, - 'loss': 'logits', 'candidates': 2, - 'overshoot': 0.01} + self.attack_params = { + "steps": 25, + "epsilons": None, + "loss": "logits", + "candidates": 2, + "overshoot": 0.01, + } self.has_targeted = False self.has_untargeted = True @@ -47,9 +55,13 @@ def setUp(self): super(TestCAttackEvasionFoolboxDeepfoolLInfLogits, self).setUp() self.attack_class = CFoolboxDeepfoolLinf - self.attack_params = {'steps': 100, 'epsilons': None, - 'loss': 'logits', 'candidates': 2, - 'overshoot': 0.01} + self.attack_params = { + "steps": 100, + "epsilons": None, + "loss": "logits", + "candidates": 2, + "overshoot": 0.01, + } self.has_targeted = False self.has_untargeted = True @@ -75,9 +87,13 @@ def setUp(self): super(TestCAttackEvasionFoolboxDeepfoolL2CELoss, self).setUp() self.attack_class = CFoolboxDeepfoolL2 - self.attack_params = {'steps': 100, 'epsilons': None, - 'loss': 'crossentropy', 'candidates': 2, - 'overshoot': 0.01} + self.attack_params = { + "steps": 100, + "epsilons": None, + "loss": "crossentropy", + "candidates": 2, + "overshoot": 0.01, + } self.has_targeted = False self.has_untargeted = True @@ -93,6 +109,7 @@ def test_shapes(self): def test_obj_fun_and_grad(self): self._check_obj_function_and_grad() + class TestCAttackEvasionFoolboxDeepfoolLInfCELoss(CAttackEvasionFoolboxTestCases): """Unit test for CAttackEvasionFoolboxDeepfoolLInf with difference of cross-entropies.""" @@ -102,9 +119,13 @@ def setUp(self): super(TestCAttackEvasionFoolboxDeepfoolLInfCELoss, self).setUp() self.attack_class = CFoolboxDeepfoolLinf - self.attack_params = {'steps': self.default_steps, 'epsilons': None, - 'loss': 'crossentropy', 'candidates': 2, - 'overshoot': 0.01} + self.attack_params = { + "steps": self.default_steps, + "epsilons": None, + "loss": "crossentropy", + "candidates": 2, + "overshoot": 0.01, + } self.has_targeted = False self.has_untargeted = True diff --git a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_ead.py b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_ead.py index b12158b0..ada420c8 100644 --- a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_ead.py +++ b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_ead.py @@ -1,4 +1,6 @@ -from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import CAttackEvasionFoolboxTestCases +from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import ( + CAttackEvasionFoolboxTestCases, +) try: import foolbox @@ -18,9 +20,14 @@ def setUp(self): super(TestCAttackEvasionFoolboxEAD, self).setUp() self.attack_class = CFoolboxEAD - self.attack_params = {'steps': self.default_steps, 'binary_search_steps': 9, - 'confidence': 0.1, 'initial_stepsize': 1e-1, - 'epsilons': None, 'abort_early': False} + self.attack_params = { + "steps": self.default_steps, + "binary_search_steps": 9, + "confidence": 0.1, + "initial_stepsize": 1e-1, + "epsilons": None, + "abort_early": False, + } self.has_targeted = True self.has_untargeted = True diff --git a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_fgm.py b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_fgm.py index b9d310f8..7659e9b1 100644 --- a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_fgm.py +++ b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_fgm.py @@ -1,4 +1,6 @@ -from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import CAttackEvasionFoolboxTestCases +from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import ( + CAttackEvasionFoolboxTestCases, +) try: import foolbox @@ -6,8 +8,11 @@ except ImportError: CAttackEvasionFoolboxTestCases.importskip("foolbox") -from secml.adv.attacks.evasion.foolbox.fb_attacks.fb_fgm_attack \ - import CFoolboxFGML1, CFoolboxFGML2, CFoolboxFGMLinf +from secml.adv.attacks.evasion.foolbox.fb_attacks.fb_fgm_attack import ( + CFoolboxFGML1, + CFoolboxFGML2, + CFoolboxFGMLinf, +) class TestCAttackEvasionFoolboxFGML1(CAttackEvasionFoolboxTestCases): @@ -19,7 +24,7 @@ def setUp(self): super(TestCAttackEvasionFoolboxFGML1, self).setUp() self.attack_class = CFoolboxFGML1 - self.attack_params = {'random_start': False} + self.attack_params = {"random_start": False} self.has_targeted = False self.has_untargeted = True @@ -46,7 +51,7 @@ def setUp(self): super(TestCAttackEvasionFoolboxFGML2, self).setUp() self.attack_class = CFoolboxFGML2 - self.attack_params = {'random_start': False} + self.attack_params = {"random_start": False} self.has_targeted = False self.has_untargeted = True @@ -73,7 +78,7 @@ def setUp(self): super(TestCAttackEvasionFoolboxFGMLinf, self).setUp() self.attack_class = CFoolboxFGMLinf - self.attack_params = {'random_start': False} + self.attack_params = {"random_start": False} self.has_targeted = False self.has_untargeted = True @@ -88,4 +93,4 @@ def test_shapes(self): self._test_shapes() def test_obj_fun_and_grad(self): - self._check_obj_function_and_grad() \ No newline at end of file + self._check_obj_function_and_grad() diff --git a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_pgd.py b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_pgd.py index ce6eec35..80e099cd 100644 --- a/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_pgd.py +++ b/src/secml/adv/attacks/evasion/foolbox/tests/test_c_attack_evasion_foolbox_pgd.py @@ -1,4 +1,6 @@ -from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import CAttackEvasionFoolboxTestCases +from secml.adv.attacks.evasion.foolbox.tests.c_attack_evasion_foolbox_testcases import ( + CAttackEvasionFoolboxTestCases, +) try: import foolbox @@ -6,8 +8,11 @@ except ImportError: CAttackEvasionFoolboxTestCases.importskip("foolbox") -from secml.adv.attacks.evasion.foolbox.fb_attacks.fb_pgd_attack \ - import CFoolboxPGDL1, CFoolboxPGDL2, CFoolboxPGDLinf +from secml.adv.attacks.evasion.foolbox.fb_attacks.fb_pgd_attack import ( + CFoolboxPGDL1, + CFoolboxPGDL2, + CFoolboxPGDLinf, +) class TestCAttackEvasionFoolboxPGDL1(CAttackEvasionFoolboxTestCases): @@ -19,8 +24,12 @@ def setUp(self): super(TestCAttackEvasionFoolboxPGDL1, self).setUp() self.attack_class = CFoolboxPGDL1 - self.attack_params = {'rel_stepsize': 0.025, 'steps': self.default_steps, 'abs_stepsize': 0.1, - 'random_start': False} + self.attack_params = { + "rel_stepsize": 0.025, + "steps": self.default_steps, + "abs_stepsize": 0.1, + "random_start": False, + } self.has_targeted = True self.has_untargeted = True @@ -53,8 +62,12 @@ def setUp(self): super(TestCAttackEvasionFoolboxPGDL2, self).setUp() self.attack_class = CFoolboxPGDL2 - self.attack_params = {'rel_stepsize': 0.025, 'steps': self.default_steps, 'abs_stepsize': 0.1, - 'random_start': False} + self.attack_params = { + "rel_stepsize": 0.025, + "steps": self.default_steps, + "abs_stepsize": 0.1, + "random_start": False, + } self.has_targeted = True self.has_untargeted = True @@ -87,8 +100,12 @@ def setUp(self): super(TestCAttackEvasionFoolboxPGDLinf, self).setUp() self.attack_class = CFoolboxPGDLinf - self.attack_params = {'rel_stepsize': 0.025, 'steps': self.default_steps, 'abs_stepsize': 0.1, - 'random_start': False} + self.attack_params = { + "rel_stepsize": 0.025, + "steps": self.default_steps, + "abs_stepsize": 0.1, + "random_start": False, + } self.has_targeted = True self.has_untargeted = True diff --git a/src/secml/adv/attacks/evasion/foolbox/tests/test_secml_autograd.py b/src/secml/adv/attacks/evasion/foolbox/tests/test_secml_autograd.py index 4bbbd4dd..44f8f4d6 100644 --- a/src/secml/adv/attacks/evasion/foolbox/tests/test_secml_autograd.py +++ b/src/secml/adv/attacks/evasion/foolbox/tests/test_secml_autograd.py @@ -27,7 +27,10 @@ def test_grads_svm(self): random_op.backward() torch_grad = x.grad - secml_grad = self.secml_model.backward(as_carray(torch.ones(size=(self.D_out,)))) * self.N + secml_grad = ( + self.secml_model.backward(as_carray(torch.ones(size=(self.D_out,)))) + * self.N + ) self.assertAlmostEqual(torch_grad.sum().item(), secml_grad.sum(), places=3) diff --git a/src/secml/adv/attacks/evasion/tests/c_attack_evasion_testcases.py b/src/secml/adv/attacks/evasion/tests/c_attack_evasion_testcases.py index 04a3c081..4660f5a3 100644 --- a/src/secml/adv/attacks/evasion/tests/c_attack_evasion_testcases.py +++ b/src/secml/adv/attacks/evasion/tests/c_attack_evasion_testcases.py @@ -5,23 +5,23 @@ from secml.array import CArray from secml.data.loader import CDLRandomBlobs -from secml.optim.constraints import \ - CConstraintBox, CConstraintL1, CConstraintL2 +from secml.optim.constraints import CConstraintBox, CConstraintL1, CConstraintL2 from secml.ml.features.normalization import CNormalizerMinMax from secml.ml.classifiers import CClassifierSVM, CClassifierDecisionTree from secml.core.type_utils import is_list, is_float from secml.figure import CFigure from secml.utils import fm -IMAGES_FOLDER = fm.join(fm.abspath(__file__), 'test_images') +IMAGES_FOLDER = fm.join(fm.abspath(__file__), "test_images") if not fm.folder_exist(IMAGES_FOLDER): fm.make_folder(IMAGES_FOLDER) class CAttackEvasionTestCases(CUnitTest): """Unittests interface for CAttackEvasion.""" + images_folder = IMAGES_FOLDER - make_figures = os.getenv('MAKE_FIGURES', False) # True to produce figures + make_figures = os.getenv("MAKE_FIGURES", False) # True to produce figures def _load_blobs(self, n_feats, n_clusters, sparse=False, seed=None): """Load Random Blobs dataset. @@ -44,10 +44,10 @@ def _load_blobs(self, n_feats, n_clusters, sparse=False, seed=None): centers=n_clusters, center_box=(-0.5, 0.5), cluster_std=0.5, - random_state=seed) + random_state=seed, + ) - self.logger.info( - "Loading `random_blobs` with seed: {:}".format(seed)) + self.logger.info("Loading `random_blobs` with seed: {:}".format(seed)) ds = loader.load() if sparse is True: @@ -67,7 +67,7 @@ def _discretize_data(ds, eta): """ if is_list(eta): if len(eta) != ds.n_features: - raise ValueError('len(eta) != n_features') + raise ValueError("len(eta) != n_features") for i in range(len(eta)): ds.X[:, i] = (ds.X[:, i] / eta[i]).round() * eta[i] else: # eta is a single value @@ -101,7 +101,7 @@ def _prepare_linear_svm(self, sparse, seed): n_feats=2, # Number of dataset features n_clusters=2, # Number of dataset clusters sparse=sparse, - seed=seed + seed=seed, ) normalizer = CNormalizerMinMax(feature_range=(-1, 1)) @@ -130,7 +130,7 @@ def _prepare_linear_svm_10d(self, sparse, seed): n_feats=10, # Number of dataset features n_clusters=2, # Number of dataset clusters sparse=sparse, - seed=seed + seed=seed, ) normalizer = CNormalizerMinMax(feature_range=(-1, 1)) @@ -159,11 +159,11 @@ def _prepare_nonlinear_svm(self, sparse, seed): n_feats=2, # Number of dataset features n_clusters=2, # Number of dataset clusters sparse=sparse, - seed=seed + seed=seed, ) normalizer = CNormalizerMinMax(feature_range=(-1, 1)) - clf = CClassifierSVM(kernel='rbf', C=1, preprocess=normalizer) + clf = CClassifierSVM(kernel="rbf", C=1, preprocess=normalizer) return ds, clf @@ -191,11 +191,11 @@ def _prepare_tree_nonlinear_svm(self, sparse, seed): n_feats=2, # Number of dataset features n_clusters=2, # Number of dataset clusters sparse=sparse, - seed=seed + seed=seed, ) clf = CClassifierDecisionTree(random_state=seed) - clf_surr = CClassifierSVM(kernel='rbf', C=1) + clf_surr = CClassifierSVM(kernel="rbf", C=1) return ds, clf, clf_surr @@ -249,8 +249,9 @@ def _run_evasion(self, evas, x0, y0, expected_x=None, expected_y=None): with self.logger.timer(): y_pred, scores, adv_ds, f_obj = evas.run(x0, y0) - self.logger.info("Starting score: " + str( - evas.classifier.decision_function(x0, y=1).item())) + self.logger.info( + "Starting score: " + str(evas.classifier.decision_function(x0, y=1).item()) + ) self.logger.info("Final score: " + str(evas.f_opt)) self.logger.info("x*:\n" + str(evas.x_opt)) @@ -269,7 +270,8 @@ def _run_evasion(self, evas, x0, y0, expected_x=None, expected_y=None): # Compare optimal point with expected if expected_x is not None: self.assert_array_almost_equal( - evas.x_opt.todense().ravel(), expected_x, decimal=4) + evas.x_opt.todense().ravel(), expected_x, decimal=4 + ) if expected_y is not None: self.assert_array_almost_equal(y_pred.item(), expected_y) @@ -338,26 +340,32 @@ def _plot_2d_evasion(self, evas, ds, x0, filename, th=0, grid_limits=None): fig.sp.plot_ds(ds) fig.sp.plot_fun( func=evas.objective_function, - grid_limits=grid_limits, colorbar=False, - n_grid_points=50, plot_levels=False) + grid_limits=grid_limits, + colorbar=False, + n_grid_points=50, + plot_levels=False, + ) fig.sp.plot_decision_regions( - clf=evas.classifier, plot_background=False, + clf=evas.classifier, + plot_background=False, grid_limits=grid_limits, - n_grid_points=50) + n_grid_points=50, + ) - fig.sp.plot_constraint(self._box(evas), - n_grid_points=20, - grid_limits=grid_limits) + fig.sp.plot_constraint( + self._box(evas), n_grid_points=20, grid_limits=grid_limits + ) - fig.sp.plot_fun(func=lambda z: self._constr(evas, x0).constraint(z), - plot_background=False, - n_grid_points=50, - grid_limits=grid_limits, - levels=[0], - colorbar=False) + fig.sp.plot_fun( + func=lambda z: self._constr(evas, x0).constraint(z), + plot_background=False, + n_grid_points=50, + grid_limits=grid_limits, + levels=[0], + colorbar=False, + ) fig.sp.plot_path(evas.x_seq) - fig.savefig(fm.join(self.images_folder, filename), file_format='pdf') - + fig.savefig(fm.join(self.images_folder, filename), file_format="pdf") diff --git a/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd.py b/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd.py index 2f25007a..cc06e6b4 100644 --- a/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd.py +++ b/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd.py @@ -55,15 +55,13 @@ def test_linear_l1(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 1.05, "lb": -1.05, "ub": 1.05, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta - } + "solver_params": {"eta": eta}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -74,7 +72,7 @@ def test_linear_l1(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pdg_linear_L1.pdf') + self._plot_2d_evasion(evas, ds, x0, "pdg_linear_L1.pdf") def test_linear_l2(self): """Test evasion of a linear classifier using L2 distance.""" @@ -88,15 +86,13 @@ def test_linear_l2(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l2', + "distance": "l2", "dmax": 1.05, "lb": -0.67, "ub": 0.67, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta - } + "solver_params": {"eta": eta}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -107,7 +103,7 @@ def test_linear_l2(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pdg_linear_L2.pdf') + self._plot_2d_evasion(evas, ds, x0, "pdg_linear_L2.pdf") def test_nonlinear_l1(self): """Test evasion of a nonlinear classifier using L1 distance.""" @@ -121,15 +117,13 @@ def test_nonlinear_l1(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 1.0, "lb": -1.0, "ub": 1.0, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta - } + "solver_params": {"eta": eta}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -140,7 +134,7 @@ def test_nonlinear_l1(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pdg_nonlinear_L1.pdf') + self._plot_2d_evasion(evas, ds, x0, "pdg_nonlinear_L1.pdf") def test_nonlinear_l2(self): """Test evasion of a nonlinear classifier using L2 distance.""" @@ -154,15 +148,13 @@ def test_nonlinear_l2(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l2', + "distance": "l2", "dmax": 1.25, "lb": -0.65, "ub": 1.0, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta - } + "solver_params": {"eta": eta}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -173,7 +165,7 @@ def test_nonlinear_l2(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pdg_nonlinear_L2.pdf') + self._plot_2d_evasion(evas, ds, x0, "pdg_nonlinear_L2.pdf") def test_tree_l1(self): """Test evasion of a tree classifier using L1 distance.""" @@ -187,15 +179,13 @@ def test_tree_l1(self): evasion_params = { "classifier": clf_surr, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 2.0, "lb": -1.5, "ub": 1.5, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta - } + "solver_params": {"eta": eta}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -206,5 +196,4 @@ def test_tree_l1(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion( - evas, ds, x0, th=0.5, filename='pdg_tree_L1.pdf') + self._plot_2d_evasion(evas, ds, x0, th=0.5, filename="pdg_tree_L1.pdf") diff --git a/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_exp.py b/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_exp.py index ddbdfece..271231b9 100644 --- a/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_exp.py +++ b/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_exp.py @@ -56,17 +56,13 @@ def test_linear_l1(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 1.05, "lb": -1.05, "ub": 1.05, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": None, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": None, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -77,7 +73,7 @@ def test_linear_l1(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pgd_exp_linear_L1.pdf') + self._plot_2d_evasion(evas, ds, x0, "pgd_exp_linear_L1.pdf") def test_linear_l1_discrete(self): """Test evasion of a linear classifier using L1 distance (discrete).""" @@ -93,17 +89,13 @@ def test_linear_l1_discrete(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 2, "lb": -1, "ub": 1, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": None, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": None, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -114,7 +106,7 @@ def test_linear_l1_discrete(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pgd_exp_linear_L1_discrete.pdf') + self._plot_2d_evasion(evas, ds, x0, "pgd_exp_linear_L1_discrete.pdf") def test_linear_l1_discrete_10d(self): """Test evasion of a linear classifier (10 features) @@ -136,32 +128,26 @@ def test_linear_l1_discrete_10d(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 5, "lb": -2, "ub": 2, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": None, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": None, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) # Set few features to the same max value w_new = clf.w.deepcopy() - w_new[CArray.randint( - clf.w.size, shape=3, random_state=seed)] = clf.w.max() + w_new[CArray.randint(clf.w.size, shape=3, random_state=seed)] = clf.w.max() clf._w = w_new # Expected final optimal point # CAttackEvasionPGDExp uses CLineSearchBisectProj # which brings the point outside of the grid - expected_x = \ - CArray([-1.8333, -1.8333, 1.8333, 0, -0.5, 0, 0.5, -0.5, 1, 0.5]) + expected_x = CArray([-1.8333, -1.8333, 1.8333, 0, -0.5, 0, 0.5, -0.5, 1, 0.5]) expected_y = 0 self._run_evasion(evas, x0, y0, expected_x, expected_y) @@ -178,17 +164,13 @@ def test_linear_l2(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l2', + "distance": "l2", "dmax": 1.05, "lb": -0.67, "ub": 0.67, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": None, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": None, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -199,7 +181,7 @@ def test_linear_l2(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pgd_exp_linear_L2.pdf') + self._plot_2d_evasion(evas, ds, x0, "pgd_exp_linear_L2.pdf") def test_nonlinear_l1(self): """Test evasion of a nonlinear classifier using L1 distance.""" @@ -213,17 +195,13 @@ def test_nonlinear_l1(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 1.0, "lb": -1.0, "ub": 1.0, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": 0.1, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": 0.1, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -234,7 +212,7 @@ def test_nonlinear_l1(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pgd_exp_nonlinear_L1.pdf') + self._plot_2d_evasion(evas, ds, x0, "pgd_exp_nonlinear_L1.pdf") def test_nonlinear_l2(self): """Test evasion of a nonlinear classifier using L2 distance.""" @@ -248,17 +226,13 @@ def test_nonlinear_l2(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l2', + "distance": "l2", "dmax": 1.25, "lb": -0.65, "ub": 1.0, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": 0.01, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": 0.01, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -269,7 +243,7 @@ def test_nonlinear_l2(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pgd_exp_nonlinear_L2.pdf') + self._plot_2d_evasion(evas, ds, x0, "pgd_exp_nonlinear_L2.pdf") def test_tree_l1(self): """Test evasion of a tree classifier using L1 distance.""" @@ -283,17 +257,13 @@ def test_tree_l1(self): evasion_params = { "classifier": clf_surr, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 2.0, "lb": -1.5, "ub": 1.5, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": None, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": None, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -304,5 +274,4 @@ def test_tree_l1(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion( - evas, ds, x0, th=0.5, filename='pgd_exp_tree_L1.pdf') + self._plot_2d_evasion(evas, ds, x0, th=0.5, filename="pgd_exp_tree_L1.pdf") diff --git a/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls.py b/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls.py index ba401f97..75e85d5f 100644 --- a/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls.py +++ b/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls.py @@ -56,17 +56,13 @@ def test_linear_l1(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 1.05, "lb": -1.05, "ub": 1.05, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": None, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": None, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -77,7 +73,7 @@ def test_linear_l1(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pgd_ls_linear_L1.pdf') + self._plot_2d_evasion(evas, ds, x0, "pgd_ls_linear_L1.pdf") def test_linear_l1_discrete(self): """Test evasion of a linear classifier using L1 distance (discrete).""" @@ -93,15 +89,13 @@ def test_linear_l1_discrete(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 2, "lb": -1, "ub": 1, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta - } + "solver_params": {"eta": eta}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -112,7 +106,7 @@ def test_linear_l1_discrete(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pgd_ls_linear_L1_discrete.pdf') + self._plot_2d_evasion(evas, ds, x0, "pgd_ls_linear_L1_discrete.pdf") def test_linear_l1_discrete_10d(self): """Test evasion of a linear classifier (10 features) @@ -134,29 +128,24 @@ def test_linear_l1_discrete_10d(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 5, "lb": -2, "ub": 2, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": None, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": None, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) # Set few features to the same max value w_new = clf.w.deepcopy() - w_new[CArray.randint( - clf.w.size, shape=3, random_state=seed)] = clf.w.max() + w_new[CArray.randint(clf.w.size, shape=3, random_state=seed)] = clf.w.max() clf._w = w_new # Expected final optimal point - expected_x = CArray([-2., -1.5, 2, 0, -0.5, 0, 0.5, -0.5, 1, 0.5]) + expected_x = CArray([-2.0, -1.5, 2, 0, -0.5, 0, 0.5, -0.5, 1, 0.5]) expected_y = 0 self._run_evasion(evas, x0, y0, expected_x, expected_y) @@ -173,17 +162,13 @@ def test_linear_l2(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l2', + "distance": "l2", "dmax": 1.05, "lb": -0.67, "ub": 0.67, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": None, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": None, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -194,7 +179,7 @@ def test_linear_l2(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pgd_ls_linear_L2.pdf') + self._plot_2d_evasion(evas, ds, x0, "pgd_ls_linear_L2.pdf") def test_nonlinear_l1(self): """Test evasion of a nonlinear classifier using L1 distance.""" @@ -208,17 +193,13 @@ def test_nonlinear_l1(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 1.0, "lb": -1.0, "ub": 1.0, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": 0.1, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": 0.1, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -229,7 +210,7 @@ def test_nonlinear_l1(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pgd_ls_nonlinear_L1.pdf') + self._plot_2d_evasion(evas, ds, x0, "pgd_ls_nonlinear_L1.pdf") def test_nonlinear_l2(self): """Test evasion of a nonlinear classifier using L2 distance.""" @@ -243,17 +224,13 @@ def test_nonlinear_l2(self): evasion_params = { "classifier": clf, "double_init_ds": ds, - "distance": 'l2', + "distance": "l2", "dmax": 1.25, "lb": -0.65, "ub": 1.0, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": 0.01, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": 0.01, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -264,7 +241,7 @@ def test_nonlinear_l2(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion(evas, ds, x0, 'pgd_ls_nonlinear_L2.pdf') + self._plot_2d_evasion(evas, ds, x0, "pgd_ls_nonlinear_L2.pdf") def test_tree_l1(self): """Test evasion of a tree classifier using L1 distance.""" @@ -278,17 +255,13 @@ def test_tree_l1(self): evasion_params = { "classifier": clf_surr, "double_init_ds": ds, - "distance": 'l1', + "distance": "l1", "dmax": 2.0, "lb": -1.5, "ub": 1.5, "attack_classes": CArray([1]), "y_target": 0, - "solver_params": { - "eta": eta, - "eta_min": None, - "eta_max": None - } + "solver_params": {"eta": eta, "eta_min": None, "eta_max": None}, } evas, x0, y0 = self._set_evasion(ds, evasion_params) @@ -299,5 +272,4 @@ def test_tree_l1(self): self._run_evasion(evas, x0, y0, expected_x, expected_y) - self._plot_2d_evasion( - evas, ds, x0, th=0.5, filename='pgd_ls_tree_L1.pdf') + self._plot_2d_evasion(evas, ds, x0, th=0.5, filename="pgd_ls_tree_L1.pdf") diff --git a/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls_mnist.py b/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls_mnist.py index 15e5eb92..8711bb94 100644 --- a/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls_mnist.py +++ b/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls_mnist.py @@ -35,9 +35,9 @@ def _load_mnist49(self, sparse=False, seed=None): self._digits = [4, 9] self._tr = loader.load( - 'training', digits=self._digits, num_samples=n_tr+n_val) - self._ts = loader.load( - 'testing', digits=self._digits, num_samples=n_ts) + "training", digits=self._digits, num_samples=n_tr + n_val + ) + self._ts = loader.load("testing", digits=self._digits, num_samples=n_ts) if sparse is True: self._tr = self._tr.tosparse() @@ -96,8 +96,7 @@ def _set_evasion(self, params, x0_img_class): def _choose_x0_2c(self, x0_img_class): """Find a sample of that belong to the required class.""" - adv_img_idx = \ - CArray(self._ts.Y.find(self._ts.Y == x0_img_class))[0] + adv_img_idx = CArray(self._ts.Y.find(self._ts.Y == x0_img_class))[0] x0 = self._ts.X[adv_img_idx, :] y0 = self._ts.Y[adv_img_idx] @@ -124,8 +123,9 @@ def _prepare_multiclass_svm(self, sparse, seed): self._load_mnist49(sparse, seed) clf = CClassifierMulticlassOVA( - classifier=CClassifierSVM, C=1.0, - kernel=CKernel.create('rbf', gamma=0.01), + classifier=CClassifierSVM, + C=1.0, + kernel=CKernel.create("rbf", gamma=0.01), ) return clf @@ -141,18 +141,18 @@ def test_mnist(self): evasion_params = { "classifier": clf, "double_init_ds": self._val_dts, - "distance": 'l1', + "distance": "l1", "dmax": 10, "lb": 0, "ub": 1, - "attack_classes": 'all', + "attack_classes": "all", "y_target": None, "solver_params": { "eta": 1.0 / 255.0, "eta_min": 0.1, "eta_max": None, - "eps": 1e-6 - } + "eps": 1e-6, + }, } evas, x0, y0 = self._set_evasion(evasion_params, x0_img_class=1) @@ -161,7 +161,7 @@ def test_mnist(self): y_pred = evas.classifier.predict(evas.x_opt) - self.filename = 'pgd_ls_mnist.pdf' + self.filename = "pgd_ls_mnist.pdf" self._show_adv(x0, y0, evas.x_opt, y_pred[0]) def _show_adv(self, x0, y0, x_opt, y_pred): @@ -188,19 +188,21 @@ def _show_adv(self, x0, y0, x_opt, y_pred): fig = CFigure(height=5.0, width=15.0) fig.subplot(1, 3, 1) fig.sp.title(self._digits[y0.item()]) - fig.sp.imshow(x0.reshape( - (self._tr.header.img_h, self._tr.header.img_w)), cmap='gray') + fig.sp.imshow( + x0.reshape((self._tr.header.img_h, self._tr.header.img_w)), cmap="gray" + ) fig.subplot(1, 3, 2) fig.sp.imshow( - added_noise.reshape( - (self._tr.header.img_h, self._tr.header.img_w)), cmap='gray') + added_noise.reshape((self._tr.header.img_h, self._tr.header.img_w)), + cmap="gray", + ) fig.subplot(1, 3, 3) fig.sp.title(self._digits[y_pred.item()]) - fig.sp.imshow(x_opt.reshape( - (self._tr.header.img_h, self._tr.header.img_w)), cmap='gray') - fig.savefig( - fm.join(self.images_folder, self.filename), file_format='pdf') + fig.sp.imshow( + x_opt.reshape((self._tr.header.img_h, self._tr.header.img_w)), cmap="gray" + ) + fig.savefig(fm.join(self.images_folder, self.filename), file_format="pdf") -if __name__ == '__main__': +if __name__ == "__main__": CAttackEvasionTestCases.main() diff --git a/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls_multiclass.py b/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls_multiclass.py index 92b1dd7b..885ee38d 100644 --- a/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls_multiclass.py +++ b/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls_multiclass.py @@ -21,16 +21,23 @@ class TestCAttackEvasionPGDLSMNIST(CAttackEvasionTestCases): def setUp(self): import numpy as np + np.random.seed(12345678) # generate synthetic data - self.ds = CDLRandom(n_classes=3, n_features=2, n_redundant=0, - n_clusters_per_class=1, class_sep=1, - random_state=0).load() + self.ds = CDLRandom( + n_classes=3, + n_features=2, + n_redundant=0, + n_clusters_per_class=1, + class_sep=1, + random_state=0, + ).load() # Add a new class modifying one of the existing clusters - self.ds.Y[(self.ds.X[:, 0] > 0).logical_and( - self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes + self.ds.Y[(self.ds.X[:, 0] > 0).logical_and(self.ds.X[:, 1] > 1).ravel()] = ( + self.ds.num_classes + ) # self.kernel = None self.kernel = CKernelRBF(gamma=10) @@ -40,15 +47,19 @@ def setUp(self): self.ds.X = self.normalizer.fit_transform(self.ds.X) self.multiclass = CClassifierMulticlassOVA( - classifier=CClassifierSVM, class_weight='balanced', - preprocess=None, kernel=self.kernel) + classifier=CClassifierSVM, + class_weight="balanced", + preprocess=None, + kernel=self.kernel, + ) self.multiclass.verbose = 0 # Training and classification self.multiclass.fit(self.ds.X, self.ds.Y) self.y_pred, self.score_pred = self.multiclass.predict( - self.ds.X, return_decision_function=True) + self.ds.X, return_decision_function=True + ) def test_indiscriminate(self): """Test indiscriminate evasion.""" @@ -63,8 +74,9 @@ def test_targeted(self): """Test targeted evasion.""" self.y_target = 2 - self.logger.info("Test target evasion " - "(with target class {:}) ".format(self.y_target)) + self.logger.info( + "Test target evasion " "(with target class {:}) ".format(self.y_target) + ) expected_x = CArray([0.9347, 0.3976]) self._test_evasion_multiclass(expected_x) @@ -83,13 +95,18 @@ def _test_evasion_multiclass(self, expected_x): dmax = 2 - self.solver_params = {'eta': 1e-1, 'eta_min': 1.0} + self.solver_params = {"eta": 1e-1, "eta_min": 1.0} - eva = CAttackEvasionPGDLS(classifier=self.multiclass, - double_init_ds=self.ds, - distance='l2', dmax=dmax, lb=lb, ub=ub, - solver_params=self.solver_params, - y_target=self.y_target) + eva = CAttackEvasionPGDLS( + classifier=self.multiclass, + double_init_ds=self.ds, + distance="l2", + dmax=dmax, + lb=lb, + ub=ub, + solver_params=self.solver_params, + y_target=self.y_target, + ) eva.verbose = 0 # 2 @@ -120,8 +137,7 @@ def _test_evasion_multiclass(self, expected_x): eva.dmax = d x, f_opt = eva._run(x0=x0, y0=y0, x_init=x) - y_pred, score = self.multiclass.predict( - x, return_decision_function=True) + y_pred, score = self.multiclass.predict(x, return_decision_function=True) f_seq = f_seq.append(f_opt) # not considering all iterations, just values at dmax # for all iterations, you should bring eva.x_seq and eva.f_seq @@ -137,7 +153,8 @@ def _test_evasion_multiclass(self, expected_x): # Compare optimal point with expected self.assert_array_almost_equal( - eva.x_opt.todense().ravel(), expected_x, decimal=4) + eva.x_opt.todense().ravel(), expected_x, decimal=4 + ) self._make_plots(x_seq, dmax, eva, x0, scores, f_seq) @@ -163,54 +180,87 @@ def _make_plots(self, x_seq, dmax, eva, x0, scores, f_seq): fig.subplot(2, 2, 1) fig = self._plot_decision_function(fig, plot_background=True) - fig.sp.plot_path(x_seq, path_style='-', - start_style='o', start_facecolor='w', - start_edgewidth=2, final_style='o', - final_facecolor='k', final_edgewidth=2) + fig.sp.plot_path( + x_seq, + path_style="-", + start_style="o", + start_facecolor="w", + start_edgewidth=2, + final_style="o", + final_facecolor="k", + final_edgewidth=2, + ) # plot distance constraint - fig.sp.plot_fun(func=self._rescaled_distance, - multipoint=True, - plot_background=False, - n_grid_points=20, levels_color='k', - grid_limits=ds_bounds, - levels=[0], colorbar=False, - levels_linewidth=2.0, levels_style=':', - alpha_levels=.4, c=x0, r=dmax) - - fig.sp.grid(linestyle='--', alpha=.5, zorder=0) + fig.sp.plot_fun( + func=self._rescaled_distance, + multipoint=True, + plot_background=False, + n_grid_points=20, + levels_color="k", + grid_limits=ds_bounds, + levels=[0], + colorbar=False, + levels_linewidth=2.0, + levels_style=":", + alpha_levels=0.4, + c=x0, + r=dmax, + ) + + fig.sp.grid(linestyle="--", alpha=0.5, zorder=0) # Plotting multiclass evasion objective function fig.subplot(2, 2, 2) fig = self._plot_decision_function(fig) - fig.sp.plot_fgrads(eva.objective_function_gradient, - grid_limits=ds_bounds, n_grid_points=20, - color='k', alpha=.5) - - fig.sp.plot_path(x_seq, path_style='-', - start_style='o', start_facecolor='w', - start_edgewidth=2, final_style='o', - final_facecolor='k', final_edgewidth=2) + fig.sp.plot_fgrads( + eva.objective_function_gradient, + grid_limits=ds_bounds, + n_grid_points=20, + color="k", + alpha=0.5, + ) + + fig.sp.plot_path( + x_seq, + path_style="-", + start_style="o", + start_facecolor="w", + start_edgewidth=2, + final_style="o", + final_facecolor="k", + final_edgewidth=2, + ) # plot distance constraint - fig.sp.plot_fun(func=self._rescaled_distance, - multipoint=True, - plot_background=False, - n_grid_points=20, levels_color='w', - grid_limits=ds_bounds, - levels=[0], colorbar=False, - levels_style=':', levels_linewidth=2.0, - alpha_levels=.5, c=x0, r=dmax) - - fig.sp.plot_fun(lambda z: eva.objective_function(z), - multipoint=True, - grid_limits=ds_bounds, - colorbar=False, n_grid_points=20, - plot_levels=False) - - fig.sp.grid(linestyle='--', alpha=.5, zorder=0) + fig.sp.plot_fun( + func=self._rescaled_distance, + multipoint=True, + plot_background=False, + n_grid_points=20, + levels_color="w", + grid_limits=ds_bounds, + levels=[0], + colorbar=False, + levels_style=":", + levels_linewidth=2.0, + alpha_levels=0.5, + c=x0, + r=dmax, + ) + + fig.sp.plot_fun( + lambda z: eva.objective_function(z), + multipoint=True, + grid_limits=ds_bounds, + colorbar=False, + n_grid_points=20, + plot_levels=False, + ) + + fig.sp.grid(linestyle="--", alpha=0.5, zorder=0) fig.subplot(2, 2, 3) if self.y_target is not None: @@ -233,12 +283,15 @@ def _make_plots(self, x_seq, dmax, eva, x0, scores, f_seq): fig.tight_layout() - k_name = self.kernel.class_type if self.kernel is not None else 'lin' - fig.savefig(fm.join( - self.images_folder, - "pgd_ls_multiclass_{:}c_kernel-{:}_target-{:}.pdf".format( - self.ds.num_classes, k_name, self.y_target) - )) + k_name = self.kernel.class_type if self.kernel is not None else "lin" + fig.savefig( + fm.join( + self.images_folder, + "pgd_ls_multiclass_{:}c_kernel-{:}_target-{:}.pdf".format( + self.ds.num_classes, k_name, self.y_target + ), + ) + ) def _rescaled_distance(self, x, c, r): """Rescale distance for plot.""" @@ -251,49 +304,73 @@ def _rescaled_distance(self, x, c, r): def _get_style(self): """Define the style vector for the different classes.""" if self.ds.num_classes == 3: - styles = [('b', 'o', '-'), ('g', 'p', '--'), ('r', 's', '-.')] + styles = [("b", "o", "-"), ("g", "p", "--"), ("r", "s", "-.")] elif self.ds.num_classes == 4: - styles = [('saddlebrown', 'o', '-'), ('g', 'p', '--'), - ('y', 's', '-.'), ('gray', 'D', '--')] + styles = [ + ("saddlebrown", "o", "-"), + ("g", "p", "--"), + ("y", "s", "-."), + ("gray", "D", "--"), + ] else: - styles = [('saddlebrown', 'o', '-'), ('g', 'p', '--'), - ('y', 's', '-.'), ('gray', 'D', '--'), - ('c', '-.'), ('m', '-'), ('y', '-.')] + styles = [ + ("saddlebrown", "o", "-"), + ("g", "p", "--"), + ("y", "s", "-."), + ("gray", "D", "--"), + ("c", "-."), + ("m", "-"), + ("y", "-."), + ] return styles def _plot_decision_function(self, fig, plot_background=False): """Plot the decision function of a multiclass classifier.""" - fig.sp.title('{:} ({:})'.format(self.multiclass.__class__.__name__, - self.multiclass.classifier.__name__)) + fig.sp.title( + "{:} ({:})".format( + self.multiclass.__class__.__name__, self.multiclass.classifier.__name__ + ) + ) x_bounds, y_bounds = self.ds.get_bounds() styles = self._get_style() for c_idx, c in enumerate(self.ds.classes): - fig.sp.scatter(self.ds.X[self.ds.Y == c, 0], - self.ds.X[self.ds.Y == c, 1], - s=20, c=styles[c_idx][0], edgecolors='k', - facecolors='none', linewidths=1, - label='c {:}'.format(c)) + fig.sp.scatter( + self.ds.X[self.ds.Y == c, 0], + self.ds.X[self.ds.Y == c, 1], + s=20, + c=styles[c_idx][0], + edgecolors="k", + facecolors="none", + linewidths=1, + label="c {:}".format(c), + ) # Plotting multiclass decision function fig.sp.plot_fun( lambda x: self.multiclass.predict(x), - multipoint=True, cmap='Set2', + multipoint=True, + cmap="Set2", grid_limits=self.ds.get_bounds(offset=5), - colorbar=False, n_grid_points=300, - plot_levels=True, plot_background=plot_background, - levels=[-1, 0, 1, 2], levels_color='gray', levels_style='--') + colorbar=False, + n_grid_points=300, + plot_levels=True, + plot_background=plot_background, + levels=[-1, 0, 1, 2], + levels_color="gray", + levels_style="--", + ) - fig.sp.xlim(x_bounds[0] - .05, x_bounds[1] + .05) - fig.sp.ylim(y_bounds[0] - .05, y_bounds[1] + .05) + fig.sp.xlim(x_bounds[0] - 0.05, x_bounds[1] + 0.05) + fig.sp.ylim(y_bounds[0] - 0.05, y_bounds[1] + 0.05) - fig.sp.legend(loc=9, ncol=5, mode="expand", handletextpad=.1) + fig.sp.legend(loc=9, ncol=5, mode="expand", handletextpad=0.1) return fig -if __name__ == '__main__': +if __name__ == "__main__": CAttackEvasionTestCases.main() diff --git a/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls_reject_threshold.py b/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls_reject_threshold.py index 0db96278..79784efe 100644 --- a/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls_reject_threshold.py +++ b/src/secml/adv/attacks/evasion/tests/test_c_attack_evasion_pgd_ls_reject_threshold.py @@ -22,6 +22,7 @@ class TestCAttackEvasionPGDLSRejectThreshold(CAttackEvasionTestCases): def setUp(self): import numpy as np + np.random.seed(12345678) self._dataset_creation() @@ -29,8 +30,11 @@ def setUp(self): self.kernel = CKernelRBF(gamma=1) self.multiclass = CClassifierMulticlassOVA( - classifier=CClassifierSVM, class_weight='balanced', - preprocess=None, kernel=self.kernel) + classifier=CClassifierSVM, + class_weight="balanced", + preprocess=None, + kernel=self.kernel, + ) self.multiclass.verbose = 0 self.multiclass = CClassifierRejectThreshold(self.multiclass, 0.6) @@ -39,24 +43,31 @@ def setUp(self): self.multiclass.fit(self.ds.X, self.ds.Y) self.y_pred, self.score_pred = self.multiclass.predict( - self.ds.X, return_decision_function=True) + self.ds.X, return_decision_function=True + ) def _dataset_creation(self): # generate synthetic data - self.ds = CDLRandom(n_samples=100, n_classes=3, n_features=2, - n_redundant=0, n_clusters_per_class=1, - class_sep=1, random_state=0).load() + self.ds = CDLRandom( + n_samples=100, + n_classes=3, + n_features=2, + n_redundant=0, + n_clusters_per_class=1, + class_sep=1, + random_state=0, + ).load() # Add a new class modifying one of the existing clusters - self.ds.Y[(self.ds.X[:, 0] > 0).logical_and( - self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes + self.ds.Y[(self.ds.X[:, 0] > 0).logical_and(self.ds.X[:, 1] > 1).ravel()] = ( + self.ds.num_classes + ) self.lb = 0 self.ub = 1 # Data normalization - self.normalizer = CNormalizerMinMax( - feature_range=(self.lb, self.ub)) + self.normalizer = CNormalizerMinMax(feature_range=(self.lb, self.ub)) self.normalizer = None if self.normalizer is not None: self.ds.X = self.normalizer.fit_transform(self.ds.X) @@ -74,8 +85,9 @@ def test_targeted(self): """Test targeted evasion.""" self.y_target = 2 - self.logger.info("Test target evasion " - "(with target class {:}) ".format(self.y_target)) + self.logger.info( + "Test target evasion " "(with target class {:}) ".format(self.y_target) + ) expected_x = CArray([2.3414, -0.5295]) self._test_evasion_multiclass(expected_x) @@ -94,13 +106,18 @@ def _test_evasion_multiclass(self, expected_x): dmax = 3 - self.solver_params = {'eta': 0.5, 'max_iter': 3} + self.solver_params = {"eta": 0.5, "max_iter": 3} - eva = CAttackEvasionPGDLS(classifier=self.multiclass, - double_init_ds=self.ds, - distance='l2', dmax=dmax, lb=lb, ub=ub, - solver_params=self.solver_params, - y_target=self.y_target) + eva = CAttackEvasionPGDLS( + classifier=self.multiclass, + double_init_ds=self.ds, + distance="l2", + dmax=dmax, + lb=lb, + ub=ub, + solver_params=self.solver_params, + y_target=self.y_target, + ) eva.verbose = 0 # 2 @@ -125,54 +142,48 @@ def _test_evasion_multiclass(self, expected_x): eva.dmax = dmax x_opt, f_opt = eva._run(x0=x0, y0=y0, x_init=x0) - y_pred, score = self.multiclass.predict( - x_opt, return_decision_function=True) + y_pred, score = self.multiclass.predict(x_opt, return_decision_function=True) s = score[:, y0 if self.y_target is None else self.y_target] self.logger.info( - "Number of objective function evaluations: {:}".format( - eva.f_eval)) + "Number of objective function evaluations: {:}".format(eva.f_eval) + ) self.logger.info( - "Number of gradient function evaluations: {:}".format( - eva.grad_eval)) + "Number of gradient function evaluations: {:}".format(eva.grad_eval) + ) - self.logger.info( - "Predicted label after evasion: {:}".format(y_pred)) + self.logger.info("Predicted label after evasion: {:}".format(y_pred)) self.logger.info("Score after evasion: {:}".format(s)) - self.logger.info( - "Objective function after evasion: {:}".format(f_opt)) + self.logger.info("Objective function after evasion: {:}".format(f_opt)) # Compare optimal point with expected self.assert_array_almost_equal( - eva.x_opt.todense().ravel(), expected_x, decimal=4) + eva.x_opt.todense().ravel(), expected_x, decimal=4 + ) if self.y_target: - s_ytarget_x0 = self.multiclass.decision_function( - x0, self.y_target) - s_ytarget_xopt = self.multiclass.decision_function( - x_opt, self.y_target) + s_ytarget_x0 = self.multiclass.decision_function(x0, self.y_target) + s_ytarget_xopt = self.multiclass.decision_function(x_opt, self.y_target) self.logger.info( "Discriminat function w.r.t the target class first: {:} " - "and after evasion: {:}".format(s_ytarget_x0, - s_ytarget_xopt)) + "and after evasion: {:}".format(s_ytarget_x0, s_ytarget_xopt) + ) self.assertLess(s_ytarget_x0, s_ytarget_xopt) else: # indiscriminate attack - s_ytrue_x0 = self.multiclass.decision_function( - x0, y0) - s_ytrue_xopt = self.multiclass.decision_function( - x_opt, y0) + s_ytrue_x0 = self.multiclass.decision_function(x0, y0) + s_ytrue_xopt = self.multiclass.decision_function(x_opt, y0) self.logger.info( "Discriminat function w.r.t the true class first: {:} " - "and after evasion: {:}".format(s_ytrue_x0, - s_ytrue_xopt)) + "and after evasion: {:}".format(s_ytrue_x0, s_ytrue_xopt) + ) self.assertGreater(s_ytrue_x0, s_ytrue_xopt) @@ -198,8 +209,7 @@ def _make_plot(self, p_idx, eva, dmax): eva.dmax = d x, f_opt = eva._run(x0=x0, y0=y0, x_init=x) - y_pred, score = self.multiclass.predict( - x, return_decision_function=True) + y_pred, score = self.multiclass.predict(x, return_decision_function=True) f_seq = f_seq.append(f_opt) # not considering all iterations, just values at dmax # for all iterations, you should bring eva.x_seq and eva.f_seq @@ -209,11 +219,9 @@ def _make_plot(self, p_idx, eva, dmax): scores = scores.append(s) - self.logger.info( - "Predicted label after evasion: {:}".format(y_pred)) + self.logger.info("Predicted label after evasion: {:}".format(y_pred)) self.logger.info("Score after evasion: {:}".format(s)) - self.logger.info( - "Objective function after evasion: {:}".format(f_opt)) + self.logger.info("Objective function after evasion: {:}".format(f_opt)) fig = CFigure(height=9, width=10, markersize=6, fontsize=12) @@ -231,54 +239,87 @@ def _make_plot(self, p_idx, eva, dmax): fig.subplot(2, 2, 1) fig = self._plot_decision_function(fig, plot_background=True) - fig.sp.plot_path(x_seq, path_style='-', - start_style='o', start_facecolor='w', - start_edgewidth=2, final_style='o', - final_facecolor='k', final_edgewidth=2) + fig.sp.plot_path( + x_seq, + path_style="-", + start_style="o", + start_facecolor="w", + start_edgewidth=2, + final_style="o", + final_facecolor="k", + final_edgewidth=2, + ) # plot distance constraint - fig.sp.plot_fun(func=self._rescaled_distance, - multipoint=True, - plot_background=False, - n_grid_points=20, levels_color='k', - grid_limits=ds_bounds, - levels=[0], colorbar=False, - levels_linewidth=2.0, levels_style=':', - alpha_levels=.4, c=x0, r=dmax) - - fig.sp.grid(linestyle='--', alpha=.5, zorder=0) + fig.sp.plot_fun( + func=self._rescaled_distance, + multipoint=True, + plot_background=False, + n_grid_points=20, + levels_color="k", + grid_limits=ds_bounds, + levels=[0], + colorbar=False, + levels_linewidth=2.0, + levels_style=":", + alpha_levels=0.4, + c=x0, + r=dmax, + ) + + fig.sp.grid(linestyle="--", alpha=0.5, zorder=0) # Plotting multiclass evasion objective function fig.subplot(2, 2, 2) fig = self._plot_decision_function(fig) - fig.sp.plot_fgrads(eva.objective_function_gradient, - grid_limits=ds_bounds, n_grid_points=20, - color='k', alpha=.5) - - fig.sp.plot_path(x_seq, path_style='-', - start_style='o', start_facecolor='w', - start_edgewidth=2, final_style='o', - final_facecolor='k', final_edgewidth=2) + fig.sp.plot_fgrads( + eva.objective_function_gradient, + grid_limits=ds_bounds, + n_grid_points=20, + color="k", + alpha=0.5, + ) + + fig.sp.plot_path( + x_seq, + path_style="-", + start_style="o", + start_facecolor="w", + start_edgewidth=2, + final_style="o", + final_facecolor="k", + final_edgewidth=2, + ) # plot distance constraint - fig.sp.plot_fun(func=self._rescaled_distance, - multipoint=True, - plot_background=False, - n_grid_points=20, levels_color='w', - grid_limits=ds_bounds, - levels=[0], colorbar=False, - levels_style=':', levels_linewidth=2.0, - alpha_levels=.5, c=x0, r=dmax) - - fig.sp.plot_fun(lambda z: eva.objective_function(z), - multipoint=True, - grid_limits=ds_bounds, - colorbar=False, n_grid_points=20, - plot_levels=False) - - fig.sp.grid(linestyle='--', alpha=.5, zorder=0) + fig.sp.plot_fun( + func=self._rescaled_distance, + multipoint=True, + plot_background=False, + n_grid_points=20, + levels_color="w", + grid_limits=ds_bounds, + levels=[0], + colorbar=False, + levels_style=":", + levels_linewidth=2.0, + alpha_levels=0.5, + c=x0, + r=dmax, + ) + + fig.sp.plot_fun( + lambda z: eva.objective_function(z), + multipoint=True, + grid_limits=ds_bounds, + colorbar=False, + n_grid_points=20, + plot_levels=False, + ) + + fig.sp.grid(linestyle="--", alpha=0.5, zorder=0) fig.subplot(2, 2, 3) if self.y_target is not None: @@ -301,12 +342,15 @@ def _make_plot(self, p_idx, eva, dmax): fig.tight_layout() - k_name = self.kernel.class_type if self.kernel is not None else 'lin' - fig.savefig(fm.join( - self.images_folder, - "pgd_ls_reject_threshold_{:}c_kernel-{:}_target-{:}.pdf".format( - self.ds.num_classes, k_name, self.y_target) - )) + k_name = self.kernel.class_type if self.kernel is not None else "lin" + fig.savefig( + fm.join( + self.images_folder, + "pgd_ls_reject_threshold_{:}c_kernel-{:}_target-{:}.pdf".format( + self.ds.num_classes, k_name, self.y_target + ), + ) + ) def _rescaled_distance(self, x, c, r): """Rescale distance for plot.""" @@ -319,48 +363,69 @@ def _rescaled_distance(self, x, c, r): def _get_style(self): """Define the style vector for the different classes.""" if self.ds.num_classes == 3: - styles = [('b', 'o', '-'), ('g', 'p', '--'), ('r', 's', '-.')] + styles = [("b", "o", "-"), ("g", "p", "--"), ("r", "s", "-.")] elif self.ds.num_classes == 4: - styles = [('saddlebrown', 'o', '-'), ('g', 'p', '--'), - ('y', 's', '-.'), ('gray', 'D', '--')] + styles = [ + ("saddlebrown", "o", "-"), + ("g", "p", "--"), + ("y", "s", "-."), + ("gray", "D", "--"), + ] else: - styles = [('saddlebrown', 'o', '-'), ('g', 'p', '--'), - ('y', 's', '-.'), ('gray', 'D', '--'), - ('c', '-.'), ('m', '-'), ('y', '-.')] + styles = [ + ("saddlebrown", "o", "-"), + ("g", "p", "--"), + ("y", "s", "-."), + ("gray", "D", "--"), + ("c", "-."), + ("m", "-"), + ("y", "-."), + ] return styles def _plot_decision_function(self, fig, plot_background=False): """Plot the decision function of a multiclass classifier.""" - fig.sp.title('{:}'.format(self.multiclass.__class__.__name__)) + fig.sp.title("{:}".format(self.multiclass.__class__.__name__)) x_bounds, y_bounds = self.ds.get_bounds() styles = self._get_style() for c_idx, c in enumerate(self.ds.classes): - fig.sp.scatter(self.ds.X[self.ds.Y == c, 0], - self.ds.X[self.ds.Y == c, 1], - s=20, c=styles[c_idx][0], edgecolors='k', - facecolors='none', linewidths=1, - label='c {:}'.format(c)) + fig.sp.scatter( + self.ds.X[self.ds.Y == c, 0], + self.ds.X[self.ds.Y == c, 1], + s=20, + c=styles[c_idx][0], + edgecolors="k", + facecolors="none", + linewidths=1, + label="c {:}".format(c), + ) # Plotting multiclass decision function fig.sp.plot_fun( lambda x: self.multiclass.predict(x), - multipoint=True, cmap='Set2', + multipoint=True, + cmap="Set2", grid_limits=self.ds.get_bounds(offset=5), - colorbar=False, n_grid_points=300, - plot_levels=True, plot_background=plot_background, - levels=[-1, 0, 1, 2], levels_color='gray', levels_style='--') + colorbar=False, + n_grid_points=300, + plot_levels=True, + plot_background=plot_background, + levels=[-1, 0, 1, 2], + levels_color="gray", + levels_style="--", + ) - fig.sp.xlim(x_bounds[0] - .05, x_bounds[1] + .05) - fig.sp.ylim(y_bounds[0] - .05, y_bounds[1] + .05) + fig.sp.xlim(x_bounds[0] - 0.05, x_bounds[1] + 0.05) + fig.sp.ylim(y_bounds[0] - 0.05, y_bounds[1] + 0.05) - fig.sp.legend(loc=9, ncol=5, mode="expand", handletextpad=.1) + fig.sp.legend(loc=9, ncol=5, mode="expand", handletextpad=0.1) return fig -if __name__ == '__main__': +if __name__ == "__main__": CAttackEvasionTestCases.main() diff --git a/src/secml/adv/attacks/poisoning/__init__.py b/src/secml/adv/attacks/poisoning/__init__.py index 4b2f3389..3f0ff55a 100644 --- a/src/secml/adv/attacks/poisoning/__init__.py +++ b/src/secml/adv/attacks/poisoning/__init__.py @@ -1,5 +1,4 @@ from .c_attack_poisoning import CAttackPoisoning from .c_attack_poisoning_svm import CAttackPoisoningSVM -from .c_attack_poisoning_logistic_regression import \ - CAttackPoisoningLogisticRegression +from .c_attack_poisoning_logistic_regression import CAttackPoisoningLogisticRegression from .c_attack_poisoning_ridge import CAttackPoisoningRidge diff --git a/src/secml/adv/attacks/poisoning/c_attack_poisoning.py b/src/secml/adv/attacks/poisoning/c_attack_poisoning.py index 9833d059..6705ba77 100644 --- a/src/secml/adv/attacks/poisoning/c_attack_poisoning.py +++ b/src/secml/adv/attacks/poisoning/c_attack_poisoning.py @@ -6,6 +6,7 @@ .. moduleauthor:: Battista Biggio """ + import warnings from abc import ABCMeta, abstractmethod @@ -55,20 +56,24 @@ class CAttackPoisoning(CAttackMixin, metaclass=ABCMeta): If None, no fixed seed will be set. """ - __super__ = 'CAttackPoisoning' - - def __init__(self, classifier, - training_data, - val, - distance='l2', - dmax=0, - lb=0, - ub=1, - y_target=None, - solver_type='pgd-ls', - solver_params=None, - init_type='random', - random_seed=None): + + __super__ = "CAttackPoisoning" + + def __init__( + self, + classifier, + training_data, + val, + distance="l2", + dmax=0, + lb=0, + ub=1, + y_target=None, + solver_type="pgd-ls", + solver_params=None, + init_type="random", + random_seed=None, + ): super(CAttackPoisoning, self).__init__( classifier=classifier, @@ -77,20 +82,20 @@ def __init__(self, classifier, lb=lb, ub=ub, solver_type=solver_type, - solver_params=solver_params) + solver_params=solver_params, + ) # fixme: validation loss should be optional and passed from outside - if classifier.class_type == 'svm': - loss_name = 'hinge' - elif classifier.class_type == 'logistic': - loss_name = 'log' - elif classifier.class_type == 'ridge': - loss_name = 'square' + if classifier.class_type == "svm": + loss_name = "hinge" + elif classifier.class_type == "logistic": + loss_name = "log" + elif classifier.class_type == "ridge": + loss_name = "square" else: raise NotImplementedError("We cannot poisoning that classifier") - self._attacker_loss = CLoss.create( - loss_name) + self._attacker_loss = CLoss.create(loss_name) self._init_loss = self._attacker_loss @@ -113,7 +118,7 @@ def __init__(self, classifier, self.init_type = init_type - self.eta = solver_params['eta'] + self.eta = solver_params["eta"] # this is used to speed up some poisoning algorithms by re-using # the solution obtained at a previous step of the optimization @@ -143,7 +148,7 @@ def val(self, value): self._val = None return if not isinstance(value, CDataset): - raise TypeError('val should be a CDataset!') + raise TypeError("val should be a CDataset!") self._val = value @property @@ -156,7 +161,7 @@ def training_data(self, value): """Sets the training set used to learn the targeted classifier""" # mandatory parameter, we raise error also if value is None if not isinstance(value, CDataset): - raise TypeError('training_data should be a CDataset!') + raise TypeError("training_data should be a CDataset!") self._training_data = value @property @@ -219,9 +224,9 @@ def yc(self, value): def _constraint_creation(self): # only feature increments or decrements are allowed - lb = self._x0 if self.lb == 'x0' else self.lb - ub = self._x0 if self.ub == 'x0' else self.ub - bounds = CConstraint.create('box', lb=lb, ub=ub) + lb = self._x0 if self.lb == "x0" else self.lb + ub = self._x0 if self.ub == "x0" else self.ub + bounds = CConstraint.create("box", lb=lb, ub=ub) constr = CConstraint.create(self.distance, center=0, radius=1e12) @@ -231,26 +236,29 @@ def _init_solver(self): """Create solver instance.""" if self.classifier is None: - raise ValueError('Solver not set properly!') + raise ValueError("Solver not set properly!") # map attributes to fun, constr, box - fun = CFunction(fun=self.objective_function, - gradient=self.objective_function_gradient, - n_dim=self._classifier.n_features) + fun = CFunction( + fun=self.objective_function, + gradient=self.objective_function_gradient, + n_dim=self._classifier.n_features, + ) bounds, constr = self._constraint_creation() self._solver = COptimizer.create( self._solver_type, - fun=fun, constr=constr, + fun=fun, + constr=constr, bounds=bounds, - **self.solver_params) + **self.solver_params + ) self._solver.verbose = 0 self._warm_start = None - def _rnd_init_poisoning_points( - self, n_points=None, init_from_val=False, val=None): + def _rnd_init_poisoning_points(self, n_points=None, init_from_val=False, val=None): """Returns a random set of poisoning points randomly with flipped labels.""" if init_from_val: @@ -262,28 +270,30 @@ def _rnd_init_poisoning_points( init_dataset = self.training_data if (self._n_points is None or self._n_points == 0) and ( - n_points is None or n_points == 0): + n_points is None or n_points == 0 + ): raise ValueError("Number of poisoning points (n_points) not set!") if n_points is None: n_points = self.n_points - idx = CArray.randsample(init_dataset.num_samples, n_points, - random_state=self.random_seed) + idx = CArray.randsample( + init_dataset.num_samples, n_points, random_state=self.random_seed + ) xc = init_dataset.X[idx, :].deepcopy() # if the attack is in a continuous space we add a # little perturbation to the initial poisoning point - random_noise = CArray.rand(shape=xc.shape, - random_state=self.random_seed) + random_noise = CArray.rand(shape=xc.shape, random_state=self.random_seed) xc += 1e-3 * (2 * random_noise - 1) yc = CArray(init_dataset.Y[idx]).deepcopy() # true labels # randomly pick yc from a different class for i in range(yc.size): - labels = CArray.randsample(init_dataset.num_classes, 2, - random_state=self.random_seed) + labels = CArray.randsample( + init_dataset.num_classes, 2, random_state=self.random_seed + ) if yc[i] == labels[0]: yc[i] = labels[1] else: @@ -291,8 +301,7 @@ def _rnd_init_poisoning_points( return xc, yc - def _update_poisoned_clf(self, clf=None, tr=None, - train_normalizer=False): + def _update_poisoned_clf(self, clf=None, tr=None, train_normalizer=False): """ Trains classifier on D (original training data) plus {x,y} (new point). @@ -373,11 +382,14 @@ def objective_function(self, xc, acc=False): y_pred, score = clf.predict(self.val.X, return_decision_function=True) # targeted attacks - y_ts = CArray(self._y_target).repeat(score.shape[0]) \ - if self._y_target is not None else self.val.Y + y_ts = ( + CArray(self._y_target).repeat(score.shape[0]) + if self._y_target is not None + else self.val.Y + ) # TODO: binary loss check - if self._attacker_loss.class_type != 'softmax': + if self._attacker_loss.class_type != "softmax": score = CArray(score[:, 1].ravel()) if acc is True: @@ -416,23 +428,27 @@ def objective_function_gradient(self, xc, normalization=True): # computing gradient of loss(y, f(x)) w.r.t. f _, score = clf.predict(self.val.X, return_decision_function=True) - y_ts = CArray(self._y_target).repeat(score.shape[0]) \ - if self._y_target is not None else self.val.Y + y_ts = ( + CArray(self._y_target).repeat(score.shape[0]) + if self._y_target is not None + else self.val.Y + ) grad = CArray.zeros((xc.size,)) if clf.n_classes <= 2: - loss_grad = self._attacker_loss.dloss( - y_ts, CArray(score[:, 1]).ravel()) + loss_grad = self._attacker_loss.dloss(y_ts, CArray(score[:, 1]).ravel()) grad = self._gradient_fk_xc( - self._xc[idx, :], self._yc[idx], clf, loss_grad, tr) + self._xc[idx, :], self._yc[idx], clf, loss_grad, tr + ) else: # compute the gradient as a sum of the gradient for each class for c in range(clf.n_classes): loss_grad = self._attacker_loss.dloss(y_ts, score, c=c) - grad += self._gradient_fk_xc(self._xc[idx, :], self._yc[idx], - clf, loss_grad, tr, c) + grad += self._gradient_fk_xc( + self._xc[idx, :], self._yc[idx], clf, loss_grad, tr, c + ) if normalization: norm = grad.norm() @@ -514,24 +530,24 @@ def run(self, x, y, ds_init=None, max_iter=1): """ if self._n_points is None or self._n_points == 0: # evaluate performance on x,y - y_pred, scores = self._classifier.predict( - x, return_decision_function=True) + y_pred, scores = self._classifier.predict(x, return_decision_function=True) return y_pred, scores, ds_init, 0 # n_points > 0 - if self.init_type == 'random': + if self.init_type == "random": # randomly sample xc and yc xc, yc = self._rnd_init_poisoning_points() - elif self.init_type == 'loss_based': + elif self.init_type == "loss_based": xc, yc = self._loss_based_init_poisoning_points() else: raise NotImplementedError( - "Unknown poisoning point initialization strategy.") + "Unknown poisoning point initialization strategy." + ) # re-set previously-optimized points if passed as input if ds_init is not None: - xc[0:ds_init.num_samples, :] = ds_init.X - yc[0:ds_init.num_samples] = ds_init.Y + xc[0 : ds_init.num_samples, :] = ds_init.X + yc[0 : ds_init.num_samples] = ds_init.Y delta = 1.0 k = 0 @@ -540,12 +556,11 @@ def run(self, x, y, ds_init=None, max_iter=1): if self.n_points == 1: max_iter = 1 - metric = CMetric.create('accuracy') + metric = CMetric.create("accuracy") while delta > 0 and k < max_iter: - self.logger.info( - "Iter on all the poisoning samples: {:}".format(k)) + self.logger.info("Iter on all the poisoning samples: {:}".format(k)) xc_prv = xc.deepcopy() for i in range(self._n_points): @@ -555,50 +570,57 @@ def run(self, x, y, ds_init=None, max_iter=1): xc[idx, :] = self._run(xc, yc, idx=idx) # optimizing poisoning point 0 self.logger.info( - "poisoning point {:} optim fopt: {:}".format( - i, self._f_opt)) + "poisoning point {:} optim fopt: {:}".format(i, self._f_opt) + ) y_pred, scores = self._poisoned_clf.predict( - x, return_decision_function=True) + x, return_decision_function=True + ) acc = metric.performance_score(y_true=y, y_pred=y_pred) - self.logger.info("Poisoned classifier accuracy " - "on test data {:}".format(acc)) + self.logger.info( + "Poisoned classifier accuracy " "on test data {:}".format(acc) + ) delta = (xc_prv - xc).norm_2d() self.logger.info( - "Optimization with n points: " + str(self._n_points) + - " iter: " + str(k) + ", delta: " + - str(delta) + ", fopt: " + str(self._f_opt)) + "Optimization with n points: " + + str(self._n_points) + + " iter: " + + str(k) + + ", delta: " + + str(delta) + + ", fopt: " + + str(self._f_opt) + ) k += 1 # re-train the targeted classifier (copied) on poisoned data # to evaluate attack effectiveness on targeted classifier - clf, tr = self._update_poisoned_clf(clf=self._classifier, - tr=self._training_data, - train_normalizer=False) + clf, tr = self._update_poisoned_clf( + clf=self._classifier, tr=self._training_data, train_normalizer=False + ) # fixme: rechange train_normalizer=True y_pred, scores = clf.predict(x, return_decision_function=True) acc = metric.performance_score(y_true=y, y_pred=y_pred) - self.logger.info( - "Original classifier accuracy on test data {:}".format(acc)) + self.logger.info("Original classifier accuracy on test data {:}".format(acc)) return y_pred, scores, CDataset(xc, yc), self._f_opt def _loss_based_init_poisoning_points(self, n_points=None): - """ - """ + """ """ raise NotImplementedError def _compute_grad_inv(self, G, H, grad_loss_params): from scipy import linalg + det = linalg.det(H.tondarray()) if abs(det) < 1e-6: H_inv = CArray(linalg.pinv(H.tondarray())) else: H_inv = CArray(linalg.inv(H.tondarray())) - grad_mat = - CArray(G.dot(H_inv)) # d * (d + 1) + grad_mat = -CArray(G.dot(H_inv)) # d * (d + 1) self._d_params_xc = grad_mat @@ -608,8 +630,8 @@ def _compute_grad_inv(self, G, H, grad_loss_params): def _compute_grad_solve(self, G, H, grad_loss_params, sym_pos=True): from scipy import linalg - v = linalg.solve( - H.tondarray(), grad_loss_params.tondarray(), sym_pos=sym_pos) + + v = linalg.solve(H.tondarray(), grad_loss_params.tondarray(), sym_pos=sym_pos) v = CArray(v) gt = -G.dot(v) return gt.ravel() @@ -619,14 +641,18 @@ def _compute_grad_solve_iterative(self, G, H, grad_loss_params, tol=1e-6): if self._warm_start is None: v, convergence = linalg.cg( - H.tondarray(), grad_loss_params.tondarray(), tol=tol) + H.tondarray(), grad_loss_params.tondarray(), tol=tol + ) else: v, convergence = linalg.cg( - H.tondarray(), grad_loss_params.tondarray(), tol=tol, - x0=self._warm_start.tondarray()) + H.tondarray(), + grad_loss_params.tondarray(), + tol=tol, + x0=self._warm_start.tondarray(), + ) if convergence != 0: - warnings.warn('Convergence of poisoning algorithm not reached!') + warnings.warn("Convergence of poisoning algorithm not reached!") v = CArray(v.ravel()) diff --git a/src/secml/adv/attacks/poisoning/c_attack_poisoning_logistic_regression.py b/src/secml/adv/attacks/poisoning/c_attack_poisoning_logistic_regression.py index b7f89a95..b75285da 100644 --- a/src/secml/adv/attacks/poisoning/c_attack_poisoning_logistic_regression.py +++ b/src/secml/adv/attacks/poisoning/c_attack_poisoning_logistic_regression.py @@ -6,6 +6,7 @@ .. moduleauthor:: Battista Biggio """ + from secml.adv.attacks.poisoning import CAttackPoisoning from secml.array import CArray from secml.ml.classifiers.clf_utils import convert_binary_labels @@ -63,33 +64,40 @@ class CAttackPoisoningLogisticRegression(CAttackPoisoning): If None, no fixed seed will be set. """ - __class_type = 'p-logistic' - - def __init__(self, classifier, - training_data, - val, - distance='l1', - dmax=0, - lb=0, - ub=1, - y_target=None, - solver_type='pgd-ls', - solver_params=None, - init_type='random', - random_seed=None): - - CAttackPoisoning.__init__(self, classifier=classifier, - training_data=training_data, - val=val, - distance=distance, - dmax=dmax, - lb=lb, - ub=ub, - y_target=y_target, - solver_type=solver_type, - solver_params=solver_params, - init_type=init_type, - random_seed=random_seed) + + __class_type = "p-logistic" + + def __init__( + self, + classifier, + training_data, + val, + distance="l1", + dmax=0, + lb=0, + ub=1, + y_target=None, + solver_type="pgd-ls", + solver_params=None, + init_type="random", + random_seed=None, + ): + + CAttackPoisoning.__init__( + self, + classifier=classifier, + training_data=training_data, + val=val, + distance=distance, + dmax=dmax, + lb=lb, + ub=ub, + y_target=y_target, + solver_type=solver_type, + solver_params=solver_params, + init_type=init_type, + random_seed=random_seed, + ) ########################################################################### # GRAD COMPUTATION @@ -118,13 +126,14 @@ def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None): d = xc.size - if hasattr(clf, 'C'): + if hasattr(clf, "C"): C = clf.C - elif hasattr(clf, 'alpha'): + elif hasattr(clf, "alpha"): C = 1.0 / clf.alpha else: - raise ValueError("Error: The classifier does not have neither C " - "nor alpha") + raise ValueError( + "Error: The classifier does not have neither C " "nor alpha" + ) H = clf.hessian_tr_params(tr.X, tr.Y) @@ -148,7 +157,8 @@ def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None): dbx_c = z_c * w # column vector dwx_c = ((yc * (-1 + sigm_c)) * CArray.eye(d, d)) + z_c * ( - w.dot(xc)) # matrix d*d + w.dot(xc) + ) # matrix d*d G = C * (dwx_c.append(dbx_c, axis=1)) diff --git a/src/secml/adv/attacks/poisoning/c_attack_poisoning_ridge.py b/src/secml/adv/attacks/poisoning/c_attack_poisoning_ridge.py index e476a33c..7e6e46a8 100644 --- a/src/secml/adv/attacks/poisoning/c_attack_poisoning_ridge.py +++ b/src/secml/adv/attacks/poisoning/c_attack_poisoning_ridge.py @@ -6,6 +6,7 @@ .. moduleauthor:: Battista Biggio """ + from secml.adv.attacks.poisoning import CAttackPoisoning from secml.array import CArray from secml.ml.classifiers.clf_utils import convert_binary_labels @@ -63,33 +64,40 @@ class CAttackPoisoningRidge(CAttackPoisoning): If None, no fixed seed will be set. """ - __class_type = 'p-ridge' - - def __init__(self, classifier, - training_data, - val, - distance='l2', - dmax=0, - lb=0, - ub=1, - y_target=None, - solver_type='pgd-ls', - solver_params=None, - init_type=None, - random_seed=None): - - CAttackPoisoning.__init__(self, classifier=classifier, - training_data=training_data, - val=val, - distance=distance, - dmax=dmax, - lb=lb, - ub=ub, - y_target=y_target, - solver_type=solver_type, - solver_params=solver_params, - init_type=init_type, - random_seed=random_seed) + + __class_type = "p-ridge" + + def __init__( + self, + classifier, + training_data, + val, + distance="l2", + dmax=0, + lb=0, + ub=1, + y_target=None, + solver_type="pgd-ls", + solver_params=None, + init_type=None, + random_seed=None, + ): + + CAttackPoisoning.__init__( + self, + classifier=classifier, + training_data=training_data, + val=val, + distance=distance, + dmax=dmax, + lb=lb, + ub=ub, + y_target=y_target, + solver_type=solver_type, + solver_params=solver_params, + init_type=init_type, + random_seed=random_seed, + ) ########################################################################### # GRAD COMPUTATION @@ -128,14 +136,13 @@ def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None): # handle normalizer, if present xc = xc if clf.preprocess is None else clf.preprocess.transform(xc) xc = xc.ravel().atleast_2d() - #xk = xk if clf.preprocess is None else clf.preprocess.transform(xk) + # xk = xk if clf.preprocess is None else clf.preprocess.transform(xk) # gt is the gradient in feature space k = xk.shape[0] # num validation samples d = xk.shape[1] # num features - M = clf.w.T.dot( - xc) # xc is column, w is row (this is an outer product) + M = clf.w.T.dot(xc) # xc is column, w is row (this is an outer product) M += (clf.w.dot(xc.T) + clf.b - yc) * CArray.eye(d) db_xc = clf.w.T G = M.append(db_xc, axis=1) @@ -157,4 +164,3 @@ def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None): return clf.preprocess.gradient(xc0, w=gt) return gt - diff --git a/src/secml/adv/attacks/poisoning/c_attack_poisoning_svm.py b/src/secml/adv/attacks/poisoning/c_attack_poisoning_svm.py index e67f8faf..04b09e16 100644 --- a/src/secml/adv/attacks/poisoning/c_attack_poisoning_svm.py +++ b/src/secml/adv/attacks/poisoning/c_attack_poisoning_svm.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.adv.attacks.poisoning import CAttackPoisoning from secml.array import CArray @@ -67,38 +68,44 @@ class CAttackPoisoningSVM(CAttackPoisoning): If None, no fixed seed will be set. """ - __class_type = 'p-svm' - - def __init__(self, classifier, - training_data, - val, - distance='l1', - dmax=0, - lb=0, - ub=1, - y_target=None, - solver_type='pgd-ls', - solver_params=None, - init_type='random', - random_seed=None): - - CAttackPoisoning.__init__(self, classifier=classifier, - training_data=training_data, - val=val, - distance=distance, - dmax=dmax, - lb=lb, - ub=ub, - y_target=y_target, - solver_type=solver_type, - solver_params=solver_params, - init_type=init_type, - random_seed=random_seed) + + __class_type = "p-svm" + + def __init__( + self, + classifier, + training_data, + val, + distance="l1", + dmax=0, + lb=0, + ub=1, + y_target=None, + solver_type="pgd-ls", + solver_params=None, + init_type="random", + random_seed=None, + ): + + CAttackPoisoning.__init__( + self, + classifier=classifier, + training_data=training_data, + val=val, + distance=distance, + dmax=dmax, + lb=lb, + ub=ub, + y_target=y_target, + solver_type=solver_type, + solver_params=solver_params, + init_type=init_type, + random_seed=random_seed, + ) # check if SVM has been trained in the dual if self.classifier.kernel is None: - raise ValueError( - "Please retrain the SVM in the dual (kernel != None).") + raise ValueError("Please retrain the SVM in the dual (kernel != None).") # indices of support vectors (at previous iteration) # used to check if warm_start can be used in the iterative solver @@ -228,17 +235,19 @@ def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None): # gt is the derivative of the loss computed on a validation # set w.r.t. xc Kd_xc = self._Kd_xc(clf, alpha_c, xc, xk) - assert (clf.kernel.rv.shape[0] == clf.alpha.shape[1]) + assert clf.kernel.rv.shape[0] == clf.alpha.shape[1] gt = Kd_xc.dot(grad_loss_fk).ravel() # gradient of the loss w.r.t. xc xs, sv_idx = clf._sv_margin() # these points are already normalized if xs is None: - self.logger.debug("Warning: xs is empty " - "(all points are error vectors).") - return gt if clf.kernel.preprocess is None else \ - clf.kernel.preprocess.gradient(xc0, w=gt) + self.logger.debug("Warning: xs is empty " "(all points are error vectors).") + return ( + gt + if clf.kernel.preprocess is None + else clf.kernel.preprocess.gradient(xc0, w=gt) + ) s = xs.shape[0] diff --git a/src/secml/adv/attacks/poisoning/tests/c_attack_poisoning_testcases.py b/src/secml/adv/attacks/poisoning/tests/c_attack_poisoning_testcases.py index e68c9486..55a7ef62 100644 --- a/src/secml/adv/attacks/poisoning/tests/c_attack_poisoning_testcases.py +++ b/src/secml/adv/attacks/poisoning/tests/c_attack_poisoning_testcases.py @@ -89,8 +89,11 @@ def _preparation_for_grad_computation(self, xc): idx, clf, tr = self._clf_poisoning(xc) - y_ts = self.pois_obj._y_target if self.pois_obj._y_target is not \ - None else self.pois_obj.val.Y + y_ts = ( + self.pois_obj._y_target + if self.pois_obj._y_target is not None + else self.pois_obj.val.Y + ) # computing gradient of loss(y, f(x)) w.r.t. f score = clf.decision_function(self.pois_obj.val.X) @@ -108,9 +111,9 @@ def _grads_computation(self, xc): """ idx, clf, loss_grad, tr = self._preparation_for_grad_computation(xc) - self.pois_obj._gradient_fk_xc(self.pois_obj._xc[idx, :], - self.pois_obj._yc[idx], - clf, loss_grad, tr) + self.pois_obj._gradient_fk_xc( + self.pois_obj._xc[idx, :], self.pois_obj._yc[idx], clf, loss_grad, tr + ) grads = self.pois_obj._d_params_xc return grads @@ -144,7 +147,7 @@ def gradient_b_xc(self, xc): class CAttackPoisoningTestCases(CUnitTest): def _dataset_creation(self): - """Creates a blob dataset. """ + """Creates a blob dataset.""" self.n_features = 2 # Number of dataset features self.seed = 42 @@ -159,14 +162,15 @@ def _dataset_creation(self): centers=[(-1, -1), (+1, +1)], center_box=(-2, 2), cluster_std=0.8, - random_state=self.seed) + random_state=self.seed, + ) - self.logger.info( - "Loading `random_blobs` with seed: {:}".format(self.seed)) + self.logger.info("Loading `random_blobs` with seed: {:}".format(self.seed)) dataset = loader.load() - splitter = CDataSplitterShuffle(num_folds=1, train_size=self.n_tr, - random_state=3) + splitter = CDataSplitterShuffle( + num_folds=1, train_size=self.n_tr, random_state=3 + ) splitter.compute_indices(dataset) self.tr = dataset[splitter.tr_idx[0], :] self.ts = dataset[splitter.ts_idx[0], :] @@ -178,12 +182,14 @@ def _dataset_creation(self): self.lb = -1 self.ub = 1 - self.grid_limits = [(self.lb - 0.1, self.ub + 0.1), - (self.lb - 0.1, self.ub + 0.1)] + self.grid_limits = [ + (self.lb - 0.1, self.ub + 0.1), + (self.lb - 0.1, self.ub + 0.1), + ] def _create_poisoning_object(self): - self.solver_type = 'pgd-ls' - self.solver_params = {'eta': 0.05, 'eta_min': 0.05, 'eps': 1e-9} + self.solver_type = "pgd-ls" + self.solver_params = {"eta": 0.05, "eta_min": 0.05, "eps": 1e-9} self._poisoning_params = { "classifier": self.classifier, @@ -193,7 +199,7 @@ def _create_poisoning_object(self): "ub": self.ub, "solver_type": self.solver_type, "solver_params": self.solver_params, - 'random_seed': self.seed + "random_seed": self.seed, } self.poisoning = self.pois_class(**self._poisoning_params) @@ -201,7 +207,7 @@ def _create_poisoning_object(self): self.poisoning.n_points = 1 # 1 self.xc, self.yc = self.poisoning._rnd_init_poisoning_points() - self.logger.info('yc: ' + str(self.yc)) + self.logger.info("yc: " + str(self.yc)) def _set_up(self, poisoning_class, clf_idx, clf_class, clf_params): @@ -213,13 +219,12 @@ def _set_up(self, poisoning_class, clf_idx, clf_class, clf_params): self.clf_params = clf_params def _test_init(self, normalizer=None): - """Creates the classifier and fit it. """ + """Creates the classifier and fit it.""" self._dataset_creation() # create the classifier - self.classifier = self.clf_class(preprocess=normalizer, - **self.clf_params) + self.classifier = self.clf_class(preprocess=normalizer, **self.clf_params) # fit the classifier self.classifier.fit(self.tr.X, self.tr.Y) @@ -237,11 +242,11 @@ def _clf_poisoning(self): self.logger.info("Fun Eval: " + str(self.poisoning.f_eval)) self.logger.info("Grad Eval: " + str(self.poisoning.grad_eval)) - metric = CMetric.create('accuracy') - y_pred, scores = self.classifier.predict(self.ts.X, - return_decision_function=True) - orig_acc = metric.performance_score(y_true=self.ts.Y, - y_pred=y_pred) + metric = CMetric.create("accuracy") + y_pred, scores = self.classifier.predict( + self.ts.X, return_decision_function=True + ) + orig_acc = metric.performance_score(y_true=self.ts.Y, y_pred=y_pred) self.logger.info("Error on testing data: " + str(1 - orig_acc)) tr = self.tr.append(CDataset(xc, self.yc)) @@ -249,12 +254,9 @@ def _clf_poisoning(self): pois_clf = self.classifier.deepcopy() pois_clf.fit(tr.X, tr.Y) - y_pred, scores = pois_clf.predict(self.ts.X, - return_decision_function=True) - pois_acc = metric.performance_score(y_true=self.ts.Y, - y_pred=y_pred) - self.logger.info( - "Error on testing data (poisoned): " + str(1 - pois_acc)) + y_pred, scores = pois_clf.predict(self.ts.X, return_decision_function=True) + pois_acc = metric.performance_score(y_true=self.ts.Y, y_pred=y_pred) + self.logger.info("Error on testing data (poisoned): " + str(1 - pois_acc)) return pois_clf, xc @@ -265,8 +267,10 @@ def _test_attack_effectiveness(self, normalizer): after the attack. Finally, raises an error if the one computed on the poisoning point is not the highest. """ - self.logger.info("Test if the value of the attacker objective " - "function increases after the attack") + self.logger.info( + "Test if the value of the attacker objective " + "function increases after the attack" + ) self._test_init(normalizer) self._create_poisoning_object() @@ -277,36 +281,40 @@ def _test_attack_effectiveness(self, normalizer): fobj_x0 = self.poisoning.objective_function(xc=x0) fobj_xc = self.poisoning.objective_function(xc=xc) - self.logger.info( - "Objective function before the attack {:}".format(fobj_x0)) - self.logger.info( - "Objective function after the attack {:}".format(fobj_xc)) + self.logger.info("Objective function before the attack {:}".format(fobj_x0)) + self.logger.info("Objective function after the attack {:}".format(fobj_xc)) - self.assertLess(fobj_x0, fobj_xc, - "The attack does not increase the objective " - "function of the attacker. The fobj on the " - "original poisoning point is {:} while " - "on the optimized poisoning point is {:}.".format( - fobj_x0, fobj_xc)) + self.assertLess( + fobj_x0, + fobj_xc, + "The attack does not increase the objective " + "function of the attacker. The fobj on the " + "original poisoning point is {:} while " + "on the optimized poisoning point is {:}.".format(fobj_x0, fobj_xc), + ) if self.plot_creation: self._create_2D_plots(normalizer) def _test_clf_accuracy(self, normalizer): """Checks the accuracy of the classifier considered into the - test. """ + test.""" self._test_init(normalizer) - metric = CMetric.create('accuracy') - y_pred, scores = self.classifier.predict(self.ts.X, - return_decision_function=True) + metric = CMetric.create("accuracy") + y_pred, scores = self.classifier.predict( + self.ts.X, return_decision_function=True + ) acc = metric.performance_score(y_true=self.ts.Y, y_pred=y_pred) self.logger.info("Error on testing data: " + str(1 - acc)) self.assertGreater( - acc, 0.70, "The trained classifier have an accuracy that " - "is too low to evaluate if the poisoning against " - "this classifier works") + acc, + 0.70, + "The trained classifier have an accuracy that " + "is too low to evaluate if the poisoning against " + "this classifier works", + ) ##################################################################### # PLOT FUNCTIONALITIES @@ -325,15 +333,18 @@ def _plot_func(self, fig, func, **func_kwargs): """Plot poisoning objective function""" fig.sp.plot_fun( func=func, - grid_limits=self.grid_limits, plot_levels=False, - n_grid_points=10, colorbar=True, **func_kwargs) + grid_limits=self.grid_limits, + plot_levels=False, + n_grid_points=10, + colorbar=True, + **func_kwargs + ) def _plot_obj_grads(self, fig, func, **func_kwargs): """Plot poisoning attacker objective function gradient""" fig.sp.plot_fgrads( - func, - grid_limits=self.grid_limits, - n_grid_points=20, **func_kwargs) + func, grid_limits=self.grid_limits, n_grid_points=20, **func_kwargs + ) def _create_2D_plots(self, normalizer): @@ -351,44 +362,48 @@ def _create_2D_plots(self, normalizer): box = self._create_box() fig.subplot(n_rows, n_cols, grid_slot=1) - fig.sp.title('Attacker objective and gradients') + fig.sp.title("Attacker objective and gradients") self._plot_func(fig, self.poisoning.objective_function) - self._plot_obj_grads( - fig, self.poisoning.objective_function_gradient) + self._plot_obj_grads(fig, self.poisoning.objective_function_gradient) fig.sp.plot_ds(self.tr) - fig.sp.plot_decision_regions(self.clf_orig, plot_background=False, - grid_limits=self.grid_limits, - n_grid_points=10, ) + fig.sp.plot_decision_regions( + self.clf_orig, + plot_background=False, + grid_limits=self.grid_limits, + n_grid_points=10, + ) - fig.sp.plot_constraint(box, grid_limits=self.grid_limits, - n_grid_points=10) - fig.sp.plot_path(self.poisoning.x_seq, - start_facecolor='r' if self.yc == 1 else 'b') + fig.sp.plot_constraint(box, grid_limits=self.grid_limits, n_grid_points=10) + fig.sp.plot_path( + self.poisoning.x_seq, start_facecolor="r" if self.yc == 1 else "b" + ) fig.subplot(n_rows, n_cols, grid_slot=2) - fig.sp.title('Classification error on val') - self._plot_func(fig, self.poisoning.objective_function, - acc=True) + fig.sp.title("Classification error on val") + self._plot_func(fig, self.poisoning.objective_function, acc=True) fig.sp.plot_ds(self.tr) - fig.sp.plot_decision_regions(pois_clf, plot_background=False, - grid_limits=self.grid_limits, - n_grid_points=10, ) + fig.sp.plot_decision_regions( + pois_clf, + plot_background=False, + grid_limits=self.grid_limits, + n_grid_points=10, + ) - fig.sp.plot_constraint(box, grid_limits=self.grid_limits, - n_grid_points=10) - fig.sp.plot_path(self.poisoning.x_seq, - start_facecolor='r' if self.yc == 1 else 'b') + fig.sp.plot_constraint(box, grid_limits=self.grid_limits, n_grid_points=10) + fig.sp.plot_path( + self.poisoning.x_seq, start_facecolor="r" if self.yc == 1 else "b" + ) fig.tight_layout() exp_idx = "2d_pois_" exp_idx += self.clf_idx - if self.classifier.class_type == 'svm': + if self.classifier.class_type == "svm": if self.classifier.kernel.preprocess is not None: exp_idx += "_norm" else: if self.classifier.preprocess is not None: exp_idx += "_norm" - fig.savefig(exp_idx + '.pdf', file_format='pdf') + fig.savefig(exp_idx + ".pdf", file_format="pdf") ##################################################################### # FUNCTIONS TO CHECK THE POISONING GRADIENT OF CLASSIFIERS @@ -400,23 +415,23 @@ def _plot_param_sub(self, fig, param_fun, grad_fun, clf): box = self._create_box() self._plot_func(fig, param_fun) - self._plot_obj_grads( - fig, grad_fun) + self._plot_obj_grads(fig, grad_fun) fig.sp.plot_ds(self.tr) - fig.sp.plot_decision_regions(clf, plot_background=False, - grid_limits=self.grid_limits, - n_grid_points=10, ) - fig.sp.plot_constraint(box, grid_limits=self.grid_limits, - n_grid_points=10) + fig.sp.plot_decision_regions( + clf, + plot_background=False, + grid_limits=self.grid_limits, + n_grid_points=10, + ) + fig.sp.plot_constraint(box, grid_limits=self.grid_limits, n_grid_points=10) def _create_params_grad_plot(self, normalizer): """ Show the gradient of the classifier parameters w.r.t the poisoning point """ - self.logger.info("Create 2-dimensional plot of the poisoning " - "gradient") + self.logger.info("Create 2-dimensional plot of the poisoning " "gradient") self._test_init(normalizer) @@ -432,29 +447,29 @@ def _create_params_grad_plot(self, normalizer): fig.title(self.clf_idx) fig.subplot(n_rows, n_cols, grid_slot=1) - fig.sp.title('w1 wrt xc') - self._plot_param_sub(fig, debug_pois_obj.w1, - debug_pois_obj.gradient_w1_xc, - pois_clf) + fig.sp.title("w1 wrt xc") + self._plot_param_sub( + fig, debug_pois_obj.w1, debug_pois_obj.gradient_w1_xc, pois_clf + ) fig.subplot(n_rows, n_cols, grid_slot=2) - fig.sp.title('w2 wrt xc') - self._plot_param_sub(fig, debug_pois_obj.w2, - debug_pois_obj.gradient_w2_xc, - pois_clf) + fig.sp.title("w2 wrt xc") + self._plot_param_sub( + fig, debug_pois_obj.w2, debug_pois_obj.gradient_w2_xc, pois_clf + ) fig.subplot(n_rows, n_cols, grid_slot=3) - fig.sp.title('b wrt xc') - self._plot_param_sub(fig, debug_pois_obj.b, - debug_pois_obj.gradient_b_xc, - pois_clf) + fig.sp.title("b wrt xc") + self._plot_param_sub( + fig, debug_pois_obj.b, debug_pois_obj.gradient_b_xc, pois_clf + ) fig.tight_layout() exp_idx = "2d_grad_pois_" exp_idx += self.clf_idx if self.classifier.preprocess is not None: exp_idx += "_norm" - fig.savefig(exp_idx + '.pdf', file_format='pdf') + fig.savefig(exp_idx + ".pdf", file_format="pdf") def _single_param_grad_check(self, xc, f_param, df_param, param_name): """ @@ -471,15 +486,15 @@ def _single_param_grad_check(self, xc, f_param, df_param, param_name): """ # Compare analytical gradient with its numerical approximation - check_grad_val = CFunction( - f_param, df_param).check_grad(xc, epsilon=100) - self.logger.info("Gradient difference between analytical {:} " - "gradient and numerical gradient: %s".format( - param_name), - str(check_grad_val)) - self.assertLess(check_grad_val, 1, - "poisoning gradient is wrong {:}".format( - check_grad_val)) + check_grad_val = CFunction(f_param, df_param).check_grad(xc, epsilon=100) + self.logger.info( + "Gradient difference between analytical {:} " + "gradient and numerical gradient: %s".format(param_name), + str(check_grad_val), + ) + self.assertLess( + check_grad_val, 1, "poisoning gradient is wrong {:}".format(check_grad_val) + ) def _test_single_poisoning_grad_check(self, normalizer): @@ -491,19 +506,19 @@ def _test_single_poisoning_grad_check(self, normalizer): debug_pois_obj = _CAttackPoisoningLinTest(self.poisoning) - self._single_param_grad_check(xc, debug_pois_obj.w1, - debug_pois_obj.gradient_w1_xc, - param_name='w1') - self._single_param_grad_check(xc, debug_pois_obj.w2, - debug_pois_obj.gradient_w2_xc, - param_name='w2') - self._single_param_grad_check(xc, debug_pois_obj.b, - debug_pois_obj.gradient_b_xc, - param_name='b') + self._single_param_grad_check( + xc, debug_pois_obj.w1, debug_pois_obj.gradient_w1_xc, param_name="w1" + ) + self._single_param_grad_check( + xc, debug_pois_obj.w2, debug_pois_obj.gradient_w2_xc, param_name="w2" + ) + self._single_param_grad_check( + xc, debug_pois_obj.b, debug_pois_obj.gradient_b_xc, param_name="b" + ) if self.plot_creation is True: self._create_params_grad_plot(normalizer) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/adv/attacks/poisoning/tests/test_c_attack_poisoning_logistic_regression.py b/src/secml/adv/attacks/poisoning/tests/test_c_attack_poisoning_logistic_regression.py index 74554821..95fe3bf8 100644 --- a/src/secml/adv/attacks/poisoning/tests/test_c_attack_poisoning_logistic_regression.py +++ b/src/secml/adv/attacks/poisoning/tests/test_c_attack_poisoning_logistic_regression.py @@ -8,11 +8,13 @@ class TestCAttackPoisoningLogisticRegression(CAttackPoisoningTestCases): """Unit test for CAttackPoisoningLogisticRegression.""" def setUp(self): - clf_params = {'C': 100, 'random_state': 42} - self._set_up(clf_idx='logistic_regression', - poisoning_class=CAttackPoisoningLogisticRegression, - clf_class=CClassifierLogistic, - clf_params=clf_params) + clf_params = {"C": 100, "random_state": 42} + self._set_up( + clf_idx="logistic_regression", + poisoning_class=CAttackPoisoningLogisticRegression, + clf_class=CClassifierLogistic, + clf_params=clf_params, + ) def test_poisoning_with_normalization_inside(self): """Test the CAttackPoisoning object when the classifier contains a @@ -40,5 +42,5 @@ def test_poisoning_without_normalization_inside(self): self._test_single_poisoning_grad_check(normalizer=None) -if __name__ == '__main__': +if __name__ == "__main__": CAttackPoisoningTestCases.main() diff --git a/src/secml/adv/attacks/poisoning/tests/test_c_attack_poisoning_ridge.py b/src/secml/adv/attacks/poisoning/tests/test_c_attack_poisoning_ridge.py index d376238f..f1469e0e 100644 --- a/src/secml/adv/attacks/poisoning/tests/test_c_attack_poisoning_ridge.py +++ b/src/secml/adv/attacks/poisoning/tests/test_c_attack_poisoning_ridge.py @@ -8,11 +8,13 @@ class TestCAttackPoisoningRidge(CAttackPoisoningTestCases): """Unit test for CAttackPoisoningRidge.""" def setUp(self): - clf_params = {'fit_intercept': True, 'alpha': 1} - self._set_up(clf_idx='ridge', - poisoning_class=CAttackPoisoningRidge, - clf_class=CClassifierRidge, - clf_params=clf_params) + clf_params = {"fit_intercept": True, "alpha": 1} + self._set_up( + clf_idx="ridge", + poisoning_class=CAttackPoisoningRidge, + clf_class=CClassifierRidge, + clf_params=clf_params, + ) def test_poisoning_with_normalization_inside(self): """Test the CAttackPoisoning object when the classifier contains a @@ -40,5 +42,5 @@ def test_poisoning_without_normalization_inside(self): self._test_single_poisoning_grad_check(normalizer=None) -if __name__ == '__main__': +if __name__ == "__main__": CAttackPoisoningTestCases.main() diff --git a/src/secml/adv/attacks/poisoning/tests/test_c_attack_poisoning_svm.py b/src/secml/adv/attacks/poisoning/tests/test_c_attack_poisoning_svm.py index c758388d..af14891c 100644 --- a/src/secml/adv/attacks/poisoning/tests/test_c_attack_poisoning_svm.py +++ b/src/secml/adv/attacks/poisoning/tests/test_c_attack_poisoning_svm.py @@ -8,11 +8,13 @@ class TestCAttackPoisoningSVMLinear(CAttackPoisoningTestCases): """Unit test for CAttackPoisoningLinearSVM.""" def setUp(self): - clf_params = {'kernel': 'linear', 'C': 0.1} - self._set_up(clf_idx='lin-svm', - poisoning_class=CAttackPoisoningSVM, - clf_class=CClassifierSVM, - clf_params=clf_params) + clf_params = {"kernel": "linear", "C": 0.1} + self._set_up( + clf_idx="lin-svm", + poisoning_class=CAttackPoisoningSVM, + clf_class=CClassifierSVM, + clf_params=clf_params, + ) def test_poisoning_with_normalization_inside(self): """Test the CAttackPoisoning object when the classifier contains a @@ -40,11 +42,13 @@ class TestCAttackPoisoningSVMRBF(CAttackPoisoningTestCases): """Unit test for CAttackPoisoningRBFSVM.""" def setUp(self): - clf_params = {'kernel': 'rbf', 'C': 10} - self._set_up(clf_idx='RBF-svm', - poisoning_class=CAttackPoisoningSVM, - clf_class=CClassifierSVM, - clf_params=clf_params) + clf_params = {"kernel": "rbf", "C": 10} + self._set_up( + clf_idx="RBF-svm", + poisoning_class=CAttackPoisoningSVM, + clf_class=CClassifierSVM, + clf_params=clf_params, + ) def test_poisoning_with_normalization_inside(self): """Test the CAttackPoisoning object when the classifier contains a @@ -68,5 +72,5 @@ def test_poisoning_without_normalization_inside(self): self._test_attack_effectiveness(normalizer=None) -if __name__ == '__main__': +if __name__ == "__main__": CAttackPoisoningTestCases.main() diff --git a/src/secml/adv/seceval/c_sec_eval.py b/src/secml/adv/seceval/c_sec_eval.py index c9e2c123..d5c7b949 100644 --- a/src/secml/adv/seceval/c_sec_eval.py +++ b/src/secml/adv/seceval/c_sec_eval.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + import time from secml.core import CCreator @@ -38,8 +39,7 @@ class CSecEval(CCreator): .CAttack : class that implements the attack. """ - def __init__(self, attack, param_name, param_values, - save_adv_ds=False): + def __init__(self, attack, param_name, param_values, save_adv_ds=False): # initialize read-write attribute self._attack = None @@ -55,14 +55,14 @@ def __init__(self, attack, param_name, param_values, self._sec_eval_data.param_values = param_values if param_name not in self.attack.get_params(): - raise ValueError("param_name ({:}) should be a parameter of the " - "attack but it was not found. Run `attack.get_params()` " - "for getting the list of available parameters.".format( - param_name)) + raise ValueError( + "param_name ({:}) should be a parameter of the " + "attack but it was not found. Run `attack.get_params()` " + "for getting the list of available parameters.".format(param_name) + ) if not self._attack.y_target is None: - self._sec_eval_data.Y_target = CArray( - self._attack.y_target).deepcopy() + self._sec_eval_data.Y_target = CArray(self._attack.y_target).deepcopy() ########################################################################### # READ-WRITE ATTRIBUTES (INPUTS) @@ -134,7 +134,7 @@ def run_sec_eval(self, dataset, **kwargs): Parameters ---------- dataset : CDataset - Dataset that will be used to evaluate the performances of + Dataset that will be used to evaluate the performances of the classifier under attack. kwargs Additional keyword arguments for the `CAttack.run` method. @@ -148,23 +148,30 @@ def run_sec_eval(self, dataset, **kwargs): scores = CArray.zeros(shape=(dataset.num_samples, dataset.num_classes)) # create data structures to store attack output - self._sec_eval_data.scores = [CArray(scores).deepcopy() for i in range( - self._sec_eval_data.param_values.size)] - self._sec_eval_data.Y_pred = [CArray(Y_pred).deepcopy() for i in range( - self._sec_eval_data.param_values.size)] + self._sec_eval_data.scores = [ + CArray(scores).deepcopy() + for i in range(self._sec_eval_data.param_values.size) + ] + self._sec_eval_data.Y_pred = [ + CArray(Y_pred).deepcopy() + for i in range(self._sec_eval_data.param_values.size) + ] self._sec_eval_data.time = CArray.zeros( - shape=(self._sec_eval_data.param_values.size,)) + shape=(self._sec_eval_data.param_values.size,) + ) self._sec_eval_data.fobj = CArray.zeros( - shape=(self._sec_eval_data.param_values.size,)) + shape=(self._sec_eval_data.param_values.size,) + ) # manipulate attack samples adv_ds = None for k, value in enumerate(self._sec_eval_data.param_values): - self.logger.info("Attack with " + self._sec_eval_data.param_name + - " = " + str(value)) + self.logger.info( + "Attack with " + self._sec_eval_data.param_name + " = " + str(value) + ) # Update the value of parameter in attack class # (e.g., value of dmax in CEvasion) @@ -173,8 +180,9 @@ def run_sec_eval(self, dataset, **kwargs): start_time = time.time() # todo change x_init parameter with p_ds_init - attack_result = tuple(self._attack.run( - dataset.X, dataset.Y, ds_init=adv_ds, **kwargs)) + attack_result = tuple( + self._attack.run(dataset.X, dataset.Y, ds_init=adv_ds, **kwargs) + ) # Expanding generic attack results y_pred, scores, adv_ds, fobj = attack_result[:4] diff --git a/src/secml/adv/seceval/c_sec_eval_data.py b/src/secml/adv/seceval/c_sec_eval_data.py index 012e8b10..d0f285df 100644 --- a/src/secml/adv/seceval/c_sec_eval_data.py +++ b/src/secml/adv/seceval/c_sec_eval_data.py @@ -5,6 +5,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.core import CCreator from secml.array import CArray from secml.utils import pickle_utils as pk @@ -19,8 +20,9 @@ class CSecEvalData(CCreator): class_type : 'standard' """ - __super__ = 'CSecEvalData' - __class_type = 'generic' + + __super__ = "CSecEvalData" + __class_type = "generic" def __init__(self): @@ -216,4 +218,3 @@ def load(cls, path): data = cls() data.set_params(pk.load(path)) return data - diff --git a/src/secml/adv/seceval/tests/test_c_sec_eval_evasion.py b/src/secml/adv/seceval/tests/test_c_sec_eval_evasion.py index a4d59e7b..14007056 100644 --- a/src/secml/adv/seceval/tests/test_c_sec_eval_evasion.py +++ b/src/secml/adv/seceval/tests/test_c_sec_eval_evasion.py @@ -11,20 +11,17 @@ class TestCSecEval(CAttackEvasionTestCases): """Unittests for CSecEval (evasion attack).""" def setUp(self): - self.clf = CClassifierSVM(C=1.0) self.n_tr = 40 self.n_features = 10 self.seed = 0 - self.logger.info( - "Loading `random_blobs` with seed: {:}".format(self.seed)) - self.ds = self._load_blobs( - self.n_features, 2, sparse=False, seed=self.seed) + self.logger.info("Loading `random_blobs` with seed: {:}".format(self.seed)) + self.ds = self._load_blobs(self.n_features, 2, sparse=False, seed=self.seed) - self.tr = self.ds[:self.n_tr, :] - self.ts = self.ds[self.n_tr:, :] + self.tr = self.ds[: self.n_tr, :] + self.ts = self.ds[self.n_tr :, :] self.clf.fit(self.tr.X, self.tr.Y) @@ -33,16 +30,16 @@ def test_attack_pgd_ls(self): params = { "classifier": self.clf, "double_init_ds": self.tr, - "distance": 'l2', + "distance": "l2", "lb": -2, "ub": 2, "y_target": None, - "solver_params": {'eta': 0.1, 'eps': 1e-2} + "solver_params": {"eta": 0.1, "eps": 1e-2}, } attack = CAttackEvasionPGDLS(**params) attack.verbose = 1 - param_name = 'dmax' + param_name = "dmax" self._set_and_run(attack, param_name) @@ -59,30 +56,31 @@ def test_attack_pgd_ls_discrete(self): self.ds.X[self.ds.X > 1] = 1 self.ds.X[self.ds.X < -1] = -1 - self.tr = self.ds[:self.n_tr, :] - self.ts = self.ds[self.n_tr:, :] + self.tr = self.ds[: self.n_tr, :] + self.ts = self.ds[self.n_tr :, :] self.clf.fit(self.tr.X, self.tr.Y) # Set few features to the same max value w_new = self.clf.w.deepcopy() - w_new[CArray.randint( - self.clf.w.size, shape=5, random_state=0)] = self.clf.w.max() + w_new[CArray.randint(self.clf.w.size, shape=5, random_state=0)] = ( + self.clf.w.max() + ) self.clf._w = w_new params = { "classifier": self.clf, "double_init": False, - "distance": 'l1', + "distance": "l1", "lb": -1, "ub": 1, "y_target": None, - "solver_params": {'eta': 1, 'eps': 1e-2} + "solver_params": {"eta": 1, "eps": 1e-2}, } attack = CAttackEvasionPGDLS(**params) attack.verbose = 1 - param_name = 'dmax' + param_name = "dmax" self._set_and_run(attack, param_name, dmax_step=1) @@ -92,31 +90,37 @@ def test_attack_cleverhans(self): import cleverhans except ImportError as e: import unittest + + raise unittest.SkipTest(e) + try: + import tensorflow + except ImportError as e: + import unittest + raise unittest.SkipTest(e) from cleverhans.attacks import FastGradientMethod from secml.adv.attacks import CAttackEvasionCleverhans + params = { "classifier": self.clf, "surrogate_data": self.tr, "y_target": None, "clvh_attack_class": FastGradientMethod, - 'eps': 0.1, - 'clip_max': 2, - 'clip_min': -2, - 'ord': 2 + "eps": 0.1, + "clip_max": 2, + "clip_min": -2, + "ord": 2, } attack = CAttackEvasionCleverhans(**params) - param_name = 'attack_params.eps' + param_name = "attack_params.eps" self._set_and_run(attack, param_name) def _set_and_run(self, attack, param_name, dmax=2, dmax_step=0.5): """Create the SecEval and run it on test set.""" - param_values = CArray.arange( - start=0, step=dmax_step, - stop=dmax + dmax_step) + param_values = CArray.arange(start=0, step=dmax_step, stop=dmax + dmax_step) sec_eval = CSecEval( attack=attack, @@ -129,21 +133,23 @@ def _set_and_run(self, attack, param_name, dmax=2, dmax_step=0.5): self._plot_sec_eval(sec_eval) # At the end of the seceval we expect 0% accuracy - self.assertFalse( - CArray(sec_eval.sec_eval_data.Y_pred[-1] == self.ts.Y).any()) + self.assertFalse(CArray(sec_eval.sec_eval_data.Y_pred[-1] == self.ts.Y).any()) @staticmethod def _plot_sec_eval(sec_eval): - figure = CFigure(height=5, width=5) - figure.sp.plot_sec_eval(sec_eval.sec_eval_data, - label='SVM', marker='o', - show_average=True, mean=True) + figure.sp.plot_sec_eval( + sec_eval.sec_eval_data, + label="SVM", + marker="o", + show_average=True, + mean=True, + ) figure.sp.title(sec_eval.attack.__class__.__name__) figure.subplots_adjust() figure.show() - if __name__ == '__main__': + if __name__ == "__main__": CAttackEvasionTestCases.main() diff --git a/src/secml/array/array_utils.py b/src/secml/array/array_utils.py index ca5a1b7a..1093c3fa 100644 --- a/src/secml/array/array_utils.py +++ b/src/secml/array/array_utils.py @@ -5,12 +5,13 @@ .. moduleauthor:: Marco Melis """ + import numpy as np from scipy.sparse import issparse from secml.core.type_utils import is_int, is_bool, is_tuple, is_slice -__all__ = ['is_vector_index', 'tuple_atomic_tolist', 'tuple_sequence_tondarray'] +__all__ = ["is_vector_index", "tuple_atomic_tolist", "tuple_sequence_tondarray"] def is_vector_index(idx): @@ -31,14 +32,23 @@ def is_vector_index(idx): any axis with size 1, else False. """ - return True if (np.asanyarray(idx) == 0 or np.asanyarray(idx) == -1 or # integers 0, -1 - (np.asanyarray(idx) == True and np.asanyarray(idx).dtype in (bool, np.bool_)) or # True but not '1' - idx == slice(None, None, None) or # : - idx == slice(0, None, None) or # 0: - idx == slice(0, 1, None) or # 0:1 - idx == slice(None, 1, None) or # :1 - idx == slice(-1, 0, None) # -1 - ) else False + return ( + True + if ( + np.asanyarray(idx) == 0 + or np.asanyarray(idx) == -1 # integers 0, -1 + or ( + np.asanyarray(idx) == True + and np.asanyarray(idx).dtype in (bool, np.bool_) + ) # True but not '1' + or idx == slice(None, None, None) # : + or idx == slice(0, None, None) # 0: + or idx == slice(0, 1, None) # 0:1 + or idx == slice(None, 1, None) # :1 + or idx == slice(-1, 0, None) # -1 + ) + else False + ) def tuple_atomic_tolist(idx): @@ -90,8 +100,13 @@ def tuple_sequence_tondarray(idx): if not is_tuple(idx): raise TypeError("input must be a tuple") # Converting CArray/CSparse/CDense to ndarray - idx = tuple([elem.tondarray() if - hasattr(elem, 'tondarray') else elem for elem in idx]) + idx = tuple( + [elem.tondarray() if hasattr(elem, "tondarray") else elem for elem in idx] + ) # Converting not-slice and not-None to ndarray - return tuple([np.asarray(elem) if not (is_slice(elem) or elem is None) - else elem for elem in idx]) + return tuple( + [ + np.asarray(elem) if not (is_slice(elem) or elem is None) else elem + for elem in idx + ] + ) diff --git a/src/secml/array/c_array.py b/src/secml/array/c_array.py index 87756886..0391ebf4 100644 --- a/src/secml/array/c_array.py +++ b/src/secml/array/c_array.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from copy import deepcopy import numpy as np @@ -15,8 +16,14 @@ from secml.array.c_dense import CDense from secml.array.c_sparse import CSparse -from secml.core.type_utils import \ - is_int, is_scalar, is_bool, is_ndarray, is_scsarray, to_builtin +from secml.core.type_utils import ( + is_int, + is_scalar, + is_bool, + is_ndarray, + is_scsarray, + to_builtin, +) def _instance_data(data): @@ -59,10 +66,9 @@ def _instance_data(data): return NotImplemented else: # Unknown object returned by the calling method, raise error - raise TypeError( - "objects of type {:} not supported.".format(type(data))) - - + raise TypeError("objects of type {:} not supported.".format(type(data))) + + class CArray(_CArrayInterface): """Creates an array. @@ -127,10 +133,10 @@ class CArray(_CArrayInterface): (2, 2, 2) """ - __slots__ = '_data' # CArray has only one slot for the buffer - def __init__( - self, data, dtype=None, copy=False, shape=None, tosparse=False): + __slots__ = "_data" # CArray has only one slot for the buffer + + def __init__(self, data, dtype=None, copy=False, shape=None, tosparse=False): # Not implemented operators return NotImplemented if data is NotImplemented: @@ -142,15 +148,17 @@ def __init__( if isinstance(data, CArray): # Light casting: store data after format conversion - self._data = data.tosparse()._data if tosparse is True or \ - data.issparse else data.todense()._data + self._data = ( + data.tosparse()._data + if tosparse is True or data.issparse + else data.todense()._data + ) if copy is True and self.isdense == data.isdense: # copy needed and no previous change of format self._data = deepcopy(self._data) if dtype is not None and self._data.dtype != dtype: self._data = self._data.astype(dtype) - elif tosparse is True or \ - isinstance(data, CSparse) or scs.issparse(data): + elif tosparse is True or isinstance(data, CSparse) or scs.issparse(data): self._data = CSparse(data, dtype, copy, shape) else: self._data = CDense(data, dtype, copy, shape) @@ -770,11 +778,12 @@ def todense(self, dtype=None, shape=None): """ if self.issparse is False and (shape is not None or dtype is not None): - raise ValueError("array is already dense. Use astype() or " - "reshape() function to alter array shape/dtype.") + raise ValueError( + "array is already dense. Use astype() or " + "reshape() function to alter array shape/dtype." + ) elif self.issparse is True: - return self.__class__( - self._data.todense(), shape=shape, dtype=dtype) + return self.__class__(self._data.todense(), shape=shape, dtype=dtype) else: return self @@ -818,11 +827,12 @@ def tosparse(self, dtype=None, shape=None): """ if self.isdense is False and (shape is not None or dtype is not None): - raise ValueError("array is already sparse. Use astype() or " - "reshape() unction to alter array shape/dtype.") + raise ValueError( + "array is already sparse. Use astype() or " + "reshape() unction to alter array shape/dtype." + ) elif self.isdense is True: - return self.__class__( - self._data, tosparse=True, dtype=dtype, shape=shape) + return self.__class__(self._data, tosparse=True, dtype=dtype, shape=shape) else: return self @@ -1050,9 +1060,10 @@ def __add__(self, other): other = other.todense() return self.__class__(self._data.__add__(other._data)) elif is_ndarray(other) or is_scsarray(other): - raise TypeError("unsupported operand type(s) for +: " - "'{:}' and '{:}'".format(type(self).__name__, - type(other).__name__)) + raise TypeError( + "unsupported operand type(s) for +: " + "'{:}' and '{:}'".format(type(self).__name__, type(other).__name__) + ) else: return NotImplemented @@ -1114,9 +1125,10 @@ def __sub__(self, other): other = other.todense() return self.__class__(self._data.__sub__(other._data)) elif is_ndarray(other) or is_scsarray(other): - raise TypeError("unsupported operand type(s) for -: " - "'{:}' and '{:}'".format(type(self).__name__, - type(other).__name__)) + raise TypeError( + "unsupported operand type(s) for -: " + "'{:}' and '{:}'".format(type(self).__name__, type(other).__name__) + ) else: return NotImplemented @@ -1180,9 +1192,10 @@ def __mul__(self, other): return self.__class__(other._data.__mul__(self._data)) return self.__class__(self._data.__mul__(other._data)) elif is_ndarray(other) or is_scsarray(other): - raise TypeError("unsupported operand type(s) for *: " - "'{:}' and '{:}'".format(type(self).__name__, - type(other).__name__)) + raise TypeError( + "unsupported operand type(s) for *: " + "'{:}' and '{:}'".format(type(self).__name__, type(other).__name__) + ) else: return NotImplemented @@ -1241,9 +1254,10 @@ def __truediv__(self, other): other = other.todense() return self.__class__(self._data.__truediv__(other._data)) elif is_ndarray(other) or is_scsarray(other): - raise TypeError("unsupported operand type(s) for /: " - "'{:}' and '{:}'".format(type(self).__name__, - type(other).__name__)) + raise TypeError( + "unsupported operand type(s) for /: " + "'{:}' and '{:}'".format(type(self).__name__, type(other).__name__) + ) else: return NotImplemented @@ -1297,9 +1311,10 @@ def __floordiv__(self, other): other = other.todense() return self.__class__(self._data.__floordiv__(other._data)) elif is_ndarray(other) or is_scsarray(other): - raise TypeError("unsupported operand type(s) for //: " - "'{:}' and '{:}'".format(type(self).__name__, - type(other).__name__)) + raise TypeError( + "unsupported operand type(s) for //: " + "'{:}' and '{:}'".format(type(self).__name__, type(other).__name__) + ) else: return NotImplemented @@ -1373,9 +1388,10 @@ def __pow__(self, power): power = power.todense() return self.__class__(self._data.__pow__(power._data)) elif is_ndarray(power) or is_scsarray(power): - raise TypeError("unsupported operand type(s) for **: " - "'{:}' and '{:}'".format(type(self).__name__, - type(power).__name__)) + raise TypeError( + "unsupported operand type(s) for **: " + "'{:}' and '{:}'".format(type(self).__name__, type(power).__name__) + ) else: return NotImplemented @@ -1431,9 +1447,10 @@ def __eq__(self, other): other = other.todense() return self.__class__(self._data.__eq__(other._data)) elif is_ndarray(other) or is_scsarray(other): - raise TypeError("unsupported operand type(s) for ==: " - "'{:}' and '{:}'".format(type(self).__name__, - type(other).__name__)) + raise TypeError( + "unsupported operand type(s) for ==: " + "'{:}' and '{:}'".format(type(self).__name__, type(other).__name__) + ) else: # Any unmanaged object is considered not-equal return False @@ -1468,9 +1485,10 @@ def __lt__(self, other): other = other.todense() return self.__class__(self._data.__lt__(other._data)) else: - raise TypeError("unsupported operand type(s) for <: " - "'{:}' and '{:}'".format(type(self).__name__, - type(other).__name__)) + raise TypeError( + "unsupported operand type(s) for <: " + "'{:}' and '{:}'".format(type(self).__name__, type(other).__name__) + ) def __le__(self, other): """Element-wise <= operator. @@ -1503,9 +1521,10 @@ def __le__(self, other): other = other.todense() return self.__class__(self._data.__le__(other._data)) else: - raise TypeError("unsupported operand type(s) for <=: " - "'{:}' and '{:}'".format(type(self).__name__, - type(other).__name__)) + raise TypeError( + "unsupported operand type(s) for <=: " + "'{:}' and '{:}'".format(type(self).__name__, type(other).__name__) + ) def __gt__(self, other): """Element-wise > operator. @@ -1538,9 +1557,10 @@ def __gt__(self, other): other = other.todense() return self.__class__(self._data.__gt__(other._data)) else: - raise TypeError("unsupported operand type(s) for >: " - "'{:}' and '{:}'".format(type(self).__name__, - type(other).__name__)) + raise TypeError( + "unsupported operand type(s) for >: " + "'{:}' and '{:}'".format(type(self).__name__, type(other).__name__) + ) def __ge__(self, other): """Element-wise >= operator. @@ -1573,9 +1593,10 @@ def __ge__(self, other): other = other.todense() return self.__class__(self._data.__ge__(other._data)) else: - raise TypeError("unsupported operand type(s) for >=: " - "'{:}' and '{:}'".format(type(self).__name__, - type(other).__name__)) + raise TypeError( + "unsupported operand type(s) for >=: " + "'{:}' and '{:}'".format(type(self).__name__, type(other).__name__) + ) def __ne__(self, other): """Element-wise != operator. @@ -1608,9 +1629,10 @@ def __ne__(self, other): other = other.todense() return self.__class__(self._data.__ne__(other._data)) elif is_ndarray(other) or is_scsarray(other): - raise TypeError("unsupported operand type(s) for !=: " - "'{:}' and '{:}'".format(type(self).__name__, - type(other).__name__)) + raise TypeError( + "unsupported operand type(s) for !=: " + "'{:}' and '{:}'".format(type(self).__name__, type(other).__name__) + ) else: # Any unmanaged object is considered not-equal return True @@ -1679,8 +1701,7 @@ def __str__(self): # Storing numpy format settings np_format = np.get_printoptions() # Preventing newlines and toggling summarization as often as possible - np.set_printoptions( - threshold=36, linewidth=79, edgeitems=3, precision=6) + np.set_printoptions(threshold=36, linewidth=79, edgeitems=3, precision=6) # Build the string a_str = self.__class__.__name__ + "(" + str(self._data) + ")" # Restoring numpy format settings @@ -1719,24 +1740,24 @@ def __repr__(self): """ import re + # Storing numpy format settings np_format = np.get_printoptions() # Preventing newlines and toggling summarization as often as possible - np.set_printoptions( - threshold=36, linewidth=79, edgeitems=3, precision=6) + np.set_printoptions(threshold=36, linewidth=79, edgeitems=3, precision=6) # Starting with CArray(shape)... repr_str = self.__class__.__name__ + str(self.shape) # Replace any line separator - array_repr = re.sub(r'\r|\n', '', str(self._data)) + array_repr = re.sub(r"\r|\n", "", str(self._data)) if self.isdense is True: - repr_str += '(dense: ' + array_repr + repr_str += "(dense: " + array_repr elif self.issparse is True: - repr_str += '(sparse: ' + repr_str += "(sparse: " # Replace any tabuler char - repr_str += re.sub(r'\t', ' ', array_repr[2:]) + repr_str += re.sub(r"\t", " ", array_repr[2:]) # Restoring numpy format settings np.set_printoptions(**np_format) - return repr_str + ')' + return repr_str + ")" # ------------------------------ # # # # # # # COPY UTILS # # # # # # @@ -1786,13 +1807,21 @@ def save(self, datafile, overwrite=False): if self.issparse is True and not isinstance(datafile, str): # TODO: WE CAN ALLOW FILE HANDLE SAVING?! raise NotImplementedError( - "Save using file handle is only supported for dense arrays.") + "Save using file handle is only supported for dense arrays." + ) else: self._data.save(datafile, overwrite=overwrite) @classmethod - def load(cls, datafile, dtype=float, arrayformat='dense', - startrow=0, skipend=0, cols=None): + def load( + cls, + datafile, + dtype=float, + arrayformat="dense", + startrow=0, + skipend=0, + cols=None, + ): """Load array data from plain text file. The default encoding is `utf-8`. @@ -1821,12 +1850,19 @@ def load(cls, datafile, dtype=float, arrayformat='dense', """ # TODO: CMatrix should return a 2-D, CVector a 1-D and so on... - if arrayformat == 'dense': + if arrayformat == "dense": if cols is None: cols = CArray([]) - return cls(CDense.load(datafile, dtype=dtype, startrow=startrow, - skipend=skipend, cols=cols._data)) - elif arrayformat == 'sparse': + return cls( + CDense.load( + datafile, + dtype=dtype, + startrow=startrow, + skipend=skipend, + cols=cols._data, + ) + ) + elif arrayformat == "sparse": return cls(CSparse.load(datafile, dtype=dtype)) else: raise ValueError("Supported arrayformat are 'dense' and 'sparse'.") @@ -1922,8 +1958,7 @@ def flatten(self): (0, 2) 3) """ - return self.__class__( - CArray(self.ravel(), tosparse=self.issparse).deepcopy()) + return self.__class__(CArray(self.ravel(), tosparse=self.issparse).deepcopy()) def atleast_2d(self): """View original array with at least two dimensions. @@ -2286,11 +2321,12 @@ def clip(self, c_min, c_max): """ if c_min > c_max: - raise ValueError("c_min ({:}) must be lower than " - "c_max ({:})".format(c_min, c_max)) + raise ValueError( + "c_min ({:}) must be lower than " "c_max ({:})".format(c_min, c_max) + ) return self.__class__(self._data.clip(c_min, c_max)) - def sort(self, axis=-1, kind='quicksort', inplace=False): + def sort(self, axis=-1, kind="quicksort", inplace=False): """Sort an array. Parameters @@ -2350,7 +2386,7 @@ def sort(self, axis=-1, kind='quicksort', inplace=False): # We return ourselves for inplace sort otherwise a new object return self if inplace is True else self.__class__(data_sorted) - def argsort(self, axis=-1, kind='quicksort'): + def argsort(self, axis=-1, kind="quicksort"): """Returns the indices that would sort an array. Perform an indirect sort along the given axis using @@ -2471,8 +2507,9 @@ def apply_along_axis(self, func, axis, *args, **kwargs): for i in range(self.shape[0]): out[i] = func(data_2d[i, :], *args, **kwargs) else: - raise ValueError("`apply_along_axis` currently available " - "for 1-D and 2-D arrays only.") + raise ValueError( + "`apply_along_axis` currently available " "for 1-D and 2-D arrays only." + ) return out @@ -2817,9 +2854,11 @@ def maximum(self, array): """ other_carray = self.__class__(array) if not self.has_compatible_shape(other_carray): - raise ValueError("arrays to compare must have the same shape. " - "{:} different from {:}." - "".format(self.shape, other_carray.shape)) + raise ValueError( + "arrays to compare must have the same shape. " + "{:} different from {:}." + "".format(self.shape, other_carray.shape) + ) if self.issparse: other_carray = other_carray.tosparse() @@ -2872,9 +2911,11 @@ def minimum(self, array): """ other_carray = self.__class__(array) if not self.has_compatible_shape(other_carray): - raise ValueError("arrays to compare must have the same shape. " - "{:} different from {:}." - "".format(self.shape, other_carray.shape)) + raise ValueError( + "arrays to compare must have the same shape. " + "{:} different from {:}." + "".format(self.shape, other_carray.shape) + ) if self.issparse: other_carray = other_carray.tosparse() @@ -3021,8 +3062,7 @@ def binary_search(self, value): CArray([0 0 1 3]) """ - return _instance_data( - self._data.binary_search(self.__class__(value)._data)) + return _instance_data(self._data.binary_search(self.__class__(value)._data)) # ------------- # # DATA ANALYSIS # @@ -3062,8 +3102,7 @@ def get_nnz(self, axis=None): # Return a scalar if axis is None, CArray otherwise return out if axis is None else self.__class__(out) - def unique(self, return_index=False, - return_inverse=False, return_counts=False): + def unique(self, return_index=False, return_inverse=False, return_counts=False): """Find the unique elements of an array. There are three optional outputs in addition to the unique elements: @@ -3118,8 +3157,7 @@ def unique(self, return_index=False, CArray([3 2]) """ - out = self._data.unique( - return_index, return_inverse, return_counts) + out = self._data.unique(return_index, return_inverse, return_counts) if isinstance(out, tuple): # unique returned multiple elements return tuple([self.__class__(elem) for elem in out]) else: @@ -3152,8 +3190,9 @@ def bincount(self, minlength=0): CArray([0 2 1 1 0 0 1]) """ - if (self.isdense and self.ndim > 1) or \ - (self.issparse and not self.is_vector_like): + if (self.isdense and self.ndim > 1) or ( + self.issparse and not self.is_vector_like + ): raise ValueError("Array must be one-dimensional.") return self.__class__(self._data.bincount(minlength)) @@ -3224,14 +3263,15 @@ def norm(self, order=None): if self.is_vector_like is False: raise ValueError( "Array has shape {:}. Call .norm_2d() to compute " - "matricial norm or vector norm along axis.".format(self.shape)) + "matricial norm or vector norm along axis.".format(self.shape) + ) # Flat array to simplify dense case array = self.ravel() # 'fro' is a matrix-norm. We can exit... - if order == 'fro': - raise ValueError('Invalid norm order for vectors.') + if order == "fro": + raise ValueError("Invalid norm order for vectors.") return _instance_data(array._data.norm(order)) @@ -3322,11 +3362,13 @@ def norm_2d(self, order=None, axis=None, keepdims=True): raise NotImplementedError if self.issparse is True: - out = _instance_data(self.atleast_2d()._data.norm_2d( - order, axis=axis, keepdims=keepdims)) + out = _instance_data( + self.atleast_2d()._data.norm_2d(order, axis=axis, keepdims=keepdims) + ) else: - out = _instance_data(self.atleast_2d()._data.norm( - order, axis=axis, keepdims=keepdims)) + out = _instance_data( + self.atleast_2d()._data.norm(order, axis=axis, keepdims=keepdims) + ) # Return float if axis is None, else CArray if axis is None: @@ -3522,7 +3564,7 @@ def all(self, axis=None, keepdims=True): Notes ----- - Not a Number (NaN), positive infinity and negative infinity + Not a Number (NaN), positive infinity and negative infinity evaluate to True because these are not equal to zero. Examples @@ -4835,9 +4877,13 @@ def interp(self, x_data, y_data, return_left=None, return_right=None): """ return self.__class__( - self._data.interp(CArray(x_data).astype(float)._data, - CArray(y_data).astype(float)._data, - return_left, return_right)) + self._data.interp( + CArray(x_data).astype(float)._data, + CArray(y_data).astype(float)._data, + return_left, + return_right, + ) + ) def inv(self): """Compute the (multiplicative) inverse of a square matrix. @@ -5192,8 +5238,7 @@ def rand(cls, shape, random_state=None, sparse=False, density=0.01): if sparse is True: # We fake the shape to create a sparse "vector" shape = (1, shape[0]) if len(shape) == 1 else shape - return cls(CSparse.rand( - shape, random_state=random_state, density=density)) + return cls(CSparse.rand(shape, random_state=random_state, density=density)) else: return cls(CDense.rand(shape, random_state=random_state)) @@ -5240,8 +5285,9 @@ def randn(cls, shape, random_state=None): return cls(CDense.randn(shape, random_state=random_state)) @classmethod - def randuniform(cls, low=0.0, high=1.0, - shape=None, random_state=None, sparse=False): + def randuniform( + cls, low=0.0, high=1.0, shape=None, random_state=None, sparse=False + ): """Return random samples from low (inclusive) to high (exclusive). Samples are uniformly distributed over the half-open @@ -5298,21 +5344,22 @@ def randuniform(cls, low=0.0, high=1.0, """ if CArray(low > high).any(): - raise ValueError( - "values in `low` should be lower than values in `high`") + raise ValueError("values in `low` should be lower than values in `high`") if isinstance(low, CArray): low = low.todense()._data # Convert to CDense if isinstance(high, CArray): high = high.todense()._data # Convert to CDense - return cls(CDense.randuniform( - low=low, high=high, shape=shape, random_state=random_state), - tosparse=sparse) + return cls( + CDense.randuniform( + low=low, high=high, shape=shape, random_state=random_state + ), + tosparse=sparse, + ) @classmethod - def randint(cls, low, high=None, - shape=None, random_state=None, sparse=False): + def randint(cls, low, high=None, shape=None, random_state=None, sparse=False): """Return random integers from low (inclusive) to high (exclusive). Return random integers from the "discrete uniform" distribution @@ -5367,12 +5414,13 @@ def randint(cls, low, high=None, [0 2]]) """ - return cls(CDense.randint(low=low, high=high, shape=shape, - random_state=random_state), tosparse=sparse) + return cls( + CDense.randint(low=low, high=high, shape=shape, random_state=random_state), + tosparse=sparse, + ) @classmethod - def randsample(cls, a, shape=None, - replace=False, random_state=None, sparse=False): + def randsample(cls, a, shape=None, replace=False, random_state=None, sparse=False): """Generates a random sample from a given array. Parameters @@ -5423,9 +5471,12 @@ def randsample(cls, a, shape=None, """ a = a if not isinstance(a, cls) else a.ravel()._data - return cls(CDense.randsample( - a=a, shape=shape, replace=replace, - random_state=random_state), tosparse=sparse) + return cls( + CDense.randsample( + a=a, shape=shape, replace=replace, random_state=random_state + ), + tosparse=sparse, + ) @classmethod def linspace(cls, start, stop, num=50, endpoint=True, sparse=False): @@ -5484,8 +5535,9 @@ def linspace(cls, start, stop, num=50, endpoint=True, sparse=False): CArray([3. 3.2 3.4 3.6 3.8]) """ - return cls(CDense.linspace( - start, stop, num=num, endpoint=endpoint), tosparse=sparse) + return cls( + CDense.linspace(start, stop, num=num, endpoint=endpoint), tosparse=sparse + ) @classmethod def arange(cls, start=None, stop=None, step=1, dtype=None, sparse=False): @@ -5556,8 +5608,10 @@ def arange(cls, start=None, stop=None, step=1, dtype=None, sparse=False): CArray([0. 0.8 1.6 2.4 3.2]) """ - return cls(CDense.arange( - start=start, stop=stop, step=step, dtype=dtype), tosparse=sparse) + return cls( + CDense.arange(start=start, stop=stop, step=step, dtype=dtype), + tosparse=sparse, + ) @classmethod def concatenate(cls, array1, array2, axis=1): @@ -5627,29 +5681,35 @@ def concatenate(cls, array1, array2, axis=1): """ # Return sparse only if both original arrays are sparse if isinstance(array1, cls) and array1.issparse: - return cls(CSparse.concatenate( - array1._data, cls(array2, tosparse=True)._data, axis=axis)) + return cls( + CSparse.concatenate( + array1._data, cls(array2, tosparse=True)._data, axis=axis + ) + ) else: - return cls(CDense.concatenate( - cls(array1)._data, cls(array2).todense()._data, axis=axis)) + return cls( + CDense.concatenate( + cls(array1)._data, cls(array2).todense()._data, axis=axis + ) + ) @classmethod def comblist(cls, list_of_list, dtype=float): """Generate a cartesian product of list of list input. - + Parameters ---------- list_of_list : list of list 1-D arrays to form the cartesian product of. dtype : str or dtype Datatype of output array. Default float. - + Returns ------- CArray 2-D array of shape (M, len(arrays)) containing cartesian products between input arrays. - + Examples -------- >>> print(CArray.comblist([[1, 2, 3], [4, 5], [6, 7]])) @@ -5674,7 +5734,7 @@ def comblist(cls, list_of_list, dtype=float): return cls(CDense.comblist(list_of_list, dtype=dtype)) @classmethod - def meshgrid(cls, xi, indexing='xy'): + def meshgrid(cls, xi, indexing="xy"): """Return coordinate matrices from coordinate vectors. DENSE ARRAYS ONLY @@ -5715,7 +5775,7 @@ def meshgrid(cls, xi, indexing='xy'): CArray([[2 2 2] [4 4 4] [6 6 6]]) - + >>> xv, yv = CArray.meshgrid((x, y), indexing='ij') >>> print(xv) CArray([[1 1 1] @@ -5728,8 +5788,7 @@ def meshgrid(cls, xi, indexing='xy'): """ xi = tuple(x._data for x in xi) # This is correct-ish, xi are CArrays - return tuple(cls(elem) for elem in CDense.meshgrid( - xi, indexing=indexing)) + return tuple(cls(elem) for elem in CDense.meshgrid(xi, indexing=indexing)) @classmethod def from_iterables(cls, iterables_list): @@ -5759,4 +5818,5 @@ def from_iterables(cls, iterables_list): """ import itertools + return CArray(list(itertools.chain.from_iterable(iterables_list))) diff --git a/src/secml/array/c_array_interface.py b/src/secml/array/c_array_interface.py index 1b2366c8..431af78c 100644 --- a/src/secml/array/c_array_interface.py +++ b/src/secml/array/c_array_interface.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from abc import ABCMeta, abstractmethod from copy import deepcopy @@ -132,8 +133,7 @@ def __getitem__(self, idx): def item(self): """Returns the single element in the array as built-in type.""" if self.size != 1: - raise ValueError( - "cannot use .item(). Array has size {:}".format(self.size)) + raise ValueError("cannot use .item(). Array has size {:}".format(self.size)) return to_builtin(self.tondarray().ravel()[0]) @abstractmethod @@ -379,12 +379,12 @@ def abs(self): return abs(self) @abstractmethod - def sort(self, axis=-1, kind='quicksort', inplace=False): + def sort(self, axis=-1, kind="quicksort", inplace=False): """Sort an array.""" raise NotImplementedError @abstractmethod - def argsort(self, axis=-1, kind='quicksort'): + def argsort(self, axis=-1, kind="quicksort"): """Returns the indices that would sort an array.""" raise NotImplementedError @@ -465,8 +465,7 @@ def get_nnz(self, axis=None): raise NotImplementedError @abstractmethod - def unique(self, return_index=False, - return_inverse=False, return_counts=False): + def unique(self, return_index=False, return_inverse=False, return_counts=False): """Find the unique elements of an array.""" raise NotImplementedError @@ -743,6 +742,6 @@ def comblist(cls, list_of_list, dtype=float): @classmethod @abstractmethod - def meshgrid(cls, xi, indexing='xy'): + def meshgrid(cls, xi, indexing="xy"): """Return coordinate matrices from coordinate vectors.""" raise NotImplementedError diff --git a/src/secml/array/c_sparse.py b/src/secml/array/c_sparse.py index 17694a36..f566c505 100644 --- a/src/secml/array/c_sparse.py +++ b/src/secml/array/c_sparse.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + import scipy.sparse as scs from scipy.sparse.linalg import inv, norm import numpy as np @@ -14,8 +15,16 @@ from secml.array.c_dense import CDense -from secml.core.type_utils import is_ndarray, is_list_of_lists, \ - is_list, is_tuple, is_slice, is_scalar, is_int, is_bool +from secml.core.type_utils import ( + is_ndarray, + is_list_of_lists, + is_list, + is_tuple, + is_slice, + is_scalar, + is_int, + is_bool, +) from secml.core.constants import inf @@ -82,7 +91,8 @@ def _shape_atleast_2d(shape): class CSparse(_CArrayInterface): """Sparse array. Encapsulation for scipy.sparse.csr_matrix.""" - __slots__ = '_data' # CSparse has only one slot for the scs.csr_matrix + + __slots__ = "_data" # CSparse has only one slot for the scs.csr_matrix def __init__(self, data=None, dtype=None, copy=False, shape=None): """Sparse matrix initialization.""" @@ -223,13 +233,13 @@ def _toscs(self, scs_format, shape=None): Reshape is performed after casting. """ - out = getattr(self._data, 'to{:}'.format(scs_format))() + out = getattr(self._data, "to{:}".format(scs_format))() if shape is not None: if not is_tuple(shape) or len(shape) != 2: # TODO: ERROR IS PROPERLY RAISED IN SCIPY > 1.4 - raise ValueError('matrix shape must be two-dimensional') + raise ValueError("matrix shape must be two-dimensional") # output of scipy.reshape not necessarily of the same format - return getattr(out.reshape(shape), 'to{:}'.format(scs_format))() + return getattr(out.reshape(shape), "to{:}".format(scs_format))() return out def tocsr(self, shape=None): @@ -242,7 +252,7 @@ def tocsr(self, shape=None): Reshape is performed after casting. """ - return self._toscs('csr', shape=shape) + return self._toscs("csr", shape=shape) def tocoo(self, shape=None): """Return data as coo_matrix. @@ -254,7 +264,7 @@ def tocoo(self, shape=None): Reshape is performed after casting. """ - return self._toscs('coo', shape=shape) + return self._toscs("coo", shape=shape) def tocsc(self, shape=None): """Return data as csc_matrix. @@ -266,7 +276,7 @@ def tocsc(self, shape=None): Reshape is performed after casting. """ - return self._toscs('csc', shape=shape) + return self._toscs("csc", shape=shape) def todia(self, shape=None): """Return data as dia_matrix. @@ -278,7 +288,7 @@ def todia(self, shape=None): Reshape is performed after casting. """ - return self._toscs('dia', shape=shape) + return self._toscs("dia", shape=shape) def todok(self, shape=None): """Return data as dok_matrix. @@ -290,7 +300,7 @@ def todok(self, shape=None): Reshape is performed after casting. """ - return self._toscs('dok', shape=shape) + return self._toscs("dok", shape=shape) def tolil(self, shape=None): """Return data as lil_matrix. @@ -302,7 +312,7 @@ def tolil(self, shape=None): Reshape is performed after casting. """ - return self._toscs('lil', shape=shape) + return self._toscs("lil", shape=shape) def tolist(self, shape=None): """Return data as list. @@ -325,7 +335,7 @@ def todense(self, order=None): def _tocoo_or_tocsr(self): """Return data as coo_matrix if data is not as csr_matrix, return csr_matrix otherwise.""" - if self._data.getformat() != 'csr': + if self._data.getformat() != "csr": return self.tocoo() return self.tocsr() @@ -365,7 +375,7 @@ def _check_index(self, idx): if isinstance(idx, CDense) or isinstance(idx, CSparse): # Boolean mask - if idx.dtype.kind == 'b': + if idx.dtype.kind == "b": # Boolean masks must be 2-Dimensional if idx.ndim == 1: @@ -378,14 +388,17 @@ def _check_index(self, idx): # Check the shape of the boolean mask if idx.shape != self.shape: raise IndexError( - "boolean mask must have shape {:}".format(self.shape)) + "boolean mask must have shape {:}".format(self.shape) + ) return idx # Check if array is vector-like if self.shape[0] != 1: - raise IndexError("vector-like indexing is only applicable " - "to arrays with shape[0] == 1.") + raise IndexError( + "vector-like indexing is only applicable " + "to arrays with shape[0] == 1." + ) # Fake 2D index. Use ndarrays to mimic Matlab-like indexing idx = (np.asarray([0]), idx.tondarray()) @@ -395,8 +408,10 @@ def _check_index(self, idx): elif is_list_of_lists(idx): if len(idx) != 2: - raise IndexError("for list of lists indexing, indices " - "for each dimension must be provided.") + raise IndexError( + "for list of lists indexing, indices " + "for each dimension must be provided." + ) # List of lists must be passed as a tuple return tuple(idx) @@ -404,8 +419,10 @@ def _check_index(self, idx): elif is_int(idx) or is_bool(idx): # Check if array is vector-like if self.shape[0] != 1: - raise IndexError("vector-like indexing is only applicable " - "to arrays with shape[0] == 1.") + raise IndexError( + "vector-like indexing is only applicable " + "to arrays with shape[0] == 1." + ) # Fake 2D index. Use ndarrays to mimic Matlab-like indexing idx = (np.asarray([0]), np.asarray([idx])) @@ -419,8 +436,10 @@ def _check_index(self, idx): elif is_list(idx): # Check if array is vector-like if self.shape[0] != 1: - raise IndexError("vector-like indexing is only applicable " - "to arrays with shape[0] == 1.") + raise IndexError( + "vector-like indexing is only applicable " + "to arrays with shape[0] == 1." + ) # Empty lists are converted to float by numpy, # special handling needed @@ -441,8 +460,10 @@ def _check_index(self, idx): elif is_slice(idx): # Check if array is vector-like if self.shape[0] != 1: - raise IndexError("vector-like indexing is only applicable " - "to arrays with shape[0] == 1.") + raise IndexError( + "vector-like indexing is only applicable " + "to arrays with shape[0] == 1." + ) # Fake index for row. Slice for columns is fine idx = (0, idx) @@ -499,8 +520,10 @@ def _check_index(self, idx): self._data = self._data.tocsc() else: - raise TypeError("{:} should not be used for " - "CSparse indexing.".format(type(e))) + raise TypeError( + "{:} should not be used for " + "CSparse indexing.".format(type(e)) + ) # Converting back to tuple idx = tuple(idx_list) @@ -511,8 +534,9 @@ def _check_index(self, idx): else: # No other object is accepted for CSparse indexing - raise TypeError("{:} should not be used for " - "CSparse indexing.".format(type(idx))) + raise TypeError( + "{:} should not be used for " "CSparse indexing.".format(type(idx)) + ) return idx @@ -534,11 +558,15 @@ def _check_index_bool(self, idx): for elem_idx, elem in enumerate(idx): # boolean arrays in tuple (cross-indices) must be 1-Dimensional - if elem is not None and elem.dtype.kind == 'b' and \ - elem.size != self.shape[elem_idx]: + if ( + elem is not None + and elem.dtype.kind == "b" + and elem.size != self.shape[elem_idx] + ): raise IndexError( "boolean index array for axis {:} must have " - "size {:}.".format(elem_idx, self.shape[elem_idx])) + "size {:}.".format(elem_idx, self.shape[elem_idx]) + ) def __getitem__(self, idx): """Redefinition of the get (brackets) operator.""" @@ -566,8 +594,9 @@ def __setitem__(self, idx, value): elif isinstance(value, CSparse): value = value.tocsr() elif not (is_scalar(value) or is_bool(value)): - raise TypeError("{:} cannot be used for setting " - "a CSparse.".format(type(value))) + raise TypeError( + "{:} cannot be used for setting " "a CSparse.".format(type(value)) + ) # Check index for all other cases idx = self._check_index(idx) @@ -635,7 +664,8 @@ def __add__(self, other): return self.deepcopy() raise NotImplementedError( "adding a nonzero scalar or a boolean True to a " - "sparse array is not supported. Convert to dense if needed.") + "sparse array is not supported. Convert to dense if needed." + ) elif isinstance(other, CSparse): # Sparse + Sparse = Sparse # Scipy does not support broadcast natively other = self._broadcast_other(other) @@ -644,7 +674,8 @@ def __add__(self, other): if other.size == 1: # scalar-like raise NotImplementedError( "adding an array of size one to a sparse array " - "is not supported. Convert to dense if needed.") + "is not supported. Convert to dense if needed." + ) else: # direct operation or broadcast return CDense(self._data.__add__(other.tondarray())) else: @@ -674,7 +705,8 @@ def __sub__(self, other): return self.deepcopy() raise NotImplementedError( "subtracting a nonzero scalar or a boolean True from a " - "sparse array is not supported. Convert to dense if needed.") + "sparse array is not supported. Convert to dense if needed." + ) elif isinstance(other, CSparse): # Sparse - Sparse = Sparse # Scipy does not support broadcast natively other = self._broadcast_other(other) @@ -683,7 +715,8 @@ def __sub__(self, other): if other.size == 1: # scalar-like raise NotImplementedError( "subtracting an array of size one from a sparse array " - "is not supported. Convert to dense if needed.") + "is not supported. Convert to dense if needed." + ) else: # direct operation or broadcast return CDense(self._data.__sub__(other.tondarray())) else: @@ -696,7 +729,8 @@ def __rsub__(self, other): return -self.deepcopy() raise NotImplementedError( "subtracting a sparse array from a nonzero scalar or from " - "a boolean True is not supported. Convert to dense if needed.") + "a boolean True is not supported. Convert to dense if needed." + ) else: return NotImplemented @@ -716,10 +750,10 @@ def __mul__(self, other): Array after product. """ - if is_scalar(other) or is_bool(other) or \ - isinstance(other, (CSparse, CDense)): # Always Sparse - return self.__class__( - self._data.multiply(self._buffer_to_builtin(other))) + if ( + is_scalar(other) or is_bool(other) or isinstance(other, (CSparse, CDense)) + ): # Always Sparse + return self.__class__(self._data.multiply(self._buffer_to_builtin(other))) else: return NotImplemented @@ -772,7 +806,8 @@ def __truediv__(self, other): def __rtruediv__(self, other): """Element-wise (inverse) true division.""" raise NotImplementedError( - "dividing a scalar by a sparse array is not supported") + "dividing a scalar by a sparse array is not supported" + ) def __floordiv__(self, other): """Element-wise floor division (integral part of the quotient). @@ -800,7 +835,8 @@ def __floordiv__(self, other): def __rfloordiv__(self, other): """Element-wise (inverse) floor division.""" raise NotImplementedError( - "dividing a scalar by a sparse array is not supported") + "dividing a scalar by a sparse array is not supported" + ) def __abs__(self): """Returns array elements without sign. @@ -842,18 +878,19 @@ def __pow__(self, power): if power == 0: raise NotImplementedError( "using zero or a boolean False as power is not supported " - "for sparse arrays. Convert to dense if needed.") + "for sparse arrays. Convert to dense if needed." + ) x = self.tocsr() # self.__class__ expects a csr # indices/indptr must passed as copies (pow creates new data) - return self.__class__((pow(x.data, power), x.indices, x.indptr), - shape=x.shape, copy=True) + return self.__class__( + (pow(x.data, power), x.indices, x.indptr), shape=x.shape, copy=True + ) else: return NotImplemented def __rpow__(self, power): """Element-wise (inverse) power.""" - raise NotImplementedError( - "using a sparse array as a power is not supported") + raise NotImplementedError("using a sparse array as a power is not supported") def __eq__(self, other): """Element-wise == operator. @@ -1102,8 +1139,10 @@ def save(self, datafile, overwrite=False): import os if os.path.isfile(datafile) is True and overwrite is False: - raise IOError("File {:} already exists. Specify overwrite=True " - "or delete the file.".format(datafile)) + raise IOError( + "File {:} already exists. Specify overwrite=True " + "or delete the file.".format(datafile) + ) x = self.tocsr() # Load expects a csr_matrix @@ -1111,14 +1150,14 @@ def save(self, datafile, overwrite=False): data_cndarray = CDense(x.data).reshape((1, x.data.shape[0])) # Converting explicitly to int as in 64 bit machines the # following arrays are stored with dtype == np.int32 - indices_cndarray = \ + indices_cndarray = ( CDense(x.indices).reshape((1, x.indices.shape[0])).astype(int) - indptr_cndarray = \ - CDense(x.indptr).reshape((1, x.indptr.shape[0])).astype(int) + ) + indptr_cndarray = CDense(x.indptr).reshape((1, x.indptr.shape[0])).astype(int) # Error handling is managed by CDense.save() # file will be closed exiting from context - with open(datafile, mode='wt+', encoding='utf-8') as fhandle: + with open(datafile, mode="wt+", encoding="utf-8") as fhandle: data_cndarray.save(fhandle) indices_cndarray.save(fhandle) indptr_cndarray.save(fhandle) @@ -1146,18 +1185,26 @@ def load(cls, datafile, dtype=float): """ # CDense.load() will manage IO errors - imported_data = CDense.load( - datafile, dtype=dtype, startrow=0, skipend=3).ravel().tondarray() + imported_data = ( + CDense.load(datafile, dtype=dtype, startrow=0, skipend=3) + .ravel() + .tondarray() + ) # Indices are always integers - imported_indices = CDense.load( - datafile, dtype=int, startrow=1, skipend=2).ravel().tondarray() - imported_indptr = CDense.load( - datafile, dtype=int, startrow=2, skipend=1).ravel().tondarray() - shape_ndarray = CDense.load( - datafile, dtype=int, startrow=3, skipend=0).ravel().tondarray() - - return cls((imported_data, imported_indices, imported_indptr), - shape=(shape_ndarray[0], shape_ndarray[1])) + imported_indices = ( + CDense.load(datafile, dtype=int, startrow=1, skipend=2).ravel().tondarray() + ) + imported_indptr = ( + CDense.load(datafile, dtype=int, startrow=2, skipend=1).ravel().tondarray() + ) + shape_ndarray = ( + CDense.load(datafile, dtype=int, startrow=3, skipend=0).ravel().tondarray() + ) + + return cls( + (imported_data, imported_indices, imported_indptr), + shape=(shape_ndarray[0], shape_ndarray[1]), + ) # ----------------------------- # # # # # # # UTILITIES # # # # # # @@ -1184,7 +1231,7 @@ def atleast_2d(self): """ return self.__class__(self) - def reshape(self, newshape, order='C', copy=False): + def reshape(self, newshape, order="C", copy=False): """Reshape the matrix using input shape (int or tuple of ints). Parameters @@ -1205,8 +1252,7 @@ def reshape(self, newshape, order='C', copy=False): """ # Scipy >= 1.4, shape must be two dimensional newshape = _shape_atleast_2d(newshape) - return self.__class__( - self.tocsr().reshape(newshape, order=order, copy=copy)) + return self.__class__(self.tocsr().reshape(newshape, order=order, copy=copy)) def resize(self, newshape, constant=0): """Return a new array with the specified shape.""" @@ -1232,8 +1278,7 @@ def round(self, decimals=0): data = np.round(x.data, decimals=decimals) # Round does not allocate new memory (data.flags.OWNDATA = False) # and indices/indptr must passed as copies - return self.__class__( - (data, x.indices, x.indptr), shape=self.shape, copy=True) + return self.__class__((data, x.indices, x.indptr), shape=self.shape, copy=True) def ceil(self): """Return the ceiling of the input, element-wise.""" @@ -1250,9 +1295,9 @@ def clip(self, c_min, c_max): def eliminate_zeros(self): self._data.eliminate_zeros() - def sort(self, axis=-1, kind='quicksort', inplace=False): + def sort(self, axis=-1, kind="quicksort", inplace=False): """Sort array.""" - if kind != 'quicksort': + if kind != "quicksort": raise ValueError("only `quicksort` algorithm is supported") tosort = self if inplace is True else self.deepcopy() @@ -1272,14 +1317,14 @@ def sort(self, axis=-1, kind='quicksort', inplace=False): return tosort - def argsort(self, axis=-1, kind='quicksort'): + def argsort(self, axis=-1, kind="quicksort"): """Returns the indices that would sort an array. If possible is better if you use sort function axis=-1 order based on last axis (which in sparse matrix is 1 horizontal). """ - if kind != 'quicksort': + if kind != "quicksort": raise ValueError("only `quicksort` algorithm is supported") # for all element of chosen axis @@ -1295,7 +1340,8 @@ def argsort(self, axis=-1, kind='quicksort'): axis_elem_num = array.shape[1] # order for column else: raise ValueError( - "wrong axis parameter in argsort function for sparse data") + "wrong axis parameter in argsort function for sparse data" + ) index_matrix = CDense().zeros(array.shape, dtype=int) @@ -1308,8 +1354,9 @@ def argsort(self, axis=-1, kind='quicksort'): axis_element = array[:, i] # order for column # argsort of current axis element - sorted_data_idx = CDense( - axis_element.todense()).argsort(axis=axis, kind='quicksort') + sorted_data_idx = CDense(axis_element.todense()).argsort( + axis=axis, kind="quicksort" + ) if axis == 1 or axis == -1 or axis is None: index_matrix[i, :] = sorted_data_idx[0, :] # order for row @@ -1346,7 +1393,7 @@ def repmat(self, m, n): blocks = [rows for _ in range(m)] if len(blocks) == 0: # To manage the m = 0 case blocks = [[]] - return self.__class__(scs.bmat(blocks, format='csr', dtype=self.dtype)) + return self.__class__(scs.bmat(blocks, format="csr", dtype=self.dtype)) def repeat(self, repeats, axis=None): """Repeat elements of an array.""" @@ -1376,8 +1423,7 @@ def logical_and(self, array): """ if self.shape != array.shape: - raise ValueError( - "array to compare must have shape {:}".format(self.shape)) + raise ValueError("array to compare must have shape {:}".format(self.shape)) # This create an empty sparse matrix (basically full of zeros) and_result = self.__class__(self.shape, dtype=bool) @@ -1385,8 +1431,7 @@ def logical_and(self, array): # Ensure we have the expected type # Use 'coo' for fast conversion (if not a 'csr') x = self._tocoo_or_tocsr() - x_array = array.tocoo() if \ - array._data.getformat() != 'csr' else array.tocsr() + x_array = array.tocoo() if array._data.getformat() != "csr" else array.tocsr() # Iterate over non-zero elements # This also works for any explicitly stored zero @@ -1395,9 +1440,9 @@ def logical_and(self, array): this_elem_row = self.nnz_indices[0][e_i] this_elem_col = self.nnz_indices[1][e_i] # Check if the 2nd array has an element in the same position - y_same_bool = \ - (CDense(array.nnz_indices[0]) == this_elem_row).logical_and( - CDense(array.nnz_indices[1]) == this_elem_col) + y_same_bool = (CDense(array.nnz_indices[0]) == this_elem_row).logical_and( + CDense(array.nnz_indices[1]) == this_elem_col + ) if y_same_bool.any(): # Found a corresponding element # Now extract the value to compare from second array same_position_val = int(x_array.data[y_same_bool.tondarray()]) @@ -1427,8 +1472,7 @@ def logical_or(self, array): """ if self.shape != array.shape: - raise ValueError( - "array to compare must have shape {:}".format(self.shape)) + raise ValueError("array to compare must have shape {:}".format(self.shape)) # All non-zero elements will be replaced with True, otherwise False out = self.astype(bool) @@ -1459,13 +1503,11 @@ def logical_not(self): def maximum(self, array): """Element-wise maximum.""" - return self.__class__( - self._data.maximum(self._buffer_to_builtin(array))) + return self.__class__(self._data.maximum(self._buffer_to_builtin(array))) def minimum(self, array): """Element-wise minimum.""" - return self.__class__( - self._data.minimum(self._buffer_to_builtin(array))) + return self.__class__(self._data.minimum(self._buffer_to_builtin(array))) # ------ # # SEARCH # @@ -1482,7 +1524,8 @@ def find(self, condition): def binary_search(self, value): raise NotImplementedError( - "`binary_search` is not implemented for sparse arrays!") + "`binary_search` is not implemented for sparse arrays!" + ) # ------------- # # DATA ANALYSIS # @@ -1510,23 +1553,23 @@ def get_nnz(self, axis=None): res = self.tocsr().getnnz(axis=axis) return CDense(res) if axis is not None else res - def unique(self, return_index=False, - return_inverse=False, return_counts=False): + def unique(self, return_index=False, return_inverse=False, return_counts=False): """Return unique array elements in dense format.""" # Let's compute the number of zeros (will be used multiple times) n_zeros = self.size - self.nnz unique_items = [0] if n_zeros > 0 else [] # We have at least a zero? # Appending nonzero elements - out = np.unique(self.tocsr().data, - return_index=return_index, - return_inverse=return_inverse, - return_counts=return_counts) + out = np.unique( + self.tocsr().data, + return_index=return_index, + return_inverse=return_inverse, + return_counts=return_counts, + ) if not any([return_index, return_inverse, return_counts]): # Return unique elements with correct dtype return CDense(unique_items + out.tolist()).astype(self.dtype) else: # np.unique returned a tuple - unique_items = CDense( - unique_items + out[0].tolist()).astype(self.dtype) + unique_items = CDense(unique_items + out[0].tolist()).astype(self.dtype) # If any extra parameter has been specified, output will be a tuple outputs = [unique_items] @@ -1546,20 +1589,22 @@ def unique(self, return_index=False, for i in range(flat_a.size): # If a element is missing for indices[1] # (nz column indices), means there is a zero there! - if i + 1 > len(flat_a.nnz_indices[1]) or \ - flat_a.nnz_indices[1][i] != i: + if ( + i + 1 > len(flat_a.nnz_indices[1]) + or flat_a.nnz_indices[1][i] != i + ): unique_index = CDense([i]) break # Let's get the indices of the nz elements (columns indices) unique_index = unique_index.append( - CDense(flat_a.nnz_indices[1], dtype=int)[CDense(out[1])]) + CDense(flat_a.nnz_indices[1], dtype=int)[CDense(out[1])] + ) # Add result to the list of returned items outputs.append(unique_index) if return_inverse is True: - raise NotImplementedError( - "`return_inverse` is currently not supported") + raise NotImplementedError("`return_inverse` is currently not supported") if return_counts is True: # Let's check the number of extra parameters (to parse out) @@ -1570,14 +1615,15 @@ def unique(self, return_index=False, # size of the out tuple depends on the number of extra params unique_counts = CDense( - counts_zeros + out[min(3, num_params)].tolist(), dtype=int) + counts_zeros + out[min(3, num_params)].tolist(), dtype=int + ) # Add result to the list of returned items outputs.append(unique_counts) return tuple(outputs) def bincount(self, minlength=0): - """Count the number of occurrences of each value in array + """Count the number of occurrences of each value in array of non-negative ints.""" # Use 'coo' for fast conversion (if not a 'csr') x = self._tocoo_or_tocsr() @@ -1597,7 +1643,7 @@ def norm(self, order=None): if self.size == 0: # Special handle as few norms raise error for empty arrays - if order == 'fro': + if order == "fro": raise ValueError("Invalid norm order {:}.".format(order)) return self.__class__([0.0]) @@ -1609,7 +1655,7 @@ def norm_2d(self, order=None, axis=None, keepdims=True): # Scipy does not supports negative norms along axis raise NotImplementedError - if axis is not None and order == 'fro': + if axis is not None and order == "fro": # 'fro' is a matrix norm raise ValueError("Invalid norm order {:}.".format(order)) @@ -1618,8 +1664,7 @@ def norm_2d(self, order=None, axis=None, keepdims=True): if axis is None and order in (2, -2): # Return an error consistent with scipy raise NotImplementedError - if axis is None and order not in ( - None, 'fro', inf, -inf, 1, -1): + if axis is None and order not in (None, "fro", inf, -inf, 1, -1): raise ValueError("Invalid norm order {:}.".format(order)) return self.__class__([0.0]) @@ -1636,8 +1681,7 @@ def sum(self, axis=None, keepdims=True): out_sum = CDense([[0.0]]) else: out_sum = CDense(self._data.sum(axis)) - return \ - out_sum.ravel() if axis is None or keepdims is False else out_sum + return out_sum.ravel() if axis is None or keepdims is False else out_sum def cumsum(self, axis=None, dtype=None): """Return the cumulative sum of the array elements.""" @@ -1681,16 +1725,17 @@ def all(self, axis=None, keepdims=True): """Return True if all array elements are boolean True.""" if axis is not None or keepdims is not True: raise NotImplementedError( - "`axis` and `keepdims` are currently not supported") + "`axis` and `keepdims` are currently not supported" + ) # Use 'coo' for fast conversion (if not a 'csr') - return bool( - self.size == self.nnz and self._tocoo_or_tocsr().data.all()) + return bool(self.size == self.nnz and self._tocoo_or_tocsr().data.all()) def any(self, axis=None, keepdims=True): """Return True if any array element is boolean True.""" if axis is not None or keepdims is not True: raise NotImplementedError( - "`axis` and `keepdims` are currently not supported") + "`axis` and `keepdims` are currently not supported" + ) # Use 'coo' for fast conversion (if not a 'csr') return bool(self._tocoo_or_tocsr().data.any()) @@ -1789,11 +1834,12 @@ def std(self, axis=None, ddof=0, keepdims=True): centered_array = self - array_mean.repmat( [1 if array_mean.shape[0] == self.shape[0] else self.shape[0]][0], - [1 if array_mean.shape[1] == self.shape[1] else self.shape[1]][0]) + [1 if array_mean.shape[1] == self.shape[1] else self.shape[1]][0], + ) # n is array size for axis == None or # the number of rows/columns of specified axis n = self.size if axis is None else self.shape[axis] - variance = (1.0 / (n - ddof)) * (centered_array ** 2) + variance = (1.0 / (n - ddof)) * (centered_array**2) return CDense(variance.sum(axis=axis, keepdims=keepdims).sqrt()) @@ -1807,12 +1853,13 @@ def sha1(self): """ import hashlib + x = self.tocsr() - h = hashlib.new('sha1') + h = hashlib.new("sha1") # Hash by taking into account shape and sparse matrix internals - h.update(hex(hash(x.shape)).encode('utf-8')) + h.update(hex(hash(x.shape)).encode("utf-8")) # The returned sha1 could be different for same data # but different memory order. Use C order to be consistent h.update(np.ascontiguousarray(x.indices)) @@ -1867,8 +1914,7 @@ def log(self): def log10(self): """Base 10 logarithm, element-wise.""" - raise NotImplementedError( - "`log10` is not available for sparse arrays!") + raise NotImplementedError("`log10` is not available for sparse arrays!") def pow(self, exp): """Array elements raised to powers from input exponent, element-wise. @@ -1891,8 +1937,7 @@ def pow(self, exp): def normpdf(self, mu=0.0, sigma=1.0): """Return normal distribution function.""" - raise NotImplementedError( - "`normpdf` is not available for sparse arrays!") + raise NotImplementedError("`normpdf` is not available for sparse arrays!") # ----- # # MIXED # @@ -1905,11 +1950,13 @@ def sign(self): def diag(self, k=0): """Extract a diagonal or construct a diagonal array.""" if self.shape[0] == 1: - return self.__class__(scs.diags( - self.tondarray(), offsets=[k], format='csr', dtype=self.dtype)) + return self.__class__( + scs.diags(self.tondarray(), offsets=[k], format="csr", dtype=self.dtype) + ) else: - if (k > 0 and k > self.shape[1] - 1) or \ - (k < 0 and abs(k) > self.shape[0] - 1): + if (k > 0 and k > self.shape[1] - 1) or ( + k < 0 and abs(k) > self.shape[0] - 1 + ): raise ValueError("k exceeds matrix dimensions") return CDense(self.tocsr().diagonal(k=k)) @@ -1921,8 +1968,7 @@ def dot(self, array): def interp(self, x_data, y_data, return_left=None, return_right=None): """One-dimensional linear interpolation.""" - raise NotImplementedError( - "`interp` is not available for sparse arrays!") + raise NotImplementedError("`interp` is not available for sparse arrays!") def inv(self): """Compute the (multiplicative) inverse of a square matrix.""" @@ -1974,7 +2020,7 @@ def eye(cls, n_rows, n_cols=None, k=0, dtype=float): zeros elsewhere. """ - return cls(scs.eye(n_rows, n_cols, k=k, dtype=dtype, format='csr')) + return cls(scs.eye(n_rows, n_cols, k=k, dtype=dtype, format="csr")) @classmethod def rand(cls, shape, random_state=None, density=0.01): @@ -1987,7 +2033,7 @@ def rand(cls, shape, random_state=None, density=0.01): """ n_rows, n_cols = shape # Unpacking the shape - return cls(scs.rand(n_rows, n_cols, density=density, format='csr')) + return cls(scs.rand(n_rows, n_cols, density=density, format="csr")) @classmethod def randn(cls, shape, random_state=None): @@ -2022,8 +2068,7 @@ def arange(cls, start=None, stop=None, step=1, dtype=None): def concatenate(cls, array1, array2, axis=1): """Concatenate a sequence of arrays along the given axis.""" if not isinstance(array1, cls) or not isinstance(array2, cls): - raise TypeError( - "both arrays to concatenate must be {:}".format(cls)) + raise TypeError("both arrays to concatenate must be {:}".format(cls)) if axis is None: # both arrays should be ravelled array1 = array1.ravel() @@ -2043,6 +2088,6 @@ def comblist(cls, list_of_list, dtype=float): raise NotImplementedError @classmethod - def meshgrid(cls, xi, indexing='xy'): + def meshgrid(cls, xi, indexing="xy"): """Return coordinate matrices from coordinate vectors.""" raise NotImplementedError diff --git a/src/secml/array/tests/c_array_testcases.py b/src/secml/array/tests/c_array_testcases.py index 8f05ea6a..37f81adc 100644 --- a/src/secml/array/tests/c_array_testcases.py +++ b/src/secml/array/tests/c_array_testcases.py @@ -7,47 +7,52 @@ class CArrayTestCases(CUnitTest): def setUp(self): """Basic set up.""" - self.array_dense = CArray([[1, 0, 0, 5], - [2, 4, 0, 0], - [3, 6, 0, 0]]) - self.array_sparse = CArray( - self.array_dense.deepcopy(), tosparse=True) - - self.array_dense_sym = CArray([[1, 2, 0], - [2, 4, 6], - [0, 6, 0]]) - self.array_sparse_sym = CArray( - self.array_dense_sym.deepcopy(), tosparse=True) - - self.array_dense_nozero = CArray([[1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12]]) - self.array_sparse_nozero = CArray( - self.array_dense_nozero.deepcopy(), tosparse=True) + self.array_dense = CArray([[1, 0, 0, 5], [2, 4, 0, 0], [3, 6, 0, 0]]) + self.array_sparse = CArray(self.array_dense.deepcopy(), tosparse=True) - self.array_dense_allzero = CArray([[0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]]) - self.array_sparse_allzero = CArray( - self.array_dense_allzero.deepcopy(), tosparse=True) + self.array_dense_sym = CArray([[1, 2, 0], [2, 4, 6], [0, 6, 0]]) + self.array_sparse_sym = CArray(self.array_dense_sym.deepcopy(), tosparse=True) - self.array_dense_bool = CArray([[True, False, True, True], - [False, False, False, False], - [True, True, True, True]]) - self.array_sparse_bool = CArray( - self.array_dense_bool.deepcopy(), tosparse=True) + self.array_dense_nozero = CArray([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]) + self.array_sparse_nozero = CArray( + self.array_dense_nozero.deepcopy(), tosparse=True + ) - self.array_dense_bool_true = CArray([[True, True, True, True], - [True, True, True, True], - [True, True, True, True]]) + self.array_dense_allzero = CArray([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) + self.array_sparse_allzero = CArray( + self.array_dense_allzero.deepcopy(), tosparse=True + ) + + self.array_dense_bool = CArray( + [ + [True, False, True, True], + [False, False, False, False], + [True, True, True, True], + ] + ) + self.array_sparse_bool = CArray(self.array_dense_bool.deepcopy(), tosparse=True) + + self.array_dense_bool_true = CArray( + [ + [True, True, True, True], + [True, True, True, True], + [True, True, True, True], + ] + ) self.array_sparse_bool_true = CArray( - self.array_dense_bool_true.deepcopy(), tosparse=True) - - self.array_dense_bool_false = CArray([[False, False, False, False], - [False, False, False, False], - [False, False, False, False]]) + self.array_dense_bool_true.deepcopy(), tosparse=True + ) + + self.array_dense_bool_false = CArray( + [ + [False, False, False, False], + [False, False, False, False], + [False, False, False, False], + ] + ) self.array_sparse_bool_false = CArray( - self.array_dense_bool_false.deepcopy(), tosparse=True) + self.array_dense_bool_false.deepcopy(), tosparse=True + ) self.row_flat_dense = CArray([4, 0, 6]) self.row_dense = self.row_flat_dense.atleast_2d() @@ -58,24 +63,25 @@ def setUp(self): self.single_flat_dense = CArray([4]) self.single_dense = self.single_flat_dense.atleast_2d() - self.single_sparse = CArray( - self.single_dense.deepcopy(), tosparse=True) + self.single_sparse = CArray(self.single_dense.deepcopy(), tosparse=True) self.single_flat_dense_zero = CArray([0]) self.single_dense_zero = self.single_flat_dense_zero.atleast_2d() self.single_sparse_zero = CArray( - self.single_dense_zero.deepcopy(), tosparse=True) + self.single_dense_zero.deepcopy(), tosparse=True + ) self.single_bool_flat_dense = CArray([True]) self.single_bool_dense = self.single_bool_flat_dense.atleast_2d() self.single_bool_sparse = CArray( - self.single_bool_dense.deepcopy(), tosparse=True) + self.single_bool_dense.deepcopy(), tosparse=True + ) self.single_bool_flat_dense_false = CArray([False]) - self.single_bool_dense_false = \ - self.single_bool_flat_dense_false.atleast_2d() + self.single_bool_dense_false = self.single_bool_flat_dense_false.atleast_2d() self.single_bool_sparse_false = CArray( - self.single_bool_dense_false.deepcopy(), tosparse=True) + self.single_bool_dense_false.deepcopy(), tosparse=True + ) self.empty_flat_dense = CArray([], tosparse=False) self.empty_dense = CArray([[]], tosparse=False) @@ -105,16 +111,23 @@ def _test_operator_cycle(self, totest_op, totest_items, totest_result): for operator in totest_op: to_check = [] for pair_idx, pair in enumerate(totest_items): - class0 = type(pair[0]._data) if \ - hasattr(pair[0], 'isdense') else type(pair[0]) - class1 = type(pair[1]._data) if \ - hasattr(pair[1], 'isdense') else type(pair[1]) - self.logger.info("Operator {:} between {:} and {:}" - "".format(operator.__name__, class0, class1)) + class0 = ( + type(pair[0]._data) + if hasattr(pair[0], "isdense") + else type(pair[0]) + ) + class1 = ( + type(pair[1]._data) + if hasattr(pair[1], "isdense") + else type(pair[1]) + ) + self.logger.info( + "Operator {:} between {:} and {:}" + "".format(operator.__name__, class0, class1) + ) result = operator(pair[0], pair[1]) self.assertIsInstance(result._data, totest_result[pair_idx]) - self.logger.info( - "Result: {:}".format(result._data.__class__.__name__)) + self.logger.info("Result: {:}".format(result._data.__class__.__name__)) to_check.append(result) self.assertTrue(self._test_multiple_eq(to_check)) diff --git a/src/secml/array/tests/test_c_array_casting.py b/src/secml/array/tests/test_c_array_casting.py index c60fa934..804bf54c 100644 --- a/src/secml/array/tests/test_c_array_casting.py +++ b/src/secml/array/tests/test_c_array_casting.py @@ -16,13 +16,19 @@ def test_tondarray(self): def _check_tondarray(array): self.logger.info("array:\n{:}".format(array)) - for shape in [None, array.size, (array.size,), - (1, array.size), (array.size, 1), - (1, 1, array.size)]: + for shape in [ + None, + array.size, + (array.size,), + (1, array.size), + (array.size, 1), + (1, 1, array.size), + ]: ndarray = array.tondarray(shape=shape) self.logger.info( - "array.tondarray(shape={:}):\n{:}".format(shape, ndarray)) + "array.tondarray(shape={:}):\n{:}".format(shape, ndarray) + ) self.assertIsInstance(ndarray, np.ndarray) @@ -32,7 +38,7 @@ def _check_tondarray(array): self.assertEqual(array.shape, ndarray.shape) else: # Reshape after casting if is_int(shape): # Fake 1-dim shape - shape = (shape, ) + shape = (shape,) self.assertEqual(shape, ndarray.shape) # Sparse arrays @@ -63,31 +69,32 @@ def test_toscs(self): CArray.todia(), CArray.todok(), CArray.tolil() methods.""" # Will test conversion from dense and between each sparse format scs_formats = ( - ('csr', scs.csr_matrix), - ('coo', scs.coo_matrix), - ('csc', scs.csc_matrix), - ('dia', scs.dia_matrix), - ('dok', scs.dok_matrix), - ('lil', scs.lil_matrix) + ("csr", scs.csr_matrix), + ("coo", scs.coo_matrix), + ("csc", scs.csc_matrix), + ("dia", scs.dia_matrix), + ("dok", scs.dok_matrix), + ("lil", scs.lil_matrix), ) for scs_format, scs_type in scs_formats: - self.logger.info( - "Test for CArray.to{:}() method.".format(scs_format)) + self.logger.info("Test for CArray.to{:}() method.".format(scs_format)) def _check_conversion(array): self.logger.info("array:\n{:}".format(array)) if array.issparse: - self.logger.info("array sparse format: {:}".format( - array._data._data.getformat())) + self.logger.info( + "array sparse format: {:}".format(array._data._data.getformat()) + ) for shape in [None, (1, array.size), (array.size, 1)]: - res = getattr( - array, 'to{:}'.format(scs_format))(shape=shape) - self.logger.info("array.to{:}(shape={:}):\n{:}" - "".format(scs_format, shape, res)) + res = getattr(array, "to{:}".format(scs_format))(shape=shape) + self.logger.info( + "array.to{:}(shape={:}):\n{:}" "".format(scs_format, shape, res) + ) self.logger.info( - "result sparse format: {:}".format(res.getformat())) + "result sparse format: {:}".format(res.getformat()) + ) self.assertIsInstance(res, scs_type) @@ -99,19 +106,21 @@ def _check_conversion(array): self.assertEqual(shape, res.shape) else: # Reshape after casting if array.isdense: # flat dense arrays are 2D sparse - self.assertEqual( - array.atleast_2d().shape, res.shape) + self.assertEqual(array.atleast_2d().shape, res.shape) # matrix shape must be two-dimensional with self.assertRaises(ValueError): - getattr( - array, 'to{:}'.format(scs_format))(shape=array.size) + getattr(array, "to{:}".format(scs_format))(shape=array.size) with self.assertRaises(ValueError): - getattr( - array, 'to{:}'.format(scs_format))(shape=(array.size,)) + getattr(array, "to{:}".format(scs_format))(shape=(array.size,)) with self.assertRaises(ValueError): - getattr(array, 'to{:}'.format(scs_format))( - shape=(1, 1, array.size,)) + getattr(array, "to{:}".format(scs_format))( + shape=( + 1, + 1, + array.size, + ) + ) # Sparse arrays # Checking conversion from default sparse format (csr) @@ -121,13 +130,16 @@ def _check_conversion(array): # Inner loop to check between formats conversion for scs_format_start, _ in scs_formats: self.array_sparse._data._data = getattr( - self.array_sparse, 'to{:}'.format(scs_format_start))() + self.array_sparse, "to{:}".format(scs_format_start) + )() _check_conversion(self.array_sparse) self.row_sparse._data._data = getattr( - self.row_sparse, 'to{:}'.format(scs_format_start))() + self.row_sparse, "to{:}".format(scs_format_start) + )() _check_conversion(self.row_sparse) self.column_sparse._data._data = getattr( - self.column_sparse, 'to{:}'.format(scs_format_start))() + self.column_sparse, "to{:}".format(scs_format_start) + )() _check_conversion(self.column_sparse) # Dense arrays @@ -155,13 +167,19 @@ def test_tolist(self): def _check_tolist(array): self.logger.info("array:\n{:}".format(array)) - for shape in [None, array.size, (array.size,), - (1, array.size), (array.size, 1), - (1, 1, array.size)]: + for shape in [ + None, + array.size, + (array.size,), + (1, array.size), + (array.size, 1), + (1, 1, array.size), + ]: array_list = array.tolist(shape=shape) self.logger.info( - "array.tolist(shape={:}):\n{:}".format(shape, array_list)) + "array.tolist(shape={:}):\n{:}".format(shape, array_list) + ) self.assertIsInstance(array_list, list) @@ -172,7 +190,7 @@ def _check_tolist(array): self.assertEqual(len(elem), array.shape[1]) else: # Reshape after casting if is_int(shape): # Fake 1-dim shape - shape = (shape, ) + shape = (shape,) self.assertEqual(len(array_list), shape[0]) if len(shape) > 1: for elem in array_list: @@ -291,7 +309,7 @@ def _check_tosparse(array): self.array_sparse.tosparse(shape=()) with self.assertRaises(ValueError): self.array_sparse.tosparse(dtype=int, shape=()) - -if __name__ == '__main__': + +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_classmethods.py b/src/secml/array/tests/test_c_array_classmethods.py index 1805eead..76aa3eaf 100644 --- a/src/secml/array/tests/test_c_array_classmethods.py +++ b/src/secml/array/tests/test_c_array_classmethods.py @@ -29,8 +29,8 @@ def _concat_allaxis(array1, array2): if array1.issparse: # result will be sparse, so always 2d a1_comp = a1_comp.atleast_2d() a2_comp = a2_comp.atleast_2d() - self.assert_array_equal(concat_res[:array1.size], a1_comp) - self.assert_array_equal(concat_res[array1.size:], a2_comp) + self.assert_array_equal(concat_res[: array1.size], a1_comp) + self.assert_array_equal(concat_res[array1.size :], a2_comp) array1_shape0 = array1.atleast_2d().shape[0] array1_shape1 = array1.atleast_2d().shape[1] @@ -41,16 +41,14 @@ def _concat_allaxis(array1, array2): concat_res = CArray.concatenate(array1, array2, axis=0) self.logger.info("concat(a1, a2, axis=0): {:}".format(concat_res)) self.assertEqual(array1_shape1, concat_res.shape[1]) - self.assertEqual( - array1_shape0 + array2_shape0, concat_res.shape[0]) + self.assertEqual(array1_shape0 + array2_shape0, concat_res.shape[0]) self.assert_array_equal(concat_res[:array1_shape0, :], array1) self.assert_array_equal(concat_res[array1_shape0:, :], array2) # check append on axis 1 (horizontal) concat_res = CArray.concatenate(array1, array2, axis=1) self.logger.info("concat(a1, a2, axis=1): {:}".format(concat_res)) - self.assertEqual( - array1_shape1 + array2_shape1, concat_res.shape[1]) + self.assertEqual(array1_shape1 + array2_shape1, concat_res.shape[1]) self.assertEqual(array1_shape0, concat_res.shape[0]) self.assert_array_equal(concat_res[:, :array1_shape1], array1) self.assert_array_equal(concat_res[:, array1_shape1:], array2) @@ -68,12 +66,17 @@ def _concat_allaxis(array1, array2): # check concat on empty arrays empty_sparse = CArray([], tosparse=True) empty_dense = CArray([], tosparse=False) - self.assertTrue((CArray.concatenate( - empty_sparse, empty_dense, axis=None) == empty_dense).all()) - self.assertTrue((CArray.concatenate( - empty_sparse, empty_dense, axis=0) == empty_dense).all()) - self.assertTrue((CArray.concatenate( - empty_sparse, empty_dense, axis=1) == empty_dense).all()) + self.assertTrue( + ( + CArray.concatenate(empty_sparse, empty_dense, axis=None) == empty_dense + ).all() + ) + self.assertTrue( + (CArray.concatenate(empty_sparse, empty_dense, axis=0) == empty_dense).all() + ) + self.assertTrue( + (CArray.concatenate(empty_sparse, empty_dense, axis=1) == empty_dense).all() + ) def test_comblist(self): """Test for comblist() classmethod.""" @@ -83,13 +86,13 @@ def test_comblist(self): self.logger.info("list of lists: \n{:}".format(l)) comb_array = CArray.comblist(l) self.logger.info("comblist(l): \n{:}".format(comb_array)) - self.assertTrue((comb_array == CArray([[1., 4.], [2., 4.]])).all()) + self.assertTrue((comb_array == CArray([[1.0, 4.0], [2.0, 4.0]])).all()) l = [[1, 2], []] self.logger.info("list of lists: \n{:}".format(l)) comb_array = CArray.comblist(l) self.logger.info("comblist(l): \n{:}".format(comb_array)) - self.assertTrue((comb_array == CArray([[1.], [2.]])).all()) + self.assertTrue((comb_array == CArray([[1.0], [2.0]])).all()) l = [[], []] comb_array = CArray.comblist(l) @@ -126,8 +129,7 @@ def test_fromiterables(self): self.logger.info("from_iterables result: {:}".format(a)) self.assertFalse((a != expected).any()) - a = CArray.from_iterables( - [CArray([[1, 2, 3, 4, 5, 6]], tosparse=True)]) + a = CArray.from_iterables([CArray([[1, 2, 3, 4, 5, 6]], tosparse=True)]) self.logger.info("from_iterables result: {:}".format(a)) self.assertFalse((a != expected).any()) @@ -135,13 +137,14 @@ def test_ones(self): """Test for CArray.ones() classmethod.""" self.logger.info("Test for CArray.ones() classmethod.") - for shape in [1, (1, ), 2, (2, ), (1, 2), (2, 1), (2, 2)]: + for shape in [1, (1,), 2, (2,), (1, 2), (2, 1), (2, 2)]: for dtype in [None, float, int, bool]: for sparse in [False, True]: res = CArray.ones(shape=shape, dtype=dtype, sparse=sparse) self.logger.info( "CArray.ones(shape={:}, dtype={:}, sparse={:}):" - "\n{:}".format(shape, dtype, sparse, res)) + "\n{:}".format(shape, dtype, sparse, res) + ) self.assertIsInstance(res, CArray) self.assertEqual(res.isdense, not sparse) @@ -156,7 +159,7 @@ def test_ones(self): if sparse is True: self.assertEqual(res.shape, (1, shape)) else: - self.assertEqual(res.shape, (shape, )) + self.assertEqual(res.shape, (shape,)) if dtype is None: # Default dtype is float self.assertIsSubDtype(res.dtype, float) else: @@ -167,13 +170,14 @@ def test_zeros(self): """Test for CArray.zeros() classmethod.""" self.logger.info("Test for CArray.zeros() classmethod.") - for shape in [1, (1, ), 2, (2, ), (1, 2), (2, 1), (2, 2)]: + for shape in [1, (1,), 2, (2,), (1, 2), (2, 1), (2, 2)]: for dtype in [None, float, int, bool]: for sparse in [False, True]: res = CArray.zeros(shape=shape, dtype=dtype, sparse=sparse) self.logger.info( "CArray.zeros(shape={:}, dtype={:}, sparse={:}):" - "\n{:}".format(shape, dtype, sparse, res)) + "\n{:}".format(shape, dtype, sparse, res) + ) self.assertIsInstance(res, CArray) self.assertEqual(res.isdense, not sparse) @@ -188,7 +192,7 @@ def test_zeros(self): if sparse is True: self.assertEqual(res.shape, (1, shape)) else: - self.assertEqual(res.shape, (shape, )) + self.assertEqual(res.shape, (shape,)) if dtype is None: # Default dtype is float self.assertIsSubDtype(res.dtype, float) else: @@ -199,13 +203,14 @@ def test_empty(self): """Test for CArray.empty() classmethod.""" self.logger.info("Test for CArray.empty() classmethod.") - for shape in [1, (1, ), 2, (2, ), (1, 2), (2, 1), (2, 2)]: + for shape in [1, (1,), 2, (2,), (1, 2), (2, 1), (2, 2)]: for dtype in [None, float, int, bool]: for sparse in [False, True]: res = CArray.empty(shape=shape, dtype=dtype, sparse=sparse) self.logger.info( "CArray.empty(shape={:}, dtype={:}, sparse={:}):" - "\n{:}".format(shape, dtype, sparse, res)) + "\n{:}".format(shape, dtype, sparse, res) + ) self.assertIsInstance(res, CArray) self.assertEqual(res.isdense, not sparse) @@ -220,7 +225,7 @@ def test_empty(self): if sparse is True: self.assertEqual(res.shape, (1, shape)) else: - self.assertEqual(res.shape, (shape, )) + self.assertEqual(res.shape, (shape,)) if dtype is None: # Default dtype is float self.assertIsSubDtype(res.dtype, float) else: @@ -238,12 +243,19 @@ def test_eye(self): for n_rows in [0, 1, 2, 3]: for n_cols in [None, 0, 1, 2, 3]: for k in [0, 1, 2, 3, -1, -2, -3]: - res = CArray.eye(n_rows=n_rows, n_cols=n_cols, k=k, - dtype=dtype, sparse=sparse) + res = CArray.eye( + n_rows=n_rows, + n_cols=n_cols, + k=k, + dtype=dtype, + sparse=sparse, + ) self.logger.info( "CArray.eye(n_rows={:}, n_cols={:}, k={:}, " "dtype={:}, sparse={:}):\n{:}".format( - n_rows, n_cols, k, dtype, sparse, res)) + n_rows, n_cols, k, dtype, sparse, res + ) + ) self.assertIsInstance(res, CArray) self.assertEqual(res.isdense, not sparse) @@ -265,10 +277,12 @@ def test_eye(self): # Check if the diagonal is moving according to k if k > 0: self.assertEqual( - 0, res[0, min(n_cols-1, k-1)].item()) + 0, res[0, min(n_cols - 1, k - 1)].item() + ) elif k < 0: self.assertEqual( - 0, res[min(n_rows-1, abs(k)-1), 0].item()) + 0, res[min(n_rows - 1, abs(k) - 1), 0].item() + ) else: # The top left corner is a one self.assertEqual(1, res[0, 0]) @@ -276,25 +290,29 @@ def test_eye(self): n_ones = (res == 1).sum() if k >= 0: self.assertEqual( - max(0, min(n_rows, n_cols-k)), n_ones) + max(0, min(n_rows, n_cols - k)), n_ones + ) else: self.assertEqual( - max(0, min(n_cols, n_rows-abs(k))), n_ones) + max(0, min(n_cols, n_rows - abs(k))), n_ones + ) # Check if there are other elements apart from 0,1 self.assertFalse( - ((res != 0).logical_and((res == 1).logical_not()).any())) + ((res != 0).logical_and((res == 1).logical_not()).any()) + ) def test_rand(self): """Test for CArray.rand() classmethod.""" self.logger.info("Test for CArray.rand() classmethod.") - for shape in [(1, ), (2, ), (1, 2), (2, 1), (2, 2)]: + for shape in [(1,), (2,), (1, 2), (2, 1), (2, 2)]: for sparse in [False, True]: res = CArray.rand(shape=shape, sparse=sparse) self.logger.info( "CArray.rand(shape={:}, sparse={:}):" - "\n{:}".format(shape, sparse, res)) + "\n{:}".format(shape, sparse, res) + ) self.assertIsInstance(res, CArray) self.assertEqual(res.isdense, not sparse) @@ -314,10 +332,9 @@ def test_randn(self): """Test for CArray.randn() classmethod.""" self.logger.info("Test for CArray.randn() classmethod.") - for shape in [(1, ), (2, ), (1, 2), (2, 1), (2, 2)]: + for shape in [(1,), (2,), (1, 2), (2, 1), (2, 2)]: res = CArray.randn(shape=shape) - self.logger.info( - "CArray.randn(shape={:}):\n{:}".format(shape, res)) + self.logger.info("CArray.randn(shape={:}):\n{:}".format(shape, res)) self.assertIsInstance(res, CArray) self.assertEqual(res.shape, shape) @@ -334,14 +351,13 @@ def test_randint(self): for shape in [1, 2, (1, 2), (2, 1), (2, 2)]: for sparse in [False, True]: if not isinstance(inter, tuple): - res = CArray.randint( - inter, shape=shape, sparse=sparse) + res = CArray.randint(inter, shape=shape, sparse=sparse) else: - res = CArray.randint( - *inter, shape=shape, sparse=sparse) + res = CArray.randint(*inter, shape=shape, sparse=sparse) self.logger.info( "CArray.randint({:}, shape={:}, sparse={:}):" - "\n{:}".format(inter, shape, sparse, res)) + "\n{:}".format(inter, shape, sparse, res) + ) self.assertIsInstance(res, CArray) self.assertEqual(res.isdense, not sparse) @@ -352,7 +368,7 @@ def test_randint(self): if sparse is True: self.assertEqual(res.shape, (1, shape)) else: - self.assertEqual(res.shape, (shape, )) + self.assertEqual(res.shape, (shape,)) self.assertIsSubDtype(res.dtype, int) # Checking intervals @@ -376,14 +392,13 @@ def test_randuniform(self): for shape in [1, 2, (1, 2), (2, 1), (2, 2)]: for sparse in [False, True]: if not isinstance(inter, tuple): - res = CArray.randuniform( - inter, shape=shape, sparse=sparse) + res = CArray.randuniform(inter, shape=shape, sparse=sparse) else: - res = CArray.randuniform( - *inter, shape=shape, sparse=sparse) + res = CArray.randuniform(*inter, shape=shape, sparse=sparse) self.logger.info( "CArray.randuniform({:}, shape={:}, sparse={:}):" - "\n{:}".format(inter, shape, sparse, res)) + "\n{:}".format(inter, shape, sparse, res) + ) self.assertIsInstance(res, CArray) self.assertEqual(res.isdense, not sparse) @@ -394,7 +409,7 @@ def test_randuniform(self): if sparse is True: self.assertEqual(res.shape, (1, shape)) else: - self.assertEqual(res.shape, (shape, )) + self.assertEqual(res.shape, (shape,)) self.assertIsSubDtype(res.dtype, float) # Checking intervals @@ -433,5 +448,5 @@ def test_randuniform(self): CArray.randuniform(CArray([5, -3, 4]), CArray([-1, -2, 3]), (2, 3)) -if __name__ == '__main__': +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_copy.py b/src/secml/array/tests/test_c_array_copy.py index 850e6f2e..5177ada8 100644 --- a/src/secml/array/tests/test_c_array_copy.py +++ b/src/secml/array/tests/test_c_array_copy.py @@ -13,8 +13,7 @@ def _deepcopy(array): self.logger.info("Array:\n{:}".format(array)) array_deepcopy = array.deepcopy() - self.logger.info("Array deepcopied:\n{:}".format( - array_deepcopy.todense())) + self.logger.info("Array deepcopied:\n{:}".format(array_deepcopy.todense())) self.assertEqual(array.issparse, array_deepcopy.issparse) self.assertEqual(array.isdense, array_deepcopy.isdense) @@ -27,5 +26,5 @@ def _deepcopy(array): _deepcopy(self.array_dense) -if __name__ == '__main__': +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_indexing.py b/src/secml/array/tests/test_c_array_indexing.py index 17aa2fb6..09d49f32 100644 --- a/src/secml/array/tests/test_c_array_indexing.py +++ b/src/secml/array/tests/test_c_array_indexing.py @@ -23,24 +23,27 @@ def test_selectors(input_array, selector_list, target_list): selection = input_array[selector] self.logger.info("Result is: \n" + str(selection)) - self.assertFalse(CArray(selection != target_list[selector_idx]).any(), - "{:} is different from {:}".format( - selection, target_list[selector_idx])) + self.assertFalse( + CArray(selection != target_list[selector_idx]).any(), + "{:} is different from {:}".format( + selection, target_list[selector_idx] + ), + ) if isinstance(target_list[selector_idx], CArray): self.assertIsInstance(selection, CArray) if selection.issparse: self.assertEqual( target_list[selector_idx].atleast_2d().shape, - selection.shape) + selection.shape, + ) else: - self.assertEqual(target_list[selector_idx].shape, - selection.shape) - self.assertEqual( - target_list[selector_idx].dtype, selection.dtype) + self.assertEqual( + target_list[selector_idx].shape, selection.shape + ) + self.assertEqual(target_list[selector_idx].dtype, selection.dtype) else: - self.assertIsInstance( - selection, type(target_list[selector_idx])) + self.assertIsInstance(selection, type(target_list[selector_idx])) # 2D/1D INDEXING (MATRIX) arrays_list = [self.array_dense, self.array_sparse] @@ -48,21 +51,35 @@ def test_selectors(input_array, selector_list, target_list): self.logger.info("Testing getters for matrix: \n" + str(array)) - selectors = [[[1, 2, 2, 2], [2, 0, 1, 2]], - [[1, 2, 2, 2], [np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0], np.ravel(2)[0]]], - [[np.ravel(1)[0], np.ravel(2)[0], np.ravel(2)[0], np.ravel(2)[0]], [2, 0, 1, 2]], - [[np.ravel(1)[0], np.ravel(2)[0], np.ravel(2)[0], np.ravel(2)[0]], - [np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0], np.ravel(2)[0]]], - CArray([[True, False, True, False], - [False, False, False, False], - [False, True, False, False]]), - ] - targets = [CArray([0, 3, 6, 0]), - CArray([0, 3, 6, 0]), - CArray([0, 3, 6, 0]), - CArray([0, 3, 6, 0]), - CArray([1, 0, 6]) - ] + selectors = [ + [[1, 2, 2, 2], [2, 0, 1, 2]], + [ + [1, 2, 2, 2], + [np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0], np.ravel(2)[0]], + ], + [ + [np.ravel(1)[0], np.ravel(2)[0], np.ravel(2)[0], np.ravel(2)[0]], + [2, 0, 1, 2], + ], + [ + [np.ravel(1)[0], np.ravel(2)[0], np.ravel(2)[0], np.ravel(2)[0]], + [np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0], np.ravel(2)[0]], + ], + CArray( + [ + [True, False, True, False], + [False, False, False, False], + [False, True, False, False], + ] + ), + ] + targets = [ + CArray([0, 3, 6, 0]), + CArray([0, 3, 6, 0]), + CArray([0, 3, 6, 0]), + CArray([0, 3, 6, 0]), + CArray([1, 0, 6]), + ] test_selectors(array, selectors, targets) @@ -72,34 +89,63 @@ def test_selectors(input_array, selector_list, target_list): self.logger.info("Testing getters for matrix: \n" + str(array)) - selectors_unique = [2, np.ravel(2)[0], [2, 2], CArray([2, 2]), - CArray([[2, 2]]), CArray([2, 2], tosparse=True), - slice(1, 3), [False, True, True], - CArray([False, True, True])] - selectors = list(itertools.product(selectors_unique, repeat=2)) + \ - [(1, slice(None, None, -1)), (slice(None, None, -1), 1), - ([1, 1], slice(None, None, -1)), (slice(None, None, -1), [1, 1])] - - targets_a = [CArray([[0]]), CArray([[0]]), # 2 - CArray([[0, 0]]), CArray([[0, 0]]), - CArray([[0, 0]]), CArray([[0, 0]]), - CArray([[6, 0]]), CArray([[6, 0]]), CArray([[6, 0]]) - ] - targets_b = [CArray([[0], [0]]), CArray([[0], [0]]), # [2, 2] - CArray([[0, 0], [0, 0]]), CArray([[0, 0], [0, 0]]), - CArray([[0, 0], [0, 0]]), CArray([[0, 0], [0, 0]]), - CArray([[6, 0], [6, 0]]), - CArray([[6, 0], [6, 0]]), CArray([[6, 0], [6, 0]]) - ] - targets_c = [CArray([[6], [0]]), CArray([[6], [0]]), # [False, True, True] - CArray([[6, 6], [0, 0]]), CArray([[6, 6], [0, 0]]), - CArray([[6, 6], [0, 0]]), CArray([[6, 6], [0, 0]]), - CArray([[4, 6], [6, 0]]), - CArray([[4, 6], [6, 0]]), CArray([[4, 6], [6, 0]]) - ] - targets_d = [CArray([[6, 4, 2]]), CArray([[6], [4], [2]]), - CArray([[6, 4, 2], [6, 4, 2]]), - CArray([[6, 6], [4, 4], [2, 2]])] + selectors_unique = [ + 2, + np.ravel(2)[0], + [2, 2], + CArray([2, 2]), + CArray([[2, 2]]), + CArray([2, 2], tosparse=True), + slice(1, 3), + [False, True, True], + CArray([False, True, True]), + ] + selectors = list(itertools.product(selectors_unique, repeat=2)) + [ + (1, slice(None, None, -1)), + (slice(None, None, -1), 1), + ([1, 1], slice(None, None, -1)), + (slice(None, None, -1), [1, 1]), + ] + + targets_a = [ + CArray([[0]]), + CArray([[0]]), # 2 + CArray([[0, 0]]), + CArray([[0, 0]]), + CArray([[0, 0]]), + CArray([[0, 0]]), + CArray([[6, 0]]), + CArray([[6, 0]]), + CArray([[6, 0]]), + ] + targets_b = [ + CArray([[0], [0]]), + CArray([[0], [0]]), # [2, 2] + CArray([[0, 0], [0, 0]]), + CArray([[0, 0], [0, 0]]), + CArray([[0, 0], [0, 0]]), + CArray([[0, 0], [0, 0]]), + CArray([[6, 0], [6, 0]]), + CArray([[6, 0], [6, 0]]), + CArray([[6, 0], [6, 0]]), + ] + targets_c = [ + CArray([[6], [0]]), + CArray([[6], [0]]), # [False, True, True] + CArray([[6, 6], [0, 0]]), + CArray([[6, 6], [0, 0]]), + CArray([[6, 6], [0, 0]]), + CArray([[6, 6], [0, 0]]), + CArray([[4, 6], [6, 0]]), + CArray([[4, 6], [6, 0]]), + CArray([[4, 6], [6, 0]]), + ] + targets_d = [ + CArray([[6, 4, 2]]), + CArray([[6], [4], [2]]), + CArray([[6, 4, 2], [6, 4, 2]]), + CArray([[6, 6], [4, 4], [2, 2]]), + ] targets = 2 * targets_a + 4 * targets_b + 3 * targets_c + targets_d @@ -111,25 +157,51 @@ def test_selectors(input_array, selector_list, target_list): self.logger.info("Testing getters for array: \n" + str(array)) - selectors_a = [[[0, 0, 0], [2, 0, 1]], - [[0, 0, 0], [np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0]]], - [[np.ravel(0)[0], np.ravel(0)[0], np.ravel(0)[0]], [2, 0, 1]], - [[np.ravel(0)[0], np.ravel(0)[0], np.ravel(0)[0]], - [np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0]]], - CArray([[True, False, True]]), - CArray([True, False, True]), - slice(None, None, -1) - ] - selectors_row = [0, np.ravel(0)[0], [0], CArray([0]), - -1, np.ravel(-1)[0], [-1], CArray([-1]), - True, np.ravel(True)[0], [True], CArray([True])] - selectors_col = [[], 0, np.ravel(0)[0], [2, 2], CArray([2, 2]), - slice(1, 3), [False, True, True], - CArray([False, True, True])] - selectors_b = [(0, slice(None, None, -1)), - (CArray([0]), slice(None, None, -1))] - selectors = selectors_a + [(x, y) for x in selectors_row for y in selectors_col] + \ - selectors_b + selectors_a = [ + [[0, 0, 0], [2, 0, 1]], + [[0, 0, 0], [np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0]]], + [[np.ravel(0)[0], np.ravel(0)[0], np.ravel(0)[0]], [2, 0, 1]], + [ + [np.ravel(0)[0], np.ravel(0)[0], np.ravel(0)[0]], + [np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0]], + ], + CArray([[True, False, True]]), + CArray([True, False, True]), + slice(None, None, -1), + ] + selectors_row = [ + 0, + np.ravel(0)[0], + [0], + CArray([0]), + -1, + np.ravel(-1)[0], + [-1], + CArray([-1]), + True, + np.ravel(True)[0], + [True], + CArray([True]), + ] + selectors_col = [ + [], + 0, + np.ravel(0)[0], + [2, 2], + CArray([2, 2]), + slice(1, 3), + [False, True, True], + CArray([False, True, True]), + ] + selectors_b = [ + (0, slice(None, None, -1)), + (CArray([0]), slice(None, None, -1)), + ] + selectors = ( + selectors_a + + [(x, y) for x in selectors_row for y in selectors_col] + + selectors_b + ) targets_a = [CArray([6, 4, 0])] targets_b = [CArray([4, 6])] @@ -137,13 +209,21 @@ def test_selectors(input_array, selector_list, target_list): # Output always flat for flat arrays if array.ndim == 1: targets += [CArray([6, 0, 4])] - targets += 12 * ([CArray([], dtype=int)] + 2 * [CArray([4])] + - 2 * [CArray([6, 6])] + 3 * [CArray([0, 6])]) + targets += 12 * ( + [CArray([], dtype=int)] + + 2 * [CArray([4])] + + 2 * [CArray([6, 6])] + + 3 * [CArray([0, 6])] + ) targets += 2 * [CArray([6, 0, 4])] else: targets += [CArray([[6, 0, 4]])] - targets += 12 * ([CArray([[]], dtype=int)] + 2 * [CArray([[4]])] + - 2 * [CArray([[6, 6]])] + 3 * [CArray([[0, 6]])]) + targets += 12 * ( + [CArray([[]], dtype=int)] + + 2 * [CArray([[4]])] + + 2 * [CArray([[6, 6]])] + + 3 * [CArray([[0, 6]])] + ) targets += 2 * [CArray([[6, 0, 4]])] test_selectors(array, selectors, targets) @@ -154,18 +234,36 @@ def test_selectors(input_array, selector_list, target_list): self.logger.info("Testing getters for vector: \n" + str(array)) - selectors = [[], 0, np.ravel(0)[0], [2, 2], CArray([2, 2]), - slice(1, 3), slice(None), slice(None, None, -1)] + selectors = [ + [], + 0, + np.ravel(0)[0], + [2, 2], + CArray([2, 2]), + slice(1, 3), + slice(None), + slice(None, None, -1), + ] # Output always flat for flat arrays if array.ndim == 1: - targets = [CArray([], dtype=int)] + 2 * [CArray([4])] + \ - 2 * [CArray([6, 6])] + [CArray([0, 6])] + \ - [CArray([4, 0, 6])] + [CArray([6, 0, 4])] + targets = ( + [CArray([], dtype=int)] + + 2 * [CArray([4])] + + 2 * [CArray([6, 6])] + + [CArray([0, 6])] + + [CArray([4, 0, 6])] + + [CArray([6, 0, 4])] + ) else: - targets = [CArray([[]], dtype=int)] + 2 * [CArray([[4]])] + \ - 2 * [CArray([[6, 6]])] + [CArray([[0, 6]])] + \ - [CArray([[4, 0, 6]])] + [CArray([[6, 0, 4]])] + targets = ( + [CArray([[]], dtype=int)] + + 2 * [CArray([[4]])] + + 2 * [CArray([[6, 6]])] + + [CArray([[0, 6]])] + + [CArray([[4, 0, 6]])] + + [CArray([[6, 0, 4]])] + ) test_selectors(array, selectors, targets) @@ -175,17 +273,37 @@ def test_selectors(input_array, selector_list, target_list): self.logger.info("Testing getters for array: \n" + str(array)) - selectors = [[], 0, np.ravel(0)[0], True, [True], CArray([True]), - slice(0, 1), slice(None), slice(None, None, -1), CArray([0, 0])] + selectors = [ + [], + 0, + np.ravel(0)[0], + True, + [True], + CArray([True]), + slice(0, 1), + slice(None), + slice(None, None, -1), + CArray([0, 0]), + ] # CArray([True]) is considered a boolean mask in this case, # resulting selection is always flat if array.ndim == 1: - targets = [CArray([], dtype=int)] + 4 * [CArray([4])] + \ - [CArray([4])] + 3 * [CArray([4])] + [CArray([4, 4])] + targets = ( + [CArray([], dtype=int)] + + 4 * [CArray([4])] + + [CArray([4])] + + 3 * [CArray([4])] + + [CArray([4, 4])] + ) else: - targets = [CArray([[]], dtype=int)] + 4 * [CArray([[4]])] + \ - [CArray([4])] + 3 * [CArray([[4]])] + [CArray([[4, 4]])] + targets = ( + [CArray([[]], dtype=int)] + + 4 * [CArray([[4]])] + + [CArray([4])] + + 3 * [CArray([[4]])] + + [CArray([[4, 4]])] + ) test_selectors(array, selectors, targets) @@ -196,8 +314,11 @@ def test_selectors(input_array, selector_list, assignment_list, target_list): for selector_idx, selector in enumerate(selector_list): - self.logger.info("Set: array[{:}] = {:}".format( - selector, assignment_list[selector_idx])) + self.logger.info( + "Set: array[{:}] = {:}".format( + selector, assignment_list[selector_idx] + ) + ) array_copy = input_array.deepcopy() try: # Using a try to easier debug array_copy[selector] = assignment_list[selector_idx] @@ -205,11 +326,14 @@ def test_selectors(input_array, selector_list, assignment_list, target_list): array_copy[selector] = assignment_list[selector_idx] self.logger.info("Result is: \n" + str(array_copy)) - self.assertFalse(CArray(array_copy != target_list[selector_idx]).any(), - "{:} is different from {:}".format( - array_copy, target_list[selector_idx])) + self.assertFalse( + CArray(array_copy != target_list[selector_idx]).any(), + "{:} is different from {:}".format( + array_copy, target_list[selector_idx] + ), + ) - if hasattr(target_list[selector_idx], 'shape'): + if hasattr(target_list[selector_idx], "shape"): self.assertEqual(array_copy.shape, target_list[selector_idx].shape) # 2D/1D INDEXING (MATRIX) @@ -218,19 +342,40 @@ def test_selectors(input_array, selector_list, assignment_list, target_list): self.logger.info("Testing setters for matrix: \n" + str(array)) - selectors = [[[1, 2, 2, 2], [2, 0, 1, 2]], - [[1, 2, 2, 2], [np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0], np.ravel(2)[0]]], - [[np.ravel(1)[0], np.ravel(2)[0], np.ravel(2)[0], np.ravel(2)[0]], [2, 0, 1, 2]], - [[np.ravel(1)[0], np.ravel(2)[0], np.ravel(2)[0], np.ravel(2)[0]], - [np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0], np.ravel(2)[0]]] - ] - selectors += 3 * [CArray([[False, False, False, False], - [False, False, True, False], - [True, True, True, False]])] - - assignments = [10, 10, CArray([10, 20, 30, 40]), CArray([10, 20, 30, 40]), - CArray([10, 20, 30, 40]), CArray([10, 20, 30, 40]), 10 - ] + selectors = [ + [[1, 2, 2, 2], [2, 0, 1, 2]], + [ + [1, 2, 2, 2], + [np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0], np.ravel(2)[0]], + ], + [ + [np.ravel(1)[0], np.ravel(2)[0], np.ravel(2)[0], np.ravel(2)[0]], + [2, 0, 1, 2], + ], + [ + [np.ravel(1)[0], np.ravel(2)[0], np.ravel(2)[0], np.ravel(2)[0]], + [np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0], np.ravel(2)[0]], + ], + ] + selectors += 3 * [ + CArray( + [ + [False, False, False, False], + [False, False, True, False], + [True, True, True, False], + ] + ) + ] + + assignments = [ + 10, + 10, + CArray([10, 20, 30, 40]), + CArray([10, 20, 30, 40]), + CArray([10, 20, 30, 40]), + CArray([10, 20, 30, 40]), + 10, + ] targets_a = [CArray([[1, 0, 0, 5], [2, 4, 10, 0], [10, 10, 10, 0]])] targets_b = [CArray([[1, 0, 0, 5], [2, 4, 10, 0], [20, 30, 40, 0]])] @@ -244,24 +389,35 @@ def test_selectors(input_array, selector_list, assignment_list, target_list): self.logger.info("Testing setters for matrix: \n" + str(array)) - selectors_unique = [2, np.ravel(2)[0], - [1, 2], CArray([1, 2]), - CArray([[1, 2]]), CArray([1, 2], tosparse=True), - slice(1, 3), [False, True, True], - CArray([False, True, True])] + selectors_unique = [ + 2, + np.ravel(2)[0], + [1, 2], + CArray([1, 2]), + CArray([[1, 2]]), + CArray([1, 2], tosparse=True), + slice(1, 3), + [False, True, True], + CArray([False, True, True]), + ] selectors = list(itertools.product(selectors_unique, repeat=2)) selectors += [(2, 2), (2, 2)] assignments_a = [10, 10] + 5 * [CArray([[10, 20]])] + 2 * [CArray([10, 20])] - assignments_b = [CArray([[10], [20]])] + [CArray([[10], [20]], tosparse=True)] + \ - 7 * [CArray([[10, 20], [30, 40]])] + assignments_b = ( + [CArray([[10], [20]])] + + [CArray([[10], [20]], tosparse=True)] + + 7 * [CArray([[10, 20], [30, 40]])] + ) assignments_c = [CArray([10]), CArray([10], tosparse=True)] assignments = 2 * assignments_a + 7 * assignments_b + assignments_c - targets_a = 2 * [CArray([[1, 2, 0], [2, 4, 6], [0, 6, 10]])] + \ - 7 * [CArray([[1, 2, 0], [2, 4, 6], [0, 10, 20]])] - targets_b = 2 * [CArray([[1, 2, 0], [2, 4, 10], [0, 6, 20]])] + \ - 7 * [CArray([[1, 2, 0], [2, 10, 20], [0, 30, 40]])] + targets_a = 2 * [CArray([[1, 2, 0], [2, 4, 6], [0, 6, 10]])] + 7 * [ + CArray([[1, 2, 0], [2, 4, 6], [0, 10, 20]]) + ] + targets_b = 2 * [CArray([[1, 2, 0], [2, 4, 10], [0, 6, 20]])] + 7 * [ + CArray([[1, 2, 0], [2, 10, 20], [0, 30, 40]]) + ] targets_c = 2 * [CArray([[1, 2, 0], [2, 4, 6], [0, 6, 10]])] targets = 2 * targets_a + 7 * targets_b + targets_c @@ -273,27 +429,53 @@ def test_selectors(input_array, selector_list, assignment_list, target_list): self.logger.info("Testing setters for array: \n" + str(array)) - selectors_a = [[[0, 0], [2, 0]], - [[0, 0], [np.ravel(2)[0], np.ravel(0)[0]]], - [[np.ravel(0)[0], np.ravel(0)[0]], [2, 0]], - [[np.ravel(0)[0], np.ravel(0)[0]], [np.ravel(2)[0], np.ravel(0)[0]]], - CArray([[True, False, True]]), CArray([True, False, True]), - CArray([[True, False, True]]), CArray([True, False, True]) - ] - selectors_row = [0, np.ravel(0)[0], [0], CArray([0]), - -1, np.ravel(-1)[0], [-1], CArray([-1]), - True, np.ravel(True)[0], [True], CArray([True])] - selectors_col = [[], 0, np.ravel(0)[0], [1, 2], CArray([1, 2]), - slice(1, 3), [False, True, True], CArray([False, True, True])] - selectors = selectors_a + [(x, y) for x in selectors_row for y in selectors_col] - - assignments_a = 2 * [CArray([10, 20])] + \ - 2 * [CArray([[10, 20]])] + \ - 2 * [CArray([10, 20])] + \ - 2 * [CArray([[10, 20]])] - assignments_b = [0] + [10, 10] + \ - 2 * [CArray([[10, 20]])] + \ - 3 * [CArray([10, 20])] + selectors_a = [ + [[0, 0], [2, 0]], + [[0, 0], [np.ravel(2)[0], np.ravel(0)[0]]], + [[np.ravel(0)[0], np.ravel(0)[0]], [2, 0]], + [[np.ravel(0)[0], np.ravel(0)[0]], [np.ravel(2)[0], np.ravel(0)[0]]], + CArray([[True, False, True]]), + CArray([True, False, True]), + CArray([[True, False, True]]), + CArray([True, False, True]), + ] + selectors_row = [ + 0, + np.ravel(0)[0], + [0], + CArray([0]), + -1, + np.ravel(-1)[0], + [-1], + CArray([-1]), + True, + np.ravel(True)[0], + [True], + CArray([True]), + ] + selectors_col = [ + [], + 0, + np.ravel(0)[0], + [1, 2], + CArray([1, 2]), + slice(1, 3), + [False, True, True], + CArray([False, True, True]), + ] + selectors = selectors_a + [ + (x, y) for x in selectors_row for y in selectors_col + ] + + assignments_a = ( + 2 * [CArray([10, 20])] + + 2 * [CArray([[10, 20]])] + + 2 * [CArray([10, 20])] + + 2 * [CArray([[10, 20]])] + ) + assignments_b = ( + [0] + [10, 10] + 2 * [CArray([[10, 20]])] + 3 * [CArray([10, 20])] + ) assignments = assignments_a + 12 * assignments_b targets_a = CArray([20, 0, 10]) @@ -302,12 +484,22 @@ def test_selectors(input_array, selector_list, assignment_list, target_list): targets_d = CArray([4, 10, 20]) # Output always flat for flat arrays if array.ndim == 1: - targets = 4 * [targets_a] + 4 * [targets_b] + \ - 12 * ([CArray([4, 0, 6])] + 2 * [targets_c] + 5 * [targets_d]) + targets = ( + 4 * [targets_a] + + 4 * [targets_b] + + 12 * ([CArray([4, 0, 6])] + 2 * [targets_c] + 5 * [targets_d]) + ) else: - targets = 4 * [targets_a.atleast_2d()] + 4 * [targets_b.atleast_2d()] + \ - 12 * ([CArray([[4, 0, 6]])] + - 2 * [targets_c.atleast_2d()] + 5 * [targets_d.atleast_2d()]) + targets = ( + 4 * [targets_a.atleast_2d()] + + 4 * [targets_b.atleast_2d()] + + 12 + * ( + [CArray([[4, 0, 6]])] + + 2 * [targets_c.atleast_2d()] + + 5 * [targets_d.atleast_2d()] + ) + ) test_selectors(array, selectors, assignments, targets) @@ -317,24 +509,47 @@ def test_selectors(input_array, selector_list, assignment_list, target_list): self.logger.info("Testing setters for vector: \n" + str(array)) - selectors = [[], 0, np.ravel(0)[0], [1, 2], CArray([1, 2]), - slice(1, 3), slice(None), 0, 0] - - assignments = [0] + [10, 10] + 2 * [CArray([[10, 20]])] + \ - [CArray([[10, 20]], tosparse=True)] + [CArray([[10, 20, 30]])] + \ - [CArray([10]), CArray([10], tosparse=True)] + selectors = [ + [], + 0, + np.ravel(0)[0], + [1, 2], + CArray([1, 2]), + slice(1, 3), + slice(None), + 0, + 0, + ] + + assignments = ( + [0] + + [10, 10] + + 2 * [CArray([[10, 20]])] + + [CArray([[10, 20]], tosparse=True)] + + [CArray([[10, 20, 30]])] + + [CArray([10]), CArray([10], tosparse=True)] + ) targets_a = CArray([10, 0, 6]) targets_b = CArray([4, 10, 20]) targets_c = CArray([10, 20, 30]) # Output always flat for flat arrays if array.ndim == 1: - targets = [CArray([4, 0, 6])] + 2 * [targets_a] + \ - 3 * [targets_b] + [targets_c] + 2 * [targets_a] + targets = ( + [CArray([4, 0, 6])] + + 2 * [targets_a] + + 3 * [targets_b] + + [targets_c] + + 2 * [targets_a] + ) else: - targets = [CArray([[4, 0, 6]])] + 2 * [targets_a.atleast_2d()] + \ - 3 * [targets_b.atleast_2d()] + [targets_c.atleast_2d()] + \ - 2 * [targets_a.atleast_2d()] + targets = ( + [CArray([[4, 0, 6]])] + + 2 * [targets_a.atleast_2d()] + + 3 * [targets_b.atleast_2d()] + + [targets_c.atleast_2d()] + + 2 * [targets_a.atleast_2d()] + ) test_selectors(array, selectors, assignments, targets) @@ -344,8 +559,18 @@ def test_selectors(input_array, selector_list, assignment_list, target_list): self.logger.info("Testing setters for array: \n" + str(array)) - selectors = [[], 0, np.ravel(0)[0], True, [True], CArray([True]), - slice(0, 1), slice(None), 0, 0] + selectors = [ + [], + 0, + np.ravel(0)[0], + True, + [True], + CArray([True]), + slice(0, 1), + slice(None), + 0, + 0, + ] assignments = 8 * [10] + [CArray([10]), CArray([10], tosparse=True)] @@ -388,7 +613,7 @@ def _item(a): self.array_dense.item() with self.assertRaises(ValueError): self.array_sparse.item() - -if __name__ == '__main__': + +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_init.py b/src/secml/array/tests/test_c_array_init.py index 6b8e3f85..13cc9e94 100644 --- a/src/secml/array/tests/test_c_array_init.py +++ b/src/secml/array/tests/test_c_array_init.py @@ -6,8 +6,7 @@ from secml.array import CArray from secml.array.c_dense import CDense from secml.array.c_sparse import CSparse -from secml.core.type_utils import \ - is_scalar, is_bool, is_list, is_list_of_lists +from secml.core.type_utils import is_scalar, is_bool, is_list, is_list_of_lists class TestCArrayInit(CArrayTestCases): @@ -24,38 +23,32 @@ def check_init_builtin(totest_elem): if is_list_of_lists(totest_elem): if not is_list_of_lists(totest_elem[0]): - self.assertEqual( - init_array.shape[0], len(totest_elem)) - self.assertEqual( - init_array.shape[1], len(totest_elem[0])) + self.assertEqual(init_array.shape[0], len(totest_elem)) + self.assertEqual(init_array.shape[1], len(totest_elem[0])) else: # N-Dimensional input in_shape = init_array.input_shape self.assertEqual(in_shape[0], len(totest_elem)) self.assertEqual(in_shape[1], len(totest_elem[0])) - self.assertEqual( - init_array.shape[0], len(totest_elem)) - self.assertEqual( - init_array.shape[1], sum(in_shape[1:])) + self.assertEqual(init_array.shape[0], len(totest_elem)) + self.assertEqual(init_array.shape[1], sum(in_shape[1:])) elif is_list(totest_elem): if init_array.issparse is True: - self.assertEqual( - init_array.shape[1], len(totest_elem)) + self.assertEqual(init_array.shape[1], len(totest_elem)) elif init_array.isdense is True: self.assertTrue(init_array.ndim == 1) - self.assertEqual( - init_array.shape[0], len(totest_elem)) - self.assertEqual( - init_array.input_shape, (len(totest_elem), )) + self.assertEqual(init_array.shape[0], len(totest_elem)) + self.assertEqual(init_array.input_shape, (len(totest_elem),)) elif is_scalar(totest_elem) or is_bool(totest_elem): self.assertEqual(init_array.size, 1) - self.assertEqual(init_array.input_shape, (1, )) + self.assertEqual(init_array.input_shape, (1,)) else: raise TypeError( "test_init_builtin should not be used " - "to test {:}".format(type(totest_elem))) + "to test {:}".format(type(totest_elem)) + ) self.logger.info("Initializing CArray with built-in types...") check_init_builtin([[2, 3], [22, 33]]) @@ -66,29 +59,43 @@ def check_init_builtin(totest_elem): check_init_builtin([[True, False], [True, True]]) check_init_builtin([True, False]) check_init_builtin([[[2, 3], [22, 33]], [[4, 5], [44, 55]]]) - check_init_builtin([[[True, False], [True, True]], - [[False, False], [False, True]]]) + check_init_builtin( + [[[True, False], [True, True]], [[False, False], [False, True]]] + ) # The following input data is malformed and should raise TypeError with self.logger.catch_warnings(): self.logger.filterwarnings( - action='ignore', + action="ignore", message="Creating an ndarray from ragged", - category=np.VisibleDeprecationWarning) - with self.assertRaises((TypeError, ValueError,)): + category=np.VisibleDeprecationWarning, + ) + with self.assertRaises( + ( + TypeError, + ValueError, + ) + ): CArray([[2, 3], [22]]) - with self.assertRaises((TypeError, ValueError,)): + with self.assertRaises( + ( + TypeError, + ValueError, + ) + ): CArray([[[2, 3], [22]], [[4, 5], [44, 55]]]) def test_init_array(self): """Test CArray initialization using arrays.""" self.logger.info("Initializing CArray with another CArray...") - arrays_list = [CArray([[2, 3], [22, 33]]), - CArray([2, 3]), - CArray([[2], [3]]), - CArray(3), - CArray([[[2, 3], [22, 33]], [[4, 5], [44, 55]]])] + arrays_list = [ + CArray([[2, 3], [22, 33]]), + CArray([2, 3]), + CArray([[2], [3]]), + CArray(3), + CArray([[[2, 3], [22, 33]], [[4, 5], [44, 55]]]), + ] for init_elem in arrays_list: self.logger.info(init_elem) @@ -99,11 +106,13 @@ def test_init_array(self): self.assertEqual(init_elem.input_shape, array.input_shape) self.logger.info("Initializing CArray with a CDense...") - dense_list = [CDense([[2, 3], [22, 33]]), - CDense([2, 3]), - CDense([[2], [3]]), - CDense([3]), - CDense([[[2, 3], [22, 33]], [[4, 5], [44, 55]]])] + dense_list = [ + CDense([[2, 3], [22, 33]]), + CDense([2, 3]), + CDense([[2], [3]]), + CDense([3]), + CDense([[[2, 3], [22, 33]], [[4, 5], [44, 55]]]), + ] for init_elem in dense_list: self.logger.info(init_elem) @@ -114,11 +123,13 @@ def test_init_array(self): self.assertEqual(init_elem.input_shape, array.input_shape) self.logger.info("Initializing CArray with an ndarray...") - dense_list = [np.array([[2, 3], [22, 33]]), - np.array([2, 3]), - np.array([[2], [3]]), - np.array([3]), - np.array([[[2, 3], [22, 33]], [[4, 5], [44, 55]]])] + dense_list = [ + np.array([[2, 3], [22, 33]]), + np.array([2, 3]), + np.array([[2], [3]]), + np.array([3]), + np.array([[[2, 3], [22, 33]], [[4, 5], [44, 55]]]), + ] for init_elem in dense_list: self.logger.info(init_elem) @@ -138,7 +149,8 @@ def test_init_array(self): CArray([2, 3], tosparse=True), CArray([[2], [3]], tosparse=True), CArray(3, tosparse=True), - CArray([[[2, 3], [22, 33]], [[4, 5], [44, 55]]], tosparse=True)] + CArray([[[2, 3], [22, 33]], [[4, 5], [44, 55]]], tosparse=True), + ] for init_elem in sparse_list: self.logger.info(init_elem) @@ -148,11 +160,13 @@ def test_init_array(self): self.assertEqual(init_elem.input_shape, array.input_shape) self.logger.info("Initializing CArray with a CSparse...") - sparse_list = [CSparse([[2, 3], [22, 33]]), - CSparse([2, 3]), - CSparse([[2], [3]]), - CSparse([3]), - CSparse([[[2, 3], [22, 33]], [[4, 5], [44, 55]]])] + sparse_list = [ + CSparse([[2, 3], [22, 33]]), + CSparse([2, 3]), + CSparse([[2], [3]]), + CSparse([3]), + CSparse([[[2, 3], [22, 33]], [[4, 5], [44, 55]]]), + ] for init_elem in sparse_list: self.logger.info(init_elem) @@ -163,10 +177,12 @@ def test_init_array(self): self.assertEqual(array.input_shape, init_elem.input_shape) self.logger.info("Initializing CArray with a csr_matrix...") - sparse_list = [scs.csr_matrix([[2, 3], [22, 33]]), - scs.csr_matrix([2, 3]), - scs.csr_matrix([[2], [3]]), - scs.csr_matrix([3])] + sparse_list = [ + scs.csr_matrix([[2, 3], [22, 33]]), + scs.csr_matrix([2, 3]), + scs.csr_matrix([[2], [3]]), + scs.csr_matrix([3]), + ] for init_elem in sparse_list: self.logger.info(init_elem) @@ -186,23 +202,26 @@ def test_init_reshape(self): size = out_def.size # Expected size in_shape = out_def.shape # Expected input_shape - for shape in [size, (size, ), (1, size), (size, 1)]: + for shape in [size, (size,), (1, size), (size, 1)]: out_res = CArray(a, tosparse=sparse, shape=shape) # Resulting shape will always be (1, n) for sparse if is_scalar(shape): - shape = (1, shape) if out_res.issparse else (shape, ) + shape = (1, shape) if out_res.issparse else (shape,) if out_res.issparse and len(shape) < 2: shape = (1, shape[0]) - self.logger.info("Expected 'shape' {:}, got {:}".format( - shape, out_res.shape)) + self.logger.info( + "Expected 'shape' {:}, got {:}".format(shape, out_res.shape) + ) self.assertEqual(out_res.shape, shape) # The input_shape should not be altered by reshaping self.logger.info( "Expected 'input_shape' {:}, got {:}".format( - in_shape, out_res.input_shape)) + in_shape, out_res.input_shape + ) + ) self.assertEqual(out_res.input_shape, in_shape) with self.assertRaises(ValueError): @@ -213,18 +232,16 @@ def test_init_empty(self): """Test CArray initialization using empty structures.""" # Initialization using empty arrays empty_init = [] - for test_case in (False, ): - self.logger.info( - "Testing flat empty, tosparse: {:}".format(test_case)) + for test_case in (False,): + self.logger.info("Testing flat empty, tosparse: {:}".format(test_case)) array_empty = CArray(empty_init, tosparse=test_case) self.assertEqual(array_empty.size, 0) - self.assertEqual(array_empty.shape, (0, )) + self.assertEqual(array_empty.shape, (0,)) self.assertEqual(array_empty.ndim, 1) empty_init = [] # Empty sparse arrays are always 2D - for test_case in (True, ): - self.logger.info( - "Testing flat empty, tosparse: {:}".format(test_case)) + for test_case in (True,): + self.logger.info("Testing flat empty, tosparse: {:}".format(test_case)) array_empty = CArray(empty_init, tosparse=test_case) self.assertEqual(array_empty.size, 0) self.assertEqual(array_empty.shape, (1, 0)) @@ -232,8 +249,7 @@ def test_init_empty(self): empty_init = [[]] for test_case in (False, True): - self.logger.info( - "Testing 2D empty, tosparse: {:}".format(test_case)) + self.logger.info("Testing 2D empty, tosparse: {:}".format(test_case)) array_empty = CArray(empty_init, tosparse=test_case) self.assertEqual(array_empty.size, 0) self.assertEqual(array_empty.shape, (1, 0)) @@ -242,8 +258,7 @@ def test_init_empty(self): def test_input_shape(self): """Test CArray.input_shape behavior.""" array = CArray([[[2, 3], [22, 33]], [[4, 5], [44, 55]]]) - array_s = \ - CArray([[[2, 3], [22, 33]], [[4, 5], [44, 55]]], tosparse=True) + array_s = CArray([[[2, 3], [22, 33]], [[4, 5], [44, 55]]], tosparse=True) ref_shape = (2, 2, 2) # not propagate on getitem (as it returns new objects) @@ -304,5 +319,5 @@ def test_input_shape(self): self.assertEqual(array_s.todense().input_shape, ref_shape) -if __name__ == '__main__': +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_properties.py b/src/secml/array/tests/test_c_array_properties.py index b46815e8..f5555e66 100644 --- a/src/secml/array/tests/test_c_array_properties.py +++ b/src/secml/array/tests/test_c_array_properties.py @@ -11,23 +11,23 @@ def test_non_zero_indices(self): self.logger.info("Testing non_zero_indices property") # FIXME: UPDATE UNITTESTS - def non_zero_indices(self, structure_name, matrix, row_vector, - column_vector): + def non_zero_indices(self, structure_name, matrix, row_vector, column_vector): self.logger.info("nnz_indices: matrix \n" + str(matrix)) - self.logger.info( - "Non zero index are: \n" + str(matrix.nnz_indices)) + self.logger.info("Non zero index are: \n" + str(matrix.nnz_indices)) self.assertEqual( - matrix.nnz_indices, [[0, 0, 1, 1, 2, 2], [0, 3, 0, 1, 0, 1]]) + matrix.nnz_indices, [[0, 0, 1, 1, 2, 2], [0, 3, 0, 1, 0, 1]] + ) self.assertIsInstance(matrix.nnz_indices, list) self.assertEqual(2, len(matrix.nnz_indices)) - self.assertTrue( - all(isinstance(elem, list) for elem in matrix.nnz_indices)) + self.assertTrue(all(isinstance(elem, list) for elem in matrix.nnz_indices)) - non_zero_indices(self, "sparse", self.array_sparse, self.row_sparse, - self.column_sparse) - non_zero_indices(self, "dense", self.array_dense, self.row_sparse, - self.column_dense) + non_zero_indices( + self, "sparse", self.array_sparse, self.row_sparse, self.column_sparse + ) + non_zero_indices( + self, "dense", self.array_dense, self.row_sparse, self.column_dense + ) def test_nnz(self): """Test for CArray.nnz property.""" @@ -67,7 +67,7 @@ def check_nnz_data(array, expected_nnz): check_nnz_data(self.array_dense, CArray([1, 5, 2, 4, 3, 6])) check_nnz_data(self.row_dense, CArray([4, 6])) check_nnz_data(self.column_dense, CArray([4, 6])) - -if __name__ == '__main__': + +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_saveload.py b/src/secml/array/tests/test_c_array_saveload.py index 8421a287..8742e64d 100644 --- a/src/secml/array/tests/test_c_array_saveload.py +++ b/src/secml/array/tests/test_c_array_saveload.py @@ -11,30 +11,33 @@ class TestCArraySaveLoad(CArrayTestCases): call tearDown after each one, resetting test files even after errors. """ + def setUp(self): super(TestCArraySaveLoad, self).setUp() - self.test_file = fm.join(fm.abspath(__file__), 'test.txt') - self.test_file_2 = fm.join(fm.abspath(__file__), 'test2.txt') + self.test_file = fm.join(fm.abspath(__file__), "test.txt") + self.test_file_2 = fm.join(fm.abspath(__file__), "test2.txt") def test_save_load_sparse(self): """Test save/load of CArray""" - self.logger.info( - "UNITTEST - CArray - Testing save/load for sparse matrix") + self.logger.info("UNITTEST - CArray - Testing save/load for sparse matrix") self.array_sparse.save(self.test_file) # Saving to a file handle is not supported for sparse arrays with self.assertRaises(NotImplementedError): - with open(self.test_file_2, 'w') as f: + with open(self.test_file_2, "w") as f: self.array_sparse.save(f) loaded_array_sparse = CArray.load( - self.test_file, arrayformat='sparse', dtype=int) + self.test_file, arrayformat="sparse", dtype=int + ) - self.assertFalse((loaded_array_sparse != self.array_sparse).any(), - "Saved and loaded arrays (sparse) are not equal!") + self.assertFalse( + (loaded_array_sparse != self.array_sparse).any(), + "Saved and loaded arrays (sparse) are not equal!", + ) def test_save_load_sparse_conversion(self): """Test save/load of CArray""" @@ -45,31 +48,34 @@ def test_save_load_sparse_conversion(self): # Saving to a file handle is not supported for sparse arrays with self.assertRaises(NotImplementedError): - with open(self.test_file_2, 'w') as f: + with open(self.test_file_2, "w") as f: self.array_sparse.save(f) loaded_array_sparse = CArray.load( - self.test_file, arrayformat='sparse', dtype=int) + self.test_file, arrayformat="sparse", dtype=int + ) - self.assertFalse((loaded_array_sparse != self.array_sparse).any(), - "Saved and loaded arrays (sparse) are not equal!") + self.assertFalse( + (loaded_array_sparse != self.array_sparse).any(), + "Saved and loaded arrays (sparse) are not equal!", + ) def test_save_load_dense(self): """Test save/load of CArray""" - self.logger.info( - "UNITTEST - CSparse - Testing save/load for dense matrix") + self.logger.info("UNITTEST - CSparse - Testing save/load for dense matrix") self.array_dense.save(self.test_file, overwrite=True) - loaded_array_dense = CArray.load( - self.test_file, arrayformat='dense', dtype=int) + loaded_array_dense = CArray.load(self.test_file, arrayformat="dense", dtype=int) - self.assertFalse((loaded_array_dense != self.array_dense).any(), - "Saved and loaded arrays (sparse) are not equal!") + self.assertFalse( + (loaded_array_dense != self.array_dense).any(), + "Saved and loaded arrays (sparse) are not equal!", + ) # Only 'dense' and 'sparse' arrayformat are supported with self.assertRaises(ValueError): - CArray.load(self.test_file, arrayformat='test') + CArray.load(self.test_file, arrayformat="test") def tearDown(self): # Remove test file(s) if exist @@ -81,5 +87,5 @@ def tearDown(self): raise e -if __name__ == '__main__': +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_sysoverloads.py b/src/secml/array/tests/test_c_array_sysoverloads.py index fc9229c0..1fcb2d83 100644 --- a/src/secml/array/tests/test_c_array_sysoverloads.py +++ b/src/secml/array/tests/test_c_array_sysoverloads.py @@ -17,46 +17,55 @@ def test_operators_array_vs_array_broadcast(self): """Test for mathematical operators array vs array with broadcast.""" operators = [op.add, op.sub] expected_result = [CSparse, CDense, CDense, CDense] - items = [(self.array_sparse_sym, self.row_sparse), - (self.array_sparse_sym, self.row_dense), - (self.array_dense_sym, self.row_sparse), - (self.array_dense_sym, self.row_dense)] + items = [ + (self.array_sparse_sym, self.row_sparse), + (self.array_sparse_sym, self.row_dense), + (self.array_dense_sym, self.row_sparse), + (self.array_dense_sym, self.row_dense), + ] self._test_operator_cycle(operators, items, expected_result) operators = [op.mul] expected_result = [CSparse, CSparse, CSparse, CDense] - items = [(self.array_sparse_sym, self.row_sparse), - (self.array_sparse_sym, self.row_dense), - (self.array_dense_sym, self.row_sparse), - (self.array_dense_sym, self.row_dense)] + items = [ + (self.array_sparse_sym, self.row_sparse), + (self.array_sparse_sym, self.row_dense), + (self.array_dense_sym, self.row_sparse), + (self.array_dense_sym, self.row_dense), + ] self._test_operator_cycle(operators, items, expected_result) operators = [op.truediv, op.floordiv] expected_result = [CDense, CDense, CDense, CDense] - items = [(self.array_sparse_sym, self.row_sparse), - (self.array_sparse_sym, self.row_dense), - (self.array_dense_sym, self.row_sparse), - (self.array_dense_sym, self.row_dense)] + items = [ + (self.array_sparse_sym, self.row_sparse), + (self.array_sparse_sym, self.row_dense), + (self.array_dense_sym, self.row_sparse), + (self.array_dense_sym, self.row_dense), + ] with self.logger.catch_warnings(): # For 0 / 0 divisions self.logger.filterwarnings( - action='ignore', + action="ignore", message="divide by zero encountered in true_divide", - category=RuntimeWarning) + category=RuntimeWarning, + ) self._test_operator_cycle(operators, items, expected_result) operators = [op.pow, CArray.pow] expected_result = [CDense, CDense] - items = [(self.array_dense_sym, self.row_sparse), - (self.array_dense_sym, self.row_dense)] + items = [ + (self.array_dense_sym, self.row_sparse), + (self.array_dense_sym, self.row_dense), + ] self._test_operator_cycle(operators, items, expected_result) # Sparse array ** array is not supported with self.assertRaises(TypeError): - self.array_sparse ** self.row_sparse + self.array_sparse**self.row_sparse with self.assertRaises(TypeError): - self.array_sparse ** self.row_dense + self.array_sparse**self.row_dense with self.assertRaises(TypeError): self.array_sparse.pow(self.row_sparse) with self.assertRaises(TypeError): @@ -66,46 +75,55 @@ def test_operators_array_vs_array(self): """Test for mathematical operators array vs array.""" operators = [op.add, op.sub] expected_result = [CSparse, CDense, CDense, CDense] - items = [(self.array_sparse, self.array_sparse), - (self.array_sparse, self.array_dense), - (self.array_dense, self.array_sparse), - (self.array_dense, self.array_dense)] + items = [ + (self.array_sparse, self.array_sparse), + (self.array_sparse, self.array_dense), + (self.array_dense, self.array_sparse), + (self.array_dense, self.array_dense), + ] self._test_operator_cycle(operators, items, expected_result) operators = [op.mul] expected_result = [CSparse, CSparse, CSparse, CDense] - items = [(self.array_sparse, self.array_sparse), - (self.array_sparse, self.array_dense), - (self.array_dense, self.array_sparse), - (self.array_dense, self.array_dense)] + items = [ + (self.array_sparse, self.array_sparse), + (self.array_sparse, self.array_dense), + (self.array_dense, self.array_sparse), + (self.array_dense, self.array_dense), + ] self._test_operator_cycle(operators, items, expected_result) operators = [op.truediv, op.floordiv] expected_result = [CDense, CDense, CDense, CDense] - items = [(self.array_sparse, self.array_sparse), - (self.array_sparse, self.array_dense), - (self.array_dense, self.array_sparse), - (self.array_dense, self.array_dense)] + items = [ + (self.array_sparse, self.array_sparse), + (self.array_sparse, self.array_dense), + (self.array_dense, self.array_sparse), + (self.array_dense, self.array_dense), + ] with self.logger.catch_warnings(): # For 0 / 0 divisions self.logger.filterwarnings( - action='ignore', + action="ignore", message="invalid value encountered in true_divide", - category=RuntimeWarning) + category=RuntimeWarning, + ) self._test_operator_cycle(operators, items, expected_result) operators = [op.pow, CArray.pow] expected_result = [CDense, CDense] - items = [(self.array_dense, self.array_sparse), - (self.array_dense, self.array_dense)] + items = [ + (self.array_dense, self.array_sparse), + (self.array_dense, self.array_dense), + ] self._test_operator_cycle(operators, items, expected_result) # Sparse array ** array is not supported with self.assertRaises(TypeError): - self.array_sparse ** self.array_sparse + self.array_sparse**self.array_sparse with self.assertRaises(TypeError): - self.array_sparse ** self.array_dense + self.array_sparse**self.array_dense with self.assertRaises(TypeError): self.array_sparse.pow(self.array_sparse) with self.assertRaises(TypeError): @@ -146,26 +164,27 @@ def test_operators_array(self): def test_operators_array_vs_scalar(self): """Test for mathematical operators array vs scalar.""" - test_scalars = [ - 2, np.ravel(2)[0], 2.0, np.ravel(2.0)[0], np.float32(2.0)] - test_z_scalars = [ - 0, np.ravel(0)[0], 0.0, np.ravel(0.0)[0], np.float32(0.0)] + test_scalars = [2, np.ravel(2)[0], 2.0, np.ravel(2.0)[0], np.float32(2.0)] + test_z_scalars = [0, np.ravel(0)[0], 0.0, np.ravel(0.0)[0], np.float32(0.0)] # DENSE ARRAY + NONZERO SCALAR, NONZERO SCALAR + DENSE ARRAY # sparse array + nonzero scalar is not supported (and viceversa) operators = [op.add, op.mul] expected_result = [CDense] * 10 - items = list(product([self.array_dense], test_scalars)) + \ - list(product(test_scalars, [self.array_dense])) + items = list(product([self.array_dense], test_scalars)) + list( + product(test_scalars, [self.array_dense]) + ) self._test_operator_cycle(operators, items, expected_result) # ARRAY + ZERO SCALAR, ZERO SCALAR + ARRAY operators = [op.add, op.mul] expected_result = [CDense] * 10 + [CSparse] * 10 - items = list(product([self.array_dense], test_z_scalars)) + \ - list(product(test_z_scalars, [self.array_dense])) + \ - list(product([self.array_sparse], test_z_scalars)) + \ - list(product(test_z_scalars, [self.array_sparse])) + items = ( + list(product([self.array_dense], test_z_scalars)) + + list(product(test_z_scalars, [self.array_dense])) + + list(product([self.array_sparse], test_z_scalars)) + + list(product(test_z_scalars, [self.array_sparse])) + ) self._test_operator_cycle(operators, items, expected_result) # DENSE ARRAY - NONZERO SCALAR @@ -184,40 +203,47 @@ def test_operators_array_vs_scalar(self): # ARRAY - ZERO SCALAR operators = [op.sub] expected_result = [CDense] * 5 + [CSparse] * 5 - items = list(product([self.array_dense], test_z_scalars)) + \ - list(product([self.array_sparse], test_z_scalars)) + items = list(product([self.array_dense], test_z_scalars)) + list( + product([self.array_sparse], test_z_scalars) + ) self._test_operator_cycle(operators, items, expected_result) # ZERO SCALAR - ARRAY operators = [op.sub] expected_result = [CDense] * 5 + [CSparse] * 5 - items = list(product(test_z_scalars, [self.array_dense])) + \ - list(product(test_z_scalars, [self.array_sparse])) + items = list(product(test_z_scalars, [self.array_dense])) + list( + product(test_z_scalars, [self.array_sparse]) + ) self._test_operator_cycle(operators, items, expected_result) # ARRAY * NONZERO SCALAR, NONZERO SCALAR * ARRAY operators = [op.mul] expected_result = [CDense] * 10 + [CSparse] * 10 - items = list(product([self.array_dense], test_scalars)) + \ - list(product(test_scalars, [self.array_dense])) + \ - list(product([self.array_sparse], test_scalars)) + \ - list(product(test_scalars, [self.array_sparse])) + items = ( + list(product([self.array_dense], test_scalars)) + + list(product(test_scalars, [self.array_dense])) + + list(product([self.array_sparse], test_scalars)) + + list(product(test_scalars, [self.array_sparse])) + ) self._test_operator_cycle(operators, items, expected_result) # ARRAY * ZERO SCALAR, ZERO SCALAR * ARRAY operators = [op.mul] expected_result = [CDense] * 10 + [CSparse] * 10 - items = list(product([self.array_dense], test_z_scalars)) + \ - list(product(test_z_scalars, [self.array_dense])) + \ - list(product([self.array_sparse], test_z_scalars)) + \ - list(product(test_z_scalars, [self.array_sparse])) + items = ( + list(product([self.array_dense], test_z_scalars)) + + list(product(test_z_scalars, [self.array_dense])) + + list(product([self.array_sparse], test_z_scalars)) + + list(product(test_z_scalars, [self.array_sparse])) + ) self._test_operator_cycle(operators, items, expected_result) # ARRAY / NONZERO SCALAR operators = [op.truediv, op.floordiv] expected_result = [CDense] * 5 + [CSparse] * 5 - items = list(product([self.array_dense], test_scalars)) + \ - list(product([self.array_sparse], test_scalars)) + items = list(product([self.array_dense], test_scalars)) + list( + product([self.array_sparse], test_scalars) + ) self._test_operator_cycle(operators, items, expected_result) # NONZERO SCALAR / DENSE ARRAY @@ -228,13 +254,15 @@ def test_operators_array_vs_scalar(self): with self.logger.catch_warnings(): # we are dividing using arrays having zeros self.logger.filterwarnings( - action='ignore', + action="ignore", message="divide by zero encountered in true_divide", - category=RuntimeWarning) + category=RuntimeWarning, + ) self.logger.filterwarnings( - action='ignore', + action="ignore", message="divide by zero encountered in divide", - category=RuntimeWarning) + category=RuntimeWarning, + ) self._test_operator_cycle(operators, items, expected_result) # ZERO SCALAR / DENSE ARRAY @@ -245,25 +273,29 @@ def test_operators_array_vs_scalar(self): with self.logger.catch_warnings(): # we are dividing a zero scalar by something self.logger.filterwarnings( - action='ignore', + action="ignore", message="divide by zero encountered in true_divide", - category=RuntimeWarning) + category=RuntimeWarning, + ) # For 0 / 0 divisions self.logger.filterwarnings( - action='ignore', + action="ignore", message="invalid value encountered in true_divide", - category=RuntimeWarning) + category=RuntimeWarning, + ) self.logger.filterwarnings( - action='ignore', + action="ignore", message="invalid value encountered in divide", - category=RuntimeWarning) + category=RuntimeWarning, + ) self._test_operator_cycle(operators, items, expected_result) # ARRAY ** NONZERO SCALAR operators = [op.pow, CArray.pow] expected_result = [CDense] * 5 + [CSparse] * 5 - items = list(product([self.array_dense], test_scalars)) + \ - list(product([self.array_sparse], test_scalars)) + items = list(product([self.array_dense], test_scalars)) + list( + product([self.array_sparse], test_scalars) + ) self._test_operator_cycle(operators, items, expected_result) # NONZERO SCALAR ** DENSE ARRAY @@ -288,22 +320,25 @@ def test_operators_array_vs_scalar(self): self._test_operator_cycle(operators, items, expected_result) # NONZERO SCALAR +,- SPARSE ARRAY NOT SUPPORTED (AND VICEVERSA) - items = list(product([self.array_sparse], test_scalars)) + \ - list(product(test_scalars, [self.array_sparse])) + items = list(product([self.array_sparse], test_scalars)) + list( + product(test_scalars, [self.array_sparse]) + ) operators = [op.add, op.sub] self._test_operator_notimplemented(operators, items) # ZERO SCALAR / SPARSE ARRAY NOT SUPPORTED # NONZERO SCALAR / SPARSE ARRAY NOT SUPPORTED - items = list(product(test_scalars, [self.array_sparse])) + \ - list(product(test_z_scalars, [self.array_sparse])) + items = list(product(test_scalars, [self.array_sparse])) + list( + product(test_z_scalars, [self.array_sparse]) + ) operators = [op.truediv, op.floordiv] self._test_operator_notimplemented(operators, items) # NONZERO SCALAR ** SPARSE ARRAY NOT SUPPORTED # ZERO SCALAR ** SPARSE ARRAY NOT SUPPORTED - items = list(product(test_scalars, [self.array_sparse])) + \ - list(product(test_z_scalars, [self.array_sparse])) + items = list(product(test_scalars, [self.array_sparse])) + list( + product(test_z_scalars, [self.array_sparse]) + ) operators = [op.pow] self._test_operator_notimplemented(operators, items) @@ -318,24 +353,35 @@ def test_operators_array_vs_unsupported(self): """Test for mathematical operators array vs unsupported types.""" def test_unsupported(x): - operators = [op.add, op.sub, op.mul, - op.truediv, op.floordiv, op.pow] + operators = [op.add, op.sub, op.mul, op.truediv, op.floordiv, op.pow] for operator in operators: with self.assertRaises(TypeError): - self.logger.info("Testing {:} dense vs '{:}'".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} dense vs '{:}'".format( + operator.__name__, type(x).__name__ + ) + ) operator(self.array_dense, x) with self.assertRaises(TypeError): - self.logger.info("Testing {:} sparse vs '{:}'".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} sparse vs '{:}'".format( + operator.__name__, type(x).__name__ + ) + ) operator(self.array_sparse, x) with self.assertRaises(TypeError): - self.logger.info("Testing {:} dense vect vs '{:}'".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} dense vect vs '{:}'".format( + operator.__name__, type(x).__name__ + ) + ) operator(self.row_flat_dense, x) with self.assertRaises(TypeError): - self.logger.info("Testing {:} sparse vect vs '{:}'".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} sparse vect vs '{:}'".format( + operator.__name__, type(x).__name__ + ) + ) operator(self.row_sparse, x) test_unsupported(np.array([1, 2, 3])) @@ -344,30 +390,41 @@ def test_unsupported(x): test_unsupported((1, 2, 3)) test_unsupported(set([1, 2, 3])) test_unsupported(dict({1: 2})) - test_unsupported('test') + test_unsupported("test") def test_operators_unsupported_vs_array(self): """Test for mathematical operators unsupported types vs array.""" def test_unsupported(x): - operators = [op.add, op.sub, op.mul, - op.truediv, op.floordiv, op.pow] + operators = [op.add, op.sub, op.mul, op.truediv, op.floordiv, op.pow] for operator in operators: with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs dense".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs dense".format( + operator.__name__, type(x).__name__ + ) + ) operator(x, self.array_dense) with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs sparse".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs sparse".format( + operator.__name__, type(x).__name__ + ) + ) operator(x, self.array_sparse) with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs dense vect".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs dense vect".format( + operator.__name__, type(x).__name__ + ) + ) operator(x, self.row_flat_dense) with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs sparse vect".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs sparse vect".format( + operator.__name__, type(x).__name__ + ) + ) operator(x, self.row_sparse) # Array do broadcasting of each element wrt our array @@ -379,59 +436,68 @@ def test_unsupported(x): test_unsupported((1, 2, 3)) test_unsupported(set([1, 2, 3])) test_unsupported(dict({1: 2})) - test_unsupported('test') + test_unsupported("test") def test_comparison_array_vs_array(self): """Test for comparison operators array vs array.""" operators = [op.eq, op.lt, op.le, op.gt, op.ge, op.ne] expected_result = [CSparse, CDense, CDense, CDense] - items = [(self.array_sparse, self.array_sparse), - (self.array_sparse, self.array_dense), - (self.array_dense, self.array_sparse), - (self.array_dense, self.array_dense)] + items = [ + (self.array_sparse, self.array_sparse), + (self.array_sparse, self.array_dense), + (self.array_dense, self.array_sparse), + (self.array_dense, self.array_dense), + ] with self.logger.catch_warnings(): # Comparing sparse arrays using ==, <= and >= is inefficient self.logger.filterwarnings( - action='ignore', + action="ignore", message="Comparing sparse matrices using*", - category=scs.SparseEfficiencyWarning) + category=scs.SparseEfficiencyWarning, + ) self._test_operator_cycle(operators, items, expected_result) def test_comparison_array_vs_array_broadcast(self): """Test for comparison operators array vs array with broadcast.""" operators = [op.eq, op.lt, op.le, op.gt, op.ge, op.ne] expected_result = [CSparse, CDense, CDense, CDense] - items = [(self.array_sparse_sym, self.row_sparse), - (self.array_sparse_sym, self.row_dense), - (self.array_dense_sym, self.row_sparse), - (self.array_dense_sym, self.row_dense)] + items = [ + (self.array_sparse_sym, self.row_sparse), + (self.array_sparse_sym, self.row_dense), + (self.array_dense_sym, self.row_sparse), + (self.array_dense_sym, self.row_dense), + ] with self.logger.catch_warnings(): # Comparing sparse arrays using ==, <= and >= is inefficient self.logger.filterwarnings( - action='ignore', + action="ignore", message="Comparing sparse matrices using*", - category=scs.SparseEfficiencyWarning) + category=scs.SparseEfficiencyWarning, + ) self._test_operator_cycle(operators, items, expected_result) def test_comparison_array_vs_scalar(self): """Test for comparison operators array vs scalar.""" operators = [op.eq, op.lt, op.le, op.gt, op.ge, op.ne] expected_result = [CSparse, CDense, CSparse, CDense] - items = [(self.array_sparse, 2), - (self.array_dense, 2), - (self.array_sparse, np.ravel(2)[0]), - (self.array_dense, np.ravel(2)[0])] + items = [ + (self.array_sparse, 2), + (self.array_dense, 2), + (self.array_sparse, np.ravel(2)[0]), + (self.array_dense, np.ravel(2)[0]), + ] with self.logger.catch_warnings(): # Comparing a sparse matrix with a scalar greater than zero # using < or <= is inefficient # Comparing a sparse matrix with a nonzero scalar # using != is inefficient self.logger.filterwarnings( - action='ignore', + action="ignore", message="Comparing a sparse matrix*", - category=scs.SparseEfficiencyWarning) + category=scs.SparseEfficiencyWarning, + ) self._test_operator_cycle(operators, items, expected_result) def test_comparison_array_vs_unsupported(self): @@ -441,82 +507,122 @@ def test_unsupported_arrays(x): for operator in [op.eq, op.lt, op.le, op.gt, op.ge, op.ne]: with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs dense".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs dense".format( + operator.__name__, type(x).__name__ + ) + ) operator(self.array_dense, x) with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs sparse".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs sparse".format( + operator.__name__, type(x).__name__ + ) + ) operator(self.array_sparse, x) with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs dense vect".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs dense vect".format( + operator.__name__, type(x).__name__ + ) + ) operator(self.row_flat_dense, x) with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs sparse vect".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs sparse vect".format( + operator.__name__, type(x).__name__ + ) + ) operator(self.row_sparse, x) def test_unsupported(x): for operator in [op.lt, op.le, op.gt, op.ge]: with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs dense".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs dense".format( + operator.__name__, type(x).__name__ + ) + ) operator(self.array_dense, x) with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs sparse".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs sparse".format( + operator.__name__, type(x).__name__ + ) + ) operator(self.array_sparse, x) with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs dense vect".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs dense vect".format( + operator.__name__, type(x).__name__ + ) + ) operator(self.row_flat_dense, x) with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs sparse vect".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs sparse vect".format( + operator.__name__, type(x).__name__ + ) + ) operator(self.row_sparse, x) def test_false(x): - self.logger.info("Testing {:} dense vs '{:}'".format( - op.eq.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} dense vs '{:}'".format(op.eq.__name__, type(x).__name__) + ) self.assertFalse(op.eq(self.array_dense, x)) - self.logger.info("Testing {:} sparse vs '{:}'".format( - op.eq.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} sparse vs '{:}'".format(op.eq.__name__, type(x).__name__) + ) self.assertFalse(op.eq(self.array_sparse, x)) - self.logger.info("Testing {:} dense vect vs '{:}'".format( - op.eq.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} dense vect vs '{:}'".format( + op.eq.__name__, type(x).__name__ + ) + ) self.assertFalse(op.eq(self.row_flat_dense, x)) - self.logger.info("Testing {:} sparse vect vs '{:}'".format( - op.eq.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} sparse vect vs '{:}'".format( + op.eq.__name__, type(x).__name__ + ) + ) self.assertFalse(op.eq(self.row_sparse, x)) def test_true(x): - self.logger.info("Testing {:} dense vs '{:}'".format( - op.ne.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} dense vs '{:}'".format(op.ne.__name__, type(x).__name__) + ) self.assertTrue(op.ne(self.array_dense, x)) - self.logger.info("Testing {:} sparse vs '{:}'".format( - op.ne.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} sparse vs '{:}'".format(op.ne.__name__, type(x).__name__) + ) self.assertTrue(op.ne(self.array_sparse, x)) - self.logger.info("Testing {:} dense vect vs '{:}'".format( - op.ne.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} dense vect vs '{:}'".format( + op.ne.__name__, type(x).__name__ + ) + ) self.assertTrue(op.ne(self.row_flat_dense, x)) - self.logger.info("Testing {:} sparse vect vs '{:}'".format( - op.ne.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} sparse vect vs '{:}'".format( + op.ne.__name__, type(x).__name__ + ) + ) self.assertTrue(op.ne(self.row_sparse, x)) test_unsupported_arrays(np.array([1, 2, 3])) @@ -526,19 +632,19 @@ def test_true(x): test_unsupported((1, 2, 3)) test_unsupported(set([1, 2, 3])) test_unsupported(dict({1: 2})) - test_unsupported('test') + test_unsupported("test") test_false([1, 2, 3]) test_false((1, 2, 3)) test_false(set([1, 2, 3])) test_false(dict({1: 2})) - test_false('test') + test_false("test") test_true([1, 2, 3]) test_true((1, 2, 3)) test_true(set([1, 2, 3])) test_true(dict({1: 2})) - test_true('test') + test_true("test") def test_operators_comparison_vs_array(self): """Test for comparison operators unsupported types vs array.""" @@ -547,59 +653,87 @@ def test_unsupported(x): for operator in [op.lt, op.le, op.gt, op.ge]: with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs dense".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs dense".format( + operator.__name__, type(x).__name__ + ) + ) operator(x, self.array_dense) with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs sparse".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs sparse".format( + operator.__name__, type(x).__name__ + ) + ) operator(x, self.array_sparse) with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs dense vect".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs dense vect".format( + operator.__name__, type(x).__name__ + ) + ) operator(x, self.row_flat_dense) with self.assertRaises(TypeError): - self.logger.info("Testing {:} '{:}' vs sparse vect".format( - operator.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} '{:}' vs sparse vect".format( + operator.__name__, type(x).__name__ + ) + ) operator(x, self.row_sparse) def test_false(x): - self.logger.info("Testing {:} dense vs '{:}'".format( - op.eq.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} dense vs '{:}'".format(op.eq.__name__, type(x).__name__) + ) self.assertFalse(op.eq(x, self.array_dense)) - self.logger.info("Testing {:} sparse vs '{:}'".format( - op.eq.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} sparse vs '{:}'".format(op.eq.__name__, type(x).__name__) + ) self.assertFalse(op.eq(x, self.array_sparse)) - self.logger.info("Testing {:} dense vect vs '{:}'".format( - op.eq.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} dense vect vs '{:}'".format( + op.eq.__name__, type(x).__name__ + ) + ) self.assertFalse(op.eq(x, self.row_flat_dense)) - self.logger.info("Testing {:} sparse vect vs '{:}'".format( - op.eq.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} sparse vect vs '{:}'".format( + op.eq.__name__, type(x).__name__ + ) + ) self.assertFalse(op.eq(x, self.row_sparse)) def test_true(x): - self.logger.info("Testing {:} dense vs '{:}'".format( - op.ne.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} dense vs '{:}'".format(op.ne.__name__, type(x).__name__) + ) self.assertTrue(op.ne(x, self.array_dense)) - self.logger.info("Testing {:} sparse vs '{:}'".format( - op.ne.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} sparse vs '{:}'".format(op.ne.__name__, type(x).__name__) + ) self.assertTrue(op.ne(x, self.array_sparse)) - self.logger.info("Testing {:} dense vect vs '{:}'".format( - op.ne.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} dense vect vs '{:}'".format( + op.ne.__name__, type(x).__name__ + ) + ) self.assertTrue(op.ne(x, self.row_flat_dense)) - self.logger.info("Testing {:} sparse vect vs '{:}'".format( - op.ne.__name__, type(x).__name__)) + self.logger.info( + "Testing {:} sparse vect vs '{:}'".format( + op.ne.__name__, type(x).__name__ + ) + ) self.assertTrue(op.ne(x, self.row_sparse)) # Array do broadcasting of each element wrt our array @@ -611,27 +745,27 @@ def test_true(x): test_unsupported((1, 2, 3)) test_unsupported(set([1, 2, 3])) test_unsupported(dict({1: 2})) - test_unsupported('test') + test_unsupported("test") test_false([1, 2, 3]) test_false((1, 2, 3)) test_false(set([1, 2, 3])) test_false(dict({1: 2})) - test_false('test') + test_false("test") test_true([1, 2, 3]) test_true((1, 2, 3)) test_true(set([1, 2, 3])) test_true(dict({1: 2})) - test_true('test') + test_true("test") def test_bool_operators(self): a = CArray([1, 2, 3]) b = CArray([1, 1, 1]) - d = (a < 2) - c = (b == 1) + d = a < 2 + c = b == 1 self.logger.info("C -> " + str(c)) self.logger.info("D -> " + str(d)) @@ -663,7 +797,7 @@ def test_iteration(self): res = [] for elem_id, elem in enumerate(self.array_dense): res.append(elem) - self.assertEqual(self.array_dense.ravel()[elem_id].item(), elem) + self.assertEqual(self.array_dense.ravel()[elem_id].item(), elem) # Check if all array elements have been returned self.assertEqual(self.array_dense.size, len(res)) @@ -696,5 +830,5 @@ def test_iteration(self): self.assertEqual(self.row_sparse.size, len(res)) -if __name__ == '__main__': +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_utils_appendmerge.py b/src/secml/array/tests/test_c_array_utils_appendmerge.py index b3d160a2..1d2511ca 100644 --- a/src/secml/array/tests/test_c_array_utils_appendmerge.py +++ b/src/secml/array/tests/test_c_array_utils_appendmerge.py @@ -32,8 +32,8 @@ def _append_allaxis(array1, array2): if array1.issparse: # result will be sparse, so always 2d a1_comp = a1_comp.atleast_2d() a2_comp = a2_comp.atleast_2d() - self.assert_array_equal(append_res[:array1.size], a1_comp) - self.assert_array_equal(append_res[array1.size:], a2_comp) + self.assert_array_equal(append_res[: array1.size], a1_comp) + self.assert_array_equal(append_res[array1.size :], a2_comp) array1_shape0 = array1.atleast_2d().shape[0] array1_shape1 = array1.atleast_2d().shape[1] @@ -44,15 +44,13 @@ def _append_allaxis(array1, array2): append_res = array1.append(array2, axis=0) self.logger.info("a1.append(a2, axis=0): {:}".format(append_res)) self.assertEqual(array1_shape1, append_res.shape[1]) - self.assertEqual( - array1_shape0 + array2_shape0, append_res.shape[0]) + self.assertEqual(array1_shape0 + array2_shape0, append_res.shape[0]) self.assert_array_equal(append_res[array1_shape0:, :], array2) # check append on axis 1 (horizontal) append_res = array1.append(array2, axis=1) self.logger.info("a1.append(a2, axis=1): {:}".format(append_res)) - self.assertEqual( - array1_shape1 + array2_shape1, append_res.shape[1]) + self.assertEqual(array1_shape1 + array2_shape1, append_res.shape[1]) self.assertEqual(array1_shape0, append_res.shape[0]) self.assert_array_equal(append_res[:, array1_shape1:], array2) @@ -65,11 +63,10 @@ def _append_allaxis(array1, array2): empty_sparse = CArray([], tosparse=True) empty_dense = CArray([], tosparse=False) self.assertTrue( - (empty_sparse.append(empty_dense, axis=None) == empty_dense).all()) - self.assertTrue( - (empty_sparse.append(empty_dense, axis=0) == empty_dense).all()) - self.assertTrue( - (empty_sparse.append(empty_dense, axis=1) == empty_dense).all()) + (empty_sparse.append(empty_dense, axis=None) == empty_dense).all() + ) + self.assertTrue((empty_sparse.append(empty_dense, axis=0) == empty_dense).all()) + self.assertTrue((empty_sparse.append(empty_dense, axis=1) == empty_dense).all()) def test_repeat(self): """Test for CArray.repeat() method.""" @@ -101,8 +98,10 @@ def _check_repeat(array): continue res = array.repeat(repeats=repeats, axis=axis) - self.logger.info("array.repeat({:}, axis={:}):" - "\n{:}".format(repeats, axis, res)) + self.logger.info( + "array.repeat({:}, axis={:}):" + "\n{:}".format(repeats, axis, res) + ) self.assertIsInstance(res, CArray) self.assertEqual(res.isdense, array.isdense) @@ -115,21 +114,19 @@ def _check_repeat(array): repeats_mul = array.size * repeats else: repeats_mul = repeats.sum() - self.assertEqual(res.shape, (repeats_mul, )) + self.assertEqual(res.shape, (repeats_mul,)) elif axis == 0: if is_scalar(repeats): repeats_mul = array.shape[0] * repeats else: repeats_mul = repeats.sum() - self.assertEqual( - res.shape, (repeats_mul, array.shape[1])) + self.assertEqual(res.shape, (repeats_mul, array.shape[1])) elif axis == 1: if is_scalar(repeats): repeats_mul = array.shape[1] * repeats else: repeats_mul = repeats.sum() - self.assertEqual( - res.shape, (array.shape[0], repeats_mul)) + self.assertEqual(res.shape, (array.shape[0], repeats_mul)) if is_scalar(repeats): repeats_size = array.size * repeats @@ -146,8 +143,7 @@ def _check_repeat(array): if not is_scalar(repeats): repeats = repeats.tondarray() - np_res = array.tondarray().repeat( - repeats=repeats, axis=axis) + np_res = array.tondarray().repeat(repeats=repeats, axis=axis) self.assertFalse((res.tondarray() != np_res).any()) # array_dense = CArray([[1, 0, 0, 5], [2, 4, 0, 0], [3, 6, 0, 0]] @@ -191,12 +187,17 @@ def _check_repmat(array_data): np_repeated_array = np.matlib.repmat(np_array, 1, 2) self.assertTrue((rep_array.tondarray() == np_repeated_array).all()) - for array in [self.row_flat_dense, self.row_sparse, - self.array_dense, self.array_sparse, - self.empty_sparse, self.empty_dense, - self.empty_flat_dense]: + for array in [ + self.row_flat_dense, + self.row_sparse, + self.array_dense, + self.array_sparse, + self.empty_sparse, + self.empty_dense, + self.empty_flat_dense, + ]: _check_repmat(array) -if __name__ == '__main__': +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_utils_comparison.py b/src/secml/array/tests/test_c_array_utils_comparison.py index 72770997..9f47be5d 100644 --- a/src/secml/array/tests/test_c_array_utils_comparison.py +++ b/src/secml/array/tests/test_c_array_utils_comparison.py @@ -15,8 +15,7 @@ def _logical_and(array1, array2, expected): self.logger.info("a2: \n{:}".format(array2)) logical_and_res = array1.logical_and(array2) - self.logger.info( - "a1.logical_and(a2): \n{:}".format(logical_and_res)) + self.logger.info("a1.logical_and(a2): \n{:}".format(logical_and_res)) self.assert_array_equal(logical_and_res, expected) @@ -24,51 +23,78 @@ def _logical_and(array1, array2, expected): # If a sparse array is involved, result must be sparse self.assertTrue(logical_and_res.issparse) - _logical_and(self.array_sparse, self.array_dense, - self.array_sparse.astype(bool)) - _logical_and(self.row_sparse, self.row_dense, - self.row_sparse.astype(bool)) - _logical_and(self.column_sparse, self.column_dense, - self.column_sparse.astype(bool)) - _logical_and(self.array_dense, self.array_sparse, - self.array_dense.astype(bool)) - _logical_and(self.row_dense, self.row_sparse, - self.row_dense.astype(bool)) - _logical_and(self.column_dense, self.column_sparse, - self.column_dense.astype(bool)) + _logical_and( + self.array_sparse, self.array_dense, self.array_sparse.astype(bool) + ) + _logical_and(self.row_sparse, self.row_dense, self.row_sparse.astype(bool)) + _logical_and( + self.column_sparse, self.column_dense, self.column_sparse.astype(bool) + ) + _logical_and(self.array_dense, self.array_sparse, self.array_dense.astype(bool)) + _logical_and(self.row_dense, self.row_sparse, self.row_dense.astype(bool)) + _logical_and( + self.column_dense, self.column_sparse, self.column_dense.astype(bool) + ) # Should work independently of sparse format self.array_sparse._data._data = self.array_sparse._data.todok() - _logical_and(self.array_sparse, self.array_dense, - self.array_sparse.astype(bool)) + _logical_and( + self.array_sparse, self.array_dense, self.array_sparse.astype(bool) + ) self.array_sparse._data._data = self.array_sparse._data.tocsr() - _logical_and(self.array_sparse, self.array_sparse_nozero, - self.array_sparse.astype(bool)) - _logical_and(self.array_dense, self.array_dense_nozero, - self.array_dense.astype(bool)) - _logical_and(self.array_sparse, self.array_sparse_allzero, - self.array_sparse_allzero.astype(bool)) - _logical_and(self.array_dense, self.array_dense_allzero, - self.array_dense_allzero.astype(bool)) - _logical_and(self.array_sparse_allzero, self.array_sparse_allzero, - self.array_sparse_allzero.astype(bool)) - _logical_and(self.array_dense_allzero, self.array_dense_allzero, - self.array_dense_allzero.astype(bool)) - - _logical_and(self.array_sparse_bool, self.array_sparse_bool_true, - self.array_sparse_bool.astype(bool)) - _logical_and(self.array_dense_bool, self.array_dense_bool_true, - self.array_dense_bool.astype(bool)) - _logical_and(self.array_sparse_bool_false, - self.array_sparse_bool_false, - self.array_sparse_bool_false.astype(bool)) - _logical_and(self.array_dense_bool_false, self.array_dense_bool_false, - self.array_dense_bool_false.astype(bool)) + _logical_and( + self.array_sparse, self.array_sparse_nozero, self.array_sparse.astype(bool) + ) + _logical_and( + self.array_dense, self.array_dense_nozero, self.array_dense.astype(bool) + ) + _logical_and( + self.array_sparse, + self.array_sparse_allzero, + self.array_sparse_allzero.astype(bool), + ) + _logical_and( + self.array_dense, + self.array_dense_allzero, + self.array_dense_allzero.astype(bool), + ) + _logical_and( + self.array_sparse_allzero, + self.array_sparse_allzero, + self.array_sparse_allzero.astype(bool), + ) + _logical_and( + self.array_dense_allzero, + self.array_dense_allzero, + self.array_dense_allzero.astype(bool), + ) + + _logical_and( + self.array_sparse_bool, + self.array_sparse_bool_true, + self.array_sparse_bool.astype(bool), + ) + _logical_and( + self.array_dense_bool, + self.array_dense_bool_true, + self.array_dense_bool.astype(bool), + ) + _logical_and( + self.array_sparse_bool_false, + self.array_sparse_bool_false, + self.array_sparse_bool_false.astype(bool), + ) + _logical_and( + self.array_dense_bool_false, + self.array_dense_bool_false, + self.array_dense_bool_false.astype(bool), + ) _logical_and(self.empty_sparse, self.empty_sparse, self.empty_sparse) - _logical_and(self.empty_flat_dense, self.empty_flat_dense, - self.empty_flat_dense) + _logical_and( + self.empty_flat_dense, self.empty_flat_dense, self.empty_flat_dense + ) def test_logical_or(self): """Test for CArray.logical_or() method.""" @@ -83,44 +109,67 @@ def _logical_or(array1, array2, expected): self.assert_array_equal(logical_or_res, expected) - _logical_or(self.array_sparse, self.array_dense, - self.array_sparse.astype(bool)) - _logical_or(self.row_sparse, self.row_dense, - self.row_sparse.astype(bool)) - _logical_or(self.column_sparse, self.column_dense, - self.column_sparse.astype(bool)) - _logical_or(self.array_dense, self.array_sparse, - self.array_dense.astype(bool)) - _logical_or(self.row_dense, self.row_sparse, - self.row_dense.astype(bool)) - _logical_or(self.column_dense, self.column_sparse, - self.column_dense.astype(bool)) - - _logical_or(self.array_sparse, self.array_sparse_nozero, - self.array_sparse_nozero.astype(bool)) - _logical_or(self.array_dense, self.array_dense_nozero, - self.array_dense_nozero.astype(bool)) - _logical_or(self.array_sparse, self.array_sparse_allzero, - self.array_sparse.astype(bool)) - _logical_or(self.array_dense, self.array_dense_allzero, - self.array_sparse.astype(bool)) - _logical_or(self.array_sparse_allzero, self.array_sparse_allzero, - self.array_sparse_allzero.astype(bool)) - _logical_or(self.array_dense_allzero, self.array_dense_allzero, - self.array_dense_allzero.astype(bool)) - - _logical_or(self.array_sparse_bool, self.array_sparse_bool_true, - self.array_sparse_bool_true.astype(bool)) - _logical_or(self.array_dense_bool, self.array_dense_bool_true, - self.array_dense_bool_true.astype(bool)) - _logical_or(self.array_sparse_bool_false, self.array_sparse_bool_false, - self.array_sparse_bool_false.astype(bool)) - _logical_or(self.array_dense_bool_false, self.array_dense_bool_false, - self.array_dense_bool_false.astype(bool)) + _logical_or(self.array_sparse, self.array_dense, self.array_sparse.astype(bool)) + _logical_or(self.row_sparse, self.row_dense, self.row_sparse.astype(bool)) + _logical_or( + self.column_sparse, self.column_dense, self.column_sparse.astype(bool) + ) + _logical_or(self.array_dense, self.array_sparse, self.array_dense.astype(bool)) + _logical_or(self.row_dense, self.row_sparse, self.row_dense.astype(bool)) + _logical_or( + self.column_dense, self.column_sparse, self.column_dense.astype(bool) + ) + + _logical_or( + self.array_sparse, + self.array_sparse_nozero, + self.array_sparse_nozero.astype(bool), + ) + _logical_or( + self.array_dense, + self.array_dense_nozero, + self.array_dense_nozero.astype(bool), + ) + _logical_or( + self.array_sparse, self.array_sparse_allzero, self.array_sparse.astype(bool) + ) + _logical_or( + self.array_dense, self.array_dense_allzero, self.array_sparse.astype(bool) + ) + _logical_or( + self.array_sparse_allzero, + self.array_sparse_allzero, + self.array_sparse_allzero.astype(bool), + ) + _logical_or( + self.array_dense_allzero, + self.array_dense_allzero, + self.array_dense_allzero.astype(bool), + ) + + _logical_or( + self.array_sparse_bool, + self.array_sparse_bool_true, + self.array_sparse_bool_true.astype(bool), + ) + _logical_or( + self.array_dense_bool, + self.array_dense_bool_true, + self.array_dense_bool_true.astype(bool), + ) + _logical_or( + self.array_sparse_bool_false, + self.array_sparse_bool_false, + self.array_sparse_bool_false.astype(bool), + ) + _logical_or( + self.array_dense_bool_false, + self.array_dense_bool_false, + self.array_dense_bool_false.astype(bool), + ) _logical_or(self.empty_sparse, self.empty_sparse, self.empty_sparse) - _logical_or(self.empty_flat_dense, self.empty_flat_dense, - self.empty_flat_dense) + _logical_or(self.empty_flat_dense, self.empty_flat_dense, self.empty_flat_dense) def test_logical_not(self): """Test for CArray.logical_not() method.""" @@ -134,14 +183,10 @@ def _logical_not(array, expected): self.assert_array_equal(logical_not_res, expected) - _logical_not(self.array_sparse_nozero, - self.array_sparse_allzero.astype(bool)) - _logical_not(self.array_dense_nozero, - self.array_dense_allzero.astype(bool)) - _logical_not(self.array_sparse_allzero, - self.array_sparse_nozero.astype(bool)) - _logical_not(self.array_dense_allzero, - self.array_dense_nozero.astype(bool)) + _logical_not(self.array_sparse_nozero, self.array_sparse_allzero.astype(bool)) + _logical_not(self.array_dense_nozero, self.array_dense_allzero.astype(bool)) + _logical_not(self.array_sparse_allzero, self.array_sparse_nozero.astype(bool)) + _logical_not(self.array_dense_allzero, self.array_dense_nozero.astype(bool)) _logical_not(self.array_sparse_bool_false, self.array_sparse_bool_true) _logical_not(self.array_dense_bool_false, self.array_sparse_bool_true) @@ -312,5 +357,5 @@ def _minimum(array1, array2): self.assertFalse((e_min != CArray([])).any()) -if __name__ == '__main__': +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_utils_dataalteration.py b/src/secml/array/tests/test_c_array_utils_dataalteration.py index c71aea8d..07841d5b 100644 --- a/src/secml/array/tests/test_c_array_utils_dataalteration.py +++ b/src/secml/array/tests/test_c_array_utils_dataalteration.py @@ -51,8 +51,10 @@ def _check_clip(array, expected): for c_limits_idx, c_limits in enumerate(intervals): res = array.clip(*c_limits) - self.logger.info("array.min(c_min={:}, c_max={:}):" - "\n{:}".format(c_limits[0], c_limits[1], res)) + self.logger.info( + "array.min(c_min={:}, c_max={:}):" + "\n{:}".format(c_limits[0], c_limits[1], res) + ) res_expected = expected[c_limits_idx] self.assertIsInstance(res, CArray) @@ -65,31 +67,42 @@ def _check_clip(array, expected): # array_dense = CArray([[1, 0, 0, 5], [2, 4, 0, 0], [3, 6, 0, 0]] # row_flat_dense = CArray([4, 0, 6]) - _check_clip(self.array_dense, - (CArray([[1, 0, 0, 2], [2, 2, 0, 0], [2, 2, 0, 0]]), - CArray([[1., 0., 0., 5.], - [2., 4., 0., 0.], - [3., 6., 0., 0.]]), - CArray([[0., 0., 0., 0.], - [0., 0., 0., 0.], - [0., 0., 0., 0.]]))) - - _check_clip(self.row_flat_dense, (CArray([2, 0, 2]), - CArray([4., 0., 6.]), - CArray([0., 0., 0.]))) - - _check_clip(self.row_dense, (CArray([[2, 0, 2]]), - CArray([[4., 0., 6.]]), - CArray([[0., 0., 0.]]))) - - _check_clip(self.column_dense, (CArray([[2], [0], [2]]), - CArray([[4.], [0.], [6.]]), - CArray([[0.], [0.], [0.]]))) - - _check_clip(self.single_flat_dense, - (CArray([2]), CArray([4.]), CArray([0.]))) - _check_clip(self.single_dense, - (CArray([[2]]), CArray([[4.]]), CArray([[0.]]))) + _check_clip( + self.array_dense, + ( + CArray([[1, 0, 0, 2], [2, 2, 0, 0], [2, 2, 0, 0]]), + CArray( + [[1.0, 0.0, 0.0, 5.0], [2.0, 4.0, 0.0, 0.0], [3.0, 6.0, 0.0, 0.0]] + ), + CArray( + [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]] + ), + ), + ) + + _check_clip( + self.row_flat_dense, + (CArray([2, 0, 2]), CArray([4.0, 0.0, 6.0]), CArray([0.0, 0.0, 0.0])), + ) + + _check_clip( + self.row_dense, + (CArray([[2, 0, 2]]), CArray([[4.0, 0.0, 6.0]]), CArray([[0.0, 0.0, 0.0]])), + ) + + _check_clip( + self.column_dense, + ( + CArray([[2], [0], [2]]), + CArray([[4.0], [0.0], [6.0]]), + CArray([[0.0], [0.0], [0.0]]), + ), + ) + + _check_clip(self.single_flat_dense, (CArray([2]), CArray([4.0]), CArray([0.0]))) + _check_clip( + self.single_dense, (CArray([[2]]), CArray([[4.0]]), CArray([[0.0]])) + ) # Check intervals wrongly chosen with self.assertRaises(ValueError): @@ -110,8 +123,9 @@ def _sort(axis, array, sorted_expected): for inplace in (False, True): array_copy = copy.deepcopy(array) array_sorted = array_copy.sort(axis=axis, inplace=inplace) - self.logger.info("Array sorted along axis {:}:" - "\n{:}".format(axis, array_sorted)) + self.logger.info( + "Array sorted along axis {:}:" "\n{:}".format(axis, array_sorted) + ) self.assertEqual(array_issparse, array_sorted.issparse) self.assertEqual(array_isdense, array_sorted.isdense) @@ -135,15 +149,21 @@ def _sort(axis, array, sorted_expected): self.assertTrue(array_sorted[0, 0] == alter_value[0]) # Sparse arrays - _sort(-1, self.array_sparse, - CArray([[0, 0, 1, 5], [0, 0, 2, 4], [0, 0, 3, 6]], - tosparse=True)) - _sort(0, self.array_sparse, - CArray([[1, 0, 0, 0], [2, 4, 0, 0], [3, 6, 0, 5]], - tosparse=True)) - _sort(1, self.array_sparse, - CArray([[0, 0, 1, 5], [0, 0, 2, 4], [0, 0, 3, 6]], - tosparse=True)) + _sort( + -1, + self.array_sparse, + CArray([[0, 0, 1, 5], [0, 0, 2, 4], [0, 0, 3, 6]], tosparse=True), + ) + _sort( + 0, + self.array_sparse, + CArray([[1, 0, 0, 0], [2, 4, 0, 0], [3, 6, 0, 5]], tosparse=True), + ) + _sort( + 1, + self.array_sparse, + CArray([[0, 0, 1, 5], [0, 0, 2, 4], [0, 0, 3, 6]], tosparse=True), + ) _sort(-1, self.row_sparse, CArray([0, 4, 6], tosparse=True)) _sort(0, self.row_sparse, CArray([4, 0, 6], tosparse=True)) @@ -154,12 +174,9 @@ def _sort(axis, array, sorted_expected): _sort(1, self.column_sparse, CArray([[4], [0], [6]], tosparse=True)) # Dense arrays - _sort(-1, self.array_dense, - CArray([[0, 0, 1, 5], [0, 0, 2, 4], [0, 0, 3, 6]])) - _sort(0, self.array_dense, - CArray([[1, 0, 0, 0], [2, 4, 0, 0], [3, 6, 0, 5]])) - _sort(1, self.array_dense, - CArray([[0, 0, 1, 5], [0, 0, 2, 4], [0, 0, 3, 6]])) + _sort(-1, self.array_dense, CArray([[0, 0, 1, 5], [0, 0, 2, 4], [0, 0, 3, 6]])) + _sort(0, self.array_dense, CArray([[1, 0, 0, 0], [2, 4, 0, 0], [3, 6, 0, 5]])) + _sort(1, self.array_dense, CArray([[0, 0, 1, 5], [0, 0, 2, 4], [0, 0, 3, 6]])) _sort(-1, self.row_dense, CArray([0, 4, 6])) _sort(0, self.row_dense, CArray([4, 0, 6])) @@ -170,31 +187,76 @@ def _sort(axis, array, sorted_expected): _sort(1, self.column_dense, CArray([[4], [0], [6]])) # Bool arrays - _sort(-1, self.array_dense_bool, - CArray([[False, True, True, True], - [False, False, False, False], - [True, True, True, True]])) - _sort(0, self.array_dense_bool, - CArray([[False, False, False, False], - [True, False, True, True], - [True, True, True, True]])) - _sort(1, self.array_dense_bool, - CArray([[False, True, True, True], - [False, False, False, False], - [True, True, True, True]])) - - _sort(-1, self.array_sparse_bool, - CArray([[False, True, True, True], - [False, False, False, False], - [True, True, True, True]], tosparse=True)) - _sort(0, self.array_sparse_bool, - CArray([[False, False, False, False], - [True, False, True, True], - [True, True, True, True]], tosparse=True)) - _sort(1, self.array_sparse_bool, - CArray([[False, True, True, True], - [False, False, False, False], - [True, True, True, True]], tosparse=True)) + _sort( + -1, + self.array_dense_bool, + CArray( + [ + [False, True, True, True], + [False, False, False, False], + [True, True, True, True], + ] + ), + ) + _sort( + 0, + self.array_dense_bool, + CArray( + [ + [False, False, False, False], + [True, False, True, True], + [True, True, True, True], + ] + ), + ) + _sort( + 1, + self.array_dense_bool, + CArray( + [ + [False, True, True, True], + [False, False, False, False], + [True, True, True, True], + ] + ), + ) + + _sort( + -1, + self.array_sparse_bool, + CArray( + [ + [False, True, True, True], + [False, False, False, False], + [True, True, True, True], + ], + tosparse=True, + ), + ) + _sort( + 0, + self.array_sparse_bool, + CArray( + [ + [False, False, False, False], + [True, False, True, True], + [True, True, True, True], + ], + tosparse=True, + ), + ) + _sort( + 1, + self.array_sparse_bool, + CArray( + [ + [False, True, True, True], + [False, False, False, False], + [True, True, True, True], + ], + tosparse=True, + ), + ) # Check sort() for empty arrays self.empty_flat_dense.sort() @@ -211,7 +273,9 @@ def _argsort(axis, matrix): sorted_idx = matrix.argsort(axis=axis) self.logger.info("array.argsort(axis={:}): {:}".format(axis, sorted_idx)) - self.assertFalse(sorted_idx.issparse, "sorted method don't return a cndarray") + self.assertFalse( + sorted_idx.issparse, "sorted method don't return a cndarray" + ) np_matrix = matrix.todense().tondarray() np_matrix = np.atleast_2d(np_matrix) @@ -300,7 +364,7 @@ def _shuffle(array): _shuffle(self.empty_flat_dense) _shuffle(self.empty_sparse) - -if __name__ == '__main__': + +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_utils_dataanalysis.py b/src/secml/array/tests/test_c_array_utils_dataanalysis.py index 8fce58bb..bbedf278 100644 --- a/src/secml/array/tests/test_c_array_utils_dataanalysis.py +++ b/src/secml/array/tests/test_c_array_utils_dataanalysis.py @@ -32,19 +32,13 @@ def check_nnz(array, expected): self.assertEqual(array.shape[0], res.size) self.assertFalse((res != expected[ax_i]).any()) - check_nnz(self.array_sparse, - (6, CArray([3, 2, 0, 1]), CArray([2, 2, 2]))) - check_nnz(self.row_sparse, - (2, CArray([1, 0, 1]), CArray([2]))) - check_nnz(self.column_sparse, - (2, CArray([2]), CArray([1, 0, 1]))) - - check_nnz(self.array_dense, - (6, CArray([3, 2, 0, 1]), CArray([2, 2, 2]))) - check_nnz(self.row_dense, - (2, CArray([1, 0, 1]), CArray([2]))) - check_nnz(self.column_dense, - (2, CArray([2]), CArray([1, 0, 1]))) + check_nnz(self.array_sparse, (6, CArray([3, 2, 0, 1]), CArray([2, 2, 2]))) + check_nnz(self.row_sparse, (2, CArray([1, 0, 1]), CArray([2]))) + check_nnz(self.column_sparse, (2, CArray([2]), CArray([1, 0, 1]))) + + check_nnz(self.array_dense, (6, CArray([3, 2, 0, 1]), CArray([2, 2, 2]))) + check_nnz(self.row_dense, (2, CArray([1, 0, 1]), CArray([2]))) + check_nnz(self.column_dense, (2, CArray([2]), CArray([1, 0, 1]))) check_nnz(self.single_dense, (1, CArray([1]), CArray([1]))) check_nnz(self.single_sparse, (1, CArray([1]), CArray([1]))) @@ -63,7 +57,8 @@ def _unique(array, true_unique): if array.isdense: array_unique, u_indices, u_inverse, u_counts = array.unique( - return_index=True, return_inverse=True, return_counts=True) + return_index=True, return_inverse=True, return_counts=True + ) # Testing call without the optional parameters array_unique_single = array.unique() elif array.issparse: @@ -71,7 +66,8 @@ def _unique(array, true_unique): with self.assertRaises(NotImplementedError): array.unique(return_inverse=True) array_unique, u_indices, u_counts = array.unique( - return_index=True, return_counts=True) + return_index=True, return_counts=True + ) # Testing call without the optional parameters array_unique_single = array.unique() else: @@ -152,9 +148,11 @@ def _check_bincount(array, expected, minlength=0): self.logger.info("array.bincount():\n{:}".format(res)) self.assertTrue(res.is_vector_like) - expected_length = array.max() + 1 if (minlength == 0) or ( - minlength < array.max() + 1) \ + expected_length = ( + array.max() + 1 + if (minlength == 0) or (minlength < array.max() + 1) else minlength + ) self.assertEqual(res.size, expected_length) self.assertFalse((res != expected).any()) @@ -169,14 +167,14 @@ def _check_bincount(array, expected, minlength=0): _check_bincount(self.row_flat_dense, CArray([1, 0, 0, 0, 1, 0, 1])) _check_bincount(self.single_flat_dense, CArray([0, 0, 0, 0, 1])) _check_bincount(self.single_bool_flat_dense, CArray([0, 1])) - _check_bincount(self.row_flat_dense, - CArray([1, 0, 0, 0, 1, 0, 1, 0, 0, 0]), minlength=10) - _check_bincount(self.row_sparse, - CArray([1, 0, 0, 0, 1, 0, 1, 0, 0, 0]), minlength=10) _check_bincount( - self.row_sparse, CArray([1, 0, 0, 0, 1, 0, 1]), minlength=3) - _check_bincount(self.row_flat_dense, - CArray([1, 0, 0, 0, 1, 0, 1]), minlength=3) + self.row_flat_dense, CArray([1, 0, 0, 0, 1, 0, 1, 0, 0, 0]), minlength=10 + ) + _check_bincount( + self.row_sparse, CArray([1, 0, 0, 0, 1, 0, 1, 0, 0, 0]), minlength=10 + ) + _check_bincount(self.row_sparse, CArray([1, 0, 0, 0, 1, 0, 1]), minlength=3) + _check_bincount(self.row_flat_dense, CArray([1, 0, 0, 0, 1, 0, 1]), minlength=3) # Should work independently of sparse format self.row_sparse._data._data = self.row_sparse._data.todok() @@ -211,39 +209,37 @@ def test_norm(self): self.logger.filterwarnings( action="ignore", message="divide by zero encountered in reciprocal", - category=RuntimeWarning + category=RuntimeWarning, ) self.logger.filterwarnings( action="ignore", message="divide by zero encountered in power", - category=RuntimeWarning + category=RuntimeWarning, ) def _check_norm(array): self.logger.info("array:\n{:}".format(array)) - for ord_idx, order in enumerate((None, 'fro', inf, -inf, - 0, 1, -1, 2, -2, 3, -3)): + for ord_idx, order in enumerate( + (None, "fro", inf, -inf, 0, 1, -1, 2, -2, 3, -3) + ): - if order == 'fro': # Frobenius is a matrix norm - self.logger.info( - "array.norm(order={:}): ValueError".format(order)) + if order == "fro": # Frobenius is a matrix norm + self.logger.info("array.norm(order={:}): ValueError".format(order)) with self.assertRaises(ValueError): array.norm(order=order) continue # Scipy does not supports negative norms if array.issparse is True and is_int(order) and order < 0: - self.logger.info( - "array.norm(order={:}): ValueError".format(order)) + self.logger.info("array.norm(order={:}): ValueError".format(order)) with self.assertRaises(NotImplementedError): array.norm(order=order) continue res = array.norm(order=order) - self.logger.info("array.norm(order={:}):\n{:}" - "".format(order, res)) + self.logger.info("array.norm(order={:}):\n{:}" "".format(order, res)) # Special handle of empty arrays if array.size == 0: @@ -252,8 +248,7 @@ def _check_norm(array): self.assertEqual(0, res) continue - res_np = np.linalg.norm( - array.tondarray().ravel(), ord=order).round(4) + res_np = np.linalg.norm(array.tondarray().ravel(), ord=order).round(4) res = round(res, 4) self.assertTrue(is_scalar(res)) @@ -289,12 +284,12 @@ def test_norm_2d(self): self.logger.filterwarnings( action="ignore", message="divide by zero encountered in reciprocal", - category=RuntimeWarning + category=RuntimeWarning, ) self.logger.filterwarnings( action="ignore", message="divide by zero encountered in power", - category=RuntimeWarning + category=RuntimeWarning, ) def _check_norm_2d(array): @@ -302,12 +297,14 @@ def _check_norm_2d(array): for axis_idx, axis in enumerate((None, 0, 1)): for ord_idx, order in enumerate( - (None, 'fro', inf, -inf, 1, -1, 2, -2, 3, -3)): + (None, "fro", inf, -inf, 1, -1, 2, -2, 3, -3) + ): if axis is None and order in (2, -2): self.logger.info( "array.norm_2d(order={:}, axis={:}): " - "NotImplementedError".format(order, axis)) + "NotImplementedError".format(order, axis) + ) # Norms not implemented for matrices with self.assertRaises(NotImplementedError): array.norm_2d(order=order, axis=axis) @@ -316,34 +313,42 @@ def _check_norm_2d(array): if axis is None and order in (3, -3): self.logger.info( "array.norm_2d(order={:}, axis={:}): " - "ValueError".format(order, axis)) + "ValueError".format(order, axis) + ) # Invalid norm order for matrices with self.assertRaises(ValueError): array.norm_2d(order=order, axis=axis) continue - if axis is not None and order == 'fro': + if axis is not None and order == "fro": self.logger.info( "array.norm_2d(order={:}, axis={:}): " - "ValueError".format(order, axis)) + "ValueError".format(order, axis) + ) # fro-norm is a matrix norm with self.assertRaises(ValueError): array.norm_2d(order=order, axis=axis) continue - if array.issparse is True and axis is not None and \ - (is_int(order) and order < 0): + if ( + array.issparse is True + and axis is not None + and (is_int(order) and order < 0) + ): self.logger.info( "array.norm_2d(order={:}, axis={:}): " - "NotImplementedError".format(order, axis)) + "NotImplementedError".format(order, axis) + ) # Negative vector norms not implemented for sparse with self.assertRaises(NotImplementedError): array.norm_2d(order=order, axis=axis) continue res = array.norm_2d(order=order, axis=axis) - self.logger.info("array.norm_2d(order={:}, axis={:}):" - "\n{:}".format(order, axis, res)) + self.logger.info( + "array.norm_2d(order={:}, axis={:}):" + "\n{:}".format(order, axis, res) + ) # Special handle of empty arrays if array.size == 0: @@ -357,9 +362,12 @@ def _check_norm_2d(array): self.assertFalse((CArray([[0.0]]) != res).any()) continue - res_np = np.linalg.norm(array.atleast_2d().tondarray(), - ord=order, axis=axis, - keepdims=True).round(4) + res_np = np.linalg.norm( + array.atleast_2d().tondarray(), + ord=order, + axis=axis, + keepdims=True, + ).round(4) if axis is None: res = round(res, 4) @@ -377,20 +385,19 @@ def _check_norm_2d(array): else: if axis == 0: # Should return a row self.assertEqual(1, res.shape[0]) - self.assertEqual( - array.shape[1], res.shape[1]) + self.assertEqual(array.shape[1], res.shape[1]) else: # Should return a column self.assertEqual(1, res.shape[1]) - self.assertEqual( - array.shape[0], res.shape[0]) + self.assertEqual(array.shape[0], res.shape[0]) self.assertEqual(res_np.dtype, res.dtype) self.assertFalse((res_np != res.tondarray()).any()) with self.assertRaises(ValueError): - self.logger.info("array.norm_2d(order={:}): " - "NotImplementedError".format(0)) + self.logger.info( + "array.norm_2d(order={:}): " "NotImplementedError".format(0) + ) array.norm_2d(order=0) # Norm 0 not implemented # Sparse arrays @@ -427,8 +434,10 @@ def _check_sum(array, expected): for res_idx, axis in enumerate([None, 0, 1]): res = array.sum(axis=axis, keepdims=keepdims) - self.logger.info("array.sum(axis={:}, keepdims={:}):" - "\n{:}".format(axis, keepdims, res)) + self.logger.info( + "array.sum(axis={:}, keepdims={:}):" + "\n{:}".format(axis, keepdims, res) + ) if axis is None: self.assertTrue(is_scalar(res)) @@ -453,24 +462,26 @@ def _check_sum(array, expected): self.logger.info("Testing CArray.sum()") - _check_sum(self.array_sparse, - (21, CArray([[6, 10, 0, 5]]), CArray([[6], [6], [9]]))) - _check_sum(self.array_dense, - (21, CArray([[6, 10, 0, 5]]), CArray([[6], [6], [9]]))) + _check_sum( + self.array_sparse, (21, CArray([[6, 10, 0, 5]]), CArray([[6], [6], [9]])) + ) + _check_sum( + self.array_dense, (21, CArray([[6, 10, 0, 5]]), CArray([[6], [6], [9]])) + ) - _check_sum(self.array_dense_bool, - (7, CArray([[2, 1, 2, 2]]), CArray([[3], [0], [4]]))) - _check_sum(self.array_sparse_bool, - (7, CArray([[2, 1, 2, 2]]), CArray([[3], [0], [4]]))) + _check_sum( + self.array_dense_bool, (7, CArray([[2, 1, 2, 2]]), CArray([[3], [0], [4]])) + ) + _check_sum( + self.array_sparse_bool, (7, CArray([[2, 1, 2, 2]]), CArray([[3], [0], [4]])) + ) _check_sum(self.row_flat_dense, (10, CArray([4, 0, 6]), CArray([10]))) _check_sum(self.row_dense, (10, CArray([[4, 0, 6]]), CArray([[10]]))) _check_sum(self.row_sparse, (10, CArray([[4, 0, 6]]), CArray([[10]]))) - _check_sum(self.column_dense, - (10, CArray([[10]]), CArray([[4], [0], [6]]))) - _check_sum(self.column_sparse, - (10, CArray([[10]]), CArray([[4], [0], [6]]))) + _check_sum(self.column_dense, (10, CArray([[10]]), CArray([[4], [0], [6]]))) + _check_sum(self.column_sparse, (10, CArray([[10]]), CArray([[4], [0], [6]]))) _check_sum(self.single_flat_dense, (4, CArray([4]), CArray([4]))) _check_sum(self.single_dense, (4, CArray([[4]]), CArray([[4]]))) @@ -495,11 +506,14 @@ def _check_cumsum(array): for dtype in (None, float, int): res = array.cumsum(axis=axis, dtype=dtype) - self.logger.info("array.cumsum(axis={:}, dtype={:}):" - "\n{:}".format(axis, dtype, res)) + self.logger.info( + "array.cumsum(axis={:}, dtype={:}):" + "\n{:}".format(axis, dtype, res) + ) - res_np = np.cumsum(array.atleast_2d().tondarray(), - axis=axis, dtype=dtype) + res_np = np.cumsum( + array.atleast_2d().tondarray(), axis=axis, dtype=dtype + ) if array.ndim == 1: # We pass to numpy 2D arrays but result @@ -548,11 +562,11 @@ def _check_prod(array, expected): for dtype in [None, float, int]: for res_idx, axis in enumerate([None, 0, 1]): - res = array.prod( - axis=axis, keepdims=keepdims, dtype=dtype) + res = array.prod(axis=axis, keepdims=keepdims, dtype=dtype) self.logger.info( "array.prod(axis={:}, keepdims={:}, dtype={:}):" - "\n{:}".format(axis, keepdims, dtype, res)) + "\n{:}".format(axis, keepdims, dtype, res) + ) if axis is None: self.assertTrue(is_scalar(res)) @@ -562,9 +576,9 @@ def _check_prod(array, expected): res_expected = expected[res_idx] if not isinstance(res_expected, CArray): if dtype is None: - if array.dtype.kind in ('i', 'u', 'b'): + if array.dtype.kind in ("i", "u", "b"): dtype_none = int - elif array.dtype.kind in ('f',): + elif array.dtype.kind in ("f",): dtype_none = float else: dtype_none = array.dtype.type @@ -576,19 +590,17 @@ def _check_prod(array, expected): self.assertEqual(res, res_expected) else: - self.assertEqual(res.isdense, - res_expected.isdense) - self.assertEqual(res.issparse, - res_expected.issparse) + self.assertEqual(res.isdense, res_expected.isdense) + self.assertEqual(res.issparse, res_expected.issparse) if keepdims is False: res_expected = res_expected.ravel() self.assertEqual(res.shape, res_expected.shape) if dtype is None: - if array.dtype.kind in ('i', 'u', 'b'): + if array.dtype.kind in ("i", "u", "b"): dtype_none = int - elif array.dtype.kind in ('f',): + elif array.dtype.kind in ("f",): dtype_none = float else: dtype_none = array.dtype.type @@ -602,48 +614,70 @@ def _check_prod(array, expected): # array_dense = CArray([[1, 0, 0, 5], [2, 4, 0, 0], [3, 6, 0, 0]] # row_flat_dense = CArray([4, 0, 6]) - _check_prod(self.array_sparse, - (0, CArray([[6, 0, 0, 0]], tosparse=True), - CArray([[0], [0], [0]], tosparse=True))) - _check_prod(self.array_dense, - (0, CArray([[6, 0, 0, 0]]), CArray([[0], [0], [0]]))) + _check_prod( + self.array_sparse, + ( + 0, + CArray([[6, 0, 0, 0]], tosparse=True), + CArray([[0], [0], [0]], tosparse=True), + ), + ) + _check_prod( + self.array_dense, (0, CArray([[6, 0, 0, 0]]), CArray([[0], [0], [0]])) + ) - _check_prod(self.array_dense_bool, - (0, CArray([[0, 0, 0, 0]]), CArray([[0], [0], [1]]))) - _check_prod(self.array_sparse_bool, - (0, CArray([[0, 0, 0, 0]], tosparse=True), - CArray([[0], [0], [1]], tosparse=True))) + _check_prod( + self.array_dense_bool, (0, CArray([[0, 0, 0, 0]]), CArray([[0], [0], [1]])) + ) + _check_prod( + self.array_sparse_bool, + ( + 0, + CArray([[0, 0, 0, 0]], tosparse=True), + CArray([[0], [0], [1]], tosparse=True), + ), + ) _check_prod(self.row_flat_dense, (0, CArray([4, 0, 6]), CArray([0]))) _check_prod(self.row_dense, (0, CArray([[4, 0, 6]]), CArray([[0]]))) - _check_prod(self.row_sparse, (0, CArray([[4, 0, 6]], tosparse=True), - CArray([[0]], tosparse=True))) + _check_prod( + self.row_sparse, + (0, CArray([[4, 0, 6]], tosparse=True), CArray([[0]], tosparse=True)), + ) - _check_prod(self.column_dense, - (0, CArray([[0]]), CArray([[4], [0], [6]]))) - _check_prod(self.column_sparse, (0, CArray([[0]], tosparse=True), - CArray([[4], [0], [6]], - tosparse=True))) + _check_prod(self.column_dense, (0, CArray([[0]]), CArray([[4], [0], [6]]))) + _check_prod( + self.column_sparse, + (0, CArray([[0]], tosparse=True), CArray([[4], [0], [6]], tosparse=True)), + ) _check_prod(self.single_flat_dense, (4, CArray([4]), CArray([4]))) _check_prod(self.single_dense, (4, CArray([[4]]), CArray([[4]]))) - _check_prod(self.single_sparse, (4, CArray([[4]], tosparse=True), - CArray([[4]], tosparse=True))) + _check_prod( + self.single_sparse, + (4, CArray([[4]], tosparse=True), CArray([[4]], tosparse=True)), + ) self.single_sparse._data._data = self.single_sparse._data.todok() - _check_prod(self.single_sparse, (4, CArray([[4]], tosparse=True), - CArray([[4]], tosparse=True))) + _check_prod( + self.single_sparse, + (4, CArray([[4]], tosparse=True), CArray([[4]], tosparse=True)), + ) self.single_sparse._data._data = self.single_sparse._data.tocsr() _check_prod(self.single_bool_flat_dense, (1, CArray([1]), CArray([1]))) _check_prod(self.single_bool_dense, (1, CArray([[1]]), CArray([[1]]))) - _check_prod(self.single_bool_sparse, (1, CArray([[1]], tosparse=True), - CArray([[1]], tosparse=True))) + _check_prod( + self.single_bool_sparse, + (1, CArray([[1]], tosparse=True), CArray([[1]], tosparse=True)), + ) _check_prod(self.empty_flat_dense, (1.0, CArray([1.0]), CArray([1.0]))) _check_prod(self.empty_dense, (1.0, CArray([[1.0]]), CArray([[1.0]]))) - _check_prod(self.empty_sparse, (1.0, CArray([[1.0]], tosparse=True), - CArray([[1.0]], tosparse=True))) + _check_prod( + self.empty_sparse, + (1.0, CArray([[1.0]], tosparse=True), CArray([[1.0]], tosparse=True)), + ) def test_all(self): """Test for CArray.all() method.""" @@ -658,8 +692,9 @@ def _all(matrix): # all() on array that contain also zeros gives False? self.logger.info("matrix: \n" + str(matrix)) all_res = matrix.all(axis=axis) - self.logger.info("matrix.all(axis={:}) result is:\n" - "{:}".format(axis, all_res)) + self.logger.info( + "matrix.all(axis={:}) result is:\n" "{:}".format(axis, all_res) + ) if axis is None: self.assertIsInstance(all_res, bool) self.assertFalse(all_res) @@ -673,11 +708,11 @@ def _all_nozero(matrix_nozero): matrix_nozero.all(axis=axis) else: # all() on an array with no zeros gives True? - self.logger.info( - "matrix_nozero: \n" + str(matrix_nozero)) + self.logger.info("matrix_nozero: \n" + str(matrix_nozero)) all_res = matrix_nozero.all(axis=axis) - self.logger.info("matrix_nozero.all(axis={:}):\n" - "{:}".format(axis, all_res)) + self.logger.info( + "matrix_nozero.all(axis={:}):\n" "{:}".format(axis, all_res) + ) if axis is None: self.assertIsInstance(all_res, bool) self.assertTrue(all_res) @@ -694,8 +729,9 @@ def _all_bool(matrix_bool): # all() on boolean array self.logger.info("matrix_bool: \n" + str(matrix_bool)) all_res = matrix_bool.all(axis=axis) - self.logger.info("matrix_bool.all(axis={:}):\n" - "{:}".format(axis, all_res)) + self.logger.info( + "matrix_bool.all(axis={:}):\n" "{:}".format(axis, all_res) + ) if axis is None: self.assertIsInstance(all_res, bool) self.assertFalse(all_res) @@ -709,11 +745,11 @@ def _all_bool_true(matrix_bool_true): matrix_bool_true.all(axis=axis) else: # all() on a boolean array with all True - self.logger.info( - "matrix_bool_true: \n" + str(matrix_bool_true)) + self.logger.info("matrix_bool_true: \n" + str(matrix_bool_true)) all_res = matrix_bool_true.all(axis=axis) - self.logger.info("matrix_bool_true.all(axis={:}):\n" - "{:}".format(axis, all_res)) + self.logger.info( + "matrix_bool_true.all(axis={:}):\n" "{:}".format(axis, all_res) + ) if axis is None: self.assertIsInstance(all_res, bool) self.assertTrue(all_res) @@ -728,11 +764,9 @@ def _all_bool_true(matrix_bool_true): # Should work independently of sparse format # Use a nonzero array to avoid short-circuit - self.array_sparse_nozero._data._data = \ - self.array_sparse_nozero._data.todok() + self.array_sparse_nozero._data._data = self.array_sparse_nozero._data.todok() _all_nozero(self.array_sparse_nozero) - self.array_sparse_nozero._data._data = \ - self.array_sparse_nozero._data.tocsr() + self.array_sparse_nozero._data._data = self.array_sparse_nozero._data.tocsr() _all(self.array_dense) _all_nozero(self.array_dense_nozero) @@ -752,8 +786,9 @@ def _any(matrix): # any() on an array that contain also zeros gives True? self.logger.info("matrix: \n" + str(matrix)) any_res = matrix.any(axis=axis) - self.logger.info("matrix.any(axis={:}):\n" - "{:}".format(axis, any_res)) + self.logger.info( + "matrix.any(axis={:}):\n" "{:}".format(axis, any_res) + ) if axis is None: self.assertIsInstance(any_res, bool) self.assertTrue(any_res) @@ -767,11 +802,11 @@ def _any_allzero(matrix_allzero): matrix_allzero.any(axis=axis) else: # any() on an array with all zeros gives False? - self.logger.info( - "matrix_allzero: \n" + str(matrix_allzero)) + self.logger.info("matrix_allzero: \n" + str(matrix_allzero)) any_res = matrix_allzero.any(axis=axis) - self.logger.info("matrix_allzero.any(axis={:}):\n" - "{:}".format(axis, any_res)) + self.logger.info( + "matrix_allzero.any(axis={:}):\n" "{:}".format(axis, any_res) + ) if axis is None: self.assertIsInstance(any_res, bool) self.assertFalse(any_res) @@ -788,8 +823,9 @@ def _any_bool(matrix_bool): # any() on boolean array self.logger.info("matrix_bool: \n" + str(matrix_bool)) any_res = matrix_bool.any(axis=axis) - self.logger.info("matrix_bool.any(axis={:}):\n" - "{:}".format(axis, any_res)) + self.logger.info( + "matrix_bool.any(axis={:}):\n" "{:}".format(axis, any_res) + ) if axis is None: self.assertIsInstance(any_res, bool) self.assertTrue(any_res) @@ -803,11 +839,11 @@ def _any_bool_false(matrix_bool_false): matrix_bool_false.any(axis=axis) else: # any() on a boolean array with all False - self.logger.info( - "matrix_bool_false: \n" + str(matrix_bool_false)) + self.logger.info("matrix_bool_false: \n" + str(matrix_bool_false)) any_res = matrix_bool_false.any(axis=axis) - self.logger.info("matrix_bool_false.any(axis={:}):\n" - "{:}".format(axis, any_res)) + self.logger.info( + "matrix_bool_false.any(axis={:}):\n" "{:}".format(axis, any_res) + ) if axis is None: self.assertIsInstance(any_res, bool) self.assertFalse(any_res) @@ -822,11 +858,9 @@ def _any_bool_false(matrix_bool_false): # Should work independently of sparse format # Use a allzero array to avoid short-circuit - self.array_sparse_allzero._data._data = \ - self.array_sparse_allzero._data.todok() + self.array_sparse_allzero._data._data = self.array_sparse_allzero._data.todok() _any_allzero(self.array_sparse_allzero) - self.array_sparse_allzero._data._data = \ - self.array_sparse_allzero._data.tocsr() + self.array_sparse_allzero._data._data = self.array_sparse_allzero._data.tocsr() _any(self.array_dense) _any_allzero(self.array_dense_allzero) @@ -835,8 +869,7 @@ def _any_bool_false(matrix_bool_false): def test_min_max_mean(self): """Test for CArray.min(), CArray.max(), CArray.mean() method.""" - self.logger.info( - "Test for CArray.min(), CArray.max(), CArray.mean() method.") + self.logger.info("Test for CArray.min(), CArray.max(), CArray.mean() method.") def _check_minmaxmean(func, array, expected): self.logger.info("Array:\n{:}".format(array)) @@ -844,18 +877,24 @@ def _check_minmaxmean(func, array, expected): for keepdims in (True, False): for res_idx, axis in enumerate([None, 0, 1]): - if func == 'min': + if func == "min": res = array.min(axis=axis, keepdims=keepdims) - self.logger.info("array.min(axis={:}, keepdims={:}):" - "\n{:}".format(axis, keepdims, res)) - elif func == 'max': + self.logger.info( + "array.min(axis={:}, keepdims={:}):" + "\n{:}".format(axis, keepdims, res) + ) + elif func == "max": res = array.max(axis=axis, keepdims=keepdims) - self.logger.info("array.max(axis={:}, keepdims={:}):" - "\n{:}".format(axis, keepdims, res)) - elif func == 'mean': + self.logger.info( + "array.max(axis={:}, keepdims={:}):" + "\n{:}".format(axis, keepdims, res) + ) + elif func == "mean": res = array.mean(axis=axis, keepdims=keepdims) - self.logger.info("array.mean(axis={:}, keepdims={:}):" - "\n{:}".format(axis, keepdims, res)) + self.logger.info( + "array.mean(axis={:}, keepdims={:}):" + "\n{:}".format(axis, keepdims, res) + ) else: raise ValueError("func {:} unknown".format(func)) @@ -882,123 +921,142 @@ def _check_minmaxmean(func, array, expected): self.logger.info("Testing CArray.min()") - _check_minmaxmean('min', self.array_sparse, - (0, CArray([[1, 0, 0, 0]]), - CArray([[0], [0], [0]]))) - _check_minmaxmean('min', self.array_dense, - (0, CArray([[1, 0, 0, 0]]), CArray([[0], [0], [0]]))) + _check_minmaxmean( + "min", + self.array_sparse, + (0, CArray([[1, 0, 0, 0]]), CArray([[0], [0], [0]])), + ) + _check_minmaxmean( + "min", + self.array_dense, + (0, CArray([[1, 0, 0, 0]]), CArray([[0], [0], [0]])), + ) # Should work independently of sparse format self.array_sparse._data._data = self.array_sparse._data.todok() - _check_minmaxmean('min', self.array_sparse, - (0, CArray([[1, 0, 0, 0]]), - CArray([[0], [0], [0]]))) + _check_minmaxmean( + "min", + self.array_sparse, + (0, CArray([[1, 0, 0, 0]]), CArray([[0], [0], [0]])), + ) self.array_sparse._data._data = self.array_sparse._data.tocsr() - _check_minmaxmean('min', self.row_flat_dense, - (0, CArray([4, 0, 6]), 0)) - _check_minmaxmean('min', self.row_sparse, - (0, CArray([[4, 0, 6]]), - CArray([[0]]))) - _check_minmaxmean('min', self.row_dense, - (0, CArray([[4, 0, 6]]), CArray([[0]]))) - - _check_minmaxmean('min', self.column_sparse, - (0, CArray([[0]]), - CArray([[4], [0], [6]]))) - _check_minmaxmean('min', self.column_dense, - (0, CArray([[0]]), CArray([[4], [0], [6]]))) - - _check_minmaxmean('min', self.single_flat_dense, - (4, CArray([4]), CArray([4]))) - _check_minmaxmean('min', self.single_dense, - (4, CArray([[4]]), CArray([[4]]))) - _check_minmaxmean('min', self.single_sparse, - (4, CArray([[4]]), - CArray([[4]]))) + _check_minmaxmean("min", self.row_flat_dense, (0, CArray([4, 0, 6]), 0)) + _check_minmaxmean( + "min", self.row_sparse, (0, CArray([[4, 0, 6]]), CArray([[0]])) + ) + _check_minmaxmean( + "min", self.row_dense, (0, CArray([[4, 0, 6]]), CArray([[0]])) + ) + + _check_minmaxmean( + "min", self.column_sparse, (0, CArray([[0]]), CArray([[4], [0], [6]])) + ) + _check_minmaxmean( + "min", self.column_dense, (0, CArray([[0]]), CArray([[4], [0], [6]])) + ) + + _check_minmaxmean("min", self.single_flat_dense, (4, CArray([4]), CArray([4]))) + _check_minmaxmean("min", self.single_dense, (4, CArray([[4]]), CArray([[4]]))) + _check_minmaxmean("min", self.single_sparse, (4, CArray([[4]]), CArray([[4]]))) self.logger.info("Testing CArray.max()") - _check_minmaxmean('max', self.array_sparse, - (6, CArray([[3, 6, 0, 5]]), - CArray([[5], [4], [6]]))) - _check_minmaxmean('max', self.array_dense, - (6, CArray([[3, 6, 0, 5]]), CArray([[5], [4], [6]]))) + _check_minmaxmean( + "max", + self.array_sparse, + (6, CArray([[3, 6, 0, 5]]), CArray([[5], [4], [6]])), + ) + _check_minmaxmean( + "max", + self.array_dense, + (6, CArray([[3, 6, 0, 5]]), CArray([[5], [4], [6]])), + ) # Should work independently of sparse format self.array_sparse._data._data = self.array_sparse._data.todok() - _check_minmaxmean('max', self.array_sparse, - (6, CArray([[3, 6, 0, 5]]), - CArray([[5], [4], [6]]))) + _check_minmaxmean( + "max", + self.array_sparse, + (6, CArray([[3, 6, 0, 5]]), CArray([[5], [4], [6]])), + ) self.array_sparse._data._data = self.array_sparse._data.tocsr() - _check_minmaxmean('max', self.row_flat_dense, - (6, CArray([4, 0, 6]), CArray([6]))) - _check_minmaxmean('max', self.row_sparse, - (6, CArray([[4, 0, 6]]), - CArray([[6]]))) - _check_minmaxmean('max', self.row_dense, - (6, CArray([[4, 0, 6]]), CArray([[6]]))) - - _check_minmaxmean('max', self.column_sparse, - (6, CArray([[6]]), - CArray([[4], [0], [6]]))) - _check_minmaxmean('max', self.column_dense, - (6, CArray([[6]]), CArray([[4], [0], [6]]))) - - _check_minmaxmean('max', self.single_flat_dense, - (4, CArray([4]), CArray([4]))) - _check_minmaxmean('max', self.single_dense, - (4, CArray([[4]]), CArray([[4]]))) - _check_minmaxmean('max', self.single_sparse, - (4, CArray([[4]]), - CArray([[4]]))) + _check_minmaxmean( + "max", self.row_flat_dense, (6, CArray([4, 0, 6]), CArray([6])) + ) + _check_minmaxmean( + "max", self.row_sparse, (6, CArray([[4, 0, 6]]), CArray([[6]])) + ) + _check_minmaxmean( + "max", self.row_dense, (6, CArray([[4, 0, 6]]), CArray([[6]])) + ) + + _check_minmaxmean( + "max", self.column_sparse, (6, CArray([[6]]), CArray([[4], [0], [6]])) + ) + _check_minmaxmean( + "max", self.column_dense, (6, CArray([[6]]), CArray([[4], [0], [6]])) + ) + + _check_minmaxmean("max", self.single_flat_dense, (4, CArray([4]), CArray([4]))) + _check_minmaxmean("max", self.single_dense, (4, CArray([[4]]), CArray([[4]]))) + _check_minmaxmean("max", self.single_sparse, (4, CArray([[4]]), CArray([[4]]))) self.logger.info("Testing CArray.mean()") - _check_minmaxmean('mean', self.array_sparse, - (1.75, CArray([[2, 3.33, 0, 1.67]]), - CArray([[1.5], [1.5], [2.25]]))) - _check_minmaxmean('mean', self.array_dense, - (1.75, CArray([[2, 3.33, 0, 1.67]]), - CArray([[1.5], [1.5], [2.25]]))) + _check_minmaxmean( + "mean", + self.array_sparse, + (1.75, CArray([[2, 3.33, 0, 1.67]]), CArray([[1.5], [1.5], [2.25]])), + ) + _check_minmaxmean( + "mean", + self.array_dense, + (1.75, CArray([[2, 3.33, 0, 1.67]]), CArray([[1.5], [1.5], [2.25]])), + ) # Should work independently of sparse format self.array_sparse._data._data = self.array_sparse._data.todok() - _check_minmaxmean('mean', self.array_sparse, - (1.75, CArray([[2, 3.33, 0, 1.67]]), - CArray([[1.5], [1.5], [2.25]]))) + _check_minmaxmean( + "mean", + self.array_sparse, + (1.75, CArray([[2, 3.33, 0, 1.67]]), CArray([[1.5], [1.5], [2.25]])), + ) self.array_sparse._data._data = self.array_sparse._data.tocsr() - _check_minmaxmean('mean', self.row_flat_dense, - (3.33, CArray([4, 0, 6]), CArray([3.33]))) - _check_minmaxmean('mean', self.row_sparse, - (3.33, CArray([[4, 0, 6]]), CArray([[3.33]]))) - _check_minmaxmean('mean', self.row_dense, - (3.33, CArray([[4, 0, 6]]), CArray([[3.33]]))) - - _check_minmaxmean('mean', self.column_sparse, - (3.33, CArray([[3.33]]), CArray([[4], [0], [6]]))) - _check_minmaxmean('mean', self.column_dense, - (3.33, CArray([[3.33]]), CArray([[4], [0], [6]]))) - - _check_minmaxmean('mean', self.single_flat_dense, - (4, CArray([4]), CArray([4]))) - _check_minmaxmean('mean', self.single_dense, - (4, CArray([[4]]), CArray([[4]]))) - _check_minmaxmean('mean', self.single_sparse, - (4, CArray([[4]]), CArray([[4]]))) + _check_minmaxmean( + "mean", self.row_flat_dense, (3.33, CArray([4, 0, 6]), CArray([3.33])) + ) + _check_minmaxmean( + "mean", self.row_sparse, (3.33, CArray([[4, 0, 6]]), CArray([[3.33]])) + ) + _check_minmaxmean( + "mean", self.row_dense, (3.33, CArray([[4, 0, 6]]), CArray([[3.33]])) + ) + + _check_minmaxmean( + "mean", + self.column_sparse, + (3.33, CArray([[3.33]]), CArray([[4], [0], [6]])), + ) + _check_minmaxmean( + "mean", self.column_dense, (3.33, CArray([[3.33]]), CArray([[4], [0], [6]])) + ) + + _check_minmaxmean("mean", self.single_flat_dense, (4, CArray([4]), CArray([4]))) + _check_minmaxmean("mean", self.single_dense, (4, CArray([[4]]), CArray([[4]]))) + _check_minmaxmean("mean", self.single_sparse, (4, CArray([[4]]), CArray([[4]]))) def test_nanmin_nanmax(self): """Test for CArray.nanmin(), CArray.nanmax() method.""" - self.logger.info( - "Test for CArray.nanmin(), CArray.nanmax() method.") + self.logger.info("Test for CArray.nanmin(), CArray.nanmax() method.") # We are going to test few cases when the results actually contain nans self.logger.filterwarnings( action="ignore", message="All-NaN slice encountered", - category=RuntimeWarning + category=RuntimeWarning, ) def _check_nanminnanmax(func, array, expected): @@ -1011,16 +1069,18 @@ def _check_nanminnanmax(func, array, expected): for keepdims in (True, False): for res_idx, axis in enumerate([None, 0, 1]): - if func == 'nanmin': + if func == "nanmin": res = array.nanmin(axis=axis, keepdims=keepdims) self.logger.info( "array.nanmin(axis={:}, keepdims={:}):" - "\n{:}".format(axis, keepdims, res)) - elif func == 'nanmax': + "\n{:}".format(axis, keepdims, res) + ) + elif func == "nanmax": res = array.nanmax(axis=axis, keepdims=keepdims) self.logger.info( "array.nanmax(axis={:}, keepdims={:}):" - "\n{:}".format(axis, keepdims, res)) + "\n{:}".format(axis, keepdims, res) + ) else: raise ValueError("func {:} unknown".format(func)) @@ -1034,8 +1094,7 @@ def _check_nanminnanmax(func, array, expected): res = CArray(res).round(2)[0] else: self.assertEqual(res.isdense, res_expected.isdense) - self.assertEqual(res.issparse, - res_expected.issparse) + self.assertEqual(res.issparse, res_expected.issparse) res = res.round(2) if keepdims is False: res_expected = res_expected.ravel() @@ -1048,43 +1107,57 @@ def _check_nanminnanmax(func, array, expected): self.logger.info("Testing CArray.nanmin()") - _check_nanminnanmax('nanmin', self.array_dense, - (0, CArray([[2, 0, 0, 0]]), - CArray([[0], [0], [0]]))) + _check_nanminnanmax( + "nanmin", + self.array_dense, + (0, CArray([[2, 0, 0, 0]]), CArray([[0], [0], [0]])), + ) - _check_nanminnanmax('nanmin', self.row_flat_dense, - (0, CArray([nan, 0, 6]), CArray([0]))) + _check_nanminnanmax( + "nanmin", self.row_flat_dense, (0, CArray([nan, 0, 6]), CArray([0])) + ) - _check_nanminnanmax('nanmin', self.row_dense, - (0, CArray([[nan, 0, 6]]), CArray([[0]]))) + _check_nanminnanmax( + "nanmin", self.row_dense, (0, CArray([[nan, 0, 6]]), CArray([[0]])) + ) - _check_nanminnanmax('nanmin', self.column_dense, - (0, CArray([[0]]), CArray([[nan], [0], [6]]))) + _check_nanminnanmax( + "nanmin", self.column_dense, (0, CArray([[0]]), CArray([[nan], [0], [6]])) + ) - _check_nanminnanmax('nanmin', self.single_flat_dense, - (nan, CArray([nan]), CArray([nan]))) - _check_nanminnanmax('nanmin', self.single_dense, - (nan, CArray([[nan]]), CArray([[nan]]))) + _check_nanminnanmax( + "nanmin", self.single_flat_dense, (nan, CArray([nan]), CArray([nan])) + ) + _check_nanminnanmax( + "nanmin", self.single_dense, (nan, CArray([[nan]]), CArray([[nan]])) + ) self.logger.info("Testing CArray.nanmax()") - _check_nanminnanmax('nanmax', self.array_dense, - (6, CArray([[3, 6, 0, 5]]), - CArray([[5], [4], [6]]))) + _check_nanminnanmax( + "nanmax", + self.array_dense, + (6, CArray([[3, 6, 0, 5]]), CArray([[5], [4], [6]])), + ) - _check_nanminnanmax('nanmax', self.row_flat_dense, - (6, CArray([nan, 0, 6]), CArray([6]))) + _check_nanminnanmax( + "nanmax", self.row_flat_dense, (6, CArray([nan, 0, 6]), CArray([6])) + ) - _check_nanminnanmax('nanmax', self.row_dense, - (6, CArray([[nan, 0, 6]]), CArray([[6]]))) + _check_nanminnanmax( + "nanmax", self.row_dense, (6, CArray([[nan, 0, 6]]), CArray([[6]])) + ) - _check_nanminnanmax('nanmax', self.column_dense, - (6, CArray([[6]]), CArray([[nan], [0], [6]]))) + _check_nanminnanmax( + "nanmax", self.column_dense, (6, CArray([[6]]), CArray([[nan], [0], [6]])) + ) - _check_nanminnanmax('nanmax', self.single_flat_dense, - (nan, CArray([nan]), CArray([nan]))) - _check_nanminnanmax('nanmax', self.single_dense, - (nan, CArray([[nan]]), CArray([[nan]]))) + _check_nanminnanmax( + "nanmax", self.single_flat_dense, (nan, CArray([nan]), CArray([nan])) + ) + _check_nanminnanmax( + "nanmax", self.single_dense, (nan, CArray([[nan]]), CArray([[nan]])) + ) with self.assertRaises(NotImplementedError): self.array_sparse.nanmin() @@ -1112,10 +1185,8 @@ def _argmin(array): self.assertEqual(1, argmin_res.shape[0]) # We create a find_2d-like mask to check result min_res = array.min(axis=0) - argmin_res = [ - argmin_res.ravel().tolist(), list(range(array.shape[1]))] - self.assert_array_equal( - array[argmin_res].atleast_2d(), min_res) + argmin_res = [argmin_res.ravel().tolist(), list(range(array.shape[1]))] + self.assert_array_equal(array[argmin_res].atleast_2d(), min_res) self.logger.info("a: \n{:}".format(array)) argmin_res = array.argmin(axis=1) @@ -1127,10 +1198,8 @@ def _argmin(array): # We create a find_2d-like mask to check result min_res = array.min(axis=1) min_res = min_res.T # will return a column but we compare as a row - argmin_res = [ - list(range(array.shape[0])), argmin_res.ravel().tolist()] - self.assert_array_equal( - array[argmin_res].atleast_2d(), min_res) + argmin_res = [list(range(array.shape[0])), argmin_res.ravel().tolist()] + self.assert_array_equal(array[argmin_res].atleast_2d(), min_res) _argmin(self.array_sparse) _argmin(self.row_sparse) @@ -1174,10 +1243,8 @@ def _argmax(array): self.assertEqual(1, argmax_res.shape[0]) # We create a find_2d-like mask to check result max_res = array.max(axis=0) - argmax_res = [ - argmax_res.ravel().tolist(), list(range(array.shape[1]))] - self.assert_array_equal( - array[argmax_res].atleast_2d(), max_res) + argmax_res = [argmax_res.ravel().tolist(), list(range(array.shape[1]))] + self.assert_array_equal(array[argmax_res].atleast_2d(), max_res) self.logger.info("a: \n{:}".format(array)) argmax_res = array.argmax(axis=1) @@ -1189,10 +1256,8 @@ def _argmax(array): # We create a find_2d-like mask to check result max_res = array.max(axis=1) max_res = max_res.T # max return a column but we compare as a row - argmax_res = [ - list(range(array.shape[0])), argmax_res.ravel().tolist()] - self.assert_array_equal( - array[argmax_res].atleast_2d(), max_res) + argmax_res = [list(range(array.shape[0])), argmax_res.ravel().tolist()] + self.assert_array_equal(array[argmax_res].atleast_2d(), max_res) _argmax(self.array_sparse) _argmax(self.row_sparse) @@ -1228,8 +1293,7 @@ def _check_nanargmin(array): self.logger.info("a: \n{:}".format(array)) argmin_res = array.nanargmin(axis=None) - self.logger.info( - "a.nanargmin(axis=None): \n{:}".format(argmin_res)) + self.logger.info("a.nanargmin(axis=None): \n{:}".format(argmin_res)) self.assertIsInstance(argmin_res, int) min_res = array.nanmin(axis=None) # use numpy.testing to proper compare arrays with nans @@ -1242,14 +1306,12 @@ def _check_nanargmin(array): array.nanargmin(axis=0) else: argmin_res = array.nanargmin(axis=0) - self.logger.info( - "a.nanargmin(axis=0): \n{:}".format(argmin_res)) + self.logger.info("a.nanargmin(axis=0): \n{:}".format(argmin_res)) self.assertIsInstance(argmin_res, CArray) min_res = array.nanmin(axis=0) # One res for each column with keepdims min_res = min_res.ravel() - argmin_res = [ - argmin_res.ravel().tolist(), list(range(array.shape[1]))] + argmin_res = [argmin_res.ravel().tolist(), list(range(array.shape[1]))] # use numpy.testing to proper compare arrays with nans self.assert_array_equal(array[argmin_res], min_res) @@ -1261,13 +1323,11 @@ def _check_nanargmin(array): else: argmin_res = array.nanargmin(axis=1) self.assertIsInstance(argmin_res, CArray) - self.logger.info( - "a.nanargmin(axis=1): \n{:}".format(argmin_res)) + self.logger.info("a.nanargmin(axis=1): \n{:}".format(argmin_res)) min_res = array.nanmin(axis=1) # One res for each row with keepdims min_res = min_res.ravel() - argmin_res = [ - list(range(array.shape[0])), argmin_res.ravel().tolist()] + argmin_res = [list(range(array.shape[0])), argmin_res.ravel().tolist()] # use numpy.testing to proper compare arrays with nans self.assert_array_equal(array[argmin_res], min_res) @@ -1295,8 +1355,7 @@ def _check_nanargmax(array): self.logger.info("a: \n{:}".format(array)) argmax_res = array.nanargmax(axis=None) - self.logger.info( - "a.nanargmax(axis=None): \n{:}".format(argmax_res)) + self.logger.info("a.nanargmax(axis=None): \n{:}".format(argmax_res)) self.assertIsInstance(argmax_res, int) max_res = array.nanmax(axis=None) self.assert_array_equal(array.ravel()[argmax_res], max_res) @@ -1308,14 +1367,12 @@ def _check_nanargmax(array): array.nanargmax(axis=0) else: argmax_res = array.nanargmax(axis=0) - self.logger.info( - "a.nanargmax(axis=0): \n{:}".format(argmax_res)) + self.logger.info("a.nanargmax(axis=0): \n{:}".format(argmax_res)) self.assertIsInstance(argmax_res, CArray) max_res = array.nanmax(axis=0) # One res for each column with keepdims max_res = max_res.ravel() - argmax_res = [ - argmax_res.ravel().tolist(), list(range(array.shape[1]))] + argmax_res = [argmax_res.ravel().tolist(), list(range(array.shape[1]))] self.assert_array_equal(array[argmax_res], max_res) self.logger.info("a: \n{:}".format(array)) @@ -1325,14 +1382,12 @@ def _check_nanargmax(array): array.nanargmax(axis=1) else: argmax_res = array.nanargmax(axis=1) - self.logger.info( - "a.nanargmax(axis=1): \n{:}".format(argmax_res)) + self.logger.info("a.nanargmax(axis=1): \n{:}".format(argmax_res)) self.assertIsInstance(argmax_res, CArray) max_res = array.nanmax(axis=1) # One res for each row with keepdims max_res = max_res.ravel() - argmax_res = [ - list(range(array.shape[0])), argmax_res.ravel().tolist()] + argmax_res = [list(range(array.shape[0])), argmax_res.ravel().tolist()] self.assert_array_equal(array[argmax_res], max_res) _check_nanargmax(self.array_dense) @@ -1359,7 +1414,8 @@ def _check_median(array, expected): res = array.median(axis=axis, keepdims=keepdims) self.logger.info( "array.median(axis={:}, keepdims={:}):" - "\n{:}".format(axis, keepdims, res)) + "\n{:}".format(axis, keepdims, res) + ) if axis is None: self.assertTrue(is_scalar(res)) @@ -1372,8 +1428,7 @@ def _check_median(array, expected): self.assertEqual(res, res_expected) else: self.assertEqual(res.isdense, res_expected.isdense) - self.assertEqual(res.issparse, - res_expected.issparse) + self.assertEqual(res.issparse, res_expected.issparse) res = res.round(2) if keepdims is False: res_expected = res_expected.ravel() @@ -1383,17 +1438,18 @@ def _check_median(array, expected): # array_dense = CArray([[1, 0, 0, 5], [2, 4, 0, 0], [3, 6, 0, 0]] # row_flat_dense = CArray([4, 0, 6]) - _check_median(self.array_dense, (0.5, CArray([[2, 4.0, 0, 0.]]), - CArray([[0.5], [1.], [1.5]]))) + _check_median( + self.array_dense, + (0.5, CArray([[2, 4.0, 0, 0.0]]), CArray([[0.5], [1.0], [1.5]])), + ) - _check_median(self.row_flat_dense, - (4.0, CArray([4, 0, 6]), CArray([4.0]))) + _check_median(self.row_flat_dense, (4.0, CArray([4, 0, 6]), CArray([4.0]))) - _check_median(self.row_dense, - (4.0, CArray([[4, 0, 6]]), CArray([[4.0]]))) + _check_median(self.row_dense, (4.0, CArray([[4, 0, 6]]), CArray([[4.0]]))) - _check_median(self.column_dense, - (4.0, CArray([[4.0]]), CArray([[4], [0], [6]]))) + _check_median( + self.column_dense, (4.0, CArray([[4.0]]), CArray([[4], [0], [6]])) + ) _check_median(self.single_flat_dense, (4, CArray([4]), CArray([4]))) _check_median(self.single_dense, (4, CArray([[4]]), CArray([[4]]))) @@ -1415,8 +1471,7 @@ def _check_sha1(array): # Transpose the array and check if sha1 changes if shape changes array_mod = array.T - self.logger.info( - "Checking hash after transpose:\n{:}".format(array_mod)) + self.logger.info("Checking hash after transpose:\n{:}".format(array_mod)) sha1_mod = array_mod.sha1() self.logger.info("array_mod.sha1():\n{:}".format(sha1_mod)) if array_mod.shape != array.shape: @@ -1427,8 +1482,10 @@ def _check_sha1(array): # Change dtype and check if sha1 changes if data changes newtype = int if array.dtype != int else float array_mod = array.astype(newtype) - self.logger.info("Checking hash after changing dtype to " - "{:}:\n{:}".format(newtype, array_mod)) + self.logger.info( + "Checking hash after changing dtype to " + "{:}:\n{:}".format(newtype, array_mod) + ) sha1_mod = array_mod.sha1() self.logger.info("array_mod.sha1():\n{:}".format(sha1_mod)) if array_mod.size > 0: @@ -1456,12 +1513,13 @@ def _check_sha1(array): _check_sha1(self.single_bool_sparse), _check_sha1(self.empty_flat_dense), _check_sha1(self.empty_dense), - _check_sha1(self.empty_sparse) + _check_sha1(self.empty_sparse), ] # We now check that all the collected hashes are different # as each test case was different import itertools + for a, b in itertools.combinations(sha1_list, 2): self.assertNotEqual(a, b) @@ -1487,53 +1545,41 @@ def _check_is_inf_nan(fun, val, array, pos=None): if pos is not None: self.assertTrue(all(res[pos])) self.assertEqual( - len(pos[0]) if is_list_of_lists(pos) else len(pos), - res.nnz) + len(pos[0]) if is_list_of_lists(pos) else len(pos), res.nnz + ) for test_fun, sub_val in ( - (CArray.is_inf, inf), (CArray.is_inf, -inf), - (CArray.is_posinf, inf), (CArray.is_neginf, -inf), - (CArray.is_nan, nan)): - self.logger.info( - "Test for CArray.{:}() method.".format(test_fun.__name__)) - - _check_is_inf_nan( - test_fun, sub_val, self.array_sparse, [[0, 1], [1, 2]]), - _check_is_inf_nan( - test_fun, sub_val, self.array_dense, [[0, 1], [1, 2]]), - _check_is_inf_nan( - test_fun, sub_val, self.array_dense_bool, [[0, 1], [1, 2]]), - _check_is_inf_nan( - test_fun, sub_val, self.array_sparse_bool, [[0, 1], [1, 2]]), - _check_is_inf_nan( - test_fun, sub_val, self.row_flat_dense, [1, 2]), + (CArray.is_inf, inf), + (CArray.is_inf, -inf), + (CArray.is_posinf, inf), + (CArray.is_neginf, -inf), + (CArray.is_nan, nan), + ): + self.logger.info("Test for CArray.{:}() method.".format(test_fun.__name__)) + + _check_is_inf_nan(test_fun, sub_val, self.array_sparse, [[0, 1], [1, 2]]), + _check_is_inf_nan(test_fun, sub_val, self.array_dense, [[0, 1], [1, 2]]), _check_is_inf_nan( - test_fun, sub_val, self.row_dense, [1, 2]), + test_fun, sub_val, self.array_dense_bool, [[0, 1], [1, 2]] + ), _check_is_inf_nan( - test_fun, sub_val, self.row_sparse, [1, 2]), - _check_is_inf_nan( - test_fun, sub_val, self.column_dense, [[1, 2], [0, 0]]), - _check_is_inf_nan( - test_fun, sub_val, self.column_sparse, [[1, 2], [0, 0]]), - _check_is_inf_nan( - test_fun, sub_val, self.single_flat_dense, [0]), - _check_is_inf_nan( - test_fun, sub_val, self.single_dense, [0]), - _check_is_inf_nan( - test_fun, sub_val, self.single_sparse, [0]), - _check_is_inf_nan( - test_fun, sub_val, self.single_bool_flat_dense, [0]), - _check_is_inf_nan( - test_fun, sub_val, self.single_bool_dense, [0]), - _check_is_inf_nan( - test_fun, sub_val, self.single_bool_sparse, [0]), - _check_is_inf_nan( - test_fun, sub_val, self.empty_flat_dense), - _check_is_inf_nan( - test_fun, sub_val, self.empty_dense), - _check_is_inf_nan( - test_fun, sub_val, self.empty_sparse) - - -if __name__ == '__main__': + test_fun, sub_val, self.array_sparse_bool, [[0, 1], [1, 2]] + ), + _check_is_inf_nan(test_fun, sub_val, self.row_flat_dense, [1, 2]), + _check_is_inf_nan(test_fun, sub_val, self.row_dense, [1, 2]), + _check_is_inf_nan(test_fun, sub_val, self.row_sparse, [1, 2]), + _check_is_inf_nan(test_fun, sub_val, self.column_dense, [[1, 2], [0, 0]]), + _check_is_inf_nan(test_fun, sub_val, self.column_sparse, [[1, 2], [0, 0]]), + _check_is_inf_nan(test_fun, sub_val, self.single_flat_dense, [0]), + _check_is_inf_nan(test_fun, sub_val, self.single_dense, [0]), + _check_is_inf_nan(test_fun, sub_val, self.single_sparse, [0]), + _check_is_inf_nan(test_fun, sub_val, self.single_bool_flat_dense, [0]), + _check_is_inf_nan(test_fun, sub_val, self.single_bool_dense, [0]), + _check_is_inf_nan(test_fun, sub_val, self.single_bool_sparse, [0]), + _check_is_inf_nan(test_fun, sub_val, self.empty_flat_dense), + _check_is_inf_nan(test_fun, sub_val, self.empty_dense), + _check_is_inf_nan(test_fun, sub_val, self.empty_sparse) + + +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_utils_mathelementwise.py b/src/secml/array/tests/test_c_array_utils_mathelementwise.py index b4bcd6af..42226931 100644 --- a/src/secml/array/tests/test_c_array_utils_mathelementwise.py +++ b/src/secml/array/tests/test_c_array_utils_mathelementwise.py @@ -15,7 +15,7 @@ def test_sqrt(self): self.logger.filterwarnings( action="ignore", message="invalid value encountered in sqrt", - category=RuntimeWarning + category=RuntimeWarning, ) def _check_sqrt(array, expected): @@ -34,27 +34,41 @@ def _check_sqrt(array, expected): # array_dense = CArray([[1, 0, 0, 5], [2, 4, 0, 0], [3, 6, 0, 0]] # row_flat_dense = CArray([4, 0, 6]) - _check_sqrt(self.array_dense, - CArray([[1., 0., 0., 2.2361], - [1.4142, 2., 0., 0.], - [1.7320, 2.4495, 0., 0.]])) - _check_sqrt(self.array_sparse, - CArray([[1., 0., 0., 2.2361], - [1.4142, 2., 0., 0.], - [1.7320, 2.4495, 0., 0.]], tosparse=True)) - _check_sqrt(self.row_flat_dense, CArray([2., 0., 2.4495])) - _check_sqrt(CArray([4., 0., -3.]), CArray([2., 0., nan])) - _check_sqrt(self.row_dense, CArray([[2., 0., 2.4495]])) - _check_sqrt(self.row_sparse, CArray([[2., 0., 2.4495]], tosparse=True)) - _check_sqrt(CArray([[4., 0., -3.]]), CArray([[2., 0., nan]])) - _check_sqrt(CArray([4., 0., -3.], tosparse=True), - CArray([[2., 0., nan]], tosparse=True)) - _check_sqrt(self.column_dense, CArray([[2.], [0.], [2.4495]])) - _check_sqrt(self.column_sparse, - CArray([[2.], [0.], [2.4495]], tosparse=True)) - _check_sqrt(self.single_flat_dense, CArray([2.])) - _check_sqrt(self.single_dense, CArray([[2.]])) - _check_sqrt(self.single_sparse, CArray([[2.]], tosparse=True)) + _check_sqrt( + self.array_dense, + CArray( + [ + [1.0, 0.0, 0.0, 2.2361], + [1.4142, 2.0, 0.0, 0.0], + [1.7320, 2.4495, 0.0, 0.0], + ] + ), + ) + _check_sqrt( + self.array_sparse, + CArray( + [ + [1.0, 0.0, 0.0, 2.2361], + [1.4142, 2.0, 0.0, 0.0], + [1.7320, 2.4495, 0.0, 0.0], + ], + tosparse=True, + ), + ) + _check_sqrt(self.row_flat_dense, CArray([2.0, 0.0, 2.4495])) + _check_sqrt(CArray([4.0, 0.0, -3.0]), CArray([2.0, 0.0, nan])) + _check_sqrt(self.row_dense, CArray([[2.0, 0.0, 2.4495]])) + _check_sqrt(self.row_sparse, CArray([[2.0, 0.0, 2.4495]], tosparse=True)) + _check_sqrt(CArray([[4.0, 0.0, -3.0]]), CArray([[2.0, 0.0, nan]])) + _check_sqrt( + CArray([4.0, 0.0, -3.0], tosparse=True), + CArray([[2.0, 0.0, nan]], tosparse=True), + ) + _check_sqrt(self.column_dense, CArray([[2.0], [0.0], [2.4495]])) + _check_sqrt(self.column_sparse, CArray([[2.0], [0.0], [2.4495]], tosparse=True)) + _check_sqrt(self.single_flat_dense, CArray([2.0])) + _check_sqrt(self.single_dense, CArray([[2.0]])) + _check_sqrt(self.single_sparse, CArray([[2.0]], tosparse=True)) def test_sin(self): """Test for CArray.sin() method.""" @@ -77,10 +91,16 @@ def _check_sin(array, expected): # row_flat_dense = CArray([4, 0, 6]) # We consider the values already in radians - _check_sin(self.array_dense, - CArray([[0.8415, 0, 0, -0.9589], - [0.9093, -0.7568, 0, 0], - [0.1411, -0.2794, 0, 0]])) + _check_sin( + self.array_dense, + CArray( + [ + [0.8415, 0, 0, -0.9589], + [0.9093, -0.7568, 0, 0], + [0.1411, -0.2794, 0, 0], + ] + ), + ) _check_sin(self.row_flat_dense, CArray([-0.7568, 0, -0.2794])) _check_sin(self.row_dense, CArray([[-0.7568, 0, -0.2794]])) _check_sin(self.column_dense, CArray([[-0.7568], [0], [-0.2794]])) @@ -108,13 +128,19 @@ def _check_cos(array, expected): # row_flat_dense = CArray([4, 0, 6]) # We consider the values already in radians - _check_cos(self.array_dense, - CArray([[0.5403, 1., 1., 0.2837], - [-0.4161, -0.6536, 1., 1.], - [-0.9900, 0.9602, 1., 1.]])) - _check_cos(self.row_flat_dense, CArray([-0.6536, 1., 0.9602])) - _check_cos(self.row_dense, CArray([[-0.6536, 1., 0.9602]])) - _check_cos(self.column_dense, CArray([[-0.6536], [1.], [0.9602]])) + _check_cos( + self.array_dense, + CArray( + [ + [0.5403, 1.0, 1.0, 0.2837], + [-0.4161, -0.6536, 1.0, 1.0], + [-0.9900, 0.9602, 1.0, 1.0], + ] + ), + ) + _check_cos(self.row_flat_dense, CArray([-0.6536, 1.0, 0.9602])) + _check_cos(self.row_dense, CArray([[-0.6536, 1.0, 0.9602]])) + _check_cos(self.column_dense, CArray([[-0.6536], [1.0], [0.9602]])) _check_cos(self.single_flat_dense, CArray([-0.6536])) _check_cos(self.single_dense, CArray([[-0.6536]])) @@ -138,13 +164,19 @@ def _check_exp(array, expected): # array_dense = CArray([[1, 0, 0, 5], [2, 4, 0, 0], [3, 6, 0, 0]] # row_flat_dense = CArray([4, 0, 6]) - _check_exp(self.array_dense, - CArray([[2.7183, 1., 1., 148.4132], - [7.3891, 54.5982, 1., 1.], - [20.0855, 403.4288, 1., 1.]])) - _check_exp(self.row_flat_dense, CArray([54.5982, 1., 403.4288])) - _check_exp(self.row_dense, CArray([[54.5982, 1., 403.4288]])) - _check_exp(self.column_dense, CArray([[54.5982], [1.], [403.4288]])) + _check_exp( + self.array_dense, + CArray( + [ + [2.7183, 1.0, 1.0, 148.4132], + [7.3891, 54.5982, 1.0, 1.0], + [20.0855, 403.4288, 1.0, 1.0], + ] + ), + ) + _check_exp(self.row_flat_dense, CArray([54.5982, 1.0, 403.4288])) + _check_exp(self.row_dense, CArray([[54.5982, 1.0, 403.4288]])) + _check_exp(self.column_dense, CArray([[54.5982], [1.0], [403.4288]])) _check_exp(self.single_flat_dense, CArray([54.5982])) _check_exp(self.single_dense, CArray([[54.5982]])) @@ -156,7 +188,7 @@ def test_log(self): self.logger.filterwarnings( action="ignore", message="divide by zero encountered in log", - category=RuntimeWarning + category=RuntimeWarning, ) def _check_log(array, expected): @@ -175,10 +207,16 @@ def _check_log(array, expected): # array_dense = CArray([[1, 0, 0, 5], [2, 4, 0, 0], [3, 6, 0, 0]] # row_flat_dense = CArray([4, 0, 6]) - _check_log(self.array_dense, - CArray([[0., -inf, -inf, 1.6094], - [0.6931, 1.3863, -inf, -inf], - [1.0986, 1.7918, -inf, -inf]])) + _check_log( + self.array_dense, + CArray( + [ + [0.0, -inf, -inf, 1.6094], + [0.6931, 1.3863, -inf, -inf], + [1.0986, 1.7918, -inf, -inf], + ] + ), + ) _check_log(self.row_flat_dense, CArray([1.3863, -inf, 1.7918])) _check_log(self.row_dense, CArray([[1.3863, -inf, 1.7918]])) _check_log(self.column_dense, CArray([[1.3863], [-inf], [1.7918]])) @@ -193,7 +231,7 @@ def test_log10(self): self.logger.filterwarnings( action="ignore", message="divide by zero encountered in log10", - category=RuntimeWarning + category=RuntimeWarning, ) def _check_log10(array, expected): @@ -212,17 +250,22 @@ def _check_log10(array, expected): # array_dense = CArray([[1, 0, 0, 5], [2, 4, 0, 0], [3, 6, 0, 0]] # row_flat_dense = CArray([4, 0, 6]) - _check_log10(self.array_dense, - CArray([[0., -inf, -inf, 0.6990], - [0.3010, 0.6021, -inf, -inf], - [0.4771, 0.7782, -inf, -inf]])) + _check_log10( + self.array_dense, + CArray( + [ + [0.0, -inf, -inf, 0.6990], + [0.3010, 0.6021, -inf, -inf], + [0.4771, 0.7782, -inf, -inf], + ] + ), + ) _check_log10(self.row_flat_dense, CArray([0.6021, -inf, 0.7782])) _check_log10(self.row_dense, CArray([[0.6021, -inf, 0.7782]])) - _check_log10(self.column_dense, - CArray([[0.6021], [-inf], [0.7782]])) + _check_log10(self.column_dense, CArray([[0.6021], [-inf], [0.7782]])) _check_log10(self.single_flat_dense, CArray([0.6021])) _check_log10(self.single_dense, CArray([[0.6021]])) - -if __name__ == '__main__': + +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_utils_mixed.py b/src/secml/array/tests/test_c_array_utils_mixed.py index 7b7dabe4..d7d70041 100644 --- a/src/secml/array/tests/test_c_array_utils_mixed.py +++ b/src/secml/array/tests/test_c_array_utils_mixed.py @@ -51,30 +51,34 @@ def _check_sign(array, expected): # SPARSE data = self.array_sparse data[2, :] *= -1 - _check_sign(data, CArray([[1, 0, 0, 1], [1, 1, 0, 0], [-1, -1, 0, 0]], - tosparse=True)) - _check_sign(CArray([[4, 0, -6]], tosparse=True), - CArray([[1, 0, -1]], tosparse=True)) - _check_sign(CArray([[4, 0, -6]], tosparse=True).T, - CArray([[1], [0], [-1]], tosparse=True)) + _check_sign( + data, CArray([[1, 0, 0, 1], [1, 1, 0, 0], [-1, -1, 0, 0]], tosparse=True) + ) + _check_sign( + CArray([[4, 0, -6]], tosparse=True), CArray([[1, 0, -1]], tosparse=True) + ) + _check_sign( + CArray([[4, 0, -6]], tosparse=True).T, + CArray([[1], [0], [-1]], tosparse=True), + ) _check_sign(CArray([4], tosparse=True), CArray([1], tosparse=True)) _check_sign(CArray([0], tosparse=True), CArray([0], tosparse=True)) _check_sign(CArray([-4], tosparse=True), CArray([-1], tosparse=True)) # BOOL - _check_sign(self.array_dense_bool, - CArray([[1, 0, 1, 1], [0, 0, 0, 0], [1, 1, 1, 1]])) - _check_sign(self.array_sparse_bool, - CArray([[1, 0, 1, 1], [0, 0, 0, 0], [1, 1, 1, 1]], - tosparse=True)) + _check_sign( + self.array_dense_bool, CArray([[1, 0, 1, 1], [0, 0, 0, 0], [1, 1, 1, 1]]) + ) + _check_sign( + self.array_sparse_bool, + CArray([[1, 0, 1, 1], [0, 0, 0, 0], [1, 1, 1, 1]], tosparse=True), + ) _check_sign(CArray([True]), CArray([1])) _check_sign(CArray([False]), CArray([0])) _check_sign(CArray([[True]]), CArray([[1]])) _check_sign(CArray([[False]]), CArray([[0]])) - _check_sign(CArray([[True]], tosparse=True), - CArray([[1]], tosparse=True)) - _check_sign(CArray([[False]], tosparse=True), - CArray([[0]], tosparse=True)) + _check_sign(CArray([[True]], tosparse=True), CArray([[1]], tosparse=True)) + _check_sign(CArray([[False]], tosparse=True), CArray([[0]], tosparse=True)) def test_diag(self): """Test for CArray.diag() method.""" @@ -121,8 +125,7 @@ def extract_diag(array, k, out): def create_diag(array, k, out): diag = array.diag(k=k) - self.logger.info( - "Array created using k={:} is:\n{:}".format(k, diag)) + self.logger.info("Array created using k={:} is:\n{:}".format(k, diag)) self.assertEqual(array.isdense, diag.isdense) self.assertEqual(array.issparse, diag.issparse) self.assertTrue((diag == out).all()) @@ -132,8 +135,7 @@ def create_diag(array, k, out): out_diag = CArray([[4, 0, 0], [0, 0, 0], [0, 0, 6]]) create_diag(self.row_flat_dense, k=0, out=out_diag) - out_diag = CArray([[0, 4, 0, 0], [0, 0, 0, 0], - [0, 0, 0, 6], [0, 0, 0, 0]]) + out_diag = CArray([[0, 4, 0, 0], [0, 0, 0, 0], [0, 0, 0, 6], [0, 0, 0, 0]]) create_diag(self.row_flat_dense, k=1, out=out_diag) self.logger.info("Array is:\n{:}".format(self.row_dense)) @@ -141,8 +143,7 @@ def create_diag(array, k, out): out_diag = CArray([[4, 0, 0], [0, 0, 0], [0, 0, 6]]) create_diag(self.row_dense, k=0, out=out_diag) - out_diag = CArray([[0, 4, 0, 0], [0, 0, 0, 0], - [0, 0, 0, 6], [0, 0, 0, 0]]) + out_diag = CArray([[0, 4, 0, 0], [0, 0, 0, 0], [0, 0, 0, 6], [0, 0, 0, 0]]) create_diag(self.row_dense, k=1, out=out_diag) self.logger.info("Array is:\n{:}".format(self.row_sparse)) @@ -150,8 +151,7 @@ def create_diag(array, k, out): out_diag = CArray([[4, 0, 0], [0, 0, 0], [0, 0, 6]]) create_diag(self.row_sparse, k=0, out=out_diag) - out_diag = CArray([[0, 4, 0, 0], [0, 0, 0, 0], - [0, 0, 0, 6], [0, 0, 0, 0]]) + out_diag = CArray([[0, 4, 0, 0], [0, 0, 0, 0], [0, 0, 0, 6], [0, 0, 0, 0]]) create_diag(self.row_sparse, k=1, out=out_diag) self.logger.info("Testing diagonal array creation from single val...") @@ -180,7 +180,7 @@ def create_diag(array, k, out): self.empty_sparse.diag() def test_dot(self): - """"Test for CArray.dot() method.""" + """ "Test for CArray.dot() method.""" self.logger.info("Test for CArray.dot() method.") s_vs_s = self.array_sparse.dot(self.array_sparse.T) s_vs_d = self.array_sparse.dot(self.array_dense.T) @@ -194,14 +194,15 @@ def test_dot(self): self.assertIsInstance(d_vs_s._data, CDense) # Check if we have the same output in all cases - self.assertTrue( - self._test_multiple_eq([s_vs_s, s_vs_d, d_vs_d, d_vs_s])) + self.assertTrue(self._test_multiple_eq([s_vs_s, s_vs_d, d_vs_d, d_vs_s])) # Test inner product between vector-like arrays def _check_dot_vector_like(array1, array2, expected): dot_res = array1.dot(array2) - self.logger.info("We made a dot between {:} and {:}, " - "result: {:}.".format(array1, array2, dot_res)) + self.logger.info( + "We made a dot between {:} and {:}, " + "result: {:}.".format(array1, array2, dot_res) + ) self.assertEqual(dot_res, expected) _check_dot_vector_like(self.row_flat_dense, self.column_dense, 52) @@ -212,17 +213,22 @@ def _check_dot_vector_like(array1, array2, expected): _check_dot_vector_like(self.row_sparse, self.column_sparse, 52) dense_flat_outer = self.column_dense.dot(self.row_flat_dense) - self.logger.info("We made a dot between {:} and {:}, " - "result: {:}.".format(self.column_dense, - self.row_flat_dense, - dense_flat_outer)) - self.assertEqual(len(dense_flat_outer.shape), 2, - "Dot result column.dot(row) is not a matrix!") + self.logger.info( + "We made a dot between {:} and {:}, " + "result: {:}.".format( + self.column_dense, self.row_flat_dense, dense_flat_outer + ) + ) + self.assertEqual( + len(dense_flat_outer.shape), + 2, + "Dot result column.dot(row) is not a matrix!", + ) # Test between flats dot_res_flats = CArray([10, 20]).dot(CArray([1, 0])) self.assertEqual(dot_res_flats, 10) -if __name__ == '__main__': +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_utils_search.py b/src/secml/array/tests/test_c_array_utils_search.py index 6f3c346d..ddcce222 100644 --- a/src/secml/array/tests/test_c_array_utils_search.py +++ b/src/secml/array/tests/test_c_array_utils_search.py @@ -99,5 +99,5 @@ def _check_binary_search(a): self.array_sparse.binary_search(3) -if __name__ == '__main__': +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_array_utils_shapealteration.py b/src/secml/array/tests/test_c_array_utils_shapealteration.py index 1652cf30..760f055f 100644 --- a/src/secml/array/tests/test_c_array_utils_shapealteration.py +++ b/src/secml/array/tests/test_c_array_utils_shapealteration.py @@ -14,26 +14,32 @@ def test_transpose(self): target_dense = self.array_dense.get_data().transpose() self.assertFalse( - (target_sparse != self.array_sparse.transpose().get_data()).todense().any()) + (target_sparse != self.array_sparse.transpose().get_data()).todense().any() + ) self.assertFalse( - (target_dense != self.array_dense.transpose().get_data()).any()) + (target_dense != self.array_dense.transpose().get_data()).any() + ) target_sparse = self.array_sparse.get_data().T target_dense = self.array_dense.get_data().T self.assertFalse( - (target_sparse != self.array_sparse.transpose().get_data()).todense().any()) + (target_sparse != self.array_sparse.transpose().get_data()).todense().any() + ) self.assertFalse( - (target_dense != self.array_dense.transpose().get_data()).any()) + (target_dense != self.array_dense.transpose().get_data()).any() + ) dense_flat = CArray([1, 2, 3]) self.logger.info("We have a flat vector {:}".format(dense_flat)) dense_flat_transposed = dense_flat.T - self.logger.info("We transposed the vector: {:}. Shape {:}" - "".format(dense_flat_transposed, - dense_flat_transposed.shape)) - self.assertEqual(len(dense_flat_transposed.shape), 2, - "Array still flat after transposing!") + self.logger.info( + "We transposed the vector: {:}. Shape {:}" + "".format(dense_flat_transposed, dense_flat_transposed.shape) + ) + self.assertEqual( + len(dense_flat_transposed.shape), 2, "Array still flat after transposing!" + ) def test_ravel(self): """Test for CArray.ravel() method.""" @@ -51,7 +57,7 @@ def _check_ravel(array): if array.isdense: self.assertEqual(array_ravel.ndim, 1) - self.assertEqual(array_ravel.shape, (array.size, )) + self.assertEqual(array_ravel.shape, (array.size,)) if array.issparse: self.assertEqual(array_ravel.shape[0], 1) self.assertEqual(array_ravel.shape, (1, array.size)) @@ -108,8 +114,9 @@ def _check_flatten(array): self.assertFalse((array_original != array).any()) else: - self.assertIsInstance(array_flatten, - (bool, float, np.bool_, int, np.integer)) + self.assertIsInstance( + array_flatten, (bool, float, np.bool_, int, np.integer) + ) _check_flatten(self.array_dense) _check_flatten(self.array_sparse) @@ -144,12 +151,11 @@ def _check_reshape(array, shape): self.logger.info("Array:\n{:}".format(array)) res = array.reshape(newshape=shape) - self.logger.info( - "array.reshape(newshape={:}):\n{:}".format(shape, res)) + self.logger.info("array.reshape(newshape={:}):\n{:}".format(shape, res)) # Transforming input shape to appropriate tuple if not isinstance(shape, tuple): - shape = (shape, ) + shape = (shape,) if array.issparse: if len(shape) == 1: shape = (1, shape[0]) @@ -158,8 +164,7 @@ def _check_reshape(array, shape): self.assertEqual(res.shape, shape) # We now go to check if original array elements are preserved - self.assertFalse( - (CArray(array.ravel()) != CArray(res.ravel())).any()) + self.assertFalse((CArray(array.ravel()) != CArray(res.ravel())).any()) _check_reshape(self.array_dense, (2, 6)) _check_reshape(self.array_dense, 12) @@ -242,7 +247,8 @@ def _check_resize(array, shape): res = array.resize(newshape=shape, constant=constant) self.logger.info( "array.resize(newshape={:}, constant={:}):" - "\n{:}".format(shape, constant, res)) + "\n{:}".format(shape, constant, res) + ) if not isinstance(shape, tuple): self.assertEqual(res.ndim, 1) @@ -252,10 +258,10 @@ def _check_resize(array, shape): self.assertEqual(res.dtype, array.dtype) # We now go to check if original array elements are preserved - array_size = array.shape[0] * \ - (array.shape[1] if len(array.shape) > 1 else 1) - res_size = res.shape[0] * \ - (res.shape[1] if len(res.shape) > 1 else 1) + array_size = array.shape[0] * ( + array.shape[1] if len(array.shape) > 1 else 1 + ) + res_size = res.shape[0] * (res.shape[1] if len(res.shape) > 1 else 1) if res_size == 0: self.assertFalse((res != CArray([])).any()) @@ -272,8 +278,7 @@ def _check_resize(array, shape): self.assertFalse((array_ravel != res_ravel).any()) if res_added is not None: - self.assertFalse( - (res_added != array.dtype.type(constant)).any()) + self.assertFalse((res_added != array.dtype.type(constant)).any()) _check_resize(self.array_dense, (2, 6)) _check_resize(self.array_dense, (2, 4)) @@ -314,7 +319,7 @@ def _check_resize(array, shape): _check_resize(self.single_bool_flat_dense, (2, 4)) _check_resize(self.single_bool_flat_dense, 0) _check_resize(self.single_bool_flat_dense, 5) - _check_resize(self.single_bool_dense,(1, 1)) + _check_resize(self.single_bool_dense, (1, 1)) _check_resize(self.single_bool_dense, (2, 4)) _check_resize(self.single_bool_dense, 0) _check_resize(self.single_bool_dense, 5) @@ -330,7 +335,7 @@ def _check_resize(array, shape): with self.assertRaises(NotImplementedError): self.array_sparse.resize((2, 6)) - -if __name__ == '__main__': + +if __name__ == "__main__": CArrayTestCases.main() diff --git a/src/secml/array/tests/test_c_ndarray.py b/src/secml/array/tests/test_c_ndarray.py index dcbf555f..e8e255f7 100644 --- a/src/secml/array/tests/test_c_ndarray.py +++ b/src/secml/array/tests/test_c_ndarray.py @@ -13,7 +13,7 @@ def test_save_load(self): self.logger.info("UNITTEST - CDense - save/load matrix") - test_file = fm.join(fm.abspath(__file__), 'test.txt') + test_file = fm.join(fm.abspath(__file__), "test.txt") # Cleaning test file try: @@ -28,8 +28,7 @@ def test_save_load(self): a.save(test_file) with self.timer(): - b = CDense().load( - test_file, startrow=100, cols=CDense(np.arange(0, 100))) + b = CDense().load(test_file, startrow=100, cols=CDense(np.arange(0, 100))) self.assertFalse((a[100:, 0:100] != b).any()) @@ -41,19 +40,19 @@ def test_save_load(self): a.save(test_file, overwrite=True) with self.timer(): - b = CDense().load( - test_file, cols=list(range(100, 1000)), dtype=int).ravel() + b = CDense().load(test_file, cols=list(range(100, 1000)), dtype=int).ravel() self.assertFalse((a[0, 100] != b).any()) - if np.__version__ < '1.18': + if np.__version__ < "1.18": with self.assertRaises(IndexError) as e: CDense().load(test_file, startrow=10) self.logger.info("Expected error: {:}".format(e.exception)) else: with self.logger.catch_warnings(): self.logger.filterwarnings( - "ignore", message="genfromtxt: Empty input file") + "ignore", message="genfromtxt: Empty input file" + ) a = CDense().load(test_file, startrow=10) self.assertEqual(a.size, 0) @@ -73,13 +72,14 @@ def test_save_load(self): # Let's handle the resource warning about unclosed file with self.logger.catch_warnings(): self.logger.filterwarnings("ignore", message="unclosed file") - if np.__version__ < '1.18': + if np.__version__ < "1.18": with self.assertRaises(IndexError) as e: CDense().load(test_file, startrow=10) self.logger.info("Expected error: {:}".format(e.exception)) else: self.logger.filterwarnings( - "ignore", message="genfromtxt: Empty input file") + "ignore", message="genfromtxt: Empty input file" + ) a = CDense().load(test_file, startrow=10) self.assertEqual(a.size, 0) @@ -88,7 +88,7 @@ def test_save_load(self): a = -CDense().zeros(1000) a.save(test_file, overwrite=True) - with open(test_file, mode='at+') as fhandle: + with open(test_file, mode="at+") as fhandle: with self.timer(): a.save(fhandle, overwrite=True) @@ -98,7 +98,7 @@ def test_save_load(self): self.assertFalse((a != b).any()) - a = CDense(['a', 'b']) + a = CDense(["a", "b"]) with self.timer(): a.save(test_file, overwrite=True) @@ -114,6 +114,6 @@ def test_save_load(self): if e.errno != 2: raise e - -if __name__ == '__main__': + +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/array/tests/test_csr_sparse.py b/src/secml/array/tests/test_csr_sparse.py index 534183f4..55fa9d41 100644 --- a/src/secml/array/tests/test_csr_sparse.py +++ b/src/secml/array/tests/test_csr_sparse.py @@ -11,9 +11,7 @@ class TestCSparse(CUnitTest): def setUp(self): """Basic set up.""" - self.dense = CDense([[1, 0, 0, 0, 5], - [2, 4, 0, 0, 0], - [3, 4, 5, 0, 0]]) + self.dense = CDense([[1, 0, 0, 0, 5], [2, 4, 0, 0, 0], [3, 4, 5, 0, 0]]) self.dense_vector = CDense([1, 0, 0, 0, 3]) self.sparse_matrix = CSparse(self.dense) self.sparse_vector = CSparse(self.dense_vector) @@ -22,7 +20,7 @@ def test_save_load(self): """Test save/load of sparse arrays""" self.logger.info("UNITTEST - CSparse - save/load") - test_file = fm.join(fm.abspath(__file__), 'test.txt') + test_file = fm.join(fm.abspath(__file__), "test.txt") # Cleaning test file try: @@ -31,30 +29,33 @@ def test_save_load(self): if e.errno != 2: raise e - self.logger.info( - "UNITTEST - CSparse - Testing save/load for sparse matrix") + self.logger.info("UNITTEST - CSparse - Testing save/load for sparse matrix") self.sparse_matrix.save(test_file) self.logger.info( - "Saving again with overwrite=False... IOError should be raised.") + "Saving again with overwrite=False... IOError should be raised." + ) with self.assertRaises(IOError) as e: self.sparse_matrix.save(test_file) self.logger.info(e.exception) loaded_sparse_matrix = CSparse.load(test_file, dtype=int) - self.assertFalse((loaded_sparse_matrix != self.sparse_matrix).any(), - "Saved and loaded arrays (matrices) are not equal!") + self.assertFalse( + (loaded_sparse_matrix != self.sparse_matrix).any(), + "Saved and loaded arrays (matrices) are not equal!", + ) - self.logger.info( - "UNITTEST - CSparse - Testing save/load for sparse vector") + self.logger.info("UNITTEST - CSparse - Testing save/load for sparse vector") self.sparse_vector.save(test_file, overwrite=True) loaded_sparse_vector = CSparse.load(test_file, dtype=int) - self.assertFalse((loaded_sparse_vector != self.sparse_vector).any(), - "Saved and loaded arrays (vectors) are not equal!") + self.assertFalse( + (loaded_sparse_vector != self.sparse_vector).any(), + "Saved and loaded arrays (vectors) are not equal!", + ) # Cleaning test file try: @@ -69,5 +70,5 @@ def test_mixed(self): print(type(self.sparse_matrix[np.ravel(0)[0], np.ravel(0)[0]])) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/core/attr_utils.py b/src/secml/core/attr_utils.py index 5ab3e44c..30726843 100644 --- a/src/secml/core/attr_utils.py +++ b/src/secml/core/attr_utils.py @@ -5,23 +5,40 @@ .. moduleauthor:: Marco Melis """ + from secml import _NoValue from secml.core.type_utils import is_str -__all__ = ['as_public', - 'as_protected', 'has_protected', 'get_protected', - 'as_private', 'has_private', 'get_private', - 'has_property', 'get_property', 'has_getter', 'has_setter', - 'add_readonly', 'add_readwrite', - 'is_public', 'is_protected', 'is_readonly', 'is_readwrite', - 'is_readable', 'is_writable', 'extract_attr'] +__all__ = [ + "as_public", + "as_protected", + "has_protected", + "get_protected", + "as_private", + "has_private", + "get_private", + "has_property", + "get_property", + "has_getter", + "has_setter", + "add_readonly", + "add_readwrite", + "is_public", + "is_protected", + "is_readonly", + "is_readwrite", + "is_readable", + "is_writable", + "extract_attr", +] def _check_is_attr_name(attr): """Raise TypeError if input is not an attribute name (string).""" if not is_str(attr): - raise TypeError("attribute must be passed as a string, " - "not {:}.".format(type(attr))) + raise TypeError( + "attribute must be passed as a string, " "not {:}.".format(type(attr)) + ) def as_public(attr): @@ -41,7 +58,8 @@ def as_public(attr): """ _check_is_attr_name(attr) import re - return re.sub('^_rw_|^_r_|^_', '', attr) + + return re.sub("^_rw_|^_r_|^_", "", attr) def as_protected(attr): @@ -60,9 +78,9 @@ def as_protected(attr): """ _check_is_attr_name(attr) - if not attr.startswith('_'): # Public attribute - return '_' + attr - if attr.startswith('__'): # Private attribute + if not attr.startswith("_"): # Public attribute + return "_" + attr + if attr.startswith("__"): # Private attribute return attr[1:] # Remove the first underscore return attr # Already a protected attribute @@ -112,8 +130,8 @@ def as_private(obj_class, attr): """ _check_is_attr_name(attr) - attr = '__' + attr if attr.startswith('__') is False else attr - return '_' + obj_class.__name__ + attr + attr = "__" + attr if attr.startswith("__") is False else attr + return "_" + obj_class.__name__ + attr def has_private(obj_class, attr): @@ -161,9 +179,12 @@ def has_property(obj, attr): """ _check_is_attr_name(attr) - return True if hasattr(obj.__class__, as_public(attr)) and \ - isinstance(getattr( - obj.__class__, as_public(attr)), property) else False + return ( + True + if hasattr(obj.__class__, as_public(attr)) + and isinstance(getattr(obj.__class__, as_public(attr)), property) + else False + ) def get_property(obj, attr): @@ -181,8 +202,10 @@ def get_property(obj, attr): """ _check_is_attr_name(attr) if not has_property(obj, attr): - raise AttributeError("'{:}' has no property associated with attribute " - "'{:}'.".format(obj.__class__.__name__, attr)) + raise AttributeError( + "'{:}' has no property associated with attribute " + "'{:}'.".format(obj.__class__.__name__, attr) + ) return getattr(obj.__class__, as_public(attr)) @@ -198,8 +221,11 @@ def has_getter(obj, attr): """ _check_is_attr_name(attr) - return True if has_property(obj, attr) and \ - get_property(obj, attr).fget is not None else False + return ( + True + if has_property(obj, attr) and get_property(obj, attr).fget is not None + else False + ) def has_setter(obj, attr): @@ -214,8 +240,11 @@ def has_setter(obj, attr): """ _check_is_attr_name(attr) - return True if has_property(obj, attr) and \ - get_property(obj, attr).fset is not None else False + return ( + True + if has_property(obj, attr) and get_property(obj, attr).fset is not None + else False + ) def add_readonly(obj, attr, value=None): @@ -286,8 +315,7 @@ def is_public(obj, attr): """ _check_is_attr_name(attr) # Exclude properties to only return actual public attributes - return True if not attr.startswith('_') and \ - not has_property(obj, attr) else False + return True if not attr.startswith("_") and not has_property(obj, attr) else False def is_readonly(obj, attr): @@ -304,9 +332,13 @@ def is_readonly(obj, attr): """ _check_is_attr_name(attr) - return True if not is_public(obj, attr) and \ - has_getter(obj, attr) and \ - not has_setter(obj, attr) else False + return ( + True + if not is_public(obj, attr) + and has_getter(obj, attr) + and not has_setter(obj, attr) + else False + ) def is_readwrite(obj, attr): @@ -343,8 +375,7 @@ def is_protected(obj, attr): """ _check_is_attr_name(attr) # There cannot be a setter without a getter! - return True if not is_public(obj, attr) and \ - not has_getter(obj, attr) else False + return True if not is_public(obj, attr) and not has_getter(obj, attr) else False def is_readable(obj, attr): @@ -364,9 +395,11 @@ def is_readable(obj, attr): """ _check_is_attr_name(attr) - return True if is_public(obj, attr) or \ - is_readwrite(obj, attr) or \ - is_readonly(obj, attr) else False + return ( + True + if is_public(obj, attr) or is_readwrite(obj, attr) or is_readonly(obj, attr) + else False + ) def is_writable(obj, attr): @@ -418,16 +451,16 @@ def extract_attr(obj, mode): def parse_modes(mode_str): """Parse modes string and return a list with the required checks.""" - mode_list = mode_str.split('+') + mode_list = mode_str.split("+") req_check = [] for m in mode_list: - if m == 'pub': + if m == "pub": req_check.append(is_public) - elif m == 'rw': + elif m == "rw": req_check.append(is_readwrite) - elif m == 'r': + elif m == "r": req_check.append(is_readonly) - elif m == 'pro': + elif m == "pro": req_check.append(is_protected) else: raise ValueError("mode `{:}` not supported.".format(m)) diff --git a/src/secml/core/c_creator.py b/src/secml/core/c_creator.py index 43956a87..bae3ecd5 100755 --- a/src/secml/core/c_creator.py +++ b/src/secml/core/c_creator.py @@ -5,13 +5,21 @@ .. moduleauthor:: Marco Melis """ + from importlib import import_module from inspect import isclass, getmembers from functools import wraps from secml.settings import SECML_STORE_LOGS, SECML_LOGS_PATH -from secml.core.attr_utils import is_writable, is_readable, \ - extract_attr, as_public, has_protected, as_protected, get_private +from secml.core.attr_utils import ( + is_writable, + is_readable, + extract_attr, + as_public, + has_protected, + as_protected, + get_private, +) from secml.core.type_utils import is_str import secml.utils.pickle_utils as pck from secml.utils.list_utils import find_duplicates @@ -31,27 +39,34 @@ class will not be instantiable using `.create()`. Can be None to explicitly NOT support `.create()` and `.load()`. """ + __class_type = None # Must be re-defined to support `.create()` - __super__ = None # Name of the superclass (if `.create()` or `.load()` should be available) + __super__ = ( + None # Name of the superclass (if `.create()` or `.load()` should be available) + ) # Ancestor logger, level 'WARNING' by default _logger = CLog( add_stream=True, - file_handler=SECML_LOGS_PATH if SECML_STORE_LOGS is True else None) + file_handler=SECML_LOGS_PATH if SECML_STORE_LOGS is True else None, + ) @property def class_type(self): """Defines class type.""" try: # Convert the private attribute to public property - return get_private(self.__class__, 'class_type') + return get_private(self.__class__, "class_type") except AttributeError: - raise AttributeError("'class_type' not defined for '{:}'" - "".format(self.__class__.__name__)) + raise AttributeError( + "'class_type' not defined for '{:}'" "".format(self.__class__.__name__) + ) @property def logger(self): """Logger for current object.""" - return self._logger.get_child(self.__class__.__name__ + '.' + str(hex(id(self)))) + return self._logger.get_child( + self.__class__.__name__ + "." + str(hex(id(self))) + ) @property def verbose(self): @@ -69,7 +84,7 @@ def verbose(self): @verbose.setter def verbose(self, level): """Sets verbosity level of logger.""" - verbosity_lvls = {0: 'WARNING', 1: 'INFO', 2: 'DEBUG'} + verbosity_lvls = {0: "WARNING", 1: "INFO", 2: "DEBUG"} if level not in verbosity_lvls: raise ValueError("Verbosity level {:} not supported.".format(level)) self.logger.set_level(verbosity_lvls[level]) @@ -92,6 +107,7 @@ def timed(msg=None): If None, "Entering timed block `method_name`..." will printed. """ + def wrapper(fun): @wraps(fun) # To make wrapped_fun work as fun_timed def wrapped_fun(self, *args, **kwargs): @@ -100,8 +116,11 @@ def wrapped_fun(self, *args, **kwargs): @wraps(fun) # To make fun_timed work as fun def fun_timed(*fun_args, **fun_wargs): return fun(*fun_args, **fun_wargs) + return fun_timed(self, *args, **kwargs) + return wrapped_fun + return wrapper @classmethod @@ -142,8 +161,9 @@ def create(cls, class_item=None, *args, **kwargs): # We accept strings and class instances only if isclass(class_item): # Returns false for instances - raise TypeError("creator only accepts a class type " - "as string or a class instance.") + raise TypeError( + "creator only accepts a class type " "as string or a class instance." + ) # CCreator cannot be created! if class_item.__class__ == CCreator: @@ -152,11 +172,14 @@ def create(cls, class_item=None, *args, **kwargs): # If a class instance is passed, it's returned as is if not is_str(class_item): if not isinstance(class_item, cls): - raise TypeError("input instance should be a {:} " - "subclass.".format(cls.__name__)) + raise TypeError( + "input instance should be a {:} " "subclass.".format(cls.__name__) + ) if len(args) + len(kwargs) != 0: - raise TypeError("optional arguments are not allowed " - "when a class instance is passed.") + raise TypeError( + "optional arguments are not allowed " + "when a class instance is passed." + ) return class_item # Get all the subclasses of the superclass @@ -170,12 +193,13 @@ def create(cls, class_item=None, *args, **kwargs): # Everything seems fine now, look for desired class type for class_data in subclasses: - if get_private(class_data[1], 'class_type', None) == class_item: + if get_private(class_data[1], "class_type", None) == class_item: return class_data[1](*args, **kwargs) - raise NameError("no class of type `{:}` is a subclass of '{:}' " - "from module '{:}'".format( - class_item, cls.__name__, cls.__module__)) + raise NameError( + "no class of type `{:}` is a subclass of '{:}' " + "from module '{:}'".format(class_item, cls.__name__, cls.__module__) + ) @classmethod def get_subclasses(cls): @@ -189,6 +213,7 @@ def get_subclasses(cls): each class is a "subclass" of itself. """ + def get_subclasses(sup_cls): subcls_list = [] for subclass in sup_cls.__subclasses__(): @@ -260,11 +285,13 @@ def get_class_from_type(cls, class_type): # Look for desired class type for class_data in subclasses: - if get_private(class_data[1], 'class_type', None) == class_type: + if get_private(class_data[1], "class_type", None) == class_type: return class_data[1] - raise NameError("no class of type `{:}` found within the package " - "of class '{:}'".format(class_type, cls.__module__)) + raise NameError( + "no class of type `{:}` found within the package " + "of class '{:}'".format(class_type, cls.__module__) + ) def get_params(self): """Returns the dictionary of class hyperparameters. @@ -275,14 +302,16 @@ def get_params(self): # We extract the PUBLIC (pub) and the READ/WRITE (rw) attributes # from the class dictionary, than we build a new dictionary using # as keys the attributes names without the accessibility prefix - params = SubLevelsDict((as_public(k), getattr(self, as_public(k))) - for k in extract_attr(self, 'pub+rw')) + params = SubLevelsDict( + (as_public(k), getattr(self, as_public(k))) + for k in extract_attr(self, "pub+rw") + ) # Now look for any parameter inside the accessible attributes - for k in extract_attr(self, 'r'): + for k in extract_attr(self, "r"): # Extract the contained object (if any) k_attr = getattr(self, as_public(k)) - if hasattr(k_attr, 'get_params') and len(k_attr.get_params()) > 0: + if hasattr(k_attr, "get_params") and len(k_attr.get_params()) > 0: # as k_attr has one or more parameters, it's a parameter itself params[as_public(k)] = k_attr @@ -346,12 +375,14 @@ def set(self, param_name, param_value, copy=False): extracted, a deepcopy of the parameter value is done first. """ + def copy_attr(attr_tocopy): from copy import deepcopy + return deepcopy(attr_tocopy) # Support for recursive setting, e.g. -> kernel.gamma - param_name = param_name.split('.') + param_name = param_name.split(".") attr0 = param_name[0] if hasattr(self, attr0): @@ -361,17 +392,20 @@ def copy_attr(attr_tocopy): # PUBLIC and READ/WRITE accessibility is checked if not is_writable(self, attr0): raise AttributeError( - "can't set `{:}`, must be writable.".format(attr0)) - setattr(self, attr0, copy_attr(param_value) - if copy is True else param_value) + "can't set `{:}`, must be writable.".format(attr0) + ) + setattr( + self, attr0, copy_attr(param_value) if copy is True else param_value + ) return else: # Start recursion on sublevels # Level 0 attribute must be accessible (readable) # PUBLIC, READ/WRITE and READ ONLY accessibility is checked if not is_readable(self, attr0): raise AttributeError( - "can't set `{:}`, must be accessible.".format(attr0)) - sub_param_name = '.'.join(param_name[1:]) + "can't set `{:}`, must be accessible.".format(attr0) + ) + sub_param_name = ".".join(param_name[1:]) # Calling `.set` method of the next sublevel getattr(self, attr0).set(sub_param_name, param_value, copy) return @@ -389,15 +423,20 @@ def copy_attr(attr_tocopy): # PUBLIC and READ/WRITE accessibility is checked if not is_writable(attr, attr0): raise AttributeError( - "can't set `{:}`, must be writable.".format(attr0)) - setattr(attr, attr0, copy_attr(param_value) - if copy is True else param_value) + "can't set `{:}`, must be writable.".format(attr0) + ) + setattr( + attr, + attr0, + copy_attr(param_value) if copy is True else param_value, + ) return # Attribute not found, raise AttributeError raise AttributeError( "'{:}', or any of its attributes, has attribute '{:}'" - "".format(self.__class__.__name__, attr0)) + "".format(self.__class__.__name__, attr0) + ) def get_state(self, **kwargs): """Returns the object state dictionary. @@ -416,8 +455,10 @@ def get_state(self, **kwargs): # We extract the PUBLIC (pub), READ/WRITE (rw) and READ ONLY (r) # attributes from the class dictionary, than we build a new dictionary # using as keys the attributes names without the accessibility prefix - state = dict((as_public(k), getattr(self, as_public(k))) - for k in extract_attr(self, 'pub+rw+r')) + state = dict( + (as_public(k), getattr(self, as_public(k))) + for k in extract_attr(self, "pub+rw+r") + ) # Get the state of the deeper objects # Use list(state) as state size will change during iteration @@ -426,7 +467,7 @@ def get_state(self, **kwargs): state_deep = state[attr].get_state(**kwargs) # Replace `attr` with its attributes's state for attr_deep in state_deep: - attr_full_key = attr + '.' + attr_deep + attr_full_key = attr + "." + attr_deep state[attr_full_key] = state_deep[attr_deep] del state[attr] @@ -451,8 +492,10 @@ def set_state(self, state_dict, copy=False): extracted, a deepcopy of the attribute is done first. """ + def copy_attr(attr_tocopy): from copy import deepcopy + return deepcopy(attr_tocopy) for param_name in state_dict: @@ -461,13 +504,14 @@ def copy_attr(attr_tocopy): param_value = state_dict[param_name] # Support for recursive setting, e.g. -> kernel.gamma - param_name = param_name.split('.', 1) + param_name = param_name.split(".", 1) # Attributes to set in this function must be readable # PUBLIC, READ/WRITE and READ ONLY accessibility is checked if not is_readable(self, param_name[0]): raise AttributeError( - "can't set `{:}`, must be readable.".format(param_name[0])) + "can't set `{:}`, must be readable.".format(param_name[0]) + ) attr0 = param_name[0] if hasattr(self, attr0): @@ -481,19 +525,22 @@ def copy_attr(attr_tocopy): # If exists, set the protected attribute if has_protected(self, attr0): attr0 = as_protected(attr0) - setattr(self, attr0, copy_attr(param_value) - if copy is True else param_value) + setattr( + self, + attr0, + copy_attr(param_value) if copy is True else param_value, + ) continue # Attribute set, go to next one else: # Start recursion on sublevels # Call `.set_state` for the next level of current attribute - getattr(self, attr0).set_state( - {param_name[1]: param_value}, copy) + getattr(self, attr0).set_state({param_name[1]: param_value}, copy) continue # Attribute set, go to next one # Attribute not found, raise AttributeError raise AttributeError( "'{:}', or any of its attributes, has attribute '{:}'" - "".format(self.__class__.__name__, attr0)) + "".format(self.__class__.__name__, attr0) + ) def copy(self): """Returns a shallow copy of current class. @@ -504,11 +551,13 @@ def copy(self): """ from copy import copy + return copy(self) def __copy__(self, *args, **kwargs): """Called when copy.copy(object) is called.""" from copy import copy + new_obj = self.__new__(self.__class__) for attr in self.__dict__: new_obj.__dict__[attr] = copy(self.__dict__[attr]) @@ -523,6 +572,7 @@ def deepcopy(self): """ from copy import deepcopy + return deepcopy(self) def __deepcopy__(self, memo, *args, **kwargs): @@ -532,6 +582,7 @@ def __deepcopy__(self, memo, *args, **kwargs): """ from copy import deepcopy + new_obj = self.__new__(self.__class__) for attr in self.__dict__: new_obj.__dict__[attr] = deepcopy(self.__dict__[attr], memo) @@ -575,12 +626,16 @@ def load(cls, path): """ loaded_obj = pck.load(path) - if loaded_obj.__class__ == cls or cls == CCreator or \ - (has_super(loaded_obj) and cls.__name__ == loaded_obj.__super__): + if ( + loaded_obj.__class__ == cls + or cls == CCreator + or (has_super(loaded_obj) and cls.__name__ == loaded_obj.__super__) + ): return loaded_obj else: err_str = "'{0}' can be loaded from: '{0}'".format( - loaded_obj.__class__.__name__) + loaded_obj.__class__.__name__ + ) if has_super(loaded_obj): err_str += ", '{:}'".format(loaded_obj.__super__) raise TypeError(err_str + " or 'CCreator'.") @@ -626,12 +681,12 @@ def load_state(self, path): def __repr__(self): """Defines print behaviour.""" out_repr = self.__class__.__name__ + "{" - for k in extract_attr(self, 'pub+rw+r'): + for k in extract_attr(self, "pub+rw+r"): pub_attr_name = as_public(k) out_repr += "'{:}': ".format(pub_attr_name) out_repr += repr(getattr(self, pub_attr_name)) out_repr += ", " - return out_repr.rstrip(', ') + "}" + return out_repr.rstrip(", ") + "}" def has_super(cls): @@ -646,7 +701,7 @@ def has_super(cls): Any class or class isntance. """ - return hasattr(cls, '__super__') and cls.__super__ is not None + return hasattr(cls, "__super__") and cls.__super__ is not None def import_package_classes(cls): @@ -663,8 +718,8 @@ def import_package_classes(cls): package_name = cls.__module__ # Leaving out the last part of __module__ string as is `cls` filename # But only if module is not the main (a single file) - if package_name != '__main__': - package_name = package_name.rpartition('.')[0] + if package_name != "__main__": + package_name = package_name.rpartition(".")[0] # Import the entire package package = import_module(package_name) # Get the classes only from the package @@ -685,11 +740,10 @@ def import_class_types(classes): # Get all class types from the input list of classes (to check duplicates) # Leaving out the classes not defining a class_type class_types = map( - lambda class_file: get_private(class_file[1], 'class_type', None), - classes) + lambda class_file: get_private(class_file[1], "class_type", None), classes + ) # skipping non string class_types -> classes not supporting creator - return [class_type for class_type in - class_types if isinstance(class_type, str)] + return [class_type for class_type in class_types if isinstance(class_type, str)] def _check_class_types_duplicates(class_types, classes): @@ -697,8 +751,11 @@ def _check_class_types_duplicates(class_types, classes): duplicates = find_duplicates(class_types) if len(duplicates) != 0: # Return the list of classes with duplicate type duplicates_classes = [ - (class_tuple[0], get_private(class_tuple[1], 'class_type')) - for class_tuple in classes if - get_private(class_tuple[1], 'class_type', None) in duplicates] - raise ValueError("following classes have the same class type. Fix " - "before continue. {:}".format(duplicates_classes)) + (class_tuple[0], get_private(class_tuple[1], "class_type")) + for class_tuple in classes + if get_private(class_tuple[1], "class_type", None) in duplicates + ] + raise ValueError( + "following classes have the same class type. Fix " + "before continue. {:}".format(duplicates_classes) + ) diff --git a/src/secml/core/constants.py b/src/secml/core/constants.py index 92e72765..dee2eaaa 100644 --- a/src/secml/core/constants.py +++ b/src/secml/core/constants.py @@ -5,10 +5,11 @@ .. moduleauthor:: Marco Melis """ + import numpy as np import math -__all__ = ['inf', 'nan', 'eps', 'e', 'pi'] +__all__ = ["inf", "nan", "eps", "e", "pi"] """Positive infinity.""" diff --git a/src/secml/core/decorators.py b/src/secml/core/decorators.py index 53c5d9f9..fbfe41d2 100644 --- a/src/secml/core/decorators.py +++ b/src/secml/core/decorators.py @@ -36,7 +36,8 @@ class deprecated: - https://wiki.python.org/moin/PythonDecoratorLibrary """ - def __init__(self, version, extra=''): + + def __init__(self, version, extra=""): self.extra = extra self.version = version @@ -57,12 +58,12 @@ def __call__(self, obj): def _decorate_class(self, cls): """Decorate class clf.""" msg = "class `{:}` is deprecated since version {:}".format( - cls.__name__, self.version) + cls.__name__, self.version + ) if self.extra: msg += "; %s" % self.extra - warnings.filterwarnings( - 'once', message=msg, category=DeprecationWarning) + warnings.filterwarnings("once", message=msg, category=DeprecationWarning) # FIXME: we should probably reset __new__ for full generality init = cls.__init__ @@ -70,9 +71,10 @@ def _decorate_class(self, cls): def wrapped(*args, **kwargs): warnings.warn(msg, category=DeprecationWarning) return init(*args, **kwargs) + cls.__init__ = wrapped - wrapped.__name__ = '__init__' + wrapped.__name__ = "__init__" wrapped.deprecated_original = init cls.__doc__ = self._update_doc(cls.__doc__) @@ -82,12 +84,12 @@ def wrapped(*args, **kwargs): def _decorate_fun(self, fun): """Decorate function fun.""" msg = "function `{:}` is deprecated since version {:}".format( - fun.__name__, self.version) + fun.__name__, self.version + ) if self.extra: msg += "; %s" % self.extra - warnings.filterwarnings( - 'once', message=msg, category=DeprecationWarning) + warnings.filterwarnings("once", message=msg, category=DeprecationWarning) @functools.wraps(fun) def wrapped(*args, **kwargs): diff --git a/src/secml/core/exceptions.py b/src/secml/core/exceptions.py index 2806da11..c2e26290 100644 --- a/src/secml/core/exceptions.py +++ b/src/secml/core/exceptions.py @@ -6,7 +6,7 @@ """ -__all__ = ['NotFittedError'] +__all__ = ["NotFittedError"] class NotFittedError(ValueError, AttributeError): @@ -27,4 +27,5 @@ class NotFittedError(ValueError, AttributeError): NotFittedError('this `CClassifierSVM` is not trained. Call `.fit()` first.',) """ + pass diff --git a/src/secml/core/tests/test_attr_utils.py b/src/secml/core/tests/test_attr_utils.py index f0aa6651..8f142859 100644 --- a/src/secml/core/tests/test_attr_utils.py +++ b/src/secml/core/tests/test_attr_utils.py @@ -35,22 +35,23 @@ def test_extract_attr(self): def check_attrs(code, expected): self.assertTrue( - set(attr for attr in extract_attr(self.test, code)) == expected) - - check_attrs('pub', {'a'}) - check_attrs('r', {'_b'}) - check_attrs('rw', {'_c'}) - check_attrs('pub+r', {'a', '_b'}) - check_attrs('pub+rw', {'a', '_c'}) - check_attrs('pub+pro', {'a', '_d'}) - check_attrs('r+rw', {'_b', '_c'}) - check_attrs('r+pro', {'_b', '_d'}) - check_attrs('rw+pro', {'_c', '_d'}) - check_attrs('pub+r+rw', {'a', '_b', '_c'}) - check_attrs('pub+r+pro', {'a', '_b', '_d'}) - check_attrs('pub+rw+pro', {'a', '_c', '_d'}) - check_attrs('pub+r+rw+pro', {'a', '_b', '_c', '_d'}) - - -if __name__ == '__main__': + set(attr for attr in extract_attr(self.test, code)) == expected + ) + + check_attrs("pub", {"a"}) + check_attrs("r", {"_b"}) + check_attrs("rw", {"_c"}) + check_attrs("pub+r", {"a", "_b"}) + check_attrs("pub+rw", {"a", "_c"}) + check_attrs("pub+pro", {"a", "_d"}) + check_attrs("r+rw", {"_b", "_c"}) + check_attrs("r+pro", {"_b", "_d"}) + check_attrs("rw+pro", {"_c", "_d"}) + check_attrs("pub+r+rw", {"a", "_b", "_c"}) + check_attrs("pub+r+pro", {"a", "_b", "_d"}) + check_attrs("pub+rw+pro", {"a", "_c", "_d"}) + check_attrs("pub+r+rw+pro", {"a", "_b", "_c", "_d"}) + + +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/core/tests/test_c_creator.py b/src/secml/core/tests/test_c_creator.py index 74850cad..7911c779 100644 --- a/src/secml/core/tests/test_c_creator.py +++ b/src/secml/core/tests/test_c_creator.py @@ -48,21 +48,21 @@ def check_attrs(pdict, expected): # Standard class params = self.test.get_params() self.logger.info("Foo.get_params(): {:}".format(params)) - check_attrs(params, {'a', 'c'}) + check_attrs(params, {"a", "c"}) # To the read-only param we assign a class with a public attribute # get_params should return the read-only parameter too self.test._b = Doo() params = self.test.get_params() self.logger.info("Foo.get_params() with b=Doo(): {:}".format(params)) - check_attrs(params, {'a', 'b', 'c'}) + check_attrs(params, {"a", "b", "c"}) # To the read-only param we assign a class with a protected attribute # get_params should NOT return the read-only parameter self.test._b = Coo() params = self.test.get_params() self.logger.info("Foo.get_params() with b=Coo(): {:}".format(params)) - check_attrs(params, {'a', 'c'}) + check_attrs(params, {"a", "c"}) # In the following we replace the protected attribute # get_params should NOT return it in any case @@ -70,46 +70,46 @@ def check_attrs(pdict, expected): self.test._d = Doo() params = self.test.get_params() self.logger.info("Foo.get_params() with d=Doo(): {:}".format(params)) - check_attrs(params, {'a', 'c'}) + check_attrs(params, {"a", "c"}) self.test._d = Coo() params = self.test.get_params() self.logger.info("Foo.get_params() with d=Coo(): {:}".format(params)) - check_attrs(params, {'a', 'c'}) + check_attrs(params, {"a", "c"}) def test_set(self): """Unittest for `CCreator.set()`.""" # Standard class - self.test.set('a', 10) + self.test.set("a", 10) self.logger.info("Foo.a: {:}".format(self.test.a)) self.assertEqual(self.test.a, 10) - self.test.set('c', 30) + self.test.set("c", 30) self.assertEqual(self.test.c, 30) self.logger.info("Foo.a: {:}".format(self.test.c)) with self.assertRaises(AttributeError): - self.test.set('b', 20) + self.test.set("b", 20) with self.assertRaises(AttributeError): - self.test.set('d', 40) + self.test.set("d", 40) # To the read-only param we assign a class with a public attribute # get_params should return the read-only parameter too self.test._b = Doo() - self.test.set('b.a', 10) + self.test.set("b.a", 10) self.assertEqual(self.test.b.a, 10) self.logger.info("Foo.b: {:}".format(self.test.b)) with self.assertRaises(AttributeError): - self.test.set('b', 20) + self.test.set("b", 20) # To the read-only param we assign a class with a protected attribute # get_params should NOT return the read-only parameter self.test._b = Coo() with self.assertRaises(AttributeError): - self.test.set('b.a', 10) + self.test.set("b.a", 10) with self.assertRaises(AttributeError): - self.test.set('b', 20) + self.test.set("b", 20) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/core/type_utils.py b/src/secml/core/type_utils.py index 387db4bf..9b7618af 100644 --- a/src/secml/core/type_utils.py +++ b/src/secml/core/type_utils.py @@ -5,14 +5,34 @@ .. moduleauthor:: Marco Melis """ + import numpy as np from scipy.sparse import issparse -__all__ = ['is_bool', 'is_int', 'is_intlike', 'is_float', 'is_floatlike', - 'is_scalar', 'is_scalarlike', 'is_inf', 'is_posinf', 'is_neginf', - 'is_nan', 'is_list', 'is_list_of_lists', - 'is_ndarray', 'is_scsarray', 'is_slice', 'is_str', 'is_bytes', - 'is_tuple', 'is_set', 'is_dict', 'to_builtin'] +__all__ = [ + "is_bool", + "is_int", + "is_intlike", + "is_float", + "is_floatlike", + "is_scalar", + "is_scalarlike", + "is_inf", + "is_posinf", + "is_neginf", + "is_nan", + "is_list", + "is_list_of_lists", + "is_ndarray", + "is_scsarray", + "is_slice", + "is_str", + "is_bytes", + "is_tuple", + "is_set", + "is_dict", + "to_builtin", +] def is_bool(x): @@ -48,8 +68,9 @@ def is_intlike(x): if is_int(x): return True # built-in or numpy integers - elif (is_list(x) and len(x) == 1 and is_int(x[0])) or \ - (is_ndarray(x) and x.size == 1 and x.dtype.kind in ('i', 'u')): + elif (is_list(x) and len(x) == 1 and is_int(x[0])) or ( + is_ndarray(x) and x.size == 1 and x.dtype.kind in ("i", "u") + ): return True else: return False @@ -79,8 +100,9 @@ def is_floatlike(x): """ if is_float(x): return True # built-in or numpy floats - elif (is_list(x) and len(x) == 1 and is_float(x[0])) or \ - (is_ndarray(x) and x.size == 1 and x.dtype.kind in ('f')): + elif (is_list(x) and len(x) == 1 and is_float(x[0])) or ( + is_ndarray(x) and x.size == 1 and x.dtype.kind in ("f") + ): return True else: return False diff --git a/src/secml/data/c_dataset.py b/src/secml/data/c_dataset.py index d0e0aa6f..790701a5 100755 --- a/src/secml/data/c_dataset.py +++ b/src/secml/data/c_dataset.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.core import CCreator from secml.array import CArray from secml.data import CDatasetHeader @@ -72,8 +73,9 @@ class CDataset(CCreator): 34 """ - __super__ = 'CDataset' - __class_type = 'standard' + + __super__ = "CDataset" + __class_type = "standard" def __init__(self, x, y, header=None): @@ -95,7 +97,7 @@ def __setstate__(self, state): self.__dict__.update(state) # Initialize header placeholder if not available # Necessary to unpickle old dataset (stored with secml < v0.6) - if not hasattr(self, '_header'): + if not hasattr(self, "_header"): self._header = None @property @@ -150,15 +152,15 @@ def header(self, value): """Dataset header.""" if value is not None: if not isinstance(value, CDatasetHeader): - raise TypeError( - "'header' must be an instance of 'CDatasetHeader'") + raise TypeError("'header' must be an instance of 'CDatasetHeader'") # Check if header is compatible (same num_samples) - if value.num_samples is not None and \ - self.num_samples != value.num_samples: + if value.num_samples is not None and self.num_samples != value.num_samples: raise ValueError( "incompatible header size {:}. {:} expected.".format( - self.num_samples, value.num_samples)) + self.num_samples, value.num_samples + ) + ) self._header = value @@ -205,20 +207,23 @@ def _check_samples_labels(self, x=None, y=None): if x.shape[0] != y.size: raise ValueError( "number of labels ({:}) must be equal to the number " - "of samples ({:}).".format(y.size, x.shape[0])) + "of samples ({:}).".format(y.size, x.shape[0]) + ) def __getitem__(self, idx): """Given an index, get the corresponding X and Y elements.""" if not isinstance(idx, tuple) or len(idx) != self.X.ndim: raise IndexError( - "{:} sequences are required for indexing.".format(self.X.ndim)) + "{:} sequences are required for indexing.".format(self.X.ndim) + ) y = self.Y.__getitem__([idx[0] if isinstance(idx, tuple) else idx][0]) header = None if self.header is not None: header = self.header.__getitem__( - [idx[0] if isinstance(idx, tuple) else idx][0]) + [idx[0] if isinstance(idx, tuple) else idx][0] + ) return self.__class__(self.X.__getitem__(idx), y, header=header) @@ -228,7 +233,8 @@ def __setitem__(self, idx, data): raise TypeError("dataset can be set only using another dataset.") if not isinstance(idx, tuple) or len(idx) != self.X.ndim: raise IndexError( - "{:} sequences are required for indexing.".format(self.X.ndim)) + "{:} sequences are required for indexing.".format(self.X.ndim) + ) self.X.__setitem__(idx, data.X) # We now set the labels corresponding to set patterns self.Y.__setitem__([idx[0] if isinstance(idx, tuple) else idx][0], data.Y) @@ -301,12 +307,14 @@ def append(self, dataset): "cannot append a dataset with header and " "{:} samples as the other has no header. " "Define a consistent header for both dataset " - "and try again.".format(new_header.num_samples)) + "and try again.".format(new_header.num_samples) + ) else: # Both input ds and self have header, merge them new_header = self.header.append(dataset.header) return self.__class__( - self.X.append(dataset.X, axis=0), new_labels, header=new_header) + self.X.append(dataset.X, axis=0), new_labels, header=new_header + ) def tosparse(self): """Convert dataset's patterns to sparse format. @@ -437,4 +445,3 @@ def get_bounds(self, offset=0.0): for f_idx in range(self.num_features): boundary.append((x_min[0, f_idx].item(), x_max[0, f_idx].item())) return boundary - diff --git a/src/secml/data/c_dataset_header.py b/src/secml/data/c_dataset_header.py index 4a25ad2c..d3a958eb 100755 --- a/src/secml/data/c_dataset_header.py +++ b/src/secml/data/c_dataset_header.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.core import CCreator from secml.core.attr_utils import is_writable from secml.core.type_utils import is_list @@ -49,8 +50,9 @@ class CDatasetHeader(CCreator): 32 """ - __super__ = 'CDatasetHeader' - __class_type = 'standard' + + __super__ = "CDatasetHeader" + __class_type = "standard" def __init__(self, **kwargs): @@ -105,8 +107,10 @@ def _validate_params(self): delattr(self, attr_k) # Remove faulty attribute raise ValueError( "`{:}` is an array of size {:}. " - "{:} expected.".format(attr_k, attr_v.size, - self.num_samples)) + "{:} expected.".format( + attr_k, attr_v.size, self.num_samples + ) + ) # Populate the protected _num_samples attribute self._num_samples = attr_v.size @@ -143,8 +147,7 @@ def __getitem__(self, idx): def __str__(self): if len(self.get_params()) == 0: return self.__class__.__name__ + "{}" - return self.__class__.__name__ + \ - "{'" + "', '".join(self.get_params()) + "'}" + return self.__class__.__name__ + "{'" + "', '".join(self.get_params()) + "'}" def append(self, header): """Append input header to current header. @@ -193,13 +196,13 @@ def append(self, header): for attr in header.get_params(): if hasattr(self, attr): # Attribute already in current header if isinstance(getattr(self, attr), CArray): - subset[attr] = getattr(self, attr)\ - .append(getattr(header, attr)) + subset[attr] = getattr(self, attr).append(getattr(header, attr)) elif getattr(self, attr) != getattr(header, attr): # For not-arrays, we check equality raise ValueError( "value of '{:}' in input header should be equal " - "to '{:}'".format(attr, getattr(self, attr))) + "to '{:}'".format(attr, getattr(self, attr)) + ) else: # New attribute in input header subset[attr] = getattr(header, attr) diff --git a/src/secml/data/c_dataset_pytorch.py b/src/secml/data/c_dataset_pytorch.py index 028121a7..a1a9e1c2 100644 --- a/src/secml/data/c_dataset_pytorch.py +++ b/src/secml/data/c_dataset_pytorch.py @@ -5,6 +5,7 @@ .. moduleauthor:: Maura Pintor """ + import torch from torch.utils.data import Dataset @@ -52,7 +53,6 @@ def X(self): def Y(self): return self._labels - def __len__(self): """Returns dataset size.""" return self._samples.shape[0] diff --git a/src/secml/data/data_utils.py b/src/secml/data/data_utils.py index b7f75230..964fadb7 100644 --- a/src/secml/data/data_utils.py +++ b/src/secml/data/data_utils.py @@ -5,12 +5,13 @@ .. moduleauthor:: Marco Melis """ + from sklearn.preprocessing import label_binarize as sk_binarizer import numpy as np from secml.array import CArray -__all__ = ['label_binarize_onehot'] +__all__ = ["label_binarize_onehot"] def label_binarize_onehot(y): @@ -39,5 +40,4 @@ def label_binarize_onehot(y): if not np.issubdtype(y.dtype, np.integer): raise ValueError("only integer labels are supported") classes = CArray.arange(y.max() + 1) - return CArray(sk_binarizer( - y.tondarray(), classes=classes.tondarray())) + return CArray(sk_binarizer(y.tondarray(), classes=classes.tondarray())) diff --git a/src/secml/data/loader/c_dataloader.py b/src/secml/data/loader/c_dataloader.py index be03483c..b16c2309 100644 --- a/src/secml/data/loader/c_dataloader.py +++ b/src/secml/data/loader/c_dataloader.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from abc import ABCMeta, abstractmethod from secml.array import CArray @@ -14,7 +15,8 @@ class CDataLoader(CCreator, metaclass=ABCMeta): """Interface for Dataset loaders.""" - __super__ = 'CDataLoader' + + __super__ = "CDataLoader" @abstractmethod def load(self, *args, **kwargs): @@ -25,7 +27,8 @@ def load(self, *args, **kwargs): """ raise NotImplementedError( "Please implement a `load` method for class {:}" - "".format(self.__class__.__name__)) + "".format(self.__class__.__name__) + ) # TODO: GENERALIZE THIS FUNCTION AND PUT IT INTO CARRAY @staticmethod @@ -67,7 +70,4 @@ def _remove_all_zero_features(patterns): idx_feat_presents = CArray(nnz_elem_idx[1]).unique() # return ds without features that are all zero and non zero old idx - return patterns[:, idx_feat_presents], \ - all_orig_feat_idx[idx_feat_presents] - - + return patterns[:, idx_feat_presents], all_orig_feat_idx[idx_feat_presents] diff --git a/src/secml/data/loader/c_dataloader_cifar.py b/src/secml/data/loader/c_dataloader_cifar.py index 86a068dd..055a617d 100644 --- a/src/secml/data/loader/c_dataloader_cifar.py +++ b/src/secml/data/loader/c_dataloader_cifar.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import tarfile from multiprocessing import Lock import pickle @@ -20,14 +21,14 @@ from secml.settings import SECML_DS_DIR -CIFAR10_URL_PYTHON = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' -CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a' -CIFAR100_URL_PYTHON = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' -CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85' +CIFAR10_URL_PYTHON = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" +CIFAR10_MD5 = "c58f30108f718f92721af3b95e74349a" +CIFAR100_URL_PYTHON = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" +CIFAR100_MD5 = "eb9058c3a382ffc7106e4002c42a8d85" -CIFAR_PATH = fm.join(SECML_DS_DIR, 'cifar') -CIFAR10_PATH = fm.join(CIFAR_PATH, 'cifar-10-batches-py') -CIFAR100_PATH = fm.join(CIFAR_PATH, 'cifar-100-python') +CIFAR_PATH = fm.join(SECML_DS_DIR, "cifar") +CIFAR10_PATH = fm.join(CIFAR_PATH, "cifar-10-batches-py") +CIFAR100_PATH = fm.join(CIFAR_PATH, "cifar-100-python") class CDataLoaderCIFAR(CDataLoader, metaclass=ABCMeta): @@ -36,20 +37,23 @@ class CDataLoaderCIFAR(CDataLoader, metaclass=ABCMeta): Available at: https://www.cs.toronto.edu/~kriz/cifar.html """ + __lock = Lock() # Lock to prevent multiple parallel download/extraction def __init__(self): # Extract the name of the data file from the url - self.data_file = self.data_url.split('/')[-1] + self.data_file = self.data_url.split("/")[-1] # Path to the downloaded dataset file data_file_path = fm.join(CIFAR_PATH, self.data_file) with CDataLoaderCIFAR.__lock: # Download (if needed) data and extract it - if not fm.file_exist(data_file_path) or \ - md5(data_file_path) != self.data_md5: + if ( + not fm.file_exist(data_file_path) + or md5(data_file_path) != self.data_md5 + ): self._get_data(self.data_url, CIFAR_PATH) elif not fm.folder_exist(self.data_path): # Downloaded datafile seems valid, extract only @@ -126,8 +130,15 @@ def load(self, val_size=0): """ raise NotImplementedError - def _load(self, train_files, test_files, meta_file, - labels_key, class_names_key, val_size=0): + def _load( + self, + train_files, + test_files, + meta_file, + labels_key, + class_names_key, + val_size=0, + ): """Load all images of the dataset. Adapted from: http://dataset-loading.readthedocs.io/en/latest/_modules/dataset_loading/cifar.html @@ -159,20 +170,20 @@ def _load(self, train_files, test_files, meta_file, """ self.logger.info( - "Loading {:} dataset from {:}...".format(self.class_type, - self.data_path)) + "Loading {:} dataset from {:}...".format(self.class_type, self.data_path) + ) def load_files(batches_list): # Function that loads the data into memory data = None labels = None for batch in batches_list: - with open(batch, 'rb') as bf: - mydict = pickle.load(bf, encoding='bytes') + with open(batch, "rb") as bf: + mydict = pickle.load(bf, encoding="bytes") # The labels have different names in the two datasets - new_data = np.array(mydict[b'data'], dtype='uint8') - newlabels = np.array(mydict[labels_key], dtype='int32') + new_data = np.array(mydict[b"data"], dtype="uint8") + newlabels = np.array(mydict[labels_key], dtype="int32") if data is not None: data = np.vstack([data, new_data]) labels = np.hstack([labels, newlabels]) @@ -184,18 +195,22 @@ def load_files(batches_list): # Load training and test sets train_data, train_labels = load_files( - [fm.join(self.data_path, f) for f in train_files]) + [fm.join(self.data_path, f) for f in train_files] + ) test_data, test_labels = load_files( - [fm.join(self.data_path, f) for f in test_files]) + [fm.join(self.data_path, f) for f in test_files] + ) val_data = None val_labels = None # Populate the validation set if needed if val_size > 0: train_data, val_data = np.split( - train_data, [train_data.shape[0] - val_size]) + train_data, [train_data.shape[0] - val_size] + ) train_labels, val_labels = np.split( - train_labels, [train_labels.shape[0] - val_size]) + train_labels, [train_labels.shape[0] - val_size] + ) # Load the class names from the meta file class_names = self._load_class_names(meta_file, class_names_key) @@ -211,7 +226,7 @@ def load_files(batches_list): if val_size > 0: val = CDataset(val_data, val_labels, header=header) # Also return the validation dataset - out_datasets += (val, ) + out_datasets += (val,) return out_datasets @@ -234,11 +249,11 @@ def _load_class_names(self, meta_file, class_names_key): meta_file_url = fm.join(self.data_path, meta_file) # Load the class-names from the pickled file. - with open(meta_file_url, 'rb') as mf: - raw = pickle.load(mf, encoding='bytes')[class_names_key] + with open(meta_file_url, "rb") as mf: + raw = pickle.load(mf, encoding="bytes")[class_names_key] # Convert from binary strings. - names = {i: x.decode('utf-8') for i, x in enumerate(raw)} + names = {i: x.decode("utf-8") for i, x in enumerate(raw)} return names @@ -256,14 +271,14 @@ def _get_data(self, file_url, dl_folder, extract_only=False): """ # Generate the full path to the downloaded file - f = fm.join(dl_folder, self.data_url.split('/')[-1]) + f = fm.join(dl_folder, self.data_url.split("/")[-1]) if extract_only is False: f_dl = dl_file(file_url, dl_folder, md5_digest=self.data_md5) if f != f_dl: raise ValueError("Unexpected filename {:}".format(f_dl)) - tarfile.open(name=f, mode='r:gz').extractall(dl_folder) + tarfile.open(name=f, mode="r:gz").extractall(dl_folder) class CDataLoaderCIFAR10(CDataLoaderCIFAR): @@ -280,7 +295,8 @@ class CDataLoaderCIFAR10(CDataLoaderCIFAR): class_type : 'CIFAR-10' """ - __class_type = 'CIFAR-10' + + __class_type = "CIFAR-10" @property def data_url(self): @@ -323,14 +339,16 @@ def load(self, val_size=0): # The CIFAR-10 dataset has 5 different batches for train data # and one single batch for test data # The metafile is called `batches.meta` and the labels `labels` - train_files = ['data_batch_' + str(i) for i in range(1, 6)] - test_files = ['test_batch'] - meta_file = 'batches.meta' - labels_key = b'labels' - class_names_key = b'label_names' - - return self._load(train_files, test_files, meta_file, - labels_key, class_names_key, val_size) + train_files = ["data_batch_" + str(i) for i in range(1, 6)] + test_files = ["test_batch"] + meta_file = "batches.meta" + labels_key = b"labels" + class_names_key = b"label_names" + + return self._load( + train_files, test_files, meta_file, labels_key, class_names_key, val_size + ) + load.__doc__ += CDataLoaderCIFAR.load.__doc__ @@ -352,7 +370,8 @@ class CDataLoaderCIFAR100(CDataLoaderCIFAR): class_type : 'CIFAR-100' """ - __class_type = 'CIFAR-100' + + __class_type = "CIFAR-100" @property def data_url(self): @@ -394,12 +413,14 @@ def load(self, val_size=0): """Load all images of the dataset.""" # The CIFAR-100 dataset has a single file for train/test # The metafile is called `meta` and the labels `fine_labels` - train_files = ['train'] - test_files = ['test'] - meta_file = 'meta' - labels_key = b'fine_labels' - class_names_key = b'fine_label_names' - - return self._load(train_files, test_files, meta_file, - labels_key, class_names_key, val_size) + train_files = ["train"] + test_files = ["test"] + meta_file = "meta" + labels_key = b"fine_labels" + class_names_key = b"fine_label_names" + + return self._load( + train_files, test_files, meta_file, labels_key, class_names_key, val_size + ) + load.__doc__ = CDataLoaderCIFAR.load.__doc__ diff --git a/src/secml/data/loader/c_dataloader_icubworld.py b/src/secml/data/loader/c_dataloader_icubworld.py index f833254c..4c4aac99 100644 --- a/src/secml/data/loader/c_dataloader_icubworld.py +++ b/src/secml/data/loader/c_dataloader_icubworld.py @@ -6,6 +6,7 @@ .. moduleauthor:: Angelo Sotgiu """ + from multiprocessing import Lock import zipfile import os @@ -24,14 +25,15 @@ from secml.utils.download_utils import dl_file, md5 # Folder where all iCubWorld dataset will be stored -ICUBWORLD_PATH = fm.join(settings.SECML_DS_DIR, 'iCubWorld') +ICUBWORLD_PATH = fm.join(settings.SECML_DS_DIR, "iCubWorld") # iCubWorld28 -ICUBWORLD28_URL = \ - 'https://data.mendeley.com/datasets/3n2vh9rdxd/1/files/' \ - '9e3a79ef-18d9-4c37-b76c-0c34ead60544/iCubWorld28_128x128.zip?dl=1' -ICUBWORLD28_MD5 = 'd4fcdd02bdb0054688a213611a7a8ae7' -ICUBWORLD28_PATH = fm.join(ICUBWORLD_PATH, 'iCubWorld28') +ICUBWORLD28_URL = ( + "https://data.mendeley.com/datasets/3n2vh9rdxd/1/files/" + "9e3a79ef-18d9-4c37-b76c-0c34ead60544/iCubWorld28_128x128.zip?dl=1" +) +ICUBWORLD28_MD5 = "d4fcdd02bdb0054688a213611a7a8ae7" +ICUBWORLD28_PATH = fm.join(ICUBWORLD_PATH, "iCubWorld28") # TODO: iCubWorld 1.0 @@ -72,22 +74,31 @@ class CDataLoaderICubWorld28(CDataLoaderICubWorld): class_type : 'icubworld28' """ - __class_type = 'icubworld28' + + __class_type = "icubworld28" __lock = Lock() # Lock to prevent multiple parallel download/extraction def __init__(self): - self._train_path = fm.join(ICUBWORLD28_PATH, 'train') - self._test_path = fm.join(ICUBWORLD28_PATH, 'test') + self._train_path = fm.join(ICUBWORLD28_PATH, "train") + self._test_path = fm.join(ICUBWORLD28_PATH, "test") with CDataLoaderICubWorld28.__lock: # Download (if needed) data and extract it - if not fm.folder_exist(self._train_path) \ - or not fm.folder_exist(self._test_path): + if not fm.folder_exist(self._train_path) or not fm.folder_exist( + self._test_path + ): self._get_data(ICUBWORLD28_URL, ICUBWORLD28_PATH) - def load(self, ds_type, day='day4', icub7=False, - resize_shape=(128, 128), crop_shape=None, normalize=True): + def load( + self, + ds_type, + day="day4", + icub7=False, + resize_shape=(128, 128), + crop_shape=None, + normalize=True, + ): """Load the dataset. The pre-cropped version of the images is loaded, with size 128 x 128. @@ -122,9 +133,9 @@ def load(self, ds_type, day='day4', icub7=False, Output dataset. """ - if ds_type == 'train': + if ds_type == "train": data_path = self._train_path - elif ds_type == 'test': + elif ds_type == "test": data_path = self._test_path else: raise ValueError("use ds_type = {'train', 'test'}.") @@ -135,7 +146,9 @@ def load(self, ds_type, day='day4', icub7=False, self.logger.info( "Loading iCubWorld{:} {:} {:} dataset from {:}".format( - '7' if icub7 else '28', day, ds_type, day_path)) + "7" if icub7 else "28", day, ds_type, day_path + ) + ) icub7 = 3 if icub7 is True else icub7 # Use the 3rd sub-obj by default @@ -164,7 +177,7 @@ def load(self, ds_type, day='day4', icub7=False, if crop_shape is not None: img = crop_img(img, crop_shape) - img = CArray(img.getdata(), dtype='uint8').ravel() + img = CArray(img.getdata(), dtype="uint8").ravel() x = x.append(img, axis=0) if x is not None else img y_orig.append(sub_obj) # Label is given by sub-obj name @@ -194,7 +207,7 @@ def _get_data(self, file_url, dl_folder): Path to the folder where to store the downloaded file. """ - f_dl = fm.join(dl_folder, 'iCubWorld28_128x128.zip?dl=1') + f_dl = fm.join(dl_folder, "iCubWorld28_128x128.zip?dl=1") if not fm.file_exist(f_dl) or md5(f_dl) != ICUBWORLD28_MD5: # Generate the full path to the downloaded file f_dl = dl_file(file_url, dl_folder, md5_digest=ICUBWORLD28_MD5) @@ -202,23 +215,24 @@ def _get_data(self, file_url, dl_folder): self.logger.info("Extracting files...") # Extract the content of downloaded file - zipfile.ZipFile(f_dl, 'r').extractall(dl_folder) + zipfile.ZipFile(f_dl, "r").extractall(dl_folder) # Remove downloaded file fm.remove_file(f_dl) # iCubWorld28 zip file contains a macosx private folder, clean it up - if fm.folder_exist(fm.join(ICUBWORLD28_PATH, '__MACOSX')): - fm.remove_folder(fm.join(ICUBWORLD28_PATH, '__MACOSX'), force=True) + if fm.folder_exist(fm.join(ICUBWORLD28_PATH, "__MACOSX")): + fm.remove_folder(fm.join(ICUBWORLD28_PATH, "__MACOSX"), force=True) # iCubWorld28 zip file contains a macosx private files, clean it up for dirpath, dirnames, filenames in os.walk(ICUBWORLD28_PATH): for file in filenames: - if fnmatch(file, '.DS_Store'): + if fnmatch(file, ".DS_Store"): fm.remove_file(fm.join(dirpath, file)) # Now move all data to an upper folder if needed - if not fm.folder_exist(self._train_path) \ - or not fm.folder_exist(self._test_path): + if not fm.folder_exist(self._train_path) or not fm.folder_exist( + self._test_path + ): sub_d = fm.join(dl_folder, fm.listdir(dl_folder)[0]) for e in fm.listdir(sub_d): e_full = fm.join(sub_d, e) # Full path to current element @@ -231,8 +245,9 @@ def _get_data(self, file_url, dl_folder): pass # Check that the main dataset file is now in the correct folder - if not fm.folder_exist(self._train_path) \ - or not fm.folder_exist(self._test_path): + if not fm.folder_exist(self._train_path) or not fm.folder_exist( + self._test_path + ): raise RuntimeError("dataset main file not available!") # The subdirectory can now be removed diff --git a/src/secml/data/loader/c_dataloader_imgclients.py b/src/secml/data/loader/c_dataloader_imgclients.py index 909fd277..758cb1df 100644 --- a/src/secml/data/loader/c_dataloader_imgclients.py +++ b/src/secml/data/loader/c_dataloader_imgclients.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.data.loader import CDataLoader from secml.data import CDataset, CDatasetHeader from secml.array import CArray @@ -22,7 +23,8 @@ class CDataLoaderImgClients(CDataLoader): class_type : 'img-clients' """ - __class_type = 'img-clients' + + __class_type = "img-clients" def __init__(self): # Does nothing... @@ -52,11 +54,11 @@ def load(self, ds_path, img_format, label_dtype=None, load_data=True): """ # Labels file MUST be available - if not fm.file_exist(fm.join(ds_path, 'clients.txt')): + if not fm.file_exist(fm.join(ds_path, "clients.txt")): raise OSError("cannot load clients file.") # Ensuring 'img_format' always has an extension-like pattern - img_ext = '.' + img_format.strip('.').lower() + img_ext = "." + img_format.strip(".").lower() # Dimensions of each image img_w = CArray([], dtype=int) @@ -65,32 +67,34 @@ def load(self, ds_path, img_format, label_dtype=None, load_data=True): # Load files! patterns, img_w, img_h, img_c = self._load_files( - ds_path, img_w, img_h, img_c, img_ext, load_data=load_data) + ds_path, img_w, img_h, img_c, img_ext, load_data=load_data + ) - labels = CArray.load( - fm.join(ds_path, 'clients.txt'), dtype=label_dtype).ravel() + labels = CArray.load(fm.join(ds_path, "clients.txt"), dtype=label_dtype).ravel() if patterns.shape[0] != labels.size: - raise ValueError("patterns ({:}) and labels ({:}) do not have " - "the same number of elements.".format( - patterns.shape[0], labels.size)) + raise ValueError( + "patterns ({:}) and labels ({:}) do not have " + "the same number of elements.".format(patterns.shape[0], labels.size) + ) # Load the file with extra dataset attributes (optional) - attributes_path = fm.join(ds_path, 'attributes.txt') - attributes = load_dict(attributes_path) if \ - fm.file_exist(attributes_path) else dict() + attributes_path = fm.join(ds_path, "attributes.txt") + attributes = ( + load_dict(attributes_path) if fm.file_exist(attributes_path) else dict() + ) - self.logger.info("Loaded {:} images from {:}...".format( - patterns.shape[0], ds_path)) + self.logger.info( + "Loaded {:} images from {:}...".format(patterns.shape[0], ds_path) + ) - header = CDatasetHeader(id=fm.split(ds_path)[1], - img_w=img_w, img_h=img_h, img_c=img_c, - **attributes) + header = CDatasetHeader( + id=fm.split(ds_path)[1], img_w=img_w, img_h=img_h, img_c=img_c, **attributes + ) return CDataset(patterns, labels, header=header) - def _load_files(self, ds_path, img_w, img_h, img_c, - img_ext, load_data=True): + def _load_files(self, ds_path, img_w, img_h, img_c, img_ext, load_data=True): """Loads any file with given extension inside input folder.""" # Files will be loaded in alphabetical order files_list = sorted(fm.listdir(ds_path)) @@ -120,10 +124,14 @@ def _load_files(self, ds_path, img_w, img_h, img_c, array_img = CArray([[file_path]]) # Creating the 2D array patterns x features - patterns = patterns.append( - array_img, axis=0) if patterns is not None else array_img - - self.logger.debug("{:} has been loaded..." - "".format(fm.join(ds_path, file_name))) + patterns = ( + patterns.append(array_img, axis=0) + if patterns is not None + else array_img + ) + + self.logger.debug( + "{:} has been loaded..." "".format(fm.join(ds_path, file_name)) + ) return patterns, img_w, img_h, img_c diff --git a/src/secml/data/loader/c_dataloader_imgfolders.py b/src/secml/data/loader/c_dataloader_imgfolders.py index d5986d2a..3b13b73a 100644 --- a/src/secml/data/loader/c_dataloader_imgfolders.py +++ b/src/secml/data/loader/c_dataloader_imgfolders.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.data.loader import CDataLoader from secml.data import CDataset, CDatasetHeader from secml.array import CArray @@ -23,14 +24,16 @@ class CDataLoaderImgFolders(CDataLoader): class_type : 'img-folders' """ - __class_type = 'img-folders' + + __class_type = "img-folders" def __init__(self): # Does nothing... pass - def load(self, ds_path, img_format, - label_re=None, label_dtype=None, load_data=True): + def load( + self, ds_path, img_format, label_re=None, label_dtype=None, load_data=True + ): """Load all images of specified format inside given path. The following custom CDataset attributes are available: @@ -59,7 +62,7 @@ def load(self, ds_path, img_format, """ # Ensuring 'img_format' always has an extension-like pattern - img_ext = '.' + img_format.strip('.').lower() + img_ext = "." + img_format.strip(".").lower() # Dimensions of each image img_w = CArray([], dtype=int) @@ -69,45 +72,60 @@ def load(self, ds_path, img_format, # Each directory inside the provided path will be explored recursively # and, if leaf, contained images will be loaded patterns, labels, img_w, img_h, img_c = self._explore_dir( - ds_path, img_w, img_h, img_c, img_ext, - label_re=label_re, load_data=load_data) + ds_path, + img_w, + img_h, + img_c, + img_ext, + label_re=label_re, + load_data=load_data, + ) if label_dtype is not None: # Converting labels if requested labels = labels.astype(label_dtype) if patterns.shape[0] != labels.size: - raise ValueError("patterns ({:}) and labels ({:}) do not have " - "the same number of elements.".format( - patterns.shape[0], labels.size)) + raise ValueError( + "patterns ({:}) and labels ({:}) do not have " + "the same number of elements.".format(patterns.shape[0], labels.size) + ) # Load the file with extra dataset attributes (optional) - attributes_path = fm.join(ds_path, 'attributes.txt') - attributes = load_dict(attributes_path) if \ - fm.file_exist(attributes_path) else dict() + attributes_path = fm.join(ds_path, "attributes.txt") + attributes = ( + load_dict(attributes_path) if fm.file_exist(attributes_path) else dict() + ) - self.logger.info("Loaded {:} images from {:}...".format( - patterns.shape[0], ds_path)) + self.logger.info( + "Loaded {:} images from {:}...".format(patterns.shape[0], ds_path) + ) - header = CDatasetHeader(id=fm.split(ds_path)[1], - img_w=img_w, img_h=img_h, img_c=img_c, - **attributes) + header = CDatasetHeader( + id=fm.split(ds_path)[1], img_w=img_w, img_h=img_h, img_c=img_c, **attributes + ) return CDataset(patterns, labels, header=header) - def _explore_dir(self, dir_path, img_w, img_h, img_c, img_ext, - label_re=None, load_data=True): + def _explore_dir( + self, dir_path, img_w, img_h, img_c, img_ext, label_re=None, load_data=True + ): """Explore input directory and load files if leaf.""" # Folders/files will be loaded in alphabetical order items_list = sorted(fm.listdir(dir_path)) # A leaf folder is a folder with only files in it - leaf = not any(fm.folder_exist( - fm.join(dir_path, item)) for item in items_list) + leaf = not any(fm.folder_exist(fm.join(dir_path, item)) for item in items_list) if leaf is True: # Leaf directory, time to load files! return self._load_files( - dir_path, img_w, img_h, img_c, img_ext, - label_re=label_re, load_data=load_data) + dir_path, + img_w, + img_h, + img_c, + img_ext, + label_re=label_re, + load_data=load_data, + ) # Placeholder for patterns/labels CArray patterns = None @@ -122,18 +140,27 @@ def _explore_dir(self, dir_path, img_w, img_h, img_c, img_ext, # Explore next subfolder patterns_new, labels_new, img_w, img_h, img_c = self._explore_dir( - subdir_path, img_w, img_h, img_c, img_ext, - label_re=label_re, load_data=load_data) - - patterns = patterns.append(patterns_new, axis=0) \ - if patterns is not None else patterns_new - labels = labels.append(labels_new) \ - if labels is not None else labels_new + subdir_path, + img_w, + img_h, + img_c, + img_ext, + label_re=label_re, + load_data=load_data, + ) + + patterns = ( + patterns.append(patterns_new, axis=0) + if patterns is not None + else patterns_new + ) + labels = labels.append(labels_new) if labels is not None else labels_new return patterns, labels, img_w, img_h, img_c - def _load_files(self, dir_path, img_w, img_h, img_c, img_ext, - label_re=None, load_data=True): + def _load_files( + self, dir_path, img_w, img_h, img_c, img_ext, label_re=None, load_data=True + ): """Loads any file with given extension inside input folder.""" # Folders/files will be loaded in alphabetical order files_list = sorted(fm.listdir(dir_path)) @@ -164,18 +191,24 @@ def _load_files(self, dir_path, img_w, img_h, img_c, img_ext, array_img = CArray([[file_path]]) # Creating the 2D array patterns x features - patterns = patterns.append( - array_img, axis=0) if patterns is not None else array_img + patterns = ( + patterns.append(array_img, axis=0) + if patterns is not None + else array_img + ) # Consider only the directory name to set the label dir_name = fm.split(dir_path)[1] # label is the image's containing folder name or the re result - c_id = dir_name if label_re is None \ + c_id = ( + dir_name + if label_re is None else re.search(label_re, dir_name).group(0) - labels = labels.append(c_id) if labels is not None \ - else CArray(c_id) + ) + labels = labels.append(c_id) if labels is not None else CArray(c_id) - self.logger.debug("{:} has been loaded..." - "".format(fm.join(dir_path, file_name))) + self.logger.debug( + "{:} has been loaded..." "".format(fm.join(dir_path, file_name)) + ) return patterns, labels, img_w, img_h, img_c diff --git a/src/secml/data/loader/c_dataloader_lfw.py b/src/secml/data/loader/c_dataloader_lfw.py index 7ebc84ce..0769a8f4 100644 --- a/src/secml/data/loader/c_dataloader_lfw.py +++ b/src/secml/data/loader/c_dataloader_lfw.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from multiprocessing import Lock from secml.data.loader import CDataLoader @@ -40,7 +41,8 @@ class CDataLoaderLFW(CDataLoader): class_type : 'lfw' """ - __class_type = 'lfw' + + __class_type = "lfw" __lock = Lock() # Lock to prevent multiple parallel download/extraction def __init__(self): @@ -70,9 +72,14 @@ def load(self, min_faces_per_person=None, funneled=True, color=False): """ with CDataLoaderLFW.__lock: lfw_people = fetch_lfw_people( - data_home=SECML_DS_DIR, funneled=funneled, resize=1, - min_faces_per_person=min_faces_per_person, color=color, - slice_=None, download_if_missing=True) + data_home=SECML_DS_DIR, + funneled=funneled, + resize=1, + min_faces_per_person=min_faces_per_person, + color=color, + slice_=None, + download_if_missing=True, + ) x = CArray(lfw_people.data) y = CArray(lfw_people.target) @@ -96,6 +103,6 @@ def clean_tmp(): Does not delete the downloaded database archive. """ - jl_tmp_folder = fm.join(SECML_DS_DIR, 'lfw_home', 'joblib') + jl_tmp_folder = fm.join(SECML_DS_DIR, "lfw_home", "joblib") if fm.folder_exist(jl_tmp_folder): fm.remove_folder(jl_tmp_folder, force=True) diff --git a/src/secml/data/loader/c_dataloader_mnist.py b/src/secml/data/loader/c_dataloader_mnist.py index a67d893e..2a650ffc 100644 --- a/src/secml/data/loader/c_dataloader_mnist.py +++ b/src/secml/data/loader/c_dataloader_mnist.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import gzip import struct from array import array @@ -19,19 +20,19 @@ from secml.settings import SECML_DS_DIR -MODEL_ZOO_REPO_URL = 'https://gitlab.com/secml/secml-zoo' -MNIST_REPO_PATH = 'datasets/MNIST/' +MODEL_ZOO_REPO_URL = "https://gitlab.com/secml/secml-zoo" +MNIST_REPO_PATH = "datasets/MNIST/" -TRAIN_DATA_FILE = 'train-images-idx3-ubyte.gz' -TRAIN_DATA_MD5 = '6bbc9ace898e44ae57da46a324031adb' -TRAIN_LABELS_FILE = 'train-labels-idx1-ubyte.gz' -TRAIN_LABELS_MD5 = 'a25bea736e30d166cdddb491f175f624' -TEST_DATA_FILE = 't10k-images-idx3-ubyte.gz' -TEST_DATA_MD5 = '2646ac647ad5339dbf082846283269ea' -TEST_LABELS_FILE = 't10k-labels-idx1-ubyte.gz' -TEST_LABELS_MD5 = '27ae3e4e09519cfbb04c329615203637' +TRAIN_DATA_FILE = "train-images-idx3-ubyte.gz" +TRAIN_DATA_MD5 = "6bbc9ace898e44ae57da46a324031adb" +TRAIN_LABELS_FILE = "train-labels-idx1-ubyte.gz" +TRAIN_LABELS_MD5 = "a25bea736e30d166cdddb491f175f624" +TEST_DATA_FILE = "t10k-images-idx3-ubyte.gz" +TEST_DATA_MD5 = "2646ac647ad5339dbf082846283269ea" +TEST_LABELS_FILE = "t10k-labels-idx1-ubyte.gz" +TEST_LABELS_MD5 = "27ae3e4e09519cfbb04c329615203637" -MNIST_PATH = fm.join(SECML_DS_DIR, 'mnist') +MNIST_PATH = fm.join(SECML_DS_DIR, "mnist") class CDataLoaderMNIST(CDataLoader): @@ -48,35 +49,51 @@ class CDataLoaderMNIST(CDataLoader): class_type : 'mnist' """ - __class_type = 'mnist' + + __class_type = "mnist" __lock = Lock() # Lock to prevent multiple parallel download/extraction def __init__(self): # Build paths of MNIST dataset - self.train_data_path = fm.join(MNIST_PATH, 'train-images-idx3-ubyte') - self.train_labels_path = fm.join(MNIST_PATH, 'train-labels-idx1-ubyte') - self.test_data_path = fm.join(MNIST_PATH, 't10k-images-idx3-ubyte') - self.test_labels_path = fm.join(MNIST_PATH, 't10k-labels-idx1-ubyte') + self.train_data_path = fm.join(MNIST_PATH, "train-images-idx3-ubyte") + self.train_labels_path = fm.join(MNIST_PATH, "train-labels-idx1-ubyte") + self.test_data_path = fm.join(MNIST_PATH, "t10k-images-idx3-ubyte") + self.test_labels_path = fm.join(MNIST_PATH, "t10k-labels-idx1-ubyte") with CDataLoaderMNIST.__lock: # For each file check if already downloaded and extracted - if not fm.file_exist(self.train_data_path) or \ - md5(self.train_data_path) != TRAIN_DATA_MD5: - self._get_data(TRAIN_DATA_FILE, MNIST_PATH, - self.train_data_path, TRAIN_DATA_MD5) - if not fm.file_exist(self.train_labels_path) or \ - md5(self.train_labels_path) != TRAIN_LABELS_MD5: - self._get_data(TRAIN_LABELS_FILE, MNIST_PATH, - self.train_labels_path, TRAIN_LABELS_MD5) - if not fm.file_exist(self.test_data_path) or \ - md5(self.test_data_path) != TEST_DATA_MD5: - self._get_data(TEST_DATA_FILE, MNIST_PATH, - self.test_data_path, TEST_DATA_MD5) - if not fm.file_exist(self.test_labels_path) or \ - md5(self.test_labels_path) != TEST_LABELS_MD5: - self._get_data(TEST_LABELS_FILE, MNIST_PATH, - self.test_labels_path, TEST_LABELS_MD5) + if ( + not fm.file_exist(self.train_data_path) + or md5(self.train_data_path) != TRAIN_DATA_MD5 + ): + self._get_data( + TRAIN_DATA_FILE, MNIST_PATH, self.train_data_path, TRAIN_DATA_MD5 + ) + if ( + not fm.file_exist(self.train_labels_path) + or md5(self.train_labels_path) != TRAIN_LABELS_MD5 + ): + self._get_data( + TRAIN_LABELS_FILE, + MNIST_PATH, + self.train_labels_path, + TRAIN_LABELS_MD5, + ) + if ( + not fm.file_exist(self.test_data_path) + or md5(self.test_data_path) != TEST_DATA_MD5 + ): + self._get_data( + TEST_DATA_FILE, MNIST_PATH, self.test_data_path, TEST_DATA_MD5 + ) + if ( + not fm.file_exist(self.test_labels_path) + or md5(self.test_labels_path) != TEST_LABELS_MD5 + ): + self._get_data( + TEST_LABELS_FILE, MNIST_PATH, self.test_labels_path, TEST_LABELS_MD5 + ) def load(self, ds, digits=tuple(range(0, 10)), num_samples=None): """Load all images of specified format inside given path. @@ -110,24 +127,25 @@ def load(self, ds, digits=tuple(range(0, 10)), num_samples=None): else: raise ValueError("ds must be 'training' or 'testing'") - self.logger.info( - "Loading MNIST {:} dataset from {:}...".format(ds, MNIST_PATH)) + self.logger.info("Loading MNIST {:} dataset from {:}...".format(ds, MNIST_PATH)) # Opening the labels data - flbl = open(lbl_path, 'rb') + flbl = open(lbl_path, "rb") magic_nr, size = struct.unpack(">II", flbl.read(8)) if magic_nr != 2049: - raise ValueError('Magic number mismatch, expected 2049,' - 'got {}'.format(magic_nr)) + raise ValueError( + "Magic number mismatch, expected 2049," "got {}".format(magic_nr) + ) lbl = array("b", flbl.read()) flbl.close() # Opening the images data - fimg = open(data_path, 'rb') + fimg = open(data_path, "rb") magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) if magic_nr != 2051: - raise ValueError('Magic number mismatch, expected 2051,' - 'got {}'.format(magic_nr)) + raise ValueError( + "Magic number mismatch, expected 2051," "got {}".format(magic_nr) + ) img = array("B", fimg.read()) fimg.close() @@ -139,9 +157,9 @@ def load(self, ds, digits=tuple(range(0, 10)), num_samples=None): div = len(digits) n_samples_class = [ int(num_samples / div) + (1 if x < num_samples % div else 0) - for x in range(div)] - n_samples_class = { - e: n_samples_class[e_i] for e_i, e in enumerate(digits)} + for x in range(div) + ] + n_samples_class = {e: n_samples_class[e_i] for e_i, e in enumerate(digits)} else: # No constraint on the number of samples n_samples_class = {e: size for e in digits} @@ -165,14 +183,16 @@ def load(self, ds, digits=tuple(range(0, 10)), num_samples=None): min_val = min(count_samples_class.values()) raise ValueError( "not enough samples in dataset for one ore more of the " - "desired classes ({:} available)".format(min_val)) + "desired classes ({:} available)".format(min_val) + ) images = CArray.zeros((len(ind), rows * cols), dtype=np.uint8) labels = CArray.zeros(len(ind), dtype=int) digs_array = CArray(digits) # To use find method for i in range(len(ind)): - images[i, :] = CArray(img[ - ind[i] * rows * cols: (ind[i] + 1) * rows * cols]) + images[i, :] = CArray( + img[ind[i] * rows * cols : (ind[i] + 1) * rows * cols] + ) labels[i] = CArray(digs_array.find(digs_array == lbl[ind[i]])) header = CDatasetHeader(img_w=28, img_h=28, y_original=digits) @@ -195,15 +215,16 @@ def _get_data(self, file_name, dl_folder, output_path, md5sum): """ # Download file and unpack - fh = dl_file_gitlab( - MODEL_ZOO_REPO_URL, MNIST_REPO_PATH + file_name, dl_folder) - with gzip.open(fh, 'rb') as infile: - with open(output_path, 'wb') as outfile: + fh = dl_file_gitlab(MODEL_ZOO_REPO_URL, MNIST_REPO_PATH + file_name, dl_folder) + with gzip.open(fh, "rb") as infile: + with open(output_path, "wb") as outfile: for line in infile: outfile.write(line) # Remove download zipped file fm.remove_file(fh) # Check the hash of the downloaded file (unpacked) if md5(output_path) != md5sum: - raise RuntimeError('Something wrong happened while ' - 'downloading the dataset. Please try again.') + raise RuntimeError( + "Something wrong happened while " + "downloading the dataset. Please try again." + ) diff --git a/src/secml/data/loader/c_dataloader_pytorch.py b/src/secml/data/loader/c_dataloader_pytorch.py index 3d7c9a58..df5b0c13 100644 --- a/src/secml/data/loader/c_dataloader_pytorch.py +++ b/src/secml/data/loader/c_dataloader_pytorch.py @@ -5,6 +5,7 @@ .. moduleauthor:: Maura Pintor """ + from torch.utils.data import DataLoader from secml.data.c_dataset_pytorch import CDatasetPyTorch @@ -13,21 +14,28 @@ class CDataLoaderPyTorch: # TODO: ADD DOCSTRING - def __init__(self, data, labels=None, batch_size=4, shuffle=False, - transform=None, num_workers=0): + def __init__( + self, + data, + labels=None, + batch_size=4, + shuffle=False, + transform=None, + num_workers=0, + ): - self._dataset = CDatasetPyTorch(data, - labels=labels, - transform=transform) + self._dataset = CDatasetPyTorch(data, labels=labels, transform=transform) self._batch_size = batch_size self._shuffle = shuffle self._num_workers = num_workers def get_loader(self): - data_loader = DataLoader(self._dataset, - batch_size=self._batch_size, - shuffle=self._shuffle, - num_workers=self._num_workers) + data_loader = DataLoader( + self._dataset, + batch_size=self._batch_size, + shuffle=self._shuffle, + num_workers=self._num_workers, + ) return data_loader diff --git a/src/secml/data/loader/c_dataloader_sklearn.py b/src/secml/data/loader/c_dataloader_sklearn.py index afc774e0..0cab87ae 100755 --- a/src/secml/data/loader/c_dataloader_sklearn.py +++ b/src/secml/data/loader/c_dataloader_sklearn.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from multiprocessing import Lock from abc import ABCMeta, abstractmethod @@ -14,11 +15,20 @@ from secml.data import CDataset from secml.array import CArray -__all__ = ['CDLRandom', 'CDLRandomRegression', - 'CDLRandomBlobs', 'CDLRandomBlobsRegression', - 'CDLRandomCircles', 'CDLRandomCircleRegression', - 'CDLRandomMoons', 'CDLRandomBinary', - 'CDLIris', 'CDLDigits', 'CDLBoston', 'CDLDiabetes'] +__all__ = [ + "CDLRandom", + "CDLRandomRegression", + "CDLRandomBlobs", + "CDLRandomBlobsRegression", + "CDLRandomCircles", + "CDLRandomCircleRegression", + "CDLRandomMoons", + "CDLRandomBinary", + "CDLIris", + "CDLDigits", + "CDLBoston", + "CDLDiabetes", +] class CDLRandom(CDataLoader): @@ -100,13 +110,26 @@ class weight is automatically inferred. class_type : 'classification' """ - __class_type = 'classification' - def __init__(self, n_samples=100, n_features=20, n_informative=2, - n_redundant=2, n_repeated=0, n_classes=2, - n_clusters_per_class=2, weights=None, - flip_y=0.01, class_sep=1.0, hypercube=True, - shift=0.0, scale=1.0, random_state=None): + __class_type = "classification" + + def __init__( + self, + n_samples=100, + n_features=20, + n_informative=2, + n_redundant=2, + n_repeated=0, + n_classes=2, + n_clusters_per_class=2, + weights=None, + flip_y=0.01, + class_sep=1.0, + hypercube=True, + shift=0.0, + scale=1.0, + random_state=None, + ): self.n_samples = n_samples self.n_features = n_features @@ -133,6 +156,7 @@ def load(self): """ from sklearn.datasets import make_classification + patterns, labels = make_classification( n_samples=self.n_samples, n_features=self.n_features, @@ -147,7 +171,8 @@ def load(self): hypercube=self.hypercube, shift=self.shift, scale=self.scale, - random_state=self.random_state) + random_state=self.random_state, + ) return CDataset(patterns, labels) @@ -161,7 +186,7 @@ class CDLRandomRegression(CDataLoader): random linear regression model with `n_informative` nonzero regressors to the previously generated input and some gaussian centered noise with some adjustable scale. - + Parameters ---------- n_samples : int, optional (default=100) @@ -205,11 +230,21 @@ class CDLRandomRegression(CDataLoader): class_type : 'regression' """ - __class_type = 'regression' - def __init__(self, n_samples=100, n_features=100, n_informative=10, - n_targets=1, bias=0.0, effective_rank=None, - tail_strength=0.5, noise=0.0, random_state=None): + __class_type = "regression" + + def __init__( + self, + n_samples=100, + n_features=100, + n_informative=10, + n_targets=1, + bias=0.0, + effective_rank=None, + tail_strength=0.5, + noise=0.0, + random_state=None, + ): self.n_samples = n_samples self.n_features = n_features @@ -231,15 +266,18 @@ def load(self): """ from sklearn.datasets import make_regression - patterns, labels = make_regression(n_samples=self.n_samples, - n_features=self.n_features, - n_informative=self.n_informative, - n_targets=self.n_targets, - bias=self.bias, - effective_rank=self.effective_rank, - tail_strength=self.tail_strength, - noise=self.noise, - random_state=self.random_state) + + patterns, labels = make_regression( + n_samples=self.n_samples, + n_features=self.n_features, + n_informative=self.n_informative, + n_targets=self.n_targets, + bias=self.bias, + effective_rank=self.effective_rank, + tail_strength=self.tail_strength, + noise=self.noise, + random_state=self.random_state, + ) return CDataset(patterns, labels) @@ -272,10 +310,18 @@ class CDLRandomBlobs(CDataLoader): class_type : 'blobs' """ - __class_type = 'blobs' - def __init__(self, n_samples=100, n_features=2, centers=3, - cluster_std=1.0, center_box=(-10.0, 10.0), random_state=None): + __class_type = "blobs" + + def __init__( + self, + n_samples=100, + n_features=2, + centers=3, + cluster_std=1.0, + center_box=(-10.0, 10.0), + random_state=None, + ): self.n_samples = n_samples self.n_features = n_features @@ -294,13 +340,15 @@ def load(self): """ from sklearn.datasets import make_blobs + patterns, labels = make_blobs( n_samples=self.n_samples, n_features=self.n_features, centers=self.centers, cluster_std=self.cluster_std, center_box=self.center_box, - random_state=self.random_state) + random_state=self.random_state, + ) return CDataset(patterns, labels) @@ -331,11 +379,18 @@ class CDLRandomBlobsRegression(CDataLoader): class_type : 'blobs-regression' """ - __class_type = 'blobs-regression' - def __init__(self, n_samples=100, cluster_std=(1.0, 1.0), - bias=1.0, w=(2.0, -1.0), centers=([0, 0], [-1, -1]), - random_state=None): + __class_type = "blobs-regression" + + def __init__( + self, + n_samples=100, + cluster_std=(1.0, 1.0), + bias=1.0, + w=(2.0, -1.0), + centers=([0, 0], [-1, -1]), + random_state=None, + ): self.n_samples = n_samples self.bias = bias @@ -345,15 +400,16 @@ def __init__(self, n_samples=100, cluster_std=(1.0, 1.0), self.random_state = random_state def _dts_function(self, X): - """ TODO: Put a comment for this function. """ + """TODO: Put a comment for this function.""" from secml.ml.stats import CDistributionGaussian + d = X.shape[1] # number of features Y = self.bias for gauss_idx in range(len(self.centers)): - Y += self.w[gauss_idx] * \ - CDistributionGaussian(mean=self.centers[gauss_idx], - cov=self.cluster_std[gauss_idx] * - CArray.eye(d, d)).pdf(X) + Y += self.w[gauss_idx] * CDistributionGaussian( + mean=self.centers[gauss_idx], + cov=self.cluster_std[gauss_idx] * CArray.eye(d, d), + ).pdf(X) return Y def load(self): @@ -366,9 +422,14 @@ def load(self): """ from sklearn.datasets import make_blobs + patterns = make_blobs( - n_samples=self.n_samples, n_features=2, centers=self.centers, - cluster_std=self.cluster_std, random_state=self.random_state)[0] + n_samples=self.n_samples, + n_features=2, + centers=self.centers, + cluster_std=self.cluster_std, + random_state=self.random_state, + )[0] return CDataset(patterns, self._dts_function(CArray(patterns))) @@ -393,10 +454,10 @@ class CDLRandomCircles(CDataLoader): class_type : 'circles' """ - __class_type = 'circles' - def __init__(self, n_samples=100, noise=None, - factor=0.8, random_state=None): + __class_type = "circles" + + def __init__(self, n_samples=100, noise=None, factor=0.8, random_state=None): self.n_samples = n_samples self.noise = noise @@ -413,11 +474,13 @@ def load(self): """ from sklearn.datasets import make_circles + patterns, labels = make_circles( n_samples=self.n_samples, noise=self.noise, factor=self.factor, - random_state=self.random_state) + random_state=self.random_state, + ) return CDataset(patterns, labels) @@ -442,10 +505,10 @@ class CDLRandomCircleRegression(CDataLoader): class_type : 'circles-regression' """ - __class_type = 'circles-regression' - def __init__(self, n_samples=100, noise=None, - factor=0.8, random_state=None): + __class_type = "circles-regression" + + def __init__(self, n_samples=100, noise=None, factor=0.8, random_state=None): self.n_samples = n_samples self.noise = noise @@ -466,11 +529,13 @@ def load(self): """ from sklearn.datasets import make_circles + patterns = make_circles( n_samples=self.n_samples, noise=self.noise, factor=self.factor, - random_state=self.random_state)[0] + random_state=self.random_state, + )[0] return CDataset(patterns, self._dts_function(patterns)) @@ -493,7 +558,8 @@ class CDLRandomMoons(CDataLoader): class_type : 'moons' """ - __class_type = 'moons' + + __class_type = "moons" def __init__(self, n_samples=100, noise=None, random_state=None): @@ -511,10 +577,10 @@ def load(self): """ from sklearn.datasets import make_moons + patterns, labels = make_moons( - n_samples=self.n_samples, - noise=self.noise, - random_state=self.random_state) + n_samples=self.n_samples, noise=self.noise, random_state=self.random_state + ) return CDataset(patterns, labels) @@ -533,7 +599,8 @@ class CDLRandomBinary(CDataLoader): class_type : 'binary' """ - __class_type = 'binary' + + __class_type = "binary" def __init__(self, n_samples=100, n_features=2): @@ -573,6 +640,7 @@ class CDLRandomToy(CDataLoader, metaclass=ABCMeta): assigned 0 at the label with lower value, 1 to the other. """ + __lock = Lock() # Lock to prevent multiple parallel download/extraction def __init__(self, class_list=None, zero_one=False): @@ -599,14 +667,16 @@ def _select_classes(self, class_list, patterns, labels): sel_labels = labels[this_class_pat_idx] else: sel_patterns = sel_patterns.append( - patterns[this_class_pat_idx, :], axis=0) - sel_labels = sel_labels.append( - labels[this_class_pat_idx]) + patterns[this_class_pat_idx, :], axis=0 + ) + sel_labels = sel_labels.append(labels[this_class_pat_idx]) if self.zero_one is True: if len(class_list) > 2: - raise ValueError("you are try to convert to 0 1 label for a " - "dataset with more than 2 classes") + raise ValueError( + "you are try to convert to 0 1 label for a " + "dataset with more than 2 classes" + ) else: class_list.sort() sel_labels[sel_labels == class_list[0]] = 0 @@ -624,17 +694,21 @@ def load(self): """ with CDLRandomToy.__lock: - if self.toy == 'iris': + if self.toy == "iris": from sklearn.datasets import load_iris + toy_data = load_iris() - elif self.toy == 'digits': + elif self.toy == "digits": from sklearn.datasets import load_digits + toy_data = load_digits() - elif self.toy == 'boston': + elif self.toy == "boston": from sklearn.datasets import load_boston + toy_data = load_boston() - elif self.toy == 'diabetes': + elif self.toy == "diabetes": from sklearn.datasets import load_diabetes + toy_data = load_diabetes() else: raise ValueError("toy dataset {:} if not available.".format(self.toy)) @@ -643,9 +717,9 @@ def load(self): if self.class_list is None: return CDataset(CArray(toy_data.data), CArray(toy_data.target)) else: - return self._select_classes(self.class_list, - CArray(toy_data.data), - CArray(toy_data.target)) + return self._select_classes( + self.class_list, CArray(toy_data.data), CArray(toy_data.target) + ) class CDLIris(CDLRandomToy): @@ -676,8 +750,9 @@ class CDLIris(CDLRandomToy): class_type : 'iris' """ - __class_type = 'iris' - toy = 'iris' + + __class_type = "iris" + toy = "iris" class CDLDigits(CDLRandomToy): @@ -708,8 +783,9 @@ class CDLDigits(CDLRandomToy): class_type : 'digits' """ - __class_type = 'digits' - toy = 'digits' + + __class_type = "digits" + toy = "digits" class CDLBoston(CDLRandomToy): @@ -738,8 +814,9 @@ class CDLBoston(CDLRandomToy): class_type : 'boston' """ - __class_type = 'boston' - toy = 'boston' + + __class_type = "boston" + toy = "boston" class CDLDiabetes(CDLRandomToy): @@ -768,5 +845,6 @@ class CDLDiabetes(CDLRandomToy): class_type : 'diabetes' """ - __class_type = 'diabetes' - toy = 'diabetes' + + __class_type = "diabetes" + toy = "diabetes" diff --git a/src/secml/data/loader/c_dataloader_svmlight.py b/src/secml/data/loader/c_dataloader_svmlight.py index c699fc60..fe912a62 100644 --- a/src/secml/data/loader/c_dataloader_svmlight.py +++ b/src/secml/data/loader/c_dataloader_svmlight.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from sklearn.datasets import load_svmlight_file, dump_svmlight_file from secml.data.loader import CDataLoader @@ -21,15 +22,24 @@ class CDataLoaderSvmLight(CDataLoader): class_type : 'svmlight' """ - __class_type = 'svmlight' + + __class_type = "svmlight" def __init__(self): # Does nothing... pass - - def load(self, file_path, dtype_samples=float, dtype_labels=float, - n_features=None, zero_based=True, remove_all_zero=False, - multilabel=False, load_infos=False): + + def load( + self, + file_path, + dtype_samples=float, + dtype_labels=float, + n_features=None, + zero_based=True, + remove_all_zero=False, + multilabel=False, + load_infos=False, + ): """Loads a dataset from the svmlight / libsvm format and returns a sparse dataset. @@ -98,11 +108,13 @@ def load(self, file_path, dtype_samples=float, dtype_labels=float, """ # Never use zero_based='auto' in order to avoid # any ambiguity with the features indices... - patterns, labels = load_svmlight_file(file_path, - n_features=n_features, - dtype=float, - multilabel=multilabel, - zero_based=zero_based) + patterns, labels = load_svmlight_file( + file_path, + n_features=n_features, + dtype=float, + multilabel=multilabel, + zero_based=zero_based, + ) patterns = CArray(patterns, tosparse=True, dtype=dtype_samples) labels = CArray(labels, dtype=dtype_labels) @@ -110,21 +122,24 @@ def load(self, file_path, dtype_samples=float, dtype_labels=float, header = CDatasetHeader() # Will be populated with extra attributes if remove_all_zero is True: - patterns, idx_mapping = \ - CDataLoaderSvmLight._remove_all_zero_features(patterns) + patterns, idx_mapping = CDataLoaderSvmLight._remove_all_zero_features( + patterns + ) # Store reverse mapping as extra ds attribute header.idx_mapping = idx_mapping if load_infos is True: infos = [] - with open(file_path, 'rt') as f: + with open(file_path, "rt") as f: for l_idx, l in enumerate(f): - i = l.split(' # ') + i = l.split(" # ") if len(i) > 2: # Line should have only one split point - raise ValueError("Something wrong happened when " - "extracting infos for line {:}" - "".format(l_idx)) - infos.append(i[1].rstrip() if len(i) == 2 else '') + raise ValueError( + "Something wrong happened when " + "extracting infos for line {:}" + "".format(l_idx) + ) + infos.append(i[1].rstrip() if len(i) == 2 else "") header.infos = CArray(infos) if len(header.get_params()) == 0: @@ -136,16 +151,16 @@ def load(self, file_path, dtype_samples=float, dtype_labels=float, def dump(d, f, zero_based=True, comment=None): """Dumps a dataset in the svmlight / libsvm file format. - This format is a text-based format, with one sample per line. + This format is a text-based format, with one sample per line. It does not store zero valued features hence is suitable for sparse dataset. - + The first element of each line can be used to store a target variable to predict. Parameters ---------- - d : CDataset - Contain dataset with patterns and labels that we want store. - f : String + d : CDataset + Contain dataset with patterns and labels that we want store. + f : String Path to file were we want store dataset into format svmlight or libsvm. zero_based : bool, optional Whether column indices should be written zero-based (True, default) or one-based (False). @@ -165,5 +180,6 @@ def dump(d, f, zero_based=True, comment=None): >>> CDataLoaderSvmLight.dump(CDataset(patterns,labels), "myfile.libsvm") """ - dump_svmlight_file(d.X.get_data(), d.Y.get_data(), f, - zero_based=zero_based, comment=comment) + dump_svmlight_file( + d.X.get_data(), d.Y.get_data(), f, zero_based=zero_based, comment=comment + ) diff --git a/src/secml/data/loader/loader_utils.py b/src/secml/data/loader/loader_utils.py index 283f6d63..803310b8 100644 --- a/src/secml/data/loader/loader_utils.py +++ b/src/secml/data/loader/loader_utils.py @@ -6,9 +6,10 @@ .. moduleauthor:: Angelo Sotgiu """ + from PIL import Image -__all__ = ['resize_img', 'crop_img'] +__all__ = ["resize_img", "crop_img"] def resize_img(img, shape): @@ -78,10 +79,9 @@ def crop_img(img, crop): w, h = img.size if crop[1] >= w or crop[0] >= h: - raise ValueError( - "crop dimensions cannot be higher than {:}".format(img.size)) + raise ValueError("crop dimensions cannot be higher than {:}".format(img.size)) - x1 = int(round((w - crop[1]) / 2.)) - y1 = int(round((h - crop[0]) / 2.)) + x1 = int(round((w - crop[1]) / 2.0)) + y1 = int(round((h - crop[0]) / 2.0)) return img.crop((x1, y1, x1 + crop[1], y1 + crop[0])) diff --git a/src/secml/data/loader/tests/test_dataloader.py b/src/secml/data/loader/tests/test_dataloader.py index 139baa19..ec75cae0 100644 --- a/src/secml/data/loader/tests/test_dataloader.py +++ b/src/secml/data/loader/tests/test_dataloader.py @@ -10,10 +10,16 @@ class TestCDataLoader(CUnitTest): def test_dl_instance(self): """Testing if all available loaders can be correctly initialized.""" - available_dataset = ['classification', 'regression', - 'blobs', 'blobs-regression', - 'circles', 'circles-regression', - 'moons', 'binary'] + available_dataset = [ + "classification", + "regression", + "blobs", + "blobs-regression", + "circles", + "circles-regression", + "moons", + "binary", + ] for dl_str in available_dataset: self.logger.info("Loading dataset of type {:}...".format(dl_str)) @@ -26,14 +32,20 @@ def test_binary_data_creation(self): shapes = [(100, 2), (200, 6), (1000, 100)] for samples, features in shapes: dataset = CDataLoader.create( - 'binary', n_samples=samples, n_features=features).load() - self.assertEqual((samples, features), dataset.X.shape, - "Wrong default shape for binary dataset") + "binary", n_samples=samples, n_features=features + ).load() self.assertEqual( - 0, dataset.X[dataset.X > 1].shape[0], "Data is not binary!") + (samples, features), + dataset.X.shape, + "Wrong default shape for binary dataset", + ) self.assertEqual( - 0, dataset.X[dataset.X < 0].shape[0], "Data is not binary!") + 0, dataset.X[dataset.X > 1].shape[0], "Data is not binary!" + ) + self.assertEqual( + 0, dataset.X[dataset.X < 0].shape[0], "Data is not binary!" + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/src/secml/data/loader/tests/test_dataloader_imgclients.py b/src/secml/data/loader/tests/test_dataloader_imgclients.py index e239be5f..a65fb8fe 100644 --- a/src/secml/data/loader/tests/test_dataloader_imgclients.py +++ b/src/secml/data/loader/tests/test_dataloader_imgclients.py @@ -21,11 +21,13 @@ def test_load_img(self): ds_path = fm.join(fm.abspath(__file__), "ds_clients") - ds = dl.load(ds_path=ds_path, img_format='jpeg') + ds = dl.load(ds_path=ds_path, img_format="jpeg") self.logger.info( "Loaded {:} images of {:} features, {:} classes".format( - ds.num_samples, ds.num_features, ds.num_classes)) + ds.num_samples, ds.num_features, ds.num_classes + ) + ) self.assertEqual((2, 151875), ds.X.shape) self.assertEqual(2, ds.num_classes) @@ -41,25 +43,29 @@ def test_load_paths(self): ds_path = fm.join(fm.abspath(__file__), "ds_clients") - ds = dl.load(ds_path=ds_path, img_format='jpeg', load_data=False) + ds = dl.load(ds_path=ds_path, img_format="jpeg", load_data=False) self.logger.info( "Loaded {:} images of {:} features, {:} classes".format( - ds.num_samples, ds.num_features, ds.num_classes)) + ds.num_samples, ds.num_features, ds.num_classes + ) + ) # TODO: USE 'U' AFTER TRANSITION TO PYTHON 3 - self.assertIn(ds.X.dtype.char, ('S', 'U')) + self.assertIn(ds.X.dtype.char, ("S", "U")) # Checking correct label-img association - self.assertEqual(ds.Y[0].item(), - fm.split(ds.X[0, :].item())[1].replace('.jpeg', '')) - self.assertEqual(ds.Y[1].item(), - fm.split(ds.X[1, :].item())[1].replace('.jpeg', '')) + self.assertEqual( + ds.Y[0].item(), fm.split(ds.X[0, :].item())[1].replace(".jpeg", "") + ) + self.assertEqual( + ds.Y[1].item(), fm.split(ds.X[1, :].item())[1].replace(".jpeg", "") + ) # Checking behavior of `get_labels_ovr` - ovr = ds.get_labels_ovr(pos_label='tiger') # Y : ['coyote', 'tiger'] + ovr = ds.get_labels_ovr(pos_label="tiger") # Y : ['coyote', 'tiger'] self.assert_array_equal(ovr, CArray([0, 1])) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/data/loader/tests/test_dataloader_imgfolders.py b/src/secml/data/loader/tests/test_dataloader_imgfolders.py index a2745960..ad9f9406 100644 --- a/src/secml/data/loader/tests/test_dataloader_imgfolders.py +++ b/src/secml/data/loader/tests/test_dataloader_imgfolders.py @@ -21,11 +21,13 @@ def test_load_img(self): ds_rgb_path = fm.join(fm.abspath(__file__), "ds_rgb") - ds = dl.load(ds_path=ds_rgb_path, img_format='jpeg') + ds = dl.load(ds_path=ds_rgb_path, img_format="jpeg") self.logger.info( "Loaded {:} images of {:} features, {:} classes".format( - ds.num_samples, ds.num_features, ds.num_classes)) + ds.num_samples, ds.num_features, ds.num_classes + ) + ) self.assertEqual((2, 151875), ds.X.shape) self.assertEqual(2, ds.num_classes) @@ -37,11 +39,13 @@ def test_load_img(self): ds_gray_path = fm.join(fm.abspath(__file__), "ds_gray") - ds = dl.load(ds_path=ds_gray_path, img_format='jpeg') + ds = dl.load(ds_path=ds_gray_path, img_format="jpeg") self.logger.info( "Loaded {:} images of {:} features, {:} classes".format( - ds.num_samples, ds.num_features, ds.num_classes)) + ds.num_samples, ds.num_features, ds.num_classes + ) + ) self.assertEqual((2, 50625), ds.X.shape) self.assertEqual(2, ds.num_classes) @@ -57,19 +61,21 @@ def test_load_paths(self): ds_rgb_path = fm.join(fm.abspath(__file__), "ds_rgb") - ds = dl.load(ds_path=ds_rgb_path, img_format='jpeg', load_data=False) + ds = dl.load(ds_path=ds_rgb_path, img_format="jpeg", load_data=False) self.logger.info( "Loaded {:} images of {:} features, {:} classes".format( - ds.num_samples, ds.num_features, ds.num_classes)) + ds.num_samples, ds.num_features, ds.num_classes + ) + ) # TODO: USE 'U' AFTER TRANSITION TO PYTHON 3 - self.assertIn(ds.X.dtype.char, ('S', 'U')) + self.assertIn(ds.X.dtype.char, ("S", "U")) # Checking behavior of `get_labels_ovr` - ovr = ds.get_labels_ovr(pos_label='tiger') # Y : ['coyote', 'tiger'] + ovr = ds.get_labels_ovr(pos_label="tiger") # Y : ['coyote', 'tiger'] self.assert_array_equal(ovr, CArray([0, 1])) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/data/loader/tests/test_dataloader_mnist.py b/src/secml/data/loader/tests/test_dataloader_mnist.py index 61441d72..a0a37c75 100644 --- a/src/secml/data/loader/tests/test_dataloader_mnist.py +++ b/src/secml/data/loader/tests/test_dataloader_mnist.py @@ -10,40 +10,34 @@ def test_load(self): digits = (1, 5, 9) - tr = CDataLoaderMNIST().load('training', digits=digits) + tr = CDataLoaderMNIST().load("training", digits=digits) - self.logger.info( - "Loading {:} training set samples".format(tr.num_samples)) + self.logger.info("Loading {:} training set samples".format(tr.num_samples)) self.assertEqual(tr.num_samples, 18112) - ts = CDataLoaderMNIST().load('testing', digits=digits) + ts = CDataLoaderMNIST().load("testing", digits=digits) - self.logger.info( - "Loading {:} test set samples".format(ts.num_samples)) + self.logger.info("Loading {:} test set samples".format(ts.num_samples)) self.assertEqual(ts.num_samples, 3036) n_tr = 1000 n_ts = 1000 - tr = CDataLoaderMNIST().load( - 'training', digits=digits, num_samples=n_tr) + tr = CDataLoaderMNIST().load("training", digits=digits, num_samples=n_tr) - self.logger.info( - "Loading {:} training set samples".format(tr.num_samples)) + self.logger.info("Loading {:} training set samples".format(tr.num_samples)) self.assertEqual(tr.num_samples, n_tr) - ts = CDataLoaderMNIST().load( - 'testing', digits=digits, num_samples=n_ts) + ts = CDataLoaderMNIST().load("testing", digits=digits, num_samples=n_ts) - self.logger.info( - "Loading {:} test set samples".format(ts.num_samples)) + self.logger.info("Loading {:} test set samples".format(ts.num_samples)) self.assertEqual(ts.num_samples, n_ts) # Not enough number of samples (1666) for each desired digit # in the test set. ValueError should be raised with self.assertRaises(ValueError): - CDataLoaderMNIST().load('testing', digits=digits, num_samples=5000) + CDataLoaderMNIST().load("testing", digits=digits, num_samples=5000) diff --git a/src/secml/data/loader/tests/test_dataloader_pytorch.py b/src/secml/data/loader/tests/test_dataloader_pytorch.py index 77f4fb69..3bd12131 100644 --- a/src/secml/data/loader/tests/test_dataloader_pytorch.py +++ b/src/secml/data/loader/tests/test_dataloader_pytorch.py @@ -30,16 +30,20 @@ def setUp(self): def _dataset_creation_blobs(self): self.logger.info("\tTest dataset creation") # generate synthetic data - dataset = CDLRandom(n_samples=self.n_samples_tr + self.n_samples_ts, - n_classes=self.n_classes, - n_features=self.n_features, n_redundant=0, - n_clusters_per_class=1, - class_sep=2, random_state=0).load() + dataset = CDLRandom( + n_samples=self.n_samples_tr + self.n_samples_ts, + n_classes=self.n_classes, + n_features=self.n_features, + n_redundant=0, + n_clusters_per_class=1, + class_sep=2, + random_state=0, + ).load() # Split in training and test splitter = CTrainTestSplit( - train_size=self.n_samples_tr, test_size=self.n_samples_ts, - random_state=0) + train_size=self.n_samples_tr, test_size=self.n_samples_ts, random_state=0 + ) self.tr, self.ts = splitter.split(dataset) # Normalize the data @@ -47,23 +51,23 @@ def _dataset_creation_blobs(self): self.tr.X = nmz.fit_transform(self.tr.X) self.ts.X = nmz.transform(self.ts.X) - self._tr_loader = CDataLoaderPyTorch(self.tr.X, self.tr.Y, - self.batch_size, shuffle=True, - transform=None).get_loader() + self._tr_loader = CDataLoaderPyTorch( + self.tr.X, self.tr.Y, self.batch_size, shuffle=True, transform=None + ).get_loader() - self._ts_loader = CDataLoaderPyTorch(self.ts.X, self.ts.Y, - self.batch_size, shuffle=False, - transform=None).get_loader() + self._ts_loader = CDataLoaderPyTorch( + self.ts.X, self.ts.Y, self.batch_size, shuffle=False, transform=None + ).get_loader() def _dataset_creation_mnist(self): self.logger.info("\tTest dataset creation") digits = (1, 7) - dataset = CDataLoaderMNIST().load('training', digits=digits) + dataset = CDataLoaderMNIST().load("training", digits=digits) # Split in training and test splitter = CTrainTestSplit( - train_size=self.n_samples_tr, test_size=self.n_samples_ts, - random_state=0) + train_size=self.n_samples_tr, test_size=self.n_samples_ts, random_state=0 + ) self.tr, self.ts = splitter.split(dataset) # Normalize the data @@ -73,19 +77,19 @@ def _dataset_creation_mnist(self): transform = transforms.Lambda(lambda x: x.reshape(-1, 1, 28, 28)) - self._tr_loader = CDataLoaderPyTorch(self.tr.X, self.tr.Y, - self.batch_size, shuffle=True, - transform=transform).get_loader() + self._tr_loader = CDataLoaderPyTorch( + self.tr.X, self.tr.Y, self.batch_size, shuffle=True, transform=transform + ).get_loader() - self._ts_loader = CDataLoaderPyTorch(self.ts.X, self.ts.Y, - self.batch_size, shuffle=False, - transform=transform).get_loader() + self._ts_loader = CDataLoaderPyTorch( + self.ts.X, self.ts.Y, self.batch_size, shuffle=False, transform=transform + ).get_loader() def _test_dtypes(self): self.logger.info("\tTest data types") - assert(isinstance(self.tr, CDataset)) - assert(isinstance(self.tr.X[0, :], CArray)) - assert(isinstance(self.tr.Y[0, :], CArray)) + assert isinstance(self.tr, CDataset) + assert isinstance(self.tr.X[0, :], CArray) + assert isinstance(self.tr.Y[0, :], CArray) def _test_shapes(self, x_shape): """ @@ -98,17 +102,17 @@ def _test_shapes(self, x_shape): """ self.logger.info("\tTest shapes") # test number of batches - assert (len(self._tr_loader) == ceil(self.n_samples_tr / self.batch_size)) - assert (len(self._ts_loader) == ceil(self.n_samples_ts / self.batch_size)) + assert len(self._tr_loader) == ceil(self.n_samples_tr / self.batch_size) + assert len(self._ts_loader) == ceil(self.n_samples_ts / self.batch_size) # test number of samples - assert (len(self._tr_loader.dataset) == self.n_samples_tr) - assert (len(self._ts_loader.dataset) == self.n_samples_ts) + assert len(self._tr_loader.dataset) == self.n_samples_tr + assert len(self._ts_loader.dataset) == self.n_samples_ts # test size of the samples x, y = next(iter(self._tr_loader)) - assert (x.shape == x_shape) - assert (y.shape == (self.batch_size,)) + assert x.shape == x_shape + assert y.shape == (self.batch_size,) def test_blobs(self): self.logger.info("______________________________________") @@ -125,4 +129,3 @@ def test_mnist(self): self._dataset_creation_mnist() self._test_dtypes() self._test_shapes(x_shape=(self.batch_size, 1, 1, 28, 28)) - diff --git a/src/secml/data/loader/tests/test_dataloader_svmlight.py b/src/secml/data/loader/tests/test_dataloader_svmlight.py index 384b115f..be4e195a 100644 --- a/src/secml/data/loader/tests/test_dataloader_svmlight.py +++ b/src/secml/data/loader/tests/test_dataloader_svmlight.py @@ -44,8 +44,7 @@ def test_save_and_load_svmlight_file(self): self.logger.info("Patterns saved:\n{:}".format(self.patterns)) self.logger.info("Labels saved:\n{:}".format(self.labels)) - CDataLoaderSvmLight.dump( - CDataset(self.patterns, self.labels), test_file) + CDataLoaderSvmLight.dump(CDataset(self.patterns, self.labels), test_file) new_dataset = CDataLoaderSvmLight().load(test_file) @@ -53,13 +52,11 @@ def test_save_and_load_svmlight_file(self): self.assertFalse((new_dataset.Y != self.labels).any()) # load data but now remove all zero features (colums) - new_dataset = CDataLoaderSvmLight().load( - test_file, remove_all_zero=True) + new_dataset = CDataLoaderSvmLight().load(test_file, remove_all_zero=True) self.logger.info("Patterns loaded:\n{:}".format(new_dataset.X)) self.logger.info("Labels loaded:\n{:}".format(new_dataset.Y)) - self.logger.info( - "Mapping back:\n{:}".format(new_dataset.header.idx_mapping)) + self.logger.info("Mapping back:\n{:}".format(new_dataset.header.idx_mapping)) self.assertTrue(new_dataset.X.issparse) self.assertTrue(new_dataset.Y.isdense) @@ -83,5 +80,5 @@ def test_save_and_load_svmlight_file(self): raise e -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/data/selection/c_prototypes_selector.py b/src/secml/data/selection/c_prototypes_selector.py index 3445a8c0..b8033545 100644 --- a/src/secml/data/selection/c_prototypes_selector.py +++ b/src/secml/data/selection/c_prototypes_selector.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from abc import ABCMeta, abstractmethod from secml.core import CCreator @@ -34,7 +35,8 @@ class CPrototypesSelector(CCreator, metaclass=ABCMeta): Pattern Recognition. Springer Berlin Heidelberg, 2006. 287-296. """ - __super__ = 'CPrototypesSelector' + + __super__ = "CPrototypesSelector" def __init__(self): @@ -62,5 +64,7 @@ def select(self, dataset, n_prototypes): Dataset with selected prototypes. """ - raise NotImplementedError("Please implement a `select` method for " - "class {:}".format(self.__class__.__name__)) + raise NotImplementedError( + "Please implement a `select` method for " + "class {:}".format(self.__class__.__name__) + ) diff --git a/src/secml/data/selection/c_ps_border.py b/src/secml/data/selection/c_ps_border.py index b80f985e..9250cef4 100644 --- a/src/secml/data/selection/c_ps_border.py +++ b/src/secml/data/selection/c_ps_border.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.data.selection import CPrototypesSelector from secml.array import CArray from secml.ml.kernels import CKernelEuclidean @@ -27,7 +28,8 @@ class CPSBorder(CPrototypesSelector): class_type : 'border' """ - __class_type = 'border' + + __class_type = "border" def select(self, dataset, n_prototypes): """Selects the prototypes from input dataset. @@ -46,7 +48,7 @@ def select(self, dataset, n_prototypes): """ # Precomputing distances - k_euclidean = - CKernelEuclidean().k(dataset.X) + k_euclidean = -CKernelEuclidean().k(dataset.X) # List of selected prototypes (indices) sel_idx = [] set_indices = list(range(dataset.num_samples)) diff --git a/src/secml/data/selection/c_ps_center.py b/src/secml/data/selection/c_ps_center.py index b5feb92b..b983c98f 100644 --- a/src/secml/data/selection/c_ps_center.py +++ b/src/secml/data/selection/c_ps_center.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.data.selection import CPrototypesSelector from secml.array import CArray from secml.ml.kernels import CKernelEuclidean @@ -27,7 +28,8 @@ class CPSCenter(CPrototypesSelector): class_type : 'center' """ - __class_type = 'center' + + __class_type = "center" def select(self, dataset, n_prototypes): """Selects the prototypes from input dataset. @@ -46,7 +48,7 @@ def select(self, dataset, n_prototypes): """ # Precomputing distances - k_euclidean = - CKernelEuclidean().k(dataset.X) + k_euclidean = -CKernelEuclidean().k(dataset.X) # List of selected prototypes (indices) sel_idx = [] set_indices = list(range(dataset.num_samples)) diff --git a/src/secml/data/selection/c_ps_kmedians.py b/src/secml/data/selection/c_ps_kmedians.py index 42160ebe..70173417 100644 --- a/src/secml/data/selection/c_ps_kmedians.py +++ b/src/secml/data/selection/c_ps_kmedians.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.data.selection import CPrototypesSelector from secml.array import CArray from secml.ml.kernels import CKernelEuclidean @@ -28,7 +29,8 @@ class CPSKMedians(CPrototypesSelector): class_type : 'k-medians' """ - __class_type = 'k-medians' + + __class_type = "k-medians" def select(self, dataset, n_prototypes, random_state=None): """Selects the prototypes from input dataset. @@ -50,11 +52,13 @@ def select(self, dataset, n_prototypes, random_state=None): """ from sklearn.cluster import k_means - km = k_means(dataset.X.tondarray(), n_clusters=n_prototypes, - random_state=random_state) + + km = k_means( + dataset.X.tondarray(), n_clusters=n_prototypes, random_state=random_state + ) km_labels = CArray(km[1]) # Precomputing distances - k_euclidean = - CKernelEuclidean().k(dataset.X) + k_euclidean = -CKernelEuclidean().k(dataset.X) # List of selected prototypes (indices) sel_idx = [] for i in range(n_prototypes): diff --git a/src/secml/data/selection/c_ps_random.py b/src/secml/data/selection/c_ps_random.py index bcd1fef2..a1158ca9 100644 --- a/src/secml/data/selection/c_ps_random.py +++ b/src/secml/data/selection/c_ps_random.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.data.selection import CPrototypesSelector from secml.array import CArray @@ -17,7 +18,8 @@ class CPSRandom(CPrototypesSelector): class_type : 'random' """ - __class_type = 'random' + + __class_type = "random" def select(self, dataset, n_prototypes, random_state=None): """Selects the prototypes from input dataset. @@ -39,9 +41,11 @@ def select(self, dataset, n_prototypes, random_state=None): Dataset with selected prototypes. """ - sel_idx = CArray.randsample(CArray(list(range(dataset.num_samples))), - shape=n_prototypes, - random_state=random_state) + sel_idx = CArray.randsample( + CArray(list(range(dataset.num_samples))), + shape=n_prototypes, + random_state=random_state, + ) self.logger.debug("Selecting samples: {:}".format(sel_idx.tolist())) diff --git a/src/secml/data/selection/c_ps_spanning.py b/src/secml/data/selection/c_ps_spanning.py index 215c6a18..bbcdca5a 100644 --- a/src/secml/data/selection/c_ps_spanning.py +++ b/src/secml/data/selection/c_ps_spanning.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.data.selection import CPrototypesSelector from secml.array import CArray from secml.ml.kernels import CKernelEuclidean @@ -29,7 +30,8 @@ class CPSSpanning(CPrototypesSelector): class_type : 'spanning' """ - __class_type = 'spanning' + + __class_type = "spanning" def select(self, dataset, n_prototypes): """Selects the prototypes from input dataset. @@ -48,7 +50,7 @@ def select(self, dataset, n_prototypes): """ # Precomputing distances - k_euclidean = - CKernelEuclidean().k(dataset.X) + k_euclidean = -CKernelEuclidean().k(dataset.X) # List of selected prototypes (indices) # First sample is the median sel_idx = [k_euclidean.sum(axis=0, keepdims=False).argmin()] diff --git a/src/secml/data/selection/tests/plot_ps.py b/src/secml/data/selection/tests/plot_ps.py index 30da6c06..3a932237 100755 --- a/src/secml/data/selection/tests/plot_ps.py +++ b/src/secml/data/selection/tests/plot_ps.py @@ -2,26 +2,36 @@ from secml.data.selection import CPrototypesSelector from secml.data.loader import CDLRandomBlobs -dataset = CDLRandomBlobs(n_features=2, n_samples=30, - centers=[[-0.5, 0], [0.5, 1]], - cluster_std=(0.8, 0.8), random_state=7545).load() +dataset = CDLRandomBlobs( + n_features=2, + n_samples=30, + centers=[[-0.5, 0], [0.5, 1]], + cluster_std=(0.8, 0.8), + random_state=7545, +).load() fig = CFigure(width=6, height=2, markersize=8, fontsize=11) -rules = ['center', 'border', 'spanning', 'k-medians'] +rules = ["center", "border", "spanning", "k-medians"] for rule_id, rule in enumerate(rules): ps = CPrototypesSelector.create(rule) ps.verbose = 2 ds_reduced = ps.select(dataset, n_prototypes=5) - fig.subplot(1, len(rules), rule_id+1) + fig.subplot(1, len(rules), rule_id + 1) # Plot dataset points fig.sp.scatter(dataset.X[:, 0], dataset.X[:, 1], linewidths=0, s=30) - fig.sp.plot(ds_reduced.X[:, 0], ds_reduced.X[:, 1], linestyle='None', - markeredgewidth=2, marker='o', mfc='red') - fig.sp.title('{:}'.format(rule)) + fig.sp.plot( + ds_reduced.X[:, 0], + ds_reduced.X[:, 1], + linestyle="None", + markeredgewidth=2, + marker="o", + mfc="red", + ) + fig.sp.title("{:}".format(rule)) fig.sp.yticks([]) fig.sp.xticks([]) diff --git a/src/secml/data/selection/tests/test_prototypes_selection.py b/src/secml/data/selection/tests/test_prototypes_selection.py index f42ef183..7c1548a5 100644 --- a/src/secml/data/selection/tests/test_prototypes_selection.py +++ b/src/secml/data/selection/tests/test_prototypes_selection.py @@ -13,8 +13,11 @@ class TestPS(CUnitTest): def setUpClass(cls): cls.plots = False cls.dataset = CDLRandomBlobs( - n_features=2, centers=[[-1, 1], [1, 1]], - cluster_std=(0.4, 0.4), random_state=0).load() + n_features=2, + centers=[[-1, 1], [1, 1]], + cluster_std=(0.4, 0.4), + random_state=0, + ).load() CUnitTest.setUpClass() # call superclass constructor def _test_rule(self, rule, n_prototypes=20, random_state=None): @@ -25,30 +28,30 @@ def _test_rule(self, rule, n_prototypes=20, random_state=None): if random_state is None: ds_reduced = ps.select(self.dataset, n_prototypes=n_prototypes) else: - ds_reduced = ps.select(self.dataset, n_prototypes=n_prototypes, - random_state=random_state) + ds_reduced = ps.select( + self.dataset, n_prototypes=n_prototypes, random_state=random_state + ) if self.plots is True: self.draw_selection(ds_reduced, rule) idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule)) - self.assert_array_equal( - ps.sel_idx, CArray.load(idx_path, dtype=int).ravel()) + self.assert_array_equal(ps.sel_idx, CArray.load(idx_path, dtype=int).ravel()) def test_ps_random(self): - self._test_rule('random', random_state=200) + self._test_rule("random", random_state=200) def test_ps_border(self): - self._test_rule('border') + self._test_rule("border") def test_ps_center(self): - self._test_rule('center') + self._test_rule("center") def test_ps_spanning(self): - self._test_rule('spanning') + self._test_rule("spanning") # TODO: refactor this test when reqs will ask for sklearn >= 0.22 def test_ps_kmedians(self): - rule = 'k-medians' + rule = "k-medians" self.logger.info("Testing: " + rule + " selector.") ps = CPrototypesSelector.create(rule) ps.verbose = 2 @@ -60,26 +63,30 @@ def test_ps_kmedians(self): # k_means in sklearn > 0.24 returns a different result import sklearn from pkg_resources import parse_version + if parse_version(sklearn.__version__) < parse_version("0.24"): - idx_path = fm.join( - fm.abspath(__file__), "idx_{:}.gz".format(rule)) + idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule)) else: - idx_path = fm.join( - fm.abspath(__file__), "idx_{:}_sk0-24.gz".format(rule)) + idx_path = fm.join(fm.abspath(__file__), "idx_{:}_sk0-24.gz".format(rule)) - self.assert_array_equal( - ps.sel_idx, CArray.load(idx_path, dtype=int).ravel()) + self.assert_array_equal(ps.sel_idx, CArray.load(idx_path, dtype=int).ravel()) def draw_selection(self, ds_reduced, rule): fig = CFigure(width=10, markersize=12) # Plot dataset points - fig.sp.plot_ds(self.dataset, colors=['c', 'g']) - fig.sp.plot(ds_reduced.X[:, 0], ds_reduced.X[:, 1], - linestyle='None', mfc='none', - markeredgewidth=2, markeredgecolor='k', marker='o') - fig.sp.title('PS rule: {:}'.format(rule)) + fig.sp.plot_ds(self.dataset, colors=["c", "g"]) + fig.sp.plot( + ds_reduced.X[:, 0], + ds_reduced.X[:, 1], + linestyle="None", + mfc="none", + markeredgewidth=2, + markeredgecolor="k", + marker="o", + ) + fig.sp.title("PS rule: {:}".format(rule)) fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/data/splitter/c_chronological_splitter.py b/src/secml/data/splitter/c_chronological_splitter.py index 4556f0ac..45145406 100644 --- a/src/secml/data/splitter/c_chronological_splitter.py +++ b/src/secml/data/splitter/c_chronological_splitter.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from dateutil import parser from datetime import datetime @@ -51,13 +52,21 @@ class CChronologicalSplitter(CCreator): """ - def __init__(self, th_timestamp, train_size=1.0, test_size=1.0, - random_state=None, shuffle=True): - - if (is_float(test_size) and (test_size <= 0 or test_size > 1.0)) or \ - (is_float(train_size) and (train_size <= 0 or train_size > 1.0)): - raise ValueError("`test_size` and `train_size` " - "must be between (0 and 1.0] if float") + def __init__( + self, + th_timestamp, + train_size=1.0, + test_size=1.0, + random_state=None, + shuffle=True, + ): + + if (is_float(test_size) and (test_size <= 0 or test_size > 1.0)) or ( + is_float(train_size) and (train_size <= 0 or train_size > 1.0) + ): + raise ValueError( + "`test_size` and `train_size` " "must be between (0 and 1.0] if float" + ) # We use dateutil.parser is order to allow incomplete # timestamps (e.g. a single year '2016') @@ -95,18 +104,25 @@ def compute_indices(self, dataset): Flat arrays with the tr/ts indices. """ - if not hasattr(dataset.header, 'timestamp') or \ - not hasattr(dataset.header, 'timestamp_fmt'): - raise AttributeError("dataset must contain `timestamp` and " - "'timestamp_fmt' information") + if not hasattr(dataset.header, "timestamp") or not hasattr( + dataset.header, "timestamp_fmt" + ): + raise AttributeError( + "dataset must contain `timestamp` and " "'timestamp_fmt' information" + ) timestamps = dataset.header.timestamp fmt = dataset.header.timestamp_fmt # Pick the samples having `timestamp <= th` to build the training set - tr_mask = CArray(list(map( - lambda tstmp: datetime.strptime(tstmp, fmt) <= self.th_timestamp, - timestamps))) + tr_mask = CArray( + list( + map( + lambda tstmp: datetime.strptime(tstmp, fmt) <= self.th_timestamp, + timestamps, + ) + ) + ) # Test set samples are all the other samples ts_mask = tr_mask.logical_not() @@ -115,19 +131,24 @@ def compute_indices(self, dataset): max_ts = dataset.num_samples - max_tr if max_tr == 0: - raise ValueError("no samples with timestamp <= {:}. " - "Cannot split dataset.".format(self.th_timestamp)) + raise ValueError( + "no samples with timestamp <= {:}. " + "Cannot split dataset.".format(self.th_timestamp) + ) if max_ts == 0: - raise ValueError("no samples with timestamp > {:}. " - "Cannot split dataset.".format(self.th_timestamp)) + raise ValueError( + "no samples with timestamp > {:}. " + "Cannot split dataset.".format(self.th_timestamp) + ) # Compute the actual number of desired train/test samples if is_int(self.train_size): if self.train_size < 1 or self.train_size > max_tr: raise ValueError( - "train_size should be between 1 and {:}".format(max_tr)) + "train_size should be between 1 and {:}".format(max_tr) + ) else: # train_size is a valid integer, use it directly tr_size = self.train_size else: # Compute the proportion of train samples (at least 1) @@ -135,8 +156,7 @@ def compute_indices(self, dataset): if is_int(self.test_size): if self.test_size < 1 or self.test_size > max_ts: - raise ValueError( - "test_size should be between 1 and {:}".format(max_ts)) + raise ValueError("test_size should be between 1 and {:}".format(max_ts)) else: # test_size is a valid integer, use it directly ts_size = self.test_size else: # Compute the proportion of train samples (at least 1) @@ -151,9 +171,11 @@ def compute_indices(self, dataset): if self.shuffle is True: tr_idx = CArray.randsample( - tr_idx, shape=(tr_size, ), random_state=self.random_state) + tr_idx, shape=(tr_size,), random_state=self.random_state + ) ts_idx = CArray.randsample( - ts_idx, shape=(ts_size, ), random_state=self.random_state) + ts_idx, shape=(ts_size,), random_state=self.random_state + ) else: # Just slice the arrays of indices tr_idx = tr_idx[:tr_size] ts_idx = ts_idx[:ts_size] @@ -177,10 +199,12 @@ def split(self, dataset): Train and Test datasets. """ - if not hasattr(dataset.header, 'timestamp') or \ - not hasattr(dataset.header, 'timestamp_fmt'): - raise AttributeError("dataset must contain `timestamp` and " - "'timestamp_fmt' information") + if not hasattr(dataset.header, "timestamp") or not hasattr( + dataset.header, "timestamp_fmt" + ): + raise AttributeError( + "dataset must contain `timestamp` and " "'timestamp_fmt' information" + ) # Computing splitting indices tr_idx, ts_idx = self.compute_indices(dataset) diff --git a/src/secml/data/splitter/c_datasplitter.py b/src/secml/data/splitter/c_datasplitter.py index 61f1d6ab..ba516fee 100644 --- a/src/secml/data/splitter/c_datasplitter.py +++ b/src/secml/data/splitter/c_datasplitter.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from abc import ABCMeta, abstractmethod from secml.core import CCreator @@ -25,7 +26,8 @@ class CDataSplitter(CCreator, metaclass=ABCMeta): If None, is the RandomState instance used by np.random. """ - __super__ = 'CDataSplitter' + + __super__ = "CDataSplitter" def __init__(self, num_folds=3, random_state=None): @@ -65,8 +67,9 @@ def compute_indices(self, dataset): Instance of the dataset splitter with tr/ts indices. """ - raise NotImplementedError("Each data splitting algorithm must define " - "a `compute_indices` method.") + raise NotImplementedError( + "Each data splitting algorithm must define " "a `compute_indices` method." + ) def split(self, dataset): """Returns a list of split datasets. diff --git a/src/secml/data/splitter/c_datasplitter_kfold.py b/src/secml/data/splitter/c_datasplitter_kfold.py index b7d2023b..42be2e64 100644 --- a/src/secml/data/splitter/c_datasplitter_kfold.py +++ b/src/secml/data/splitter/c_datasplitter_kfold.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from sklearn.model_selection import KFold from secml.array import CArray @@ -50,12 +51,14 @@ class CDataSplitterKFold(CDataSplitter): [CArray(1,)(dense: [2]), CArray(1,)(dense: [1]), CArray(1,)(dense: [0])] """ - __class_type = 'kfold' + + __class_type = "kfold" def __init__(self, num_folds=3, random_state=None): super(CDataSplitterKFold, self).__init__( - num_folds=num_folds, random_state=random_state) + num_folds=num_folds, random_state=random_state + ) def compute_indices(self, dataset): """Compute training set and test set indices for each fold. @@ -75,13 +78,12 @@ def compute_indices(self, dataset): self._tr_idx = [] self._ts_idx = [] - sk_splitter = KFold(n_splits=self.num_folds, - shuffle=True, - random_state=self.random_state) + sk_splitter = KFold( + n_splits=self.num_folds, shuffle=True, random_state=self.random_state + ) # We take sklearn indices (iterators) and map to list of CArrays - for train_index, test_index in \ - sk_splitter.split(dataset.X.get_data()): + for train_index, test_index in sk_splitter.split(dataset.X.get_data()): train_index = CArray(train_index) test_index = CArray(test_index) self._tr_idx.append(train_index) diff --git a/src/secml/data/splitter/c_datasplitter_labelkfold.py b/src/secml/data/splitter/c_datasplitter_labelkfold.py index 27c7864d..8eb2cabc 100644 --- a/src/secml/data/splitter/c_datasplitter_labelkfold.py +++ b/src/secml/data/splitter/c_datasplitter_labelkfold.py @@ -5,6 +5,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.array import CArray from secml.data.splitter import CDataSplitter @@ -44,7 +45,8 @@ class CDataSplitterLabelKFold(CDataSplitter): [CArray(2,)(dense: [0 2]), CArray(1,)(dense: [3]), CArray(1,)(dense: [1])] """ - __class_type = 'label-kfold' + + __class_type = "label-kfold" def __init__(self, num_folds=3): @@ -73,9 +75,11 @@ def compute_indices(self, dataset): if self.num_folds > n_labels: raise ValueError( - ("Cannot have number of folds ({0}) greater" - " than the number of classes: {1}.").format( - self.num_folds, n_labels)) + ( + "Cannot have number of folds ({0}) greater" + " than the number of classes: {1}." + ).format(self.num_folds, n_labels) + ) # Weight labels by their number of occurrences n_samples_per_label = labels.bincount() diff --git a/src/secml/data/splitter/c_datasplitter_openworld.py b/src/secml/data/splitter/c_datasplitter_openworld.py index 8f996f38..b2dda489 100644 --- a/src/secml/data/splitter/c_datasplitter_openworld.py +++ b/src/secml/data/splitter/c_datasplitter_openworld.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml.data.splitter import CDataSplitter @@ -60,13 +61,16 @@ class CDataSplitterOpenWorldKFold(CDataSplitter): [CArray(1,)(dense: [1]), CArray(1,)(dense: [0]), CArray(1,)(dense: [1])] """ - __class_type = 'open-world-kfold' - def __init__(self, num_folds=3, n_train_samples=5, - n_train_classes=None, random_state=None): + __class_type = "open-world-kfold" + + def __init__( + self, num_folds=3, n_train_samples=5, n_train_classes=None, random_state=None + ): super(CDataSplitterOpenWorldKFold, self).__init__( - num_folds=num_folds, random_state=random_state) + num_folds=num_folds, random_state=random_state + ) self.n_train_samples = n_train_samples self.n_train_classes = n_train_classes @@ -99,8 +103,11 @@ def compute_indices(self, dataset): # If no custom number of training classes is selected, # use half of the classes - n_train_classes = int(dataset.num_classes / 2) \ - if self.n_train_classes is None else int(self.n_train_classes) + n_train_classes = ( + int(dataset.num_classes / 2) + if self.n_train_classes is None + else int(self.n_train_classes) + ) for fold in range(self.num_folds): @@ -113,9 +120,9 @@ def compute_indices(self, dataset): # only 'n_train_classes' random classes will be trained... # but now we randsample all classes to backup in case one or # more classes will be skipped for n_train_samples - all_tr_classes = CArray.randsample(dataset.classes, - dataset.num_classes, - random_state=random_state) + all_tr_classes = CArray.randsample( + dataset.classes, dataset.num_classes, random_state=random_state + ) # Placeholder for indices of chosen training classes' samples train_samples_idx = CArray([], dtype=int) @@ -125,22 +132,25 @@ def compute_indices(self, dataset): if train_classes.size >= n_train_classes: break # we reached the desired number of training classes # Vector with indices of current client's samples - client_samples_idx = CArray( - dataset.Y.find(dataset.Y == train_class)) + client_samples_idx = CArray(dataset.Y.find(dataset.Y == train_class)) # Check if we have at least n_train_samples + 1 samples for # current client if client_samples_idx.size < self.n_train_samples + 1: - self.logger.warning("skipping class {:} for training set. " - "{:} samples is less than {:}." - "".format(train_class, - client_samples_idx.size, - self.n_train_samples + 1)) + self.logger.warning( + "skipping class {:} for training set. " + "{:} samples is less than {:}." + "".format( + train_class, + client_samples_idx.size, + self.n_train_samples + 1, + ) + ) continue # Random subselection of training samples - random_samples = CArray.randsample(client_samples_idx, - self.n_train_samples, - random_state=random_state) + random_samples = CArray.randsample( + client_samples_idx, self.n_train_samples, random_state=random_state + ) # Appending to vector of indices for training set a random # subselection of samples train_samples_idx = train_samples_idx.append(random_samples) @@ -155,8 +165,12 @@ def compute_indices(self, dataset): # All other samples go to test test_samples_idx = CArray( - [idx for idx in range(dataset.num_samples) - if idx not in train_samples_idx]) + [ + idx + for idx in range(dataset.num_samples) + if idx not in train_samples_idx + ] + ) self._tr_idx += [train_samples_idx] self._ts_idx += [test_samples_idx] diff --git a/src/secml/data/splitter/c_datasplitter_shuffle.py b/src/secml/data/splitter/c_datasplitter_shuffle.py index f1d2ae32..a14e4c59 100644 --- a/src/secml/data/splitter/c_datasplitter_shuffle.py +++ b/src/secml/data/splitter/c_datasplitter_shuffle.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from sklearn.model_selection import ShuffleSplit from secml.array import CArray @@ -77,13 +78,14 @@ class CDataSplitterShuffle(CDataSplitter): [CArray(3,)(dense: [2 1 0]), CArray(3,)(dense: [2 0 1]), CArray(3,)(dense: [0 2 1])] """ - __class_type = 'shuffle' - def __init__(self, num_folds=3, train_size=None, - test_size=None, random_state=None): + __class_type = "shuffle" + + def __init__(self, num_folds=3, train_size=None, test_size=None, random_state=None): super(CDataSplitterShuffle, self).__init__( - num_folds=num_folds, random_state=random_state) + num_folds=num_folds, random_state=random_state + ) self.train_size = train_size self.test_size = test_size @@ -106,14 +108,15 @@ def compute_indices(self, dataset): self._tr_idx = [] self._ts_idx = [] - sk_splitter = ShuffleSplit(n_splits=self.num_folds, - train_size=self.train_size, - test_size=self.test_size, - random_state=self.random_state) + sk_splitter = ShuffleSplit( + n_splits=self.num_folds, + train_size=self.train_size, + test_size=self.test_size, + random_state=self.random_state, + ) # We take sklearn indices (iterators) and map to list of CArrays - for train_index, test_index in \ - sk_splitter.split(dataset.X.get_data()): + for train_index, test_index in sk_splitter.split(dataset.X.get_data()): train_index = CArray(train_index) test_index = CArray(test_index) self._tr_idx.append(train_index) diff --git a/src/secml/data/splitter/c_datasplitter_stratkfold.py b/src/secml/data/splitter/c_datasplitter_stratkfold.py index 50b7f0ff..5d39a4b3 100644 --- a/src/secml/data/splitter/c_datasplitter_stratkfold.py +++ b/src/secml/data/splitter/c_datasplitter_stratkfold.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from sklearn.model_selection import StratifiedKFold from secml.array import CArray @@ -51,12 +52,14 @@ class CDataSplitterStratifiedKFold(CDataSplitter): [CArray(2,)(dense: [0 2]), CArray(2,)(dense: [1 3])] """ - __class_type = 'strat-kfold' + + __class_type = "strat-kfold" def __init__(self, num_folds=3, random_state=None): super(CDataSplitterStratifiedKFold, self).__init__( - num_folds, random_state=random_state) + num_folds, random_state=random_state + ) def compute_indices(self, dataset): """Compute training set and test set indices for each fold. @@ -76,14 +79,14 @@ def compute_indices(self, dataset): self._tr_idx = [] self._ts_idx = [] - sk_splitter = StratifiedKFold(n_splits=self.num_folds, - shuffle=True, - random_state=self.random_state) + sk_splitter = StratifiedKFold( + n_splits=self.num_folds, shuffle=True, random_state=self.random_state + ) # We take sklearn indices (iterators) and map to list of CArrays - for train_index, test_index in \ - sk_splitter.split(X=dataset.X.get_data(), - y=dataset.Y.get_data()): + for train_index, test_index in sk_splitter.split( + X=dataset.X.get_data(), y=dataset.Y.get_data() + ): train_index = CArray(train_index) test_index = CArray(test_index) self._tr_idx.append(train_index) diff --git a/src/secml/data/splitter/c_train_test_split.py b/src/secml/data/splitter/c_train_test_split.py index cdced113..a6fd8e0f 100644 --- a/src/secml/data/splitter/c_train_test_split.py +++ b/src/secml/data/splitter/c_train_test_split.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from sklearn.model_selection import train_test_split from secml.core import CCreator @@ -77,12 +78,12 @@ class CTrainTestSplit(CCreator): """ - def __init__(self, train_size=None, test_size=None, - random_state=None, shuffle=True): + def __init__( + self, train_size=None, test_size=None, random_state=None, shuffle=True + ): if train_size is None and test_size is None: - raise ValueError( - "'train_size' and 'test_size' cannot be both None") + raise ValueError("'train_size' and 'test_size' cannot be both None") self.train_size = train_size self.test_size = test_size @@ -117,21 +118,26 @@ def compute_indices(self, dataset): """ min_set_perc = 1 / dataset.num_samples - if (is_float(self.train_size) and self.train_size < min_set_perc) or \ - (is_int(self.train_size) and self.train_size < 1): + if (is_float(self.train_size) and self.train_size < min_set_perc) or ( + is_int(self.train_size) and self.train_size < 1 + ): raise ValueError( - "train_size should be at least 1 or {:}".format(min_set_perc)) - if (is_float(self.test_size) and self.test_size < min_set_perc) or \ - (is_int(self.test_size) and self.test_size < 1): + "train_size should be at least 1 or {:}".format(min_set_perc) + ) + if (is_float(self.test_size) and self.test_size < min_set_perc) or ( + is_int(self.test_size) and self.test_size < 1 + ): raise ValueError( - "test_size should be at least 1 or {:}".format(min_set_perc)) + "test_size should be at least 1 or {:}".format(min_set_perc) + ) tr_idx, ts_idx = train_test_split( CArray.arange(dataset.num_samples).tondarray(), train_size=self.train_size, test_size=self.test_size, random_state=self.random_state, - shuffle=self.shuffle) + shuffle=self.shuffle, + ) self._tr_idx = CArray(tr_idx) self._ts_idx = CArray(ts_idx) diff --git a/src/secml/data/splitter/tests/test_chronological_splitter.py b/src/secml/data/splitter/tests/test_chronological_splitter.py index 43bcff51..df6e2496 100644 --- a/src/secml/data/splitter/tests/test_chronological_splitter.py +++ b/src/secml/data/splitter/tests/test_chronological_splitter.py @@ -15,26 +15,29 @@ def setUp(self): self.ds = CDLRandom(n_samples=10, random_state=0).load() - timestamps = CArray(['2016-02-17T10:35:58', - '2014-04-04T22:24:22', - '2016-08-07T17:10:36', - '2014-05-22T11:02:58', - '2016-07-01T07:12:34', - '2016-01-03T13:10:38', - '2014-07-28T23:42:00', - '2014-07-08T09:42:42', - '2016-05-06T18:38:08', - '2015-11-03T21:07:04']) + timestamps = CArray( + [ + "2016-02-17T10:35:58", + "2014-04-04T22:24:22", + "2016-08-07T17:10:36", + "2014-05-22T11:02:58", + "2016-07-01T07:12:34", + "2016-01-03T13:10:38", + "2014-07-28T23:42:00", + "2014-07-08T09:42:42", + "2016-05-06T18:38:08", + "2015-11-03T21:07:04", + ] + ) self.ds.header = CDatasetHeader( - timestamp=timestamps, timestamp_fmt='%Y-%m-%dT%H:%M:%S') + timestamp=timestamps, timestamp_fmt="%Y-%m-%dT%H:%M:%S" + ) def test_chronological_split(self): # Test splitter with default values (just seed for reproducibility) - tts = CChronologicalSplitter( - th_timestamp='2015', - random_state=0) + tts = CChronologicalSplitter(th_timestamp="2015", random_state=0) tr_idx, ts_idx = tts.compute_indices(self.ds) @@ -67,19 +70,26 @@ def test_chronological_split(self): tr_tmps = tr.header.timestamp ts_tmps = ts.header.timestamp - self.assertFalse(any(map( - lambda tstmp: datetime.strptime(tstmp, fmt) > tts.th_timestamp, - tr_tmps))) - - self.assertFalse(any(map( - lambda tstmp: datetime.strptime(tstmp, fmt) <= tts.th_timestamp, - ts_tmps))) + self.assertFalse( + any( + map( + lambda tstmp: datetime.strptime(tstmp, fmt) > tts.th_timestamp, + tr_tmps, + ) + ) + ) + + self.assertFalse( + any( + map( + lambda tstmp: datetime.strptime(tstmp, fmt) <= tts.th_timestamp, + ts_tmps, + ) + ) + ) # Test splitter with custom integer train size - tts = CChronologicalSplitter( - th_timestamp='2015', - train_size=2, - random_state=0) + tts = CChronologicalSplitter(th_timestamp="2015", train_size=2, random_state=0) tr_idx, ts_idx = tts.compute_indices(self.ds) @@ -94,10 +104,8 @@ def test_chronological_split(self): # Test splitter with custom float train/test size tts = CChronologicalSplitter( - th_timestamp='2015', - train_size=0.25, - test_size=0.5, - random_state=0) + th_timestamp="2015", train_size=0.25, test_size=0.5, random_state=0 + ) tr_idx, ts_idx = tts.compute_indices(self.ds) @@ -111,9 +119,7 @@ def test_chronological_split(self): self.assertFalse((ts_idx != ts_idx_expected).any()) # Test splitter with no random shuffle - tts = CChronologicalSplitter( - th_timestamp='2015', - shuffle=False) + tts = CChronologicalSplitter(th_timestamp="2015", shuffle=False) tr_idx, ts_idx = tts.compute_indices(self.ds) @@ -127,5 +133,5 @@ def test_chronological_split(self): self.assertFalse((ts_idx != ts_idx_expected).any()) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/data/splitter/tests/test_data_splitter.py b/src/secml/data/splitter/tests/test_data_splitter.py index 9669030b..f055d6c7 100644 --- a/src/secml/data/splitter/tests/test_data_splitter.py +++ b/src/secml/data/splitter/tests/test_data_splitter.py @@ -15,8 +15,7 @@ def test_kfold(self): ds = CDLRandom(n_samples=10, random_state=0).load() self.logger.info("Testing K-Fold") - kf = CDataSplitterKFold( - num_folds=2, random_state=5000).compute_indices(ds) + kf = CDataSplitterKFold(num_folds=2, random_state=5000).compute_indices(ds) tr_idx_expected = [CArray([1, 2, 5, 8, 9]), CArray([0, 3, 4, 6, 7])] ts_idx_expected = [CArray([0, 3, 4, 6, 7]), CArray([1, 2, 5, 8, 9])] @@ -25,18 +24,18 @@ def test_kfold(self): self.assertEqual(len(kf.ts_idx), 2) for fold_idx in range(kf.num_folds): - self.logger.info("{:} fold: \nTR {:} \nTS {:}" - "".format(fold_idx, kf.tr_idx[fold_idx], - kf.ts_idx[fold_idx])) - self.assert_array_equal( - tr_idx_expected[fold_idx], kf.tr_idx[fold_idx]) - self.assert_array_equal( - ts_idx_expected[fold_idx], kf.ts_idx[fold_idx]) + self.logger.info( + "{:} fold: \nTR {:} \nTS {:}" + "".format(fold_idx, kf.tr_idx[fold_idx], kf.ts_idx[fold_idx]) + ) + self.assert_array_equal(tr_idx_expected[fold_idx], kf.tr_idx[fold_idx]) + self.assert_array_equal(ts_idx_expected[fold_idx], kf.ts_idx[fold_idx]) def test_labelkfold(self): ds = CDLRandom( - n_classes=3, n_samples=10, n_informative=3, random_state=0).load() + n_classes=3, n_samples=10, n_informative=3, random_state=0 + ).load() self.logger.info("Testing Label K-Fold") kf = CDataSplitterLabelKFold(num_folds=2).compute_indices(ds) @@ -48,29 +47,35 @@ def test_labelkfold(self): self.assertEqual(len(kf.ts_idx), 2) for fold_idx in range(kf.num_folds): - self.logger.info("{:} fold: \nTR {:} {:} \nTS {:} {:}" - "".format(fold_idx, kf.tr_idx[fold_idx], - ds.Y[kf.tr_idx[fold_idx]], - kf.ts_idx[fold_idx], - ds.Y[kf.ts_idx[fold_idx]])) - self.assert_array_equal( - tr_idx_expected[fold_idx], kf.tr_idx[fold_idx]) - self.assert_array_equal( - ts_idx_expected[fold_idx], kf.ts_idx[fold_idx]) + self.logger.info( + "{:} fold: \nTR {:} {:} \nTS {:} {:}" + "".format( + fold_idx, + kf.tr_idx[fold_idx], + ds.Y[kf.tr_idx[fold_idx]], + kf.ts_idx[fold_idx], + ds.Y[kf.ts_idx[fold_idx]], + ) + ) + self.assert_array_equal(tr_idx_expected[fold_idx], kf.tr_idx[fold_idx]) + self.assert_array_equal(ts_idx_expected[fold_idx], kf.ts_idx[fold_idx]) def test_openworldkfold(self): ds = CDLRandom( - n_classes=3, n_samples=14, n_informative=3, random_state=0).load() + n_classes=3, n_samples=14, n_informative=3, random_state=0 + ).load() self.logger.info("Testing Open World K-Fold") kf = CDataSplitterOpenWorldKFold( - num_folds=2, n_train_samples=4, - random_state=5000).compute_indices(ds) + num_folds=2, n_train_samples=4, random_state=5000 + ).compute_indices(ds) tr_idx_expected = [CArray([0, 4, 8, 12]), CArray([1, 3, 9, 13])] - ts_idx_expected = [CArray([1, 2, 3, 5, 6, 7, 9, 10, 11, 13]), - CArray([0, 2, 4, 5, 6, 7, 8, 10, 11, 12])] + ts_idx_expected = [ + CArray([1, 2, 3, 5, 6, 7, 9, 10, 11, 13]), + CArray([0, 2, 4, 5, 6, 7, 8, 10, 11, 12]), + ] self.assertEqual(len(kf.tr_idx), 2) self.assertEqual(len(kf.ts_idx), 2) @@ -81,24 +86,37 @@ def test_openworldkfold(self): self.logger.info( "{:} fold:\nTR CLASSES {:}\nTR {:} {:}\nTS {:} {:}".format( - fold_idx, kf.tr_classes[fold_idx], - kf.tr_idx[fold_idx], ds.Y[kf.tr_idx[fold_idx]], - kf.ts_idx[fold_idx], ds.Y[kf.ts_idx[fold_idx]])) - self.assert_array_equal( - tr_idx_expected[fold_idx], kf.tr_idx[fold_idx]) - self.assert_array_equal( - ts_idx_expected[fold_idx], kf.ts_idx[fold_idx]) + fold_idx, + kf.tr_classes[fold_idx], + kf.tr_idx[fold_idx], + ds.Y[kf.tr_idx[fold_idx]], + kf.ts_idx[fold_idx], + ds.Y[kf.ts_idx[fold_idx]], + ) + ) + self.assert_array_equal(tr_idx_expected[fold_idx], kf.tr_idx[fold_idx]) + self.assert_array_equal(ts_idx_expected[fold_idx], kf.ts_idx[fold_idx]) def test_openworldkfold_tr_class_skip(self): - ds = CDataset([[1, 2], [3, 4], [5, 6], - [10, 20], [30, 40], [50, 60], - [100, 200], [300, 400], [500, 600]], - [1, 2, 1, 2, 2, 0, 1, 0, 2]) # class 0 has 2 samples + ds = CDataset( + [ + [1, 2], + [3, 4], + [5, 6], + [10, 20], + [30, 40], + [50, 60], + [100, 200], + [300, 400], + [500, 600], + ], + [1, 2, 1, 2, 2, 0, 1, 0, 2], + ) # class 0 has 2 samples # create 25 folds to increase the chance of getting the warning message kf = CDataSplitterOpenWorldKFold( - num_folds=25, n_train_samples=2, - random_state=5000).compute_indices(ds) + num_folds=25, n_train_samples=2, random_state=5000 + ).compute_indices(ds) self.assertEqual(len(kf.tr_idx), 25) self.assertEqual(len(kf.ts_idx), 25) @@ -113,12 +131,14 @@ def test_shuffle(self): self.logger.info("Testing Shuffle ") kf = CDataSplitterShuffle( - num_folds=2, train_size=0.2, - random_state=5000).compute_indices(ds) + num_folds=2, train_size=0.2, random_state=5000 + ).compute_indices(ds) tr_idx_expected = [CArray([1, 2]), CArray([9, 3])] - ts_idx_expected = [CArray([6, 4, 7, 0, 3, 9, 5, 8]), - CArray([7, 5, 4, 0, 8, 2, 6, 1])] + ts_idx_expected = [ + CArray([6, 4, 7, 0, 3, 9, 5, 8]), + CArray([7, 5, 4, 0, 8, 2, 6, 1]), + ] self.assertEqual(len(kf.tr_idx), 2) self.assertEqual(len(kf.ts_idx), 2) @@ -126,13 +146,12 @@ def test_shuffle(self): self.logger.info("DS classes:\n{:}".format(ds.Y)) for fold_idx in range(kf.num_folds): - self.logger.info("{:} fold: \nTR {:} \nTS {:}" - "".format(fold_idx, kf.tr_idx[fold_idx], - kf.ts_idx[fold_idx])) - self.assert_array_equal( - tr_idx_expected[fold_idx], kf.tr_idx[fold_idx]) - self.assert_array_equal( - ts_idx_expected[fold_idx], kf.ts_idx[fold_idx]) + self.logger.info( + "{:} fold: \nTR {:} \nTS {:}" + "".format(fold_idx, kf.tr_idx[fold_idx], kf.ts_idx[fold_idx]) + ) + self.assert_array_equal(tr_idx_expected[fold_idx], kf.tr_idx[fold_idx]) + self.assert_array_equal(ts_idx_expected[fold_idx], kf.ts_idx[fold_idx]) def test_stratifiedkfold(self): @@ -140,12 +159,14 @@ def test_stratifiedkfold(self): self.logger.info("Testing Stratified K-Fold") kf = CDataSplitterStratifiedKFold( - num_folds=2, random_state=5000).compute_indices(ds) + num_folds=2, random_state=5000 + ).compute_indices(ds) import sklearn - if sklearn.__version__ < '0.22': # TODO: REMOVE AFTER BUMPING DEPS - # v0.22 changed the model to fix an issue related test set size - # https://github.com/scikit-learn/scikit-learn/pull/14704 + + if sklearn.__version__ < "0.22": # TODO: REMOVE AFTER BUMPING DEPS + # v0.22 changed the model to fix an issue related test set size + # https://github.com/scikit-learn/scikit-learn/pull/14704 tr_idx_expected = [CArray([4, 5, 6, 9]), CArray([0, 1, 2, 3, 7, 8])] ts_idx_expected = [CArray([0, 1, 2, 3, 7, 8]), CArray([4, 5, 6, 9])] else: @@ -158,14 +179,13 @@ def test_stratifiedkfold(self): self.logger.info("DS classes:\n{:}".format(ds.Y)) for fold_idx in range(kf.num_folds): - self.logger.info("{:} fold: \nTR {:} \nTS {:}" - "".format(fold_idx, kf.tr_idx[fold_idx], - kf.ts_idx[fold_idx])) - self.assert_array_equal( - tr_idx_expected[fold_idx], kf.tr_idx[fold_idx]) - self.assert_array_equal( - ts_idx_expected[fold_idx], kf.ts_idx[fold_idx]) + self.logger.info( + "{:} fold: \nTR {:} \nTS {:}" + "".format(fold_idx, kf.tr_idx[fold_idx], kf.ts_idx[fold_idx]) + ) + self.assert_array_equal(tr_idx_expected[fold_idx], kf.tr_idx[fold_idx]) + self.assert_array_equal(ts_idx_expected[fold_idx], kf.ts_idx[fold_idx]) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/src/secml/data/splitter/tests/test_train_test_split.py b/src/secml/data/splitter/tests/test_train_test_split.py index 95400f88..7a0a813b 100644 --- a/src/secml/data/splitter/tests/test_train_test_split.py +++ b/src/secml/data/splitter/tests/test_train_test_split.py @@ -57,5 +57,5 @@ def test_train_test_split(self): self.assertTrue(ts.issparse) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/data/tests/test_c_dataset_pytorch.py b/src/secml/data/tests/test_c_dataset_pytorch.py index 64578306..49a5ee98 100644 --- a/src/secml/data/tests/test_c_dataset_pytorch.py +++ b/src/secml/data/tests/test_c_dataset_pytorch.py @@ -18,13 +18,17 @@ def setUp(self): self.n_classes = 3 self.n_features = 5 - - self.ds = CDLRandom(n_classes=self.n_classes, - n_features=self.n_features, - n_informative=self.n_features, - n_redundant=0).load() - self.logger.info("num_samples: {}, num_classes: {:}".format( - self.ds.num_samples, self.ds.num_classes)) + self.ds = CDLRandom( + n_classes=self.n_classes, + n_features=self.n_features, + n_informative=self.n_features, + n_redundant=0, + ).load() + self.logger.info( + "num_samples: {}, num_classes: {:}".format( + self.ds.num_samples, self.ds.num_classes + ) + ) def test_convert(self): """Test converting a CDataset into a CDatasetPyTorch.""" @@ -93,5 +97,5 @@ def test_getitem(self): torch_ds[[2, 3]] -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/data/tests/test_cdataset.py b/src/secml/data/tests/test_cdataset.py index 8fbe2fdb..512eda2c 100644 --- a/src/secml/data/tests/test_cdataset.py +++ b/src/secml/data/tests/test_cdataset.py @@ -21,46 +21,38 @@ def test_properties(self): self.logger.info("Number of patterns: \n" + str(self.dataset.num_samples)) self.logger.info("Number of features: \n" + str(self.dataset.num_features)) self.logger.info("Testing dataset properties...") - self.assertEqual( - 2, self.dataset.num_classes, "Wrong number of classes!") - self.assertEqual( - 3, self.dataset.num_samples, "Wrong number of patterns!") - self.assertEqual( - 3, self.dataset.num_features, "Wrong number of features!") + self.assertEqual(2, self.dataset.num_classes, "Wrong number of classes!") + self.assertEqual(3, self.dataset.num_samples, "Wrong number of patterns!") + self.assertEqual(3, self.dataset.num_features, "Wrong number of features!") def test_getters_and_setters(self): """Test for getters and setters of the class.""" self.logger.info("Testing setters and getters for the dataset...") - self.assertTrue( - (self.dataset.X == self.X).all(), "Wrong pattern extraction") - self.assertTrue( - (self.dataset.Y == self.Y).all(), "Wrong labels extraction") + self.assertTrue((self.dataset.X == self.X).all(), "Wrong pattern extraction") + self.assertTrue((self.dataset.Y == self.Y).all(), "Wrong labels extraction") new_patterns = CArray([[1, 2], [3, 4], [5, 6]]) - self.logger.info( - "Setting new patterns: \n" + str(new_patterns)) + self.logger.info("Setting new patterns: \n" + str(new_patterns)) self.dataset.X = new_patterns self.logger.info("Testing new patterns...") - self.assertTrue( - (self.dataset.X == new_patterns).all(), "Wrong patterns set!") + self.assertTrue((self.dataset.X == new_patterns).all(), "Wrong patterns set!") with self.assertRaises(ValueError): new_patterns = CArray([[1, 2, 3], [4, 5, 6]]) self.logger.info( - "Setting less patterns than labels: \n" + str(new_patterns)) + "Setting less patterns than labels: \n" + str(new_patterns) + ) self.dataset.X = new_patterns new_labels = CArray([11, 22, 33]) self.logger.info("Setting new labels: \n" + str(new_labels)) self.dataset.Y = new_labels self.logger.info("Testing new labels...") - self.assertTrue( - (self.dataset.Y == new_labels).all(), "Wrong labels extraction") + self.assertTrue((self.dataset.Y == new_labels).all(), "Wrong labels extraction") with self.assertRaises(ValueError): new_labels = CArray([1, 2]) - self.logger.info( - "Setting less labels than patterns: \n" + str(new_labels)) + self.logger.info("Setting less labels than patterns: \n" + str(new_labels)) self.dataset.Y = new_labels def test_select_patterns(self): @@ -76,34 +68,39 @@ def test_select_patterns(self): def test_subset(self): """Tests for subset method.""" self.logger.info("Testing subsets...") - subset_lists = [([0, 1], [0, 1]), - ([0, 2], slice(0, 3)), - (slice(0, 3), [0, 2])] - x_targets = [CArray([[1, 2], [4, 5]]), - CArray([[1, 2, 3], [7, 8, 9]]), - CArray([[1, 3], [4, 6], [7, 9]])] + subset_lists = [([0, 1], [0, 1]), ([0, 2], slice(0, 3)), (slice(0, 3), [0, 2])] + x_targets = [ + CArray([[1, 2], [4, 5]]), + CArray([[1, 2, 3], [7, 8, 9]]), + CArray([[1, 3], [4, 6], [7, 9]]), + ] y_targets = [CArray([1, 2]), CArray([1, 2]), CArray([1, 2, 2])] for row_cols, Xtarget, Ytarget in zip(subset_lists, x_targets, y_targets): rows = row_cols[0] cols = row_cols[1] subset = self.dataset[rows, cols] self.logger.info( - "Testing Subset extraction with rows indices: " + str(rows) + - " and columns indices: " + str(cols) + " \n" + str(subset.X) + - " \n" + str(subset.Y)) + "Testing Subset extraction with rows indices: " + + str(rows) + + " and columns indices: " + + str(cols) + + " \n" + + str(subset.X) + + " \n" + + str(subset.Y) + ) self.assert_array_equal(subset.X, Xtarget) self.assert_array_equal(subset.Y, Ytarget) def test_custom_attr(self): """Testing for custom attributes.""" - header = CDatasetHeader( - id='mydataset', age=34, colors=CArray([1, 2, 3])) + header = CDatasetHeader(id="mydataset", age=34, colors=CArray([1, 2, 3])) ds = CDataset(self.X, self.Y, header=header) ds_params = ds.header.get_params() - self.assertEqual(ds_params['id'], 'mydataset') - self.assertEqual(ds_params['age'], 34) - self.assert_array_equal(ds_params['colors'], CArray([1, 2, 3])) + self.assertEqual(ds_params["id"], "mydataset") + self.assertEqual(ds_params["age"], 34) + self.assert_array_equal(ds_params["colors"], CArray([1, 2, 3])) # Testing getitem. Immutable objects should be copied as they are. # Arrays should be indexed. @@ -111,9 +108,9 @@ def test_custom_attr(self): ds_params = ds_get.header.get_params() self.assert_array_equal(ds_get.X, CArray([[1, 2, 3], [7, 8, 9]])) self.assert_array_equal(ds_get.Y, CArray([1, 2])) - self.assertEqual(ds_params['id'], 'mydataset') - self.assertEqual(ds_params['age'], 34) - self.assert_array_equal(ds_params['colors'], CArray([1, 3])) + self.assertEqual(ds_params["id"], "mydataset") + self.assertEqual(ds_params["age"], 34) + self.assert_array_equal(ds_params["colors"], CArray([1, 3])) def test_append(self): """Test for .append() method.""" @@ -122,50 +119,48 @@ def test_append(self): self.assertEqual(self.dataset.num_samples * 2, ds_append.num_samples) self.assert_array_equal( - ds_append.X, CArray([[1, 2, 3], [4, 5, 6], [7, 8, 9], - [1, 2, 3], [4, 5, 6], [7, 8, 9]])) + ds_append.X, + CArray([[1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 2, 3], [4, 5, 6], [7, 8, 9]]), + ) self.assert_array_equal(ds_append.Y, CArray([1, 2, 2, 1, 2, 2])) # Test append with header ds = self.dataset.deepcopy() # Test append with header in both ds - header = CDatasetHeader( - id='mydataset', age=34, colors=CArray([1, 2, 3])) + header = CDatasetHeader(id="mydataset", age=34, colors=CArray([1, 2, 3])) ds.header = header # Test append with header in both ds ds_append = ds.append(ds) ds_params = ds_append.header.get_params() - self.assertEqual(ds_params['id'], 'mydataset') - self.assertEqual(ds_params['age'], 34) - self.assert_array_equal( - ds_params['colors'], CArray([1, 2, 3, 1, 2, 3])) + self.assertEqual(ds_params["id"], "mydataset") + self.assertEqual(ds_params["age"], 34) + self.assert_array_equal(ds_params["colors"], CArray([1, 2, 3, 1, 2, 3])) # Create two copies now for later tests ds1 = self.dataset.deepcopy() ds2 = self.dataset.deepcopy() # For the following tests we cannot use CArrays as params. Use tuple - header = CDatasetHeader( - id='mydataset', age=34, colors=(1, 2, 3)) + header = CDatasetHeader(id="mydataset", age=34, colors=(1, 2, 3)) ds1.header = header ds2.header = header # Test append with header in first ds ds_append = ds1.append(self.dataset) ds_params = ds_append.header.get_params() - self.assertEqual(ds_params['id'], 'mydataset') - self.assertEqual(ds_params['age'], 34) - self.assertEqual(ds_params['colors'], (1, 2, 3)) + self.assertEqual(ds_params["id"], "mydataset") + self.assertEqual(ds_params["age"], 34) + self.assertEqual(ds_params["colors"], (1, 2, 3)) # Test append with header in second ds ds_append = self.dataset.append(ds2) ds_params = ds_append.header.get_params() - self.assertEqual(ds_params['id'], 'mydataset') - self.assertEqual(ds_params['age'], 34) - self.assert_array_equal(ds_params['colors'], (1, 2, 3)) + self.assertEqual(ds_params["id"], "mydataset") + self.assertEqual(ds_params["age"], 34) + self.assert_array_equal(ds_params["colors"], (1, 2, 3)) def test_copy(self): """Test for .deepcopy() method.""" @@ -180,8 +175,7 @@ def test_copy(self): self.assert_array_equal(ds_copy.Y[0], CArray([100])) # Test deepcopy with header - header = CDatasetHeader( - id='mydataset', age=34, colors=CArray([1, 2, 3])) + header = CDatasetHeader(id="mydataset", age=34, colors=CArray([1, 2, 3])) self.dataset.header = header ds_copy = self.dataset.deepcopy() @@ -189,14 +183,14 @@ def test_copy(self): # Now change header of original dataset self.dataset.header.colors[0] = 100 ds_params = self.dataset.header.get_params() - self.assertEqual(ds_params['id'], 'mydataset') - self.assertEqual(ds_params['age'], 34) - self.assert_array_equal(ds_params['colors'], CArray([100, 2, 3])) + self.assertEqual(ds_params["id"], "mydataset") + self.assertEqual(ds_params["age"], 34) + self.assert_array_equal(ds_params["colors"], CArray([100, 2, 3])) ds_params = ds_copy.header.get_params() - self.assertEqual(ds_params['id'], 'mydataset') - self.assertEqual(ds_params['age'], 34) - self.assert_array_equal(ds_params['colors'], CArray([1, 2, 3])) + self.assertEqual(ds_params["id"], "mydataset") + self.assertEqual(ds_params["age"], 34) + self.assert_array_equal(ds_params["colors"], CArray([1, 2, 3])) def test_labels_binarize(self): """Unittests for `.get_labels_ovr` and `.get_labels_onehot`.""" @@ -204,7 +198,8 @@ def test_labels_binarize(self): onehot = self.dataset.get_labels_onehot() self.assertEqual( - (self.dataset.num_labels, self.dataset.Y.max() + 1), onehot.shape) + (self.dataset.num_labels, self.dataset.Y.max() + 1), onehot.shape + ) self.assertFalse((onehot != 0).logical_and(onehot != 1).any()) self.assertIsSubDtype(onehot.dtype, int) diff --git a/src/secml/data/tests/test_cdataset_header.py b/src/secml/data/tests/test_cdataset_header.py index 10e00a8b..a8c02942 100644 --- a/src/secml/data/tests/test_cdataset_header.py +++ b/src/secml/data/tests/test_cdataset_header.py @@ -10,19 +10,21 @@ class TestCDatasetHeader(CUnitTest): def setUp(self): self.header = CDatasetHeader( - id='mydataset', age=34, + id="mydataset", + age=34, colors=CArray([1, 2, 3]), - days=('Mon', 'Tue', 'Wed', 'Thu')) + days=("Mon", "Tue", "Wed", "Thu"), + ) def test_properties(self): """Test class properties.""" - self.assertEqual('mydataset', self.header.id) + self.assertEqual("mydataset", self.header.id) self.logger.info("header.id: {:}".format(self.header.id)) self.assertEqual(34, self.header.age) self.logger.info("header.age: {:}".format(self.header.age)) - self.assertEqual(('Mon', 'Tue', 'Wed', 'Thu'), self.header.days) + self.assertEqual(("Mon", "Tue", "Wed", "Thu"), self.header.days) self.logger.info("header.days: {:}".format(self.header.days)) self.assert_array_equal(CArray([1, 2, 3]), self.header.colors) @@ -48,11 +50,10 @@ def test_getitem(self): """Test for getter.""" h_get = self.header[[0, 2]] params = h_get.get_params() - self.assertEqual(params['id'], 'mydataset') - self.assertEqual(params['age'], 34) - self.assertEqual(params['days'], ('Mon', 'Tue', 'Wed', 'Thu')) - self.assert_array_equal( - params['colors'], CArray([1, 3])) + self.assertEqual(params["id"], "mydataset") + self.assertEqual(params["age"], 34) + self.assertEqual(params["days"], ("Mon", "Tue", "Wed", "Thu")) + self.assert_array_equal(params["colors"], CArray([1, 3])) with self.assertRaises(IndexError): # 'colors' CArray has size 3 self.header[[0, 3]] @@ -62,25 +63,21 @@ def test_append(self): h_append = self.header.append(self.header) params = h_append.get_params() - self.assertEqual(params['id'], 'mydataset') - self.assertEqual(params['age'], 34) - self.assertEqual(params['days'], ('Mon', 'Tue', 'Wed', 'Thu')) - self.assert_array_equal( - params['colors'], CArray([1, 2, 3, 1, 2, 3])) + self.assertEqual(params["id"], "mydataset") + self.assertEqual(params["age"], 34) + self.assertEqual(params["days"], ("Mon", "Tue", "Wed", "Thu")) + self.assert_array_equal(params["colors"], CArray([1, 2, 3, 1, 2, 3])) # Create an additional header with new attributes set - h2 = CDatasetHeader( - a=4, age=34, colors=CArray([10, 20, 30]) - ) + h2 = CDatasetHeader(a=4, age=34, colors=CArray([10, 20, 30])) h_append = self.header.append(h2) params = h_append.get_params() - self.assertEqual(params['id'], 'mydataset') - self.assertEqual(params['age'], 34) - self.assertEqual(params['days'], ('Mon', 'Tue', 'Wed', 'Thu')) - self.assertEqual(params['a'], 4) - self.assert_array_equal( - params['colors'], CArray([1, 2, 3, 10, 20, 30])) + self.assertEqual(params["id"], "mydataset") + self.assertEqual(params["age"], 34) + self.assertEqual(params["days"], ("Mon", "Tue", "Wed", "Thu")) + self.assertEqual(params["a"], 4) + self.assert_array_equal(params["colors"], CArray([1, 2, 3, 10, 20, 30])) def test_copy(self): """Test for .deepcopy() method.""" @@ -89,16 +86,16 @@ def test_copy(self): # Now change original header self.header.colors[0] = 100 params = self.header.get_params() - self.assertEqual(params['id'], 'mydataset') - self.assertEqual(params['age'], 34) - self.assertEqual(params['days'], ('Mon', 'Tue', 'Wed', 'Thu')) - self.assert_array_equal(params['colors'], CArray([100, 2, 3])) + self.assertEqual(params["id"], "mydataset") + self.assertEqual(params["age"], 34) + self.assertEqual(params["days"], ("Mon", "Tue", "Wed", "Thu")) + self.assert_array_equal(params["colors"], CArray([100, 2, 3])) params = h_copy.get_params() - self.assertEqual(params['id'], 'mydataset') - self.assertEqual(params['age'], 34) - self.assertEqual(params['days'], ('Mon', 'Tue', 'Wed', 'Thu')) - self.assert_array_equal(params['colors'], CArray([1, 2, 3])) + self.assertEqual(params["id"], "mydataset") + self.assertEqual(params["age"], 34) + self.assertEqual(params["days"], ("Mon", "Tue", "Wed", "Thu")) + self.assert_array_equal(params["colors"], CArray([1, 2, 3])) if __name__ == "__main__": diff --git a/src/secml/explanation/__init__.py b/src/secml/explanation/__init__.py index d3874c9a..9cf90171 100644 --- a/src/secml/explanation/__init__.py +++ b/src/secml/explanation/__init__.py @@ -4,4 +4,3 @@ from .c_explainer_gradient_input import CExplainerGradientInput from .c_explainer_integrated_gradients import CExplainerIntegratedGradients from .c_explainer_influence_functions import CExplainerInfluenceFunctions - diff --git a/src/secml/explanation/c_explainer.py b/src/secml/explanation/c_explainer.py index b90c1638..b6c17eeb 100644 --- a/src/secml/explanation/c_explainer.py +++ b/src/secml/explanation/c_explainer.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from abc import ABCMeta, abstractmethod from secml.core import CCreator @@ -20,7 +21,8 @@ class CExplainer(CCreator, metaclass=ABCMeta): Instance of the classifier to explain. """ - __super__ = 'CExplainer' + + __super__ = "CExplainer" def __init__(self, clf): self._clf = clf diff --git a/src/secml/explanation/c_explainer_gradient.py b/src/secml/explanation/c_explainer_gradient.py index 80d75da1..af7afa16 100644 --- a/src/secml/explanation/c_explainer_gradient.py +++ b/src/secml/explanation/c_explainer_gradient.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.explanation import CExplainer from secml.array import CArray @@ -31,7 +32,8 @@ class CExplainerGradient(CExplainer): class_type : 'gradient' """ - __class_type = 'gradient' + + __class_type = "gradient" def explain(self, x, y, return_grad=False): """Computes the explanation for input sample. @@ -53,6 +55,5 @@ def explain(self, x, y, return_grad=False): """ grad = self.clf.grad_f_x(x, y=y) rv = grad.deepcopy() - self.logger.debug( - "Relevance Vector:\n{:}".format(rv)) + self.logger.debug("Relevance Vector:\n{:}".format(rv)) return (rv, grad) if return_grad is True else rv diff --git a/src/secml/explanation/c_explainer_gradient_input.py b/src/secml/explanation/c_explainer_gradient_input.py index d87c4c29..a29ecccc 100644 --- a/src/secml/explanation/c_explainer_gradient_input.py +++ b/src/secml/explanation/c_explainer_gradient_input.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml.explanation import CExplainerGradient @@ -36,7 +37,8 @@ class CExplainerGradientInput(CExplainerGradient): class_type : 'gradient-input' """ - __class_type = 'gradient-input' + + __class_type = "gradient-input" def explain(self, x, y, return_grad=False): """Computes the explanation for input sample. @@ -58,6 +60,5 @@ def explain(self, x, y, return_grad=False): """ grad = self.clf.grad_f_x(x, y=y) rv = x * grad # Directional derivative - self.logger.debug( - "Relevance Vector:\n{:}".format(rv)) + self.logger.debug("Relevance Vector:\n{:}".format(rv)) return (rv, grad) if return_grad is True else rv diff --git a/src/secml/explanation/c_explainer_influence_functions.py b/src/secml/explanation/c_explainer_influence_functions.py index e08c7098..d8b1624d 100644 --- a/src/secml/explanation/c_explainer_influence_functions.py +++ b/src/secml/explanation/c_explainer_influence_functions.py @@ -6,6 +6,7 @@ .. moduleauthor:: Battista Biggio """ + from scipy import linalg from secml.array import CArray @@ -27,15 +28,16 @@ class CExplainerInfluenceFunctions(CExplainerGradient): Instance of the classifier to explain. Must provide the `hessian`. tr_ds : CDataset Training dataset of the classifier to explain. - + Attributes ---------- class_type : 'influence-functions' - + """ - __class_type = 'influence-functions' - def __init__(self, clf, tr_ds, outer_loss_idx='log'): + __class_type = "influence-functions" + + def __init__(self, clf, tr_ds, outer_loss_idx="log"): super(CExplainerInfluenceFunctions, self).__init__(clf=clf) @@ -102,9 +104,13 @@ def explain(self, x, y, return_grad=False): if self._grad_inner_loss_params is None: self._grad_inner_loss_params = self.grad_inner_loss_params( - self.tr_ds.X, self.tr_ds.Y) - - v = self.grad_outer_loss_params(x, y).T.dot(self._inv_H).dot( - self._grad_inner_loss_params) + self.tr_ds.X, self.tr_ds.Y + ) + + v = ( + self.grad_outer_loss_params(x, y) + .T.dot(self._inv_H) + .dot(self._grad_inner_loss_params) + ) return (v, H) if return_grad is True else v diff --git a/src/secml/explanation/c_explainer_integrated_gradients.py b/src/secml/explanation/c_explainer_integrated_gradients.py index 42d84ddb..16773b11 100644 --- a/src/secml/explanation/c_explainer_integrated_gradients.py +++ b/src/secml/explanation/c_explainer_integrated_gradients.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml import _NoValue @@ -44,7 +45,8 @@ class CExplainerIntegratedGradients(CExplainerGradient): class_type : 'integrated-gradients' """ - __class_type = 'integrated-gradients' + + __class_type = "integrated-gradients" def explain(self, x, y, return_grad=_NoValue, reference=None, m=50): """Computes the explanation for input sample. @@ -68,13 +70,15 @@ def explain(self, x, y, return_grad=_NoValue, reference=None, m=50): """ if return_grad is not _NoValue: - raise ValueError("`return_grad` is not supported by `{:}`".format( - self.__class__.__name__)) + raise ValueError( + "`return_grad` is not supported by `{:}`".format( + self.__class__.__name__ + ) + ) if reference is None: # Use default reference values if reference is not specified - reference = CArray.zeros( - shape=x.shape, dtype=x.dtype, sparse=x.issparse) + reference = CArray.zeros(shape=x.shape, dtype=x.dtype, sparse=x.issparse) x = x.atleast_2d() @@ -88,8 +92,7 @@ def explain(self, x, y, return_grad=_NoValue, reference=None, m=50): a = (x - reference) * (1 / m) * riemman_approx - self.logger.debug( - "Attributions for class {:}:\n{:}".format(y, a)) + self.logger.debug("Attributions for class {:}:\n{:}".format(y, a)) # Checks prop 1: attr should add up to the difference between # the score at the input and that at the reference @@ -118,17 +121,16 @@ def check_attributions(self, x, reference, c, attributions): """ # Checks prop 1: attr should add up to the difference between # the score at the input and that at the reference - x_pred, x_score = self.clf.predict( - x, return_decision_function=True) - ref_pred, ref_score = self.clf.predict( - reference, return_decision_function=True) + x_pred, x_score = self.clf.predict(x, return_decision_function=True) + ref_pred, ref_score = self.clf.predict(reference, return_decision_function=True) prop_check = abs(x_score[c] - ref_score[c]) prop_check = abs(prop_check - abs(attributions.sum())).item() if prop_check > 1e-1: self.logger.warning( "Attributions should add up to the difference between the " "score at the input and that at the reference. Increase `m` " - "or change the reference. Current value {:}.".format(prop_check)) + "or change the reference. Current value {:}.".format(prop_check) + ) @staticmethod def linearly_interpolate(x, reference=None, m=50): @@ -152,8 +154,7 @@ def linearly_interpolate(x, reference=None, m=50): """ if reference is None: # Use default reference values if reference is not specified - reference = CArray.zeros( - shape=x.shape, dtype=x.dtype, sparse=x.issparse) + reference = CArray.zeros(shape=x.shape, dtype=x.dtype, sparse=x.issparse) if x.shape != reference.shape: raise ValueError("reference must have shape {:}".format(x.shape)) diff --git a/src/secml/explanation/tests/test_c_explainer_gradient.py b/src/secml/explanation/tests/test_c_explainer_gradient.py index 257e6cd5..d6ebfca6 100644 --- a/src/secml/explanation/tests/test_c_explainer_gradient.py +++ b/src/secml/explanation/tests/test_c_explainer_gradient.py @@ -37,17 +37,16 @@ def test_explain(self): # Plotting original image fig.subplot(1, 2, 1) - fig.sp.imshow(attr.reshape((8, 8)), cmap='gray') + fig.sp.imshow(attr.reshape((8, 8)), cmap="gray") th = max(abs(attr.min()), abs(attr.max())) # Plotting attributions fig.subplot(1, 2, 2) - fig.sp.imshow(attr.reshape((8, 8)), - cmap='seismic', vmin=-1*th, vmax=th) + fig.sp.imshow(attr.reshape((8, 8)), cmap="seismic", vmin=-1 * th, vmax=th) fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/explanation/tests/test_c_explainer_gradient_input.py b/src/secml/explanation/tests/test_c_explainer_gradient_input.py index a42190fe..167fdf8c 100644 --- a/src/secml/explanation/tests/test_c_explainer_gradient_input.py +++ b/src/secml/explanation/tests/test_c_explainer_gradient_input.py @@ -19,7 +19,8 @@ def setUp(self): self.ds = CDLDigits().load() self.clf = CClassifierMulticlassOVA( - CClassifierSVM, kernel=CKernelRBF(gamma=1e-3)) + CClassifierSVM, kernel=CKernelRBF(gamma=1e-3) + ) # Training classifier self.clf.fit(self.ds.X, self.ds.Y) @@ -36,14 +37,15 @@ def test_explain(self): x_pred, x_score = self.clf.predict(x, return_decision_function=True) self.logger.info( - "Predicted class {:}, scores:\n{:}".format(x_pred.item(), x_score)) + "Predicted class {:}, scores:\n{:}".format(x_pred.item(), x_score) + ) self.logger.info("Candidates: {:}".format(x_score.argsort()[::-1])) fig = CFigure(height=1.5, width=12) # Plotting original image - fig.subplot(1, self.ds.num_classes+1, 1) - fig.sp.imshow(x.reshape((8, 8)), cmap='gray') + fig.subplot(1, self.ds.num_classes + 1, 1) + fig.sp.imshow(x.reshape((8, 8)), cmap="gray") fig.sp.title("Origin c{:}".format(y_true)) fig.sp.yticks([]) fig.sp.xticks([]) @@ -55,8 +57,7 @@ def test_explain(self): attr_c = self.explainer.explain(x, y=c) attr[c, :] = attr_c - self.logger.info( - "Attributions class {:}:\n{:}".format(c, attr_c.tolist())) + self.logger.info("Attributions class {:}:\n{:}".format(c, attr_c.tolist())) self.assertIsInstance(attr, CArray) self.assertEqual(attr.shape, attr.shape) @@ -66,9 +67,10 @@ def test_explain(self): # Plotting attributions for c in self.ds.classes: - fig.subplot(1, self.ds.num_classes+1, 2+c) - fig.sp.imshow(attr[c, :].reshape((8, 8)), - cmap='seismic', vmin=-1*th, vmax=th) + fig.subplot(1, self.ds.num_classes + 1, 2 + c) + fig.sp.imshow( + attr[c, :].reshape((8, 8)), cmap="seismic", vmin=-1 * th, vmax=th + ) fig.sp.title("Attr c{:}".format(c)) fig.sp.yticks([]) fig.sp.xticks([]) @@ -78,5 +80,5 @@ def test_explain(self): fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/explanation/tests/test_c_explainer_influence_functions.py b/src/secml/explanation/tests/test_c_explainer_influence_functions.py index 44bc4b80..f7d4e7d1 100644 --- a/src/secml/explanation/tests/test_c_explainer_influence_functions.py +++ b/src/secml/explanation/tests/test_c_explainer_influence_functions.py @@ -4,11 +4,9 @@ from secml.data.loader import CDataLoaderMNIST from secml.data.splitter import CDataSplitterKFold from secml.ml.peval.metrics import CMetricAccuracy -from secml.ml.classifiers import \ - CClassifierSVM, CClassifierLogistic, CClassifierRidge +from secml.ml.classifiers import CClassifierSVM, CClassifierLogistic, CClassifierRidge from secml.ml.kernels import CKernelRBF -from secml.ml.classifiers.gradients.tests.test_classes import \ - CClassifierGradientTest +from secml.ml.classifiers.gradients.tests.test_classes import CClassifierGradientTest from secml.explanation import CExplainerInfluenceFunctions @@ -23,43 +21,46 @@ def setUpClass(cls): cls._metric = CMetricAccuracy() def test_explanation_svm(self): - self._clf = CClassifierSVM(kernel='linear') # train in the dual - self._clf_idx = 'lin-svm' + self._clf = CClassifierSVM(kernel="linear") # train in the dual + self._clf_idx = "lin-svm" self._test_explanation_simple_clf() def test_explanation_logistic(self): self._clf = CClassifierLogistic() - self._clf_idx = 'logistic regression' + self._clf_idx = "logistic regression" self._test_explanation_simple_clf() def test_explanation_svm_rbf(self): self._clf = CClassifierSVM(kernel=CKernelRBF(gamma=0.01), C=10) - self._clf_idx = 'rbf-svm' + self._clf_idx = "rbf-svm" self._test_explanation_simple_clf() def test_explanation_ridge(self): self._clf = CClassifierRidge() - self._clf_idx = 'Ridge' + self._clf_idx = "Ridge" self._test_explanation_simple_clf() @staticmethod def _create_mnist_dataset( - digits=[4, 9], n_tr=100, n_val=200, n_ts=200, seed=4): # 10 + digits=[4, 9], n_tr=100, n_val=200, n_ts=200, seed=4 + ): # 10 loader = CDataLoaderMNIST() - tr = loader.load('training', digits=digits) - ts = loader.load('testing', digits=digits, num_samples=n_ts) + tr = loader.load("training", digits=digits) + ts = loader.load("testing", digits=digits, num_samples=n_ts) # start train and validation dataset split splitter = CDataSplitterKFold(num_folds=2, random_state=seed) splitter.compute_indices(tr) - val_dts_idx = CArray.randsample(CArray.arange(0, tr.num_samples), - n_val, random_state=seed) + val_dts_idx = CArray.randsample( + CArray.arange(0, tr.num_samples), n_val, random_state=seed + ) val = tr[val_dts_idx, :] - tr_dts_idx = CArray.randsample(CArray.arange(0, tr.num_samples), - n_tr, random_state=seed) + tr_dts_idx = CArray.randsample( + CArray.arange(0, tr.num_samples), n_tr, random_state=seed + ) tr = tr[tr_dts_idx, :] tr.X /= 255.0 @@ -81,11 +82,11 @@ def _compute_influences(self): self._check_accuracy() - explanation = CExplainerInfluenceFunctions(self._clf, self._tr, - outer_loss_idx=self._clf_loss) + explanation = CExplainerInfluenceFunctions( + self._clf, self._tr, outer_loss_idx=self._clf_loss + ) self.influences = explanation.explain(self._ts.X, self._ts.Y) - self.clf_gradients = CClassifierGradientTest.create( - self._clf.class_type) + self.clf_gradients = CClassifierGradientTest.create(self._clf.class_type) def _get_tr_without_point(self, p_idx): """ @@ -124,7 +125,8 @@ def _check_influence(self, point_idx): clf_copy.fit(new_dataset.X, new_dataset.Y) loss = (1 / self._ts.num_samples) * self.clf_gradients.l( - self._ts.X, self._ts.Y, clf_copy).sum(axis=None) + self._ts.X, self._ts.Y, clf_copy + ).sum(axis=None) return loss @@ -143,27 +145,35 @@ def _check_prototype_pair(self, p_inf_idx, p_not_inf_idx): less influent """ acc_without_p_infl = self._check_influence(p_inf_idx) - self.logger.info("The loss without the point {:} supposed to be " - "one of the most influent is {:}".format(p_inf_idx, - acc_without_p_infl)) + self.logger.info( + "The loss without the point {:} supposed to be " + "one of the most influent is {:}".format(p_inf_idx, acc_without_p_infl) + ) acc_without_p_not_infl = self._check_influence(p_not_inf_idx) - self.logger.info("The loss without the point {:} supposed to be " - "one of the less influent is {:}".format( - p_not_inf_idx, - acc_without_p_not_infl)) - - self.assertGreater(acc_without_p_infl, acc_without_p_not_infl, - "The point that is supposed to be between the " - "less influent has a higher influence of the " - "point supposed to be between one of the most " - "influent") + self.logger.info( + "The loss without the point {:} supposed to be " + "one of the less influent is {:}".format( + p_not_inf_idx, acc_without_p_not_infl + ) + ) + + self.assertGreater( + acc_without_p_infl, + acc_without_p_not_infl, + "The point that is supposed to be between the " + "less influent has a higher influence of the " + "point supposed to be between one of the most " + "influent", + ) def _test_explanation(self): self._compute_influences() - self.assertEqual(self.influences.shape, - (self._ts.num_samples, self._tr.num_samples), - "The shape of the influences is wrong!") + self.assertEqual( + self.influences.shape, + (self._ts.num_samples, self._tr.num_samples), + "The shape of the influences is wrong!", + ) average_influence = self.influences.mean(axis=0).ravel() # order the idx of the tr samples in the way to have the less @@ -178,10 +188,12 @@ def _test_explanation(self): self._check_prototype_pair(infl_idx, not_infl_idx) def _test_explanation_simple_clf(self): - self.logger.info("Explain the decisions of a {:} classifier and " - "test if they are reasonable".format(self._clf_idx)) + self.logger.info( + "Explain the decisions of a {:} classifier and " + "test if they are reasonable".format(self._clf_idx) + ) self._test_explanation() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/explanation/tests/test_c_explainer_integrated_gradients.py b/src/secml/explanation/tests/test_c_explainer_integrated_gradients.py index ae788511..4e848037 100644 --- a/src/secml/explanation/tests/test_c_explainer_integrated_gradients.py +++ b/src/secml/explanation/tests/test_c_explainer_integrated_gradients.py @@ -22,7 +22,8 @@ def setUpClass(cls): cls.ds = CDLDigits().load() cls.clf = CClassifierMulticlassOVA( - CClassifierSVM, kernel=CKernelRBF(gamma=1e-3)) + CClassifierSVM, kernel=CKernelRBF(gamma=1e-3) + ) # Training classifier cls.clf.fit(cls.ds.X, cls.ds.Y) @@ -43,7 +44,8 @@ def test_explain(self): x_pred, x_score = self.clf.predict(x, return_decision_function=True) self.logger.info( - "Predicted class {:}, scores:\n{:}".format(x_pred.item(), x_score)) + "Predicted class {:}, scores:\n{:}".format(x_pred.item(), x_score) + ) self.logger.info("Candidates: {:}".format(x_score.argsort()[::-1])) ref_img = None # Use default reference image @@ -61,8 +63,8 @@ def test_explain(self): fig = CFigure(height=1.5, width=12) # Plotting original image - fig.subplot(1, self.ds.num_classes+1, 1) - fig.sp.imshow(x.reshape((8, 8)), cmap='gray') + fig.subplot(1, self.ds.num_classes + 1, 1) + fig.sp.imshow(x.reshape((8, 8)), cmap="gray") fig.sp.title("Origin c{:}".format(y_true)) fig.sp.yticks([]) fig.sp.xticks([]) @@ -71,9 +73,10 @@ def test_explain(self): # Plotting attributions for c in self.ds.classes: - fig.subplot(1, self.ds.num_classes+1, 2+c) - fig.sp.imshow(attr[c, :].reshape((8, 8)), - cmap='seismic', vmin=-1*th, vmax=th) + fig.subplot(1, self.ds.num_classes + 1, 2 + c) + fig.sp.imshow( + attr[c, :].reshape((8, 8)), cmap="seismic", vmin=-1 * th, vmax=th + ) fig.sp.title("Attr c{:}".format(c)) fig.sp.yticks([]) fig.sp.xticks([]) @@ -96,5 +99,5 @@ def test_linear_interpolation(self): self.assertEqual(ret[10].shape, sample.shape) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/figure/_plots/c_plot.py b/src/secml/figure/_plots/c_plot.py index 1135c3b7..4f29683f 100644 --- a/src/secml/figure/_plots/c_plot.py +++ b/src/secml/figure/_plots/c_plot.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + import inspect import sys @@ -79,9 +80,11 @@ def _collect_spmethods(self): c_methods = inspect.getmembers(c_info[1], pred) for method in c_methods: # For each method (name, unbound method) # Skip special methods and already added methods - if not method[0].startswith('__') and \ - method[0] not in methods_list and \ - not hasattr(self, method[0]): + if ( + not method[0].startswith("__") + and method[0] not in methods_list + and not hasattr(self, method[0]) + ): methods_list.append(method) # Add methods to CPlot. Use __get__ to bound method to CPlot instance for method in methods_list: @@ -95,10 +98,10 @@ def n_lines(self): def _set_lines_params(self, kwargs): """Add lines-related parameters to input dictionary.""" # Parameters are updated/added only if not yet specified - if 'linewidth' not in kwargs: - kwargs['linewidth'] = self._params['lines.linewidth'] - if 'markersize' not in kwargs: - kwargs['markersize'] = self._params['lines.markersize'] + if "linewidth" not in kwargs: + kwargs["linewidth"] = self._params["lines.linewidth"] + if "markersize" not in kwargs: + kwargs["markersize"] = self._params["lines.markersize"] return kwargs @@ -309,8 +312,8 @@ def semilogx(self, x, y=None, *args, **kwargs): :include-source: """ - if 'subsx' in kwargs and isinstance(kwargs['subsx'], CArray): - kwargs['subsx'] = kwargs['subsx'].tondarray() + if "subsx" in kwargs and isinstance(kwargs["subsx"], CArray): + kwargs["subsx"] = kwargs["subsx"].tondarray() # Set other lines-related parameters kwargs = self._set_lines_params(kwargs) # Convert sequences inside tuple to ndarray @@ -354,8 +357,8 @@ def semilogy(self, x, y=None, *args, **kwargs): :include-source: """ - if 'subsy' in kwargs and isinstance(kwargs['subsy'], CArray): - kwargs['subsy'] = kwargs['subsy'].tondarray() + if "subsy" in kwargs and isinstance(kwargs["subsy"], CArray): + kwargs["subsy"] = kwargs["subsy"].tondarray() # Set other lines-related parameters kwargs = self._set_lines_params(kwargs) # Convert sequences inside tuple to ndarray @@ -393,10 +396,10 @@ def loglog(self, x, y=None, *args, **kwargs): .plot : Plot with standard axis. """ - if 'subsx' in kwargs and isinstance(kwargs['subsx'], CArray): - kwargs['subsx'] = kwargs['subsx'].tondarray() - if 'subsy' in kwargs and isinstance(kwargs['subsy'], CArray): - kwargs['subsy'] = kwargs['subsy'].tondarray() + if "subsx" in kwargs and isinstance(kwargs["subsx"], CArray): + kwargs["subsx"] = kwargs["subsx"].tondarray() + if "subsy" in kwargs and isinstance(kwargs["subsy"], CArray): + kwargs["subsy"] = kwargs["subsy"].tondarray() # Set other lines-related parameters kwargs = self._set_lines_params(kwargs) # Convert sequences inside tuple to ndarray @@ -406,7 +409,7 @@ def loglog(self, x, y=None, *args, **kwargs): else: self._sp.loglog(x, y, *args, **kwargs) - def scatter(self, x, y, s=20, c='b', *args, **kwargs): + def scatter(self, x, y, s=20, c="b", *args, **kwargs): """Scatter plot of x vs y. Parameters @@ -454,8 +457,8 @@ def scatter(self, x, y, s=20, c='b', *args, **kwargs): :include-source: """ - if 'linewidths' not in kwargs: - kwargs['linewidths'] = self._params['lines.linewidth'] + if "linewidths" not in kwargs: + kwargs["linewidths"] = self._params["lines.linewidth"] # Convert sequences inside tuple to ndarray if not isinstance(c, str): x, y, c = tuple_sequence_tondarray((x, y, c)) @@ -543,8 +546,8 @@ def contour(self, x, y, z, *args, **kwargs): :include-source: """ - if 'linewidths' not in kwargs: - kwargs['linewidths'] = self._params['lines.linewidth'] + if "linewidths" not in kwargs: + kwargs["linewidths"] = self._params["lines.linewidth"] # Convert sequences inside tuple to ndarray x, y, z = tuple_sequence_tondarray((x, y, z)) return self._sp.contour(x, y, z, *args, **kwargs) @@ -679,8 +682,8 @@ def clabel(self, contour, *args, **kwargs): :include-source: """ - if 'fontsize' not in kwargs: - kwargs['fontsize'] = self._params['font.size'] + if "fontsize" not in kwargs: + kwargs["fontsize"] = self._params["font.size"] return self._sp.clabel(contour, *args, **kwargs) def colorbar(self, mappable, ticks=None, *args, **kwargs): @@ -773,10 +776,11 @@ def colorbar(self, mappable, ticks=None, *args, **kwargs): """ ticks = ticks.tolist() if isinstance(ticks, CArray) else ticks from matplotlib.pyplot import colorbar + cbar = colorbar(mappable, ticks=ticks, *args, **kwargs) - if 'fontsize' not in kwargs: - kwargs['fontsize'] = self._params['font.size'] - cbar.ax.tick_params(labelsize=kwargs['fontsize']) + if "fontsize" not in kwargs: + kwargs["fontsize"] = self._params["font.size"] + cbar.ax.tick_params(labelsize=kwargs["fontsize"]) return cbar def errorbar(self, x, y, xerr=None, yerr=None, *args, **kwargs): @@ -897,11 +901,12 @@ def bar(self, left, height, width=0.8, bottom=None, *args, **kwargs): :include-source: """ - if 'linewidth' not in kwargs: - kwargs['linewidth'] = self._params['lines.linewidth'] + if "linewidth" not in kwargs: + kwargs["linewidth"] = self._params["lines.linewidth"] # Convert sequences inside tuple to ndarray left, height, width, bottom = tuple_sequence_tondarray( - (left, height, width, bottom)) + (left, height, width, bottom) + ) return self._sp.bar(left, height, width, bottom, *args, **kwargs) def barh(self, bottom, width, height=0.8, left=None, *args, **kwargs): @@ -953,11 +958,12 @@ def barh(self, bottom, width, height=0.8, left=None, *args, **kwargs): bar_list : list of bar type objects """ - if 'linewidth' not in kwargs: - kwargs['linewidth'] = self._params['lines.linewidth'] + if "linewidth" not in kwargs: + kwargs["linewidth"] = self._params["lines.linewidth"] # Convert sequences inside tuple to ndarray bottom, width, height, left = tuple_sequence_tondarray( - (bottom, width, height, left)) + (bottom, width, height, left) + ) return self._sp.barh(bottom, width, height, left, *args, **kwargs) def hist(self, x, *args, **kwargs): @@ -1078,21 +1084,41 @@ def hist(self, x, *args, **kwargs): :include-source: """ - if 'linewidth' not in kwargs: - kwargs['linewidth'] = self._params['lines.linewidth'] + if "linewidth" not in kwargs: + kwargs["linewidth"] = self._params["lines.linewidth"] x = list(xi.tondarray() if isinstance(xi, CArray) else xi for xi in x) n, bins, patches = self._sp.hist(x, *args, **kwargs) if isinstance(n, list): n = list(CArray(ni) for ni in n) return n, CArray(bins), patches - def boxplot(self, x, notch=False, sym=None, vert=True, whis=1.5, - positions=None, widths=None, patch_artist=False, - bootstrap=None, usermedians=None, conf_intervals=None, - meanline=False, showmeans=False, showcaps=True, - showbox=True, showfliers=True, boxprops=None, labels=None, - flierprops=None, medianprops=None, meanprops=None, - capprops=None, whiskerprops=None, manage_xticks=True): + def boxplot( + self, + x, + notch=False, + sym=None, + vert=True, + whis=1.5, + positions=None, + widths=None, + patch_artist=False, + bootstrap=None, + usermedians=None, + conf_intervals=None, + meanline=False, + showmeans=False, + showcaps=True, + showbox=True, + showfliers=True, + boxprops=None, + labels=None, + flierprops=None, + medianprops=None, + meanprops=None, + capprops=None, + whiskerprops=None, + manage_xticks=True, + ): """Make a box and whisker plot. Make a box and whisker plot for each column of *x* or each @@ -1209,30 +1235,49 @@ def boxplot(self, x, notch=False, sym=None, vert=True, whis=1.5, """ if isinstance(x, CArray): - x = (x, ) + x = (x,) x = tuple_sequence_tondarray(tuple(x)) if usermedians is not None: if isinstance(usermedians, CArray): - usermedians = (usermedians, ) + usermedians = (usermedians,) usermedians = tuple_sequence_tondarray(tuple(usermedians)) if conf_intervals is not None: if isinstance(conf_intervals, CArray): - conf_intervals = (conf_intervals, ) + conf_intervals = (conf_intervals,) conf_intervals = tuple_sequence_tondarray(tuple(conf_intervals)) if isinstance(positions, CArray): positions = positions.tondarray() - self._sp.boxplot(x, notch, sym, vert, whis, - positions, widths, patch_artist, - bootstrap, usermedians, conf_intervals, - meanline, showmeans, showcaps, - showbox, showfliers, boxprops, - labels, flierprops, medianprops, - meanprops, capprops, whiskerprops, - manage_xticks) - - def fill_between(self, x, y1, y2=0, where=None, - interpolate=False, step=None, **kwargs): + self._sp.boxplot( + x, + notch, + sym, + vert, + whis, + positions, + widths, + patch_artist, + bootstrap, + usermedians, + conf_intervals, + meanline, + showmeans, + showcaps, + showbox, + showfliers, + boxprops, + labels, + flierprops, + medianprops, + meanprops, + capprops, + whiskerprops, + manage_xticks, + ) + + def fill_between( + self, x, y1, y2=0, where=None, interpolate=False, step=None, **kwargs + ): """Fill the area between two horizontal curves. The curves are defined by the points (x, y1) and (x, y2). @@ -1286,8 +1331,9 @@ def fill_between(self, x, y1, y2=0, where=None, """ x, y1, y2, where = tuple_sequence_tondarray((x, y1, y2, where)) - self._sp.fill_between(x, y1, y2=y2, where=where, - interpolate=interpolate, step=step, **kwargs) + self._sp.fill_between( + x, y1, y2=y2, where=where, interpolate=interpolate, step=step, **kwargs + ) def xlim(self, bottom=None, top=None): """Set axes x limits. @@ -1326,7 +1372,7 @@ def ylim(self, bottom=None, top=None): self._ylim = (bottom, top) self._sp.set_ylim(bottom, top) - def xscale(self, scale_type, nonposx='mask', basex=10, **kwargs): + def xscale(self, scale_type, nonposx="mask", basex=10, **kwargs): """Set scale for x axis. Parameters @@ -1342,7 +1388,7 @@ def xscale(self, scale_type, nonposx='mask', basex=10, **kwargs): """ self._sp.set_xscale(scale_type, nonposx=nonposx, basex=basex, **kwargs) - def yscale(self, scale_type, nonposy='mask', basey=10, **kwargs): + def yscale(self, scale_type, nonposy="mask", basey=10, **kwargs): """Set scale for y axis. Parameters @@ -1374,8 +1420,8 @@ def xlabel(self, label, *args, **kwargs): :include-source: """ - if 'fontsize' not in kwargs: - kwargs['fontsize'] = self._params['font.size'] + if "fontsize" not in kwargs: + kwargs["fontsize"] = self._params["font.size"] self._xlabel = label self._sp.set_xlabel(label, *args, **kwargs) @@ -1394,8 +1440,8 @@ def ylabel(self, label, *args, **kwargs): .xlabel : Set a label for the x axis. """ - if 'fontsize' not in kwargs: - kwargs['fontsize'] = self._params['font.size'] + if "fontsize" not in kwargs: + kwargs["fontsize"] = self._params["font.size"] self._ylabel = label self._sp.set_ylabel(label, *args, **kwargs) @@ -1521,7 +1567,7 @@ def tick_params(self, *args, **kwargs): """ self._sp.tick_params(*args, **kwargs) - def grid(self, grid_on=True, axis='both', **kwargs): + def grid(self, grid_on=True, axis="both", **kwargs): """Draw grid for current plot. Parameters @@ -1633,8 +1679,8 @@ def text(self, *args, **kwargs): 'large', 'x-large', 'xx-large' or an absolute font size, e.g., 12 """ - if 'fontsize' not in kwargs: - kwargs['fontsize'] = self._params['font.size'] + if "fontsize" not in kwargs: + kwargs["fontsize"] = self._params["font.size"] return self._sp.text(*args, **kwargs) def legend(self, *args, **kwargs): @@ -1765,8 +1811,8 @@ def legend(self, *args, **kwargs): :include-source: """ - if 'fontsize' not in kwargs: - kwargs['fontsize'] = self._params['font.size'] + if "fontsize" not in kwargs: + kwargs["fontsize"] = self._params["font.size"] self.show_legend = True return self._sp.legend(*args, **kwargs) @@ -1776,15 +1822,26 @@ def get_legend(self): def title(self, text, *args, **kwargs): """Set a title for subplot.""" - if 'fontsize' not in kwargs: - kwargs['fontsize'] = self._params['font.size'] + if "fontsize" not in kwargs: + kwargs["fontsize"] = self._params["font.size"] return self._sp.set_title(text, *args, **kwargs) - def plot_path(self, path, path_style='-', path_width=1.5, path_color='k', - straight=False, start_style='h', start_facecolor='r', - start_edgecolor='k', start_edgewidth=1, - final_style='*', final_facecolor='g', - final_edgecolor='k', final_edgewidth=1): + def plot_path( + self, + path, + path_style="-", + path_width=1.5, + path_color="k", + straight=False, + start_style="h", + start_facecolor="r", + start_edgecolor="k", + start_edgewidth=1, + final_style="*", + final_facecolor="g", + final_edgecolor="k", + final_edgewidth=1, + ): """Plot a path traversed by a point. By default, path is drawn in solid black, start point @@ -1828,25 +1885,41 @@ def plot_path(self, path, path_style='-', path_width=1.5, path_color='k', """ path_2d = CArray(path).atleast_2d() if path_2d.shape[1] != 2: - raise ValueError("cannot plot a {:}-Dimensional path." - "".format(path_2d.shape[1])) + raise ValueError( + "cannot plot a {:}-Dimensional path." "".format(path_2d.shape[1]) + ) # Plotting full path, then the start and the end points if straight is False: - self.plot(path_2d[:, 0], path_2d[:, 1], - linestyle=path_style, - color=path_color, - linewidth=path_width) + self.plot( + path_2d[:, 0], + path_2d[:, 1], + linestyle=path_style, + color=path_color, + linewidth=path_width, + ) else: - self.plot(path_2d[[0, -1], 0], path_2d[[0, -1], 1], - linestyle=path_style, color=path_color) - self.plot(path_2d[0, 0], path_2d[0, 1], marker=start_style, - markerfacecolor=start_facecolor, - markeredgecolor=start_edgecolor, - markeredgewidth=start_edgewidth) - self.plot(path_2d[-1, 0], path_2d[-1, 1], marker=final_style, - markerfacecolor=final_facecolor, - markeredgecolor=final_edgecolor, - markeredgewidth=final_edgewidth) + self.plot( + path_2d[[0, -1], 0], + path_2d[[0, -1], 1], + linestyle=path_style, + color=path_color, + ) + self.plot( + path_2d[0, 0], + path_2d[0, 1], + marker=start_style, + markerfacecolor=start_facecolor, + markeredgecolor=start_edgecolor, + markeredgewidth=start_edgewidth, + ) + self.plot( + path_2d[-1, 0], + path_2d[-1, 1], + marker=final_style, + markerfacecolor=final_facecolor, + markeredgecolor=final_edgecolor, + markeredgewidth=final_edgewidth, + ) def imshow(self, img, *args, **kwargs): """Plot image. @@ -1873,8 +1946,9 @@ def matshow(self, array, *args, **kwargs): """ return self._sp.matshow(array.tondarray(), *args, **kwargs) - def quiver(self, U, V, X=None, Y=None, - color='k', linestyle='-', linewidth=1.0, alpha=1.0): + def quiver( + self, U, V, X=None, Y=None, color="k", linestyle="-", linewidth=1.0, alpha=1.0 + ): """A quiver plot displays velocity vectors as arrows with components (u,v) at the points (x,y). @@ -1907,11 +1981,22 @@ def quiver(self, U, V, X=None, Y=None, """ if X is None: - self._sp.quiver(U.tondarray(), V.tondarray(), - color=color, linestyle=linestyle, - linewidth=linewidth, alpha=alpha) + self._sp.quiver( + U.tondarray(), + V.tondarray(), + color=color, + linestyle=linestyle, + linewidth=linewidth, + alpha=alpha, + ) else: - self._sp.quiver(X.tondarray(), Y.tondarray(), - U.tondarray(), V.tondarray(), - color=color, linestyle=linestyle, - linewidth=linewidth, alpha=alpha) + self._sp.quiver( + X.tondarray(), + Y.tondarray(), + U.tondarray(), + V.tondarray(), + color=color, + linestyle=linestyle, + linewidth=linewidth, + alpha=alpha, + ) diff --git a/src/secml/figure/_plots/c_plot_classifier.py b/src/secml/figure/_plots/c_plot_classifier.py index 3be718dd..05c0055c 100644 --- a/src/secml/figure/_plots/c_plot_classifier.py +++ b/src/secml/figure/_plots/c_plot_classifier.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from secml.figure._plots import CPlotFunction from secml.ml.classifiers import CClassifier from secml.array import CArray @@ -30,8 +31,15 @@ def apply_params_clf(self): """Apply defined parameters to active subplot.""" self.grid(grid_on=False) - def plot_decision_regions(self, clf, plot_background=True, levels=None, - grid_limits=None, n_grid_points=30, cmap=None): + def plot_decision_regions( + self, + clf, + plot_background=True, + levels=None, + grid_limits=None, + n_grid_points=30, + cmap=None, + ): """Plot decision boundaries and regions for the given classifier. Parameters @@ -61,23 +69,25 @@ def plot_decision_regions(self, clf, plot_background=True, levels=None, if cmap is None: if clf.n_classes <= 6: - colors = ['blue', 'red', 'lightgreen', 'black', 'gray', 'cyan'] - cmap = colors[:clf.n_classes] + colors = ["blue", "red", "lightgreen", "black", "gray", "cyan"] + cmap = colors[: clf.n_classes] else: - cmap = 'jet' + cmap = "jet" if levels is None: levels = CArray.arange(0.5, clf.n_classes).tolist() - self.plot_fun(func=clf.predict, - multipoint=True, - colorbar=False, - n_colors=clf.n_classes, - cmap=cmap, - levels=levels, - plot_background=plot_background, - grid_limits=grid_limits, - n_grid_points=n_grid_points, - alpha=0.5) + self.plot_fun( + func=clf.predict, + multipoint=True, + colorbar=False, + n_colors=clf.n_classes, + cmap=cmap, + levels=levels, + plot_background=plot_background, + grid_limits=grid_limits, + n_grid_points=n_grid_points, + alpha=0.5, + ) self.apply_params_clf() diff --git a/src/secml/figure/_plots/c_plot_constraint.py b/src/secml/figure/_plots/c_plot_constraint.py index e92c3490..c5f737bb 100644 --- a/src/secml/figure/_plots/c_plot_constraint.py +++ b/src/secml/figure/_plots/c_plot_constraint.py @@ -5,6 +5,7 @@ .. moduleauthor:: Battista Biggio """ + from secml.figure._plots import CPlotFunction from secml.optim.constraints import CConstraint @@ -34,12 +35,13 @@ def plot_constraint(self, constraint, grid_limits=None, n_grid_points=30): """ if not isinstance(constraint, CConstraint): - raise TypeError( - "'constraint' must be an instance of `CConstraint`.") - - self.plot_fun(func=constraint.constraint, - plot_background=False, - grid_limits=grid_limits, - n_grid_points=n_grid_points, - levels=[0], - levels_linewidth=1.5) + raise TypeError("'constraint' must be an instance of `CConstraint`.") + + self.plot_fun( + func=constraint.constraint, + plot_background=False, + grid_limits=grid_limits, + n_grid_points=n_grid_points, + levels=[0], + levels_linewidth=1.5, + ) diff --git a/src/secml/figure/_plots/c_plot_ds.py b/src/secml/figure/_plots/c_plot_ds.py index 488eccff..97fcf98a 100644 --- a/src/secml/figure/_plots/c_plot_ds.py +++ b/src/secml/figure/_plots/c_plot_ds.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from matplotlib import cm from secml.figure._plots import CPlot @@ -36,7 +37,7 @@ def apply_params_ds(self): fig_legend.set_visible(True) self.grid(grid_on=True) - def plot_ds(self, dataset, colors=None, markers='o', *args, **kwargs): + def plot_ds(self, dataset, colors=None, markers="o", *args, **kwargs): """Plot patterns of each class with a different color/marker. Parameters @@ -63,29 +64,36 @@ def plot_ds(self, dataset, colors=None, markers='o', *args, **kwargs): classes = dataset.classes if colors is None: if classes.size <= 6: - colors = ['blue', 'red', 'lightgreen', 'black', 'gray', 'cyan'] + colors = ["blue", "red", "lightgreen", "black", "gray", "cyan"] from matplotlib.colors import ListedColormap - cmap = ListedColormap(colors[:classes.size]) + + cmap = ListedColormap(colors[: classes.size]) else: - cmap = 'jet' + cmap = "jet" else: from matplotlib.colors import ListedColormap + cmap = ListedColormap(colors) # Next returns an ndarray classes.size X 4 (RGB + Alpha) - colors = cm.ScalarMappable( - cmap=cmap).to_rgba(range(classes.size)) + colors = cm.ScalarMappable(cmap=cmap).to_rgba(range(classes.size)) if is_list(markers) and len(markers) != classes.size: - raise ValueError( - "{:} markers must be specified.".format(classes.size)) + raise ValueError("{:} markers must be specified.".format(classes.size)) for cls_idx, cls in enumerate(classes.tolist()): c = colors[cls_idx] m = markers[cls_idx] if is_list(markers) else markers this_c_p = dataset.Y.find(dataset.Y == cls) - self.plot(dataset.X[this_c_p, 0], dataset.X[this_c_p, 1], - linestyle='None', color=c, marker=m, *args, **kwargs) + self.plot( + dataset.X[this_c_p, 0], + dataset.X[this_c_p, 1], + linestyle="None", + color=c, + marker=m, + *args, + **kwargs + ) # Customizing figure self.apply_params_ds() diff --git a/src/secml/figure/_plots/c_plot_fun.py b/src/secml/figure/_plots/c_plot_fun.py index e310a164..c6624edc 100644 --- a/src/secml/figure/_plots/c_plot_fun.py +++ b/src/secml/figure/_plots/c_plot_fun.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.figure._plots import CPlot from secml.figure._plots.plot_utils import create_points_grid from secml.array import CArray @@ -36,13 +37,28 @@ def apply_params_fun(self): fig_legend.set_visible(True) self.grid(grid_on=True) - def plot_fun(self, func, multipoint=False, - plot_background=True, plot_levels=True, - levels=None, levels_color='k', levels_style=None, - levels_linewidth=1.0, n_colors=50, cmap='jet', - alpha=1.0, alpha_levels=1.0, vmin=None, vmax=None, - colorbar=True, n_grid_points=30, - grid_limits=None, func_args=(), **func_kwargs): + def plot_fun( + self, + func, + multipoint=False, + plot_background=True, + plot_levels=True, + levels=None, + levels_color="k", + levels_style=None, + levels_linewidth=1.0, + n_colors=50, + cmap="jet", + alpha=1.0, + alpha_levels=1.0, + vmin=None, + vmax=None, + colorbar=True, + n_grid_points=30, + grid_limits=None, + func_args=(), + **func_kwargs + ): """Plot a function (used for decision functions or boundaries). Parameters @@ -105,59 +121,83 @@ def plot_fun(self, func, multipoint=False, levels = [0] if levels is None else levels # create the grid of the point where the function will be evaluated - pad_grid_point_features, pad_xgrid, pad_ygrid = \ - create_points_grid(grid_limits, n_grid_points) + pad_grid_point_features, pad_xgrid, pad_ygrid = create_points_grid( + grid_limits, n_grid_points + ) # Evaluate function on each grid point if multipoint is True: - grid_points_value = func( - pad_grid_point_features, *func_args, **func_kwargs) + grid_points_value = func(pad_grid_point_features, *func_args, **func_kwargs) else: grid_points_value = pad_grid_point_features.apply_along_axis( - func, 1, *func_args, **func_kwargs) + func, 1, *func_args, **func_kwargs + ) grid_points_val_reshaped = grid_points_value.reshape( - (pad_xgrid.shape[0], pad_xgrid.shape[1])) + (pad_xgrid.shape[0], pad_xgrid.shape[1]) + ) # Clipping values to show a correct color plot clip_min = -inf if vmin is None else vmin clip_max = inf if vmax is None else vmax - grid_points_val_reshaped = grid_points_val_reshaped.clip( - clip_min, clip_max) + grid_points_val_reshaped = grid_points_val_reshaped.clip(clip_min, clip_max) if is_list(cmap): # Convert list of colors to colormap from matplotlib.colors import ListedColormap + cmap = ListedColormap(cmap) ch = None if plot_background is True: # Draw a fully colored plot using 50 levels - ch = self.contourf(pad_xgrid, pad_ygrid, - grid_points_val_reshaped, - n_colors, cmap=cmap, alpha=alpha, - vmin=vmin, vmax=vmax, zorder=0) + ch = self.contourf( + pad_xgrid, + pad_ygrid, + grid_points_val_reshaped, + n_colors, + cmap=cmap, + alpha=alpha, + vmin=vmin, + vmax=vmax, + zorder=0, + ) # Displaying 20 ticks on the colorbar if colorbar is True: some_y = CArray.linspace( - grid_points_val_reshaped.min(), - grid_points_val_reshaped.max(), 20) + grid_points_val_reshaped.min(), grid_points_val_reshaped.max(), 20 + ) self.colorbar(ch, ticks=some_y) if plot_levels is True: self.contour( - pad_xgrid, pad_ygrid, grid_points_val_reshaped, - levels=levels, colors=levels_color, linestyles=levels_style, - linewidths=levels_linewidth, alpha=alpha_levels) + pad_xgrid, + pad_ygrid, + grid_points_val_reshaped, + levels=levels, + colors=levels_color, + linestyles=levels_style, + linewidths=levels_linewidth, + alpha=alpha_levels, + ) # Customizing figure self.apply_params_fun() return ch - def plot_fgrads(self, gradf, n_grid_points=30, grid_limits=None, - color='k', linestyle='-', linewidth=1.0, alpha=1.0, - func_args=(), **func_kwargs): + def plot_fgrads( + self, + gradf, + n_grid_points=30, + grid_limits=None, + color="k", + linestyle="-", + linewidth=1.0, + alpha=1.0, + func_args=(), + **func_kwargs + ): """Plot function gradient directions. Parameters @@ -184,25 +224,31 @@ def plot_fgrads(self, gradf, n_grid_points=30, grid_limits=None, """ # create the grid of the point where the function will be evaluated - pad_grid_point_features, pad_xgrid, pad_ygrid = \ - create_points_grid(grid_limits, n_grid_points) + pad_grid_point_features, pad_xgrid, pad_ygrid = create_points_grid( + grid_limits, n_grid_points + ) n_vals = pad_grid_point_features.shape[0] grad_point_values = CArray.zeros((n_vals, 2)) # compute gradient on each grid point for p_idx in range(n_vals): grad_point_values[p_idx, :] = gradf( - pad_grid_point_features[p_idx, :].ravel(), - *func_args, **func_kwargs) - - U = grad_point_values[:, 0].reshape( - (pad_xgrid.shape[0], pad_xgrid.shape[1])) - V = grad_point_values[:, 1].reshape( - (pad_xgrid.shape[0], pad_xgrid.shape[1])) - - self.quiver(U, V, pad_xgrid, pad_ygrid, - color=color, linestyle=linestyle, - linewidth=linewidth, alpha=alpha) + pad_grid_point_features[p_idx, :].ravel(), *func_args, **func_kwargs + ) + + U = grad_point_values[:, 0].reshape((pad_xgrid.shape[0], pad_xgrid.shape[1])) + V = grad_point_values[:, 1].reshape((pad_xgrid.shape[0], pad_xgrid.shape[1])) + + self.quiver( + U, + V, + pad_xgrid, + pad_ygrid, + color=color, + linestyle=linestyle, + linewidth=linewidth, + alpha=alpha, + ) # Customizing figure self.apply_params_fun() diff --git a/src/secml/figure/_plots/c_plot_metric.py b/src/secml/figure/_plots/c_plot_metric.py index 46e5cafa..36486641 100644 --- a/src/secml/figure/_plots/c_plot_metric.py +++ b/src/secml/figure/_plots/c_plot_metric.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + import itertools from sklearn.metrics import confusion_matrix @@ -44,17 +45,17 @@ def apply_params_roc(self): fig_legend.set_visible(True) self.grid(grid_on=True) if self._ylabel is None: - self.ylabel('True Positive Rate (%)') + self.ylabel("True Positive Rate (%)") if self._xlabel is None: - self.xlabel('False Positive Rate (%)') + self.xlabel("False Positive Rate (%)") if self._yticks is None: self.yticks([0, 20, 40, 60, 80, 100]) if self._yticklabels is None: - self.yticklabels(['0', '20', '40', '60', '80', '100']) + self.yticklabels(["0", "20", "40", "60", "80", "100"]) if self._xticks is None: self.xticks([0.1, 0.5, 1, 2, 5, 10, 20, 50, 100]) if self._xticklabels is None: - self.xticklabels(['0.1', '0.5', '1', '2', '5', '10', '20', '50', '100']) + self.xticklabels(["0.1", "0.5", "1", "2", "5", "10", "20", "50", "100"]) # Limits have to applied after ticks to be effective if self._ylim is None: self.ylim(0, 100) @@ -94,13 +95,17 @@ def plot_roc(self, fpr, tpr, label=None, style=None, logx=True): self.apply_params_roc() # TODO: REMOVE AFTER COLORMAPS ARE IMPLEMENTED IN CFIGURE - styles = ['go-', 'yp--', 'rs-.', 'bD--', 'c-.', 'm-', 'y-.'] + styles = ["go-", "yp--", "rs-.", "bD--", "c-.", "m-", "y-."] plot_func = self.semilogx if logx is True else self.plot - plot_func(fpr * 100, tpr * 100, - styles[self.n_lines % len(styles)] if style is None else style, - label=label, markevery=self.get_xticks_idx(fpr * 100)) + plot_func( + fpr * 100, + tpr * 100, + styles[self.n_lines % len(styles)] if style is None else style, + label=label, + markevery=self.get_xticks_idx(fpr * 100), + ) if label is not None: # Legend on the lower right @@ -111,8 +116,9 @@ def plot_roc(self, fpr, tpr, label=None, style=None, logx=True): self.xticklabels(self._xticklabels) # TODO: REMOVE STYLE - def plot_roc_mean(self, roc, label=None, invert_tpr=False, - style=None, plot_std=False, logx=True): + def plot_roc_mean( + self, roc, label=None, invert_tpr=False, style=None, plot_std=False, logx=True + ): """Plot the mean of ROC curves. Curves will be plotted inside the active figure or @@ -145,14 +151,16 @@ def plot_roc_mean(self, roc, label=None, invert_tpr=False, raise TypeError("input must be a `CRoc` instance.") if roc.has_mean is False: - raise ValueError("average for input roc has not been computed. " - "Use `CRoc.average()` first.") + raise ValueError( + "average for input roc has not been computed. " + "Use `CRoc.average()` first." + ) # Customizing figure self.apply_params_roc() # TODO: REMOVE AFTER COLORMAPS ARE IMPLEMENTED IN CFIGURE - styles = ['go-', 'yp--', 'rs-.', 'bD--', 'c-.', 'm-', 'y-.'] + styles = ["go-", "yp--", "rs-.", "bD--", "c-.", "m-", "y-."] # If std should be plotted each run plots 2 curvers n_lines = int(self.n_lines / 2) if plot_std is True else self.n_lines @@ -161,21 +169,30 @@ def plot_roc_mean(self, roc, label=None, invert_tpr=False, mean_tpr = roc.mean_tpr if invert_tpr is False else 1 - roc.mean_tpr plot_func = self.semilogx if logx is True else self.plot - plot_func(roc.mean_fpr * 100, mean_tpr * 100, - styles[n_lines % len(styles)] if style is None else style, - label=label, markevery=mkrs_idx) + plot_func( + roc.mean_fpr * 100, + mean_tpr * 100, + styles[n_lines % len(styles)] if style is None else style, + label=label, + markevery=mkrs_idx, + ) if plot_std is True: if roc.has_std_dev is False: raise ValueError("roc object has no standard deviation for data.") - self.errorbar(roc.mean_fpr[mkrs_idx] * 100, mean_tpr[mkrs_idx] * 100, - ecolor=styles[n_lines % len(styles)][0] if style is None else style[0], - fmt='None', yerr=roc.std_dev_tpr[mkrs_idx] * 100) + self.errorbar( + roc.mean_fpr[mkrs_idx] * 100, + mean_tpr[mkrs_idx] * 100, + ecolor=styles[n_lines % len(styles)][0] if style is None else style[0], + fmt="None", + yerr=roc.std_dev_tpr[mkrs_idx] * 100, + ) if label is not None: # Legend on the lower right - self.legend(loc=4 if invert_tpr is False else 1, - labelspacing=0.4, handletextpad=0.3) + self.legend( + loc=4 if invert_tpr is False else 1, labelspacing=0.4, handletextpad=0.3 + ) if logx is True: # xticks have been reset by semilogx, reassign them self.xticks(self._xticks) @@ -211,6 +228,7 @@ def plot_roc_reps(self, roc, label=None, invert_tpr=False, logx=True): Figure after this plot session. """ + def label_w_rep(l_str, i): """Format input label to show repetition number. @@ -229,9 +247,9 @@ def label_w_rep(l_str, i): 2) If label is not '' -> "`label` (rep `i`)" """ - i_label = 'rep {:}'.format(i) + i_label = "rep {:}".format(i) if l_str is not None: - i_label = l_str + ' (' + i_label + ')' + i_label = l_str + " (" + i_label + ")" return i_label @@ -242,7 +260,7 @@ def label_w_rep(l_str, i): self.apply_params_roc() # TODO: REMOVE AFTER COLORMAPS ARE IMPLEMENTED IN CFIGURE - styles = ['go-', 'yp--', 'rs-.', 'bD--', 'c-.', 'm-', 'y-.'] + styles = ["go-", "yp--", "rs-.", "bD--", "c-.", "m-", "y-."] # Storing number of lines already plotted to chose style accordingly n_lines = self.n_lines @@ -259,24 +277,35 @@ def label_w_rep(l_str, i): tpr = tpr if invert_tpr is False else 1 - tpr - plot_func(fpr * 100, tpr * 100, - styles[(n_lines + rep_i) % len(styles)], - label=label_w_rep(label, rep_i), - markevery=self.get_xticks_idx(fpr * 100)) + plot_func( + fpr * 100, + tpr * 100, + styles[(n_lines + rep_i) % len(styles)], + label=label_w_rep(label, rep_i), + markevery=self.get_xticks_idx(fpr * 100), + ) if label is not None: # Legend on the lower right - self.legend(loc=4 if invert_tpr is False else 1, - labelspacing=0.4, handletextpad=0.3) + self.legend( + loc=4 if invert_tpr is False else 1, labelspacing=0.4, handletextpad=0.3 + ) if logx is True: # xticks have been reset by semilogx, reassign them self.xticks(self._xticks) self.xticklabels(self._xticklabels) # FIXME: accept a CMetricConfusionMatrix object instead - def plot_confusion_matrix(self, y_true, y_pred, - normalize=False, labels=None, - title=None, cmap='Blues', colorbar=False): + def plot_confusion_matrix( + self, + y_true, + y_pred, + normalize=False, + labels=None, + title=None, + cmap="Blues", + colorbar=False, + ): """Plot a confusion matrix. y_true : CArray @@ -295,15 +324,14 @@ def plot_confusion_matrix(self, y_true, y_pred, If True, show the colorbar side of the matrix. Default False. """ - matrix = CArray(confusion_matrix( - y_true.tondarray(), y_pred.tondarray())) + matrix = CArray(confusion_matrix(y_true.tondarray(), y_pred.tondarray())) if normalize: # min-max normalization matrix_min = matrix.min() matrix_max = matrix.max() matrix = (matrix - matrix.min()) / (matrix_max - matrix_min) - ax = self.imshow(matrix, interpolation='nearest', cmap=cmap) + ax = self.imshow(matrix, interpolation="nearest", cmap=cmap) self._sp.set_xticks(CArray.arange(matrix.shape[1]).tondarray()) self._sp.set_yticks(CArray.arange(matrix.shape[0]).tondarray()) @@ -313,13 +341,16 @@ def plot_confusion_matrix(self, y_true, y_pred, # Rotate the tick labels and set their alignment. import matplotlib.pyplot as plt - plt.setp(self._sp.get_xticklabels(), rotation=45, - ha="right", rotation_mode="anchor") - fmt = '.2f' if normalize else 'd' + plt.setp( + self._sp.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor" + ) + + fmt = ".2f" if normalize else "d" if colorbar is True: from mpl_toolkits.axes_grid1 import make_axes_locatable + divider = make_axes_locatable(plt.gca()) cax = divider.append_axes("right", size="5%", pad=0.1) # TODO: set format -> cax.set_yticklabels @@ -328,9 +359,12 @@ def plot_confusion_matrix(self, y_true, y_pred, if title is True: self.title(title) - thresh = matrix.max() / 2. - for i, j in itertools.product( - range(matrix.shape[0]), range(matrix.shape[1])): - self.text(j, i, format(matrix[i, j].item(), fmt), - horizontalalignment="center", - color="white" if matrix[i, j] > thresh else "black") + thresh = matrix.max() / 2.0 + for i, j in itertools.product(range(matrix.shape[0]), range(matrix.shape[1])): + self.text( + j, + i, + format(matrix[i, j].item(), fmt), + horizontalalignment="center", + color="white" if matrix[i, j] > thresh else "black", + ) diff --git a/src/secml/figure/_plots/c_plot_sec_eval.py b/src/secml/figure/_plots/c_plot_sec_eval.py index 1714a6e1..2b16c812 100644 --- a/src/secml/figure/_plots/c_plot_sec_eval.py +++ b/src/secml/figure/_plots/c_plot_sec_eval.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.figure._plots import CPlot from secml.ml.peval.metrics import CMetric from secml.array import CArray @@ -31,7 +32,8 @@ def _cmpt_sec_eval_curve(sec_eval_data, metric, label=1): y_pred = sec_eval_data.Y_pred[k].ravel() metric_val = metric.performance_score( - y_true=sec_eval_data.Y, y_pred=y_pred, score=scores) + y_true=sec_eval_data.Y, y_pred=y_pred, score=scores + ) perf[k] = metric_val @@ -64,9 +66,19 @@ def apply_params_sec_eval(self): fig_legend.set_visible(True) self.grid(grid_on=True) - def plot_sec_eval(self, sec_eval_data, metric='accuracy', mean=False, - percentage=False, show_average=False, label=None, - linestyle='-', color=None, marker=None, metric_args=()): + def plot_sec_eval( + self, + sec_eval_data, + metric="accuracy", + mean=False, + percentage=False, + show_average=False, + label=None, + linestyle="-", + color=None, + marker=None, + metric_args=(), + ): """Plot the Security Evaluation Curve using desired metric. Parameters @@ -115,8 +127,9 @@ def plot_sec_eval(self, sec_eval_data, metric='accuracy', mean=False, perf = perf.mean(axis=0, keepdims=False) else: if len(sec_eval_data) > 1: - raise ValueError("if `mean` is False, " - "only one sec eval data should be passed") + raise ValueError( + "if `mean` is False, " "only one sec eval data should be passed" + ) perf = perf.ravel() @@ -135,9 +148,15 @@ def plot_sec_eval(self, sec_eval_data, metric='accuracy', mean=False, # This is done here to make 'markevery' work correctly self.xticks(sec_eval_data[0].param_values) - self.plot(sec_eval_data[0].param_values, perf, label=label, - linestyle=linestyle, color=color, marker=marker, - markevery=self.get_xticks_idx(sec_eval_data[0].param_values)) + self.plot( + sec_eval_data[0].param_values, + perf, + label=label, + linestyle=linestyle, + color=color, + marker=marker, + markevery=self.get_xticks_idx(sec_eval_data[0].param_values), + ) if mean is True: std_up = perf + perf_std @@ -147,17 +166,22 @@ def plot_sec_eval(self, sec_eval_data, metric='accuracy', mean=False, std_up[std_up > 100] = 100 else: std_up[std_up > 1.0] = 1.0 - self.fill_between(sec_eval_data[0].param_values, std_up, std_down, - interpolate=False, alpha=0.2, facecolor=color, - linestyle='None') + self.fill_between( + sec_eval_data[0].param_values, + std_up, + std_down, + interpolate=False, + alpha=0.2, + facecolor=color, + linestyle="None", + ) if self._xlabel is None: self.xlabel(sec_eval_data[0].param_name) if self._ylabel is None: self.ylabel(metric.class_type.capitalize()) - self.legend(loc='best', labelspacing=0.4, - handletextpad=0.3, edgecolor='k') + self.legend(loc="best", labelspacing=0.4, handletextpad=0.3, edgecolor="k") self.title("Security Evaluation Curve") self.apply_params_sec_eval() diff --git a/src/secml/figure/_plots/c_plot_stats.py b/src/secml/figure/_plots/c_plot_stats.py index d73f0912..c1b9d5b4 100644 --- a/src/secml/figure/_plots/c_plot_stats.py +++ b/src/secml/figure/_plots/c_plot_stats.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.figure._plots import CPlot from secml.ml.stats import CDensityEstimation diff --git a/src/secml/figure/_plots/plot_utils.py b/src/secml/figure/_plots/plot_utils.py index be8b1a1c..1fa96bcb 100644 --- a/src/secml/figure/_plots/plot_utils.py +++ b/src/secml/figure/_plots/plot_utils.py @@ -5,9 +5,10 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray -__all__ = ['create_points_grid'] +__all__ = ["create_points_grid"] def create_points_grid(grid_limits, n_grid_points): @@ -30,13 +31,17 @@ def create_points_grid(grid_limits, n_grid_points): padding_x, padding_y = (0.05 * (x_max - x_min), 0.05 * (y_max - y_min)) # Create the equi-spaced indices for each axis x_grid_points = CArray.linspace( - x_min - padding_x, x_max + padding_x, num=n_grid_points) + x_min - padding_x, x_max + padding_x, num=n_grid_points + ) y_grid_points = CArray.linspace( - y_min - padding_y, y_max + padding_y, num=n_grid_points) + y_min - padding_y, y_max + padding_y, num=n_grid_points + ) # Create the grid pad_xgrid, pad_ygrid = CArray.meshgrid((x_grid_points, y_grid_points)) pad_grid_point_features = CArray.concatenate( pad_xgrid.reshape((pad_xgrid.size, 1)), - pad_ygrid.reshape((pad_ygrid.size, 1)), axis=1) + pad_ygrid.reshape((pad_ygrid.size, 1)), + axis=1, + ) return pad_grid_point_features, pad_xgrid, pad_ygrid diff --git a/src/secml/figure/c_figure.py b/src/secml/figure/c_figure.py index 1f7f73f5..3ba714d3 100644 --- a/src/secml/figure/c_figure.py +++ b/src/secml/figure/c_figure.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.core import CCreator from secml.figure._plots import CPlot from secml.utils import LastInDict @@ -13,9 +14,10 @@ import os import matplotlib as mpl -if os.name == 'posix' and os.environ.get('DISPLAY', '') == '': + +if os.name == "posix" and os.environ.get("DISPLAY", "") == "": # If no display is available, use file-only backend - mpl.use('Agg') + mpl.use("Agg") import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec @@ -65,8 +67,10 @@ class CFigure(CCreator): >>> fig.show() # This will open a new window with the figure """ - def __init__(self, height=6, width=6, title="", - fontsize=12, linewidth=2, markersize=7): + + def __init__( + self, height=6, width=6, title="", fontsize=12, linewidth=2, markersize=7 + ): # Instancing figure with desired dimensions self.width = width @@ -74,9 +78,11 @@ def __init__(self, height=6, width=6, title="", self._fig = plt.figure(figsize=(self.width, self.height)) # Setting default fontsize, linewidth, markersize - self._default_params = {'font.size': fontsize, - 'lines.linewidth': linewidth, - 'lines.markersize': markersize} + self._default_params = { + "font.size": fontsize, + "lines.linewidth": linewidth, + "lines.markersize": markersize, + } # Setting figure super title self.title(title) @@ -148,24 +154,26 @@ def subplot(self, n_rows=1, n_cols=1, grid_slot=1, **kwargs): """ # Create a new grid if shape has changed or this is the first grid - if self._gs is None or self._gs.get_geometry()[0] != n_rows or \ - self._gs.get_geometry()[1] != n_cols: + if ( + self._gs is None + or self._gs.get_geometry()[0] != n_rows + or self._gs.get_geometry()[1] != n_cols + ): self._gs = gridspec.GridSpec(n_rows, n_cols) # If grid_slot is not a tuple, assume we want to use a single slot - grid_slot = grid_slot-1 if not is_tuple(grid_slot) else grid_slot + grid_slot = grid_slot - 1 if not is_tuple(grid_slot) else grid_slot # Calling matplotlib subplot switcher axes = self._fig.add_subplot(self._gs[grid_slot], **kwargs) # Set default parameters - axes.tick_params(labelsize=self._default_params['font.size']) + axes.tick_params(labelsize=self._default_params["font.size"]) sp_id = hex(id(axes)) # Index of the subplot # Create the subplot if not available or switch lastitem reference if sp_id not in self._sp_data: - self._sp_data[sp_id] = CPlot( - sp=axes, default_params=self._default_params) + self._sp_data[sp_id] = CPlot(sp=axes, default_params=self._default_params) else: self._sp_data.lastin_key = sp_id @@ -207,8 +215,9 @@ def close(self, fig=None): """ plt.close(self._fig if fig is None else fig) - def subplots_adjust(self, left=0.125, right=0.9, - bottom=0.1, top=0.9, wspace=0.2, hspace=0.2): + def subplots_adjust( + self, left=0.125, right=0.9, bottom=0.1, top=0.9, wspace=0.2, hspace=0.2 + ): """Tune the subplot layout. Parameters @@ -232,9 +241,9 @@ def subplots_adjust(self, left=0.125, right=0.9, :include-source: """ - self._fig.subplots_adjust(left=left, bottom=bottom, - right=right, top=top, - wspace=wspace, hspace=hspace) + self._fig.subplots_adjust( + left=left, bottom=bottom, right=right, top=top, wspace=wspace, hspace=hspace + ) def tight_layout(self, pad=1.08, h_pad=None, w_pad=None, rect=None): """Adjust space between plot and figure. @@ -264,18 +273,28 @@ def title(self, label, **kwargs): Same as :meth:`.text` method. """ - if 'fontsize' not in kwargs: - kwargs['fontsize'] = self.get_default_params()['font.size'] + if "fontsize" not in kwargs: + kwargs["fontsize"] = self.get_default_params()["font.size"] return self._fig.suptitle(label, **kwargs) - def savefig(self, fname, dpi=None, facecolor='w', edgecolor='w', - orientation='portrait', file_format=None, transparent=False, - bbox_inches=None, bbox_extra_artists=None, pad_inches=0.1): + def savefig( + self, + fname, + dpi=None, + facecolor="w", + edgecolor="w", + orientation="portrait", + file_format=None, + transparent=False, + bbox_inches=None, + bbox_extra_artists=None, + pad_inches=0.1, + ): """Save figure to disk. - + Parameters - ---------- - fname : string + ---------- + fname : string containing a path to a filename, or a Python file-like object. If file_format is None and fname is a string, the output file_format is deduced from the extension of the filename. @@ -311,8 +330,15 @@ def savefig(self, fname, dpi=None, facecolor='w', edgecolor='w', Amount of padding around the figure when bbox_inches is 'tight'. """ - self._fig.savefig(fname, dpi=dpi, facecolor=facecolor, - edgecolor=edgecolor, orientation=orientation, - format=file_format, transparent=transparent, - bbox_inches=bbox_inches, pad_inches=pad_inches, - bbox_extra_artists=bbox_extra_artists) + self._fig.savefig( + fname, + dpi=dpi, + facecolor=facecolor, + edgecolor=edgecolor, + orientation=orientation, + format=file_format, + transparent=transparent, + bbox_inches=bbox_inches, + pad_inches=pad_inches, + bbox_extra_artists=bbox_extra_artists, + ) diff --git a/src/secml/figure/tests/test_cfigure.py b/src/secml/figure/tests/test_cfigure.py index a44c0c8f..f91845c6 100644 --- a/src/secml/figure/tests/test_cfigure.py +++ b/src/secml/figure/tests/test_cfigure.py @@ -9,7 +9,7 @@ class TestCFigure(CUnitTest): """Unittest for CFigure.""" - + def test_svm(self): self.X = CArray([[1, 2], [3, 4], [5, 6], [7, 8]]) @@ -19,13 +19,10 @@ def test_svm(self): self.classifier = CClassifierSVM(kernel=CKernelRBF()) self.classifier.fit(self.dataset.X, self.dataset.Y) - self.x_min, self.x_max = (self.X[:, [0]].min() - 1, - self.X[:, [0]].max() + 1) - self.y_min, self.y_max = (self.X[:, [1]].min() - 1, - self.X[:, [1]].max() + 1) + self.x_min, self.x_max = (self.X[:, [0]].min() - 1, self.X[:, [0]].max() + 1) + self.y_min, self.y_max = (self.X[:, [1]].min() - 1, self.X[:, [1]].max() + 1) - self.fig = CFigure(height=7, width=10, - linewidth=5, fontsize=24, markersize=20) + self.fig = CFigure(height=7, width=10, linewidth=5, fontsize=24, markersize=20) self.fig.sp.title("Svm Test") self.logger.info("Test plot dataset method...") @@ -38,8 +35,12 @@ def test_svm(self): self.logger.info("Test plot function method...") bounds = [(self.x_min, self.x_max), (self.y_min, self.y_max)] - self.fig.sp.plot_fun(self.classifier.decision_function, - plot_levels=False, grid_limits=bounds, y=1) + self.fig.sp.plot_fun( + self.classifier.decision_function, + plot_levels=False, + grid_limits=bounds, + y=1, + ) self.fig.sp.xlim(self.x_min, self.x_max) self.fig.sp.ylim(self.y_min, self.y_max) @@ -47,5 +48,5 @@ def test_svm(self): self.fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/figure/tests/test_plot.py b/src/secml/figure/tests/test_plot.py index 892f4103..c9679909 100644 --- a/src/secml/figure/tests/test_plot.py +++ b/src/secml/figure/tests/test_plot.py @@ -12,20 +12,20 @@ def test_quiver(self): """Test for `CPlot.quiver()` method.""" # gradient values creation - xv = CArray.arange(0, 2 * constants.pi, .2) - yv = CArray.arange(0, 2 * constants.pi, .2) + xv = CArray.arange(0, 2 * constants.pi, 0.2) + yv = CArray.arange(0, 2 * constants.pi, 0.2) X, Y = CArray.meshgrid((xv, yv)) U = CArray.cos(X) V = CArray.sin(Y) plot = CFigure() - plot.sp.title('Gradient arrow') + plot.sp.title("Gradient arrow") plot.sp.quiver(U, V) plot.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/figure/tests/test_plot_classifier.py b/src/secml/figure/tests/test_plot_classifier.py index 988310c9..276e8b69 100644 --- a/src/secml/figure/tests/test_plot_classifier.py +++ b/src/secml/figure/tests/test_plot_classifier.py @@ -11,10 +11,8 @@ class TestCPlotClassifier(CUnitTest): """Unit test for CPlotClassifier.""" def setUp(self): - self.clf = CClassifierMulticlassOVA( - classifier=CClassifierSVM, kernel='rbf') - self.dataset = CDLRandomBlobs( - random_state=3, n_features=2, centers=4).load() + self.clf = CClassifierMulticlassOVA(classifier=CClassifierSVM, kernel="rbf") + self.dataset = CDLRandomBlobs(random_state=3, n_features=2, centers=4).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.clf.fit(self.dataset.X, self.dataset.Y) @@ -24,16 +22,14 @@ def test_plot_decision_regions(self): fig.subplot(1, 2, 1) fig.sp.plot_ds(self.dataset) - fig.sp.plot_decision_regions( - self.clf, n_grid_points=200, plot_background=False) + fig.sp.plot_decision_regions(self.clf, n_grid_points=200, plot_background=False) fig.subplot(1, 2, 2) fig.sp.plot_ds(self.dataset) - fig.sp.plot_decision_regions( - self.clf, n_grid_points=200) + fig.sp.plot_decision_regions(self.clf, n_grid_points=200) fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/figure/tests/test_plot_constraint.py b/src/secml/figure/tests/test_plot_constraint.py index 3b8be2b3..21bc53ce 100644 --- a/src/secml/figure/tests/test_plot_constraint.py +++ b/src/secml/figure/tests/test_plot_constraint.py @@ -11,7 +11,7 @@ def setUp(self): self.constraints = [ CConstraint.create("box", lb=0, ub=1), CConstraint.create("l1", center=0.5, radius=0.5), - CConstraint.create("l2", center=0.5, radius=0.5) + CConstraint.create("l2", center=0.5, radius=0.5), ] def test_constraint(self): @@ -22,5 +22,5 @@ def test_constraint(self): fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/figure/tests/test_plot_function.py b/src/secml/figure/tests/test_plot_function.py index 1093ff52..0fff0491 100644 --- a/src/secml/figure/tests/test_plot_function.py +++ b/src/secml/figure/tests/test_plot_function.py @@ -11,8 +11,9 @@ class TestCPlot(CUnitTest): def setUp(self): self.clf = CClassifierSVM() - self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1, - n_clusters_per_class=1).load() + self.dataset = CDLRandom( + n_features=2, n_redundant=0, n_informative=1, n_clusters_per_class=1 + ).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.clf.fit(self.dataset.X, self.dataset.Y) @@ -34,5 +35,5 @@ def test_fgrads(self): fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/figure/tests/test_plot_metric.py b/src/secml/figure/tests/test_plot_metric.py index 28598010..37a09dce 100644 --- a/src/secml/figure/tests/test_plot_metric.py +++ b/src/secml/figure/tests/test_plot_metric.py @@ -13,8 +13,12 @@ def test_confusion_matrix(self): y_pred = CArray([0, 0, 2, 2, 0, 2]) fig = CFigure() fig.sp.plot_confusion_matrix( - y_true, y_pred, labels=['one', 'two', 'three'], - colorbar=True, normalize=False) + y_true, + y_pred, + labels=["one", "two", "three"], + colorbar=True, + normalize=False, + ) fig.show() # Test for normalize=True @@ -22,10 +26,14 @@ def test_confusion_matrix(self): y_pred = CArray([0, 0, 2, 2, 0, 2]).astype(float) fig = CFigure() fig.sp.plot_confusion_matrix( - y_true, y_pred, labels=['one', 'two', 'three'], - colorbar=True, normalize=True) + y_true, + y_pred, + labels=["one", "two", "three"], + colorbar=True, + normalize=True, + ) fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/figure/tests/test_plot_roc.py b/src/secml/figure/tests/test_plot_roc.py index 4b22739d..9c042634 100644 --- a/src/secml/figure/tests/test_plot_roc.py +++ b/src/secml/figure/tests/test_plot_roc.py @@ -12,8 +12,9 @@ class TestCRoc(CUnitTest): def setUp(self): - self.ds_loader = CDLRandom(n_features=1000, n_redundant=200, - n_informative=250, n_clusters_per_class=2) + self.ds_loader = CDLRandom( + n_features=1000, n_redundant=200, n_informative=250, n_clusters_per_class=2 + ) self.ds1 = self.ds_loader.load() self.ds2 = self.ds_loader.load() @@ -22,10 +23,8 @@ def setUp(self): self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y) - _, self.s1 = self.svm.predict( - self.ds1.X, return_decision_function=True) - _, self.s2 = self.svm.predict( - self.ds2.X, return_decision_function=True) + _, self.s1 = self.svm.predict(self.ds1.X, return_decision_function=True) + _, self.s2 = self.svm.predict(self.ds2.X, return_decision_function=True) self.s1 = self.s1[:, 1].ravel() self.s2 = self.s2[:, 1].ravel() @@ -44,7 +43,7 @@ def test_standard(self): # Testing without input CFigure roc_plot = CFigure() - roc_plot.sp.title('ROC Curve Standard') + roc_plot.sp.title("ROC Curve Standard") # Plotting 2 times (to show multiple curves) # add one curve for repetition and call it rep 0 and rep 1 of roc 1 roc_plot.sp.plot_roc(self.roc_wmean.mean_fpr, self.roc_wmean.mean_tpr) @@ -56,10 +55,10 @@ def test_mean(self): # Testing without input CFigure roc_plot = CFigure() - roc_plot.sp.title('ROC Curve') + roc_plot.sp.title("ROC Curve") # Plotting 2 times (to show 2 curves) - roc_plot.sp.plot_roc_mean(self.roc_wmean, label='roc1 mean', plot_std=True) - roc_plot.sp.plot_roc_reps(self.roc_wmean, label='roc1') + roc_plot.sp.plot_roc_mean(self.roc_wmean, label="roc1 mean", plot_std=True) + roc_plot.sp.plot_roc_reps(self.roc_wmean, label="roc1") roc_plot.show() @@ -72,14 +71,14 @@ def test_custom_params(self): # Testing without input CFigure roc_plot = CFigure() - roc_plot.sp.title('ROC Curve - Custom') + roc_plot.sp.title("ROC Curve - Custom") roc_plot.sp.xlim(0.1, 100) roc_plot.sp.ylim(30, 100) roc_plot.sp.yticks([70, 80, 90, 100]) - roc_plot.sp.yticklabels(['70', '80', '90', '100']) + roc_plot.sp.yticklabels(["70", "80", "90", "100"]) # Plotting 2 times (to show 2 curves) - roc_plot.sp.plot_roc_mean(self.roc_wmean, label='roc1') - roc_plot.sp.plot_roc_mean(self.roc_wmean, label='roc2') + roc_plot.sp.plot_roc_mean(self.roc_wmean, label="roc1") + roc_plot.sp.plot_roc_mean(self.roc_wmean, label="roc2") roc_plot.show() @@ -88,12 +87,12 @@ def test_single(self): # Testing without input CFigure roc_plot = CFigure() - roc_plot.sp.title('ROC Curve Repetitions') + roc_plot.sp.title("ROC Curve Repetitions") # Plotting 2 times (to show multiple curves) # add one curve for repetition and call it rep 0 and rep 1 of roc 1 - roc_plot.sp.plot_roc_reps(self.roc_nomean, label='roc1') + roc_plot.sp.plot_roc_reps(self.roc_nomean, label="roc1") # add one curve for repetition and call it rep 0 and rep 1 of roc 2 - roc_plot.sp.plot_roc_reps(self.roc_nomean, label='roc2') + roc_plot.sp.plot_roc_reps(self.roc_nomean, label="roc2") roc_plot.show() @@ -106,6 +105,7 @@ def test_compare_sklearn(self): from sklearn.model_selection import StratifiedKFold from secml.figure import CFigure + roc_fig = CFigure(width=12) # import some data to play with @@ -122,8 +122,9 @@ def test_compare_sklearn(self): # Classification and ROC analysis # Run classifier with cross-validation and plot ROC curves - classifier = svm.SVC(kernel='linear', probability=True, - random_state=random_state) + classifier = svm.SVC( + kernel="linear", probability=True, random_state=random_state + ) roc_fig.subplot(1, 2, 1) @@ -138,25 +139,29 @@ def test_compare_sklearn(self): mean_tpr += np.interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) - roc_fig.sp.plot(fpr, tpr, linewidth=1, - label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) + roc_fig.sp.plot( + fpr, tpr, linewidth=1, label="ROC fold %d (area = %0.2f)" % (i, roc_auc) + ) - roc_fig.sp.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), - label='Luck') + roc_fig.sp.plot([0, 1], [0, 1], "--", color=(0.6, 0.6, 0.6), label="Luck") mean_tpr /= cv.get_n_splits() mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) - roc_fig.sp.plot(mean_fpr, mean_tpr, 'k--', - label='Mean ROC (area = %0.2f)' % mean_auc, - linewidth=2) + roc_fig.sp.plot( + mean_fpr, + mean_tpr, + "k--", + label="Mean ROC (area = %0.2f)" % mean_auc, + linewidth=2, + ) roc_fig.sp.xlim([-0.05, 1.05]) roc_fig.sp.ylim([-0.05, 1.05]) - roc_fig.sp.xlabel('False Positive Rate') - roc_fig.sp.ylabel('True Positive Rate') - roc_fig.sp.title('Sklearn Receiver operating characteristic example') + roc_fig.sp.xlabel("False Positive Rate") + roc_fig.sp.ylabel("True Positive Rate") + roc_fig.sp.title("Sklearn Receiver operating characteristic example") roc_fig.sp.legend(loc="lower right") roc_fig.sp.grid() @@ -175,23 +180,25 @@ def test_compare_sklearn(self): self.roc_wmean.compute(true_y, score) fp, tp = self.roc_wmean.average() - roc_fig.sp.plot([0, 100], [0, 100], '--', color=(0.6, 0.6, 0.6), - label='Luck') + roc_fig.sp.plot([0, 100], [0, 100], "--", color=(0.6, 0.6, 0.6), label="Luck") roc_fig.sp.xticks([0, 20, 40, 60, 80, 100]) - roc_fig.sp.xticklabels(['0', '20', '40', '60', '80', '100']) + roc_fig.sp.xticklabels(["0", "20", "40", "60", "80", "100"]) roc_fig.sp.plot_roc_mean( - self.roc_wmean, plot_std=True, logx=False, style='go-', - label='Mean ROC (area = %0.2f)' % (auc(fp.tondarray(), - tp.tondarray()))) + self.roc_wmean, + plot_std=True, + logx=False, + style="go-", + label="Mean ROC (area = %0.2f)" % (auc(fp.tondarray(), tp.tondarray())), + ) roc_fig.sp.xlim([-0.05 * 100, 1.05 * 100]) roc_fig.sp.ylim([-0.05 * 100, 1.05 * 100]) - roc_fig.sp.title('SecML Receiver operating characteristic example') + roc_fig.sp.title("SecML Receiver operating characteristic example") roc_fig.sp.legend(loc="lower right") roc_fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/__init__.py b/src/secml/ml/__init__.py index 061d52d7..7f6c979c 100644 --- a/src/secml/ml/__init__.py +++ b/src/secml/ml/__init__.py @@ -1,4 +1,5 @@ """Machine Learning""" + from .c_module import CModule from .classifiers import * from .features import * diff --git a/src/secml/ml/c_module.py b/src/secml/ml/c_module.py index 3de67ea2..1c5ae139 100644 --- a/src/secml/ml/c_module.py +++ b/src/secml/ml/c_module.py @@ -7,6 +7,7 @@ .. moduleauthor:: Angelo Sotgiu """ + from abc import ABCMeta, abstractmethod from secml.core import CCreator from secml.array import CArray @@ -27,7 +28,8 @@ class CModule(CCreator, metaclass=ABCMeta): Cannot be higher than processor's number of cores. Default is 1. """ - __super__ = 'CModule' + + __super__ = "CModule" def __init__(self, preprocess=None, n_jobs=1): self._cached_x = None # cached internal x repr. for backward pass @@ -57,8 +59,7 @@ def create_chain(class_items, kwargs_list): """ chain = None for i, pre_id in enumerate(class_items): - chain = CModule.create( - pre_id, preprocess=chain, **kwargs_list[i]) + chain = CModule.create(pre_id, preprocess=chain, **kwargs_list[i]) return chain @@ -135,8 +136,7 @@ def preprocess(self): @preprocess.setter def preprocess(self, preprocess): - self._preprocess = None if preprocess is None \ - else CModule.create(preprocess) + self._preprocess = None if preprocess is None else CModule.create(preprocess) def _forward_preprocess(self, x, caching=True): """Runs forward through the pre-processing chain, @@ -247,8 +247,9 @@ def backward(self, w=None): return grad.ravel() if grad.is_vector_like else grad def _backward(self, w): - raise NotImplementedError("`_backward` is not implemented for {:}" - "".format(self.__class__.__name__)) + raise NotImplementedError( + "`_backward` is not implemented for {:}" "".format(self.__class__.__name__) + ) _backward.__doc__ = backward.__doc__ # Same doc for the protected method diff --git a/src/secml/ml/classifiers/c_classifier.py b/src/secml/ml/classifiers/c_classifier.py index 8d863a45..53af066f 100755 --- a/src/secml/ml/classifiers/c_classifier.py +++ b/src/secml/ml/classifiers/c_classifier.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from abc import ABCMeta, abstractmethod from secml.ml import CModule @@ -38,7 +39,8 @@ class CClassifier(CModule, metaclass=ABCMeta): Cannot be higher than processor's number of cores. Default is 1. """ - __super__ = 'CClassifier' + + __super__ = "CClassifier" def __init__(self, preprocess=None, n_jobs=1): # List of classes on which training has been performed @@ -88,7 +90,7 @@ def _check_is_fitted(self): If the classifier is not fitted. """ - check_is_fitted(self, ['classes', 'n_features']) + check_is_fitted(self, ["classes", "n_features"]) @abstractmethod def _fit(self, x, y): @@ -169,8 +171,9 @@ def fit_forward(self, x, y=None, caching=False): forward : run forward function on input data. """ - kfold = CDataSplitterKFold( - num_folds=5, random_state=0).compute_indices(CDataset(x, y)) + kfold = CDataSplitterKFold(num_folds=5, random_state=0).compute_indices( + CDataset(x, y) + ) scores = CArray.zeros(shape=(x.shape[0], y.unique().size)) @@ -232,8 +235,7 @@ class label index. """ if y < 0 or y >= self.n_classes: - raise ValueError( - "class label {:} is out of range".format(y)) + raise ValueError("class label {:} is out of range".format(y)) def grad_f_x(self, x, y): """Computes the gradient of the classifier's decision function wrt x. @@ -256,8 +258,9 @@ def grad_f_x(self, x, y): # check that x is a single point if CArray(x).is_vector_like is False: - raise ValueError("Classifier gradient can be computed only on" - " a single input sample.") + raise ValueError( + "Classifier gradient can be computed only on" " a single input sample." + ) w = CArray.zeros(self.n_classes) w[y] = 1 # one-hot encoding of y @@ -298,8 +301,9 @@ def predict(self, x, return_decision_function=False): return (labels, scores) if return_decision_function is True else labels - def estimate_parameters(self, dataset, parameters, splitter, metric, - pick='first', perf_evaluator='xval'): + def estimate_parameters( + self, dataset, parameters, splitter, metric, pick="first", perf_evaluator="xval" + ): """Estimate parameter that give better result respect a chose metric. Parameters @@ -346,7 +350,8 @@ def estimate_parameters(self, dataset, parameters, splitter, metric, # Evaluate the best parameters for the classifier (self) best_params = perf_eval.evaluate_params( - self, dataset, parameters, pick=pick, n_jobs=self.n_jobs)[0] + self, dataset, parameters, pick=pick, n_jobs=self.n_jobs + )[0] # Set the best parameters in classifier self.set_params(best_params) diff --git a/src/secml/ml/classifiers/c_classifier_dnn.py b/src/secml/ml/classifiers/c_classifier_dnn.py index db24477d..5d860a5a 100644 --- a/src/secml/ml/classifiers/c_classifier_dnn.py +++ b/src/secml/ml/classifiers/c_classifier_dnn.py @@ -5,6 +5,7 @@ .. moduleauthor:: Maura Pintor """ + from abc import ABCMeta, abstractmethod from secml.array import CArray @@ -45,13 +46,20 @@ class CClassifierDNN(CClassifier, metaclass=ABCMeta): class_type : 'dnn-clf' """ - __class_type = ' dnn-clf' - def __init__(self, model, input_shape=None, preprocess=None, - pretrained=False, pretrained_classes=None, - softmax_outputs=False, n_jobs=1): - super(CClassifierDNN, self).__init__( - preprocess=preprocess, n_jobs=n_jobs) + __class_type = " dnn-clf" + + def __init__( + self, + model, + input_shape=None, + preprocess=None, + pretrained=False, + pretrained_classes=None, + softmax_outputs=False, + n_jobs=1, + ): + super(CClassifierDNN, self).__init__(preprocess=preprocess, n_jobs=n_jobs) self._model = model self._out_layer = None diff --git a/src/secml/ml/classifiers/c_classifier_linear.py b/src/secml/ml/classifiers/c_classifier_linear.py index 89fe4dc1..e70c6f74 100755 --- a/src/secml/ml/classifiers/c_classifier_linear.py +++ b/src/secml/ml/classifiers/c_classifier_linear.py @@ -7,6 +7,7 @@ .. moduleauthor:: Battista Biggio """ + from abc import abstractmethod from secml.array import CArray diff --git a/src/secml/ml/classifiers/clf_utils.py b/src/secml/ml/classifiers/clf_utils.py index 597f4071..b78fd0d8 100644 --- a/src/secml/ml/classifiers/clf_utils.py +++ b/src/secml/ml/classifiers/clf_utils.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.core.type_utils import is_int from secml.array import CArray @@ -25,9 +26,9 @@ def check_binary_labels(labels): """ - if (is_int(labels) and not (labels == 0 or labels == 1)) or \ - (isinstance(labels, CArray) and - (labels != 0).logical_and(labels != 1).any()): + if (is_int(labels) and not (labels == 0 or labels == 1)) or ( + isinstance(labels, CArray) and (labels != 0).logical_and(labels != 1).any() + ): raise ValueError("input labels should be binary in {0, +1} interval.") diff --git a/src/secml/ml/classifiers/gradients/__init__.py b/src/secml/ml/classifiers/gradients/__init__.py index 9e772109..225b351c 100644 --- a/src/secml/ml/classifiers/gradients/__init__.py +++ b/src/secml/ml/classifiers/gradients/__init__.py @@ -1,6 +1,5 @@ from .mixin_classifier_gradient import CClassifierGradientMixin from .mixin_classifier_gradient_linear import CClassifierGradientLinearMixin -from .mixin_classifier_gradient_logistic import \ - CClassifierGradientLogisticMixin +from .mixin_classifier_gradient_logistic import CClassifierGradientLogisticMixin from .mixin_classifier_gradient_ridge import CClassifierGradientRidgeMixin from .mixin_classifier_gradient_sgd import CClassifierGradientSGDMixin diff --git a/src/secml/ml/classifiers/gradients/mixin_classifier_gradient.py b/src/secml/ml/classifiers/gradients/mixin_classifier_gradient.py index 71483ab7..23394ddd 100644 --- a/src/secml/ml/classifiers/gradients/mixin_classifier_gradient.py +++ b/src/secml/ml/classifiers/gradients/mixin_classifier_gradient.py @@ -7,12 +7,13 @@ .. moduleauthor:: Marco Melis """ + from abc import ABCMeta class CClassifierGradientMixin(metaclass=ABCMeta): """Abstract Mixin class that defines basic methods - for classifier gradients.""" + for classifier gradients.""" # train derivatives: @@ -109,8 +110,9 @@ def grad_f_x(self, x, y, **kwargs): try: # Get the derivative of decision_function grad_f = self._grad_f_x(x, y, **kwargs) except NotImplementedError: - raise NotImplementedError("{:} does not implement `grad_f_x`" - "".format(self.__class__.__name__)) + raise NotImplementedError( + "{:} does not implement `grad_f_x`" "".format(self.__class__.__name__) + ) # The derivative of decision_function should be a vector # as we are computing the gradient wrt a class `y` @@ -126,7 +128,8 @@ def grad_f_x(self, x, y, **kwargs): grad_p = self.preprocess.gradient(x_in, w=grad_f) if not grad_p.is_vector_like: raise ValueError( - "`preprocess.gradient` must return a vector like array") + "`preprocess.gradient` must return a vector like array" + ) return grad_p.ravel() return grad_f # No preprocess defined... return the clf grad diff --git a/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_linear.py b/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_linear.py index 5998c14a..7b658b50 100644 --- a/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_linear.py +++ b/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_linear.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.array import CArray from secml.ml.classifiers.gradients import CClassifierGradientMixin from secml.ml.classifiers.clf_utils import convert_binary_labels diff --git a/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_logistic.py b/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_logistic.py index c612abf8..cb7e871d 100644 --- a/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_logistic.py +++ b/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_logistic.py @@ -5,6 +5,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.array import CArray from secml.ml.classifiers.gradients import CClassifierGradientLinearMixin from secml.ml.classifiers.clf_utils import convert_binary_labels diff --git a/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_ridge.py b/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_ridge.py index 43730978..b939db40 100644 --- a/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_ridge.py +++ b/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_ridge.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.array import CArray from secml.ml.classifiers.gradients import CClassifierGradientLinearMixin diff --git a/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_sgd.py b/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_sgd.py index b1e369fb..5af9866a 100644 --- a/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_sgd.py +++ b/src/secml/ml/classifiers/gradients/mixin_classifier_gradient_sgd.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml.ml.classifiers.gradients import CClassifierGradientLinearMixin diff --git a/src/secml/ml/classifiers/gradients/tests/__init__.py b/src/secml/ml/classifiers/gradients/tests/__init__.py index 963286fd..52057ec9 100644 --- a/src/secml/ml/classifiers/gradients/tests/__init__.py +++ b/src/secml/ml/classifiers/gradients/tests/__init__.py @@ -1,2 +1,3 @@ -from secml.ml.classifiers.gradients.tests.mixin_classifier_gradient_testcases import \ - CClassifierGradientMixinTestCases +from secml.ml.classifiers.gradients.tests.mixin_classifier_gradient_testcases import ( + CClassifierGradientMixinTestCases, +) diff --git a/src/secml/ml/classifiers/gradients/tests/mixin_classifier_gradient_testcases.py b/src/secml/ml/classifiers/gradients/tests/mixin_classifier_gradient_testcases.py index adf06b6d..a52147c0 100644 --- a/src/secml/ml/classifiers/gradients/tests/mixin_classifier_gradient_testcases.py +++ b/src/secml/ml/classifiers/gradients/tests/mixin_classifier_gradient_testcases.py @@ -14,6 +14,7 @@ class CClassifierGradientMixinTestCases(CUnitTest): Test class implementing gradient test methods for specific clf. """ + clf_grads_class = None @classmethod @@ -23,9 +24,13 @@ def setUpClass(cls): cls.seed = 2 - cls.ds = CDLRandom(n_features=2, n_redundant=0, - n_informative=2, n_clusters_per_class=1, - random_state=cls.seed).load() + cls.ds = CDLRandom( + n_features=2, + n_redundant=0, + n_informative=2, + n_clusters_per_class=1, + random_state=cls.seed, + ).load() cls.ds_sparse = cls.ds.tosparse() @staticmethod @@ -58,7 +63,8 @@ def _test_grad_tr_params(self, clf): """ i = self.ds.X.randsample( - CArray.arange(self.ds.num_samples), 1, random_state=self.seed) + CArray.arange(self.ds.num_samples), 1, random_state=self.seed + ) x, y = self.ds.X[i, :], self.ds.Y[i] self.logger.info("idx {:}: x {:}, y {:}".format(i.item(), x, y)) @@ -67,8 +73,8 @@ def _test_grad_tr_params(self, clf): # Compare the analytical grad with the numerical grad gradient = clf.grad_tr_params(x, y).ravel() num_gradient = CFunction(self._grad_tr_fun).approx_fprime( - params, epsilon=1e-6, - x0=x, y0=y, clf_grads=self.clf_grads_class, clf=clf) + params, epsilon=1e-6, x0=x, y0=y, clf_grads=self.clf_grads_class, clf=clf + ) error = (gradient - num_gradient).norm() self.logger.info("Analytical gradient:\n{:}".format(gradient)) diff --git a/src/secml/ml/classifiers/gradients/tests/test_classes/__init__.py b/src/secml/ml/classifiers/gradients/tests/test_classes/__init__.py index 7b2d16a0..99ff5a60 100644 --- a/src/secml/ml/classifiers/gradients/tests/test_classes/__init__.py +++ b/src/secml/ml/classifiers/gradients/tests/test_classes/__init__.py @@ -1,6 +1,7 @@ from .c_classifier_gradient_test import CClassifierGradientTest from .c_classifier_gradient_test_linear import CClassifierGradientTestLinear from .c_classifier_gradient_test_ridge import CClassifierGradientTestRidge -from .c_classifier_gradient_test_logistic import \ - CClassifierGradientTestLogisticRegression +from .c_classifier_gradient_test_logistic import ( + CClassifierGradientTestLogisticRegression, +) from .c_classifier_gradient_test_svm import CClassifierGradientTestSVM diff --git a/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test.py b/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test.py index a7990955..74d4292a 100644 --- a/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test.py +++ b/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test.py @@ -5,13 +5,14 @@ .. moduleauthor:: Ambra Demontis """ + from abc import ABCMeta, abstractmethod from secml.core import CCreator class CClassifierGradientTest(CCreator, metaclass=ABCMeta): - __super__ = 'CClassifierGradientTest' + __super__ = "CClassifierGradientTest" @abstractmethod def params(self, clf): @@ -33,4 +34,3 @@ def change_params(self, params, clf): """Return a deepcopy of the given classifier with the value of the parameters changed.""" raise NotImplementedError - diff --git a/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_linear.py b/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_linear.py index 2756a45b..51eeba95 100644 --- a/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_linear.py +++ b/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_linear.py @@ -5,14 +5,14 @@ .. moduleauthor:: Ambra Demontis """ -from secml.ml.classifiers.gradients.tests.test_classes import \ - CClassifierGradientTest + +from secml.ml.classifiers.gradients.tests.test_classes import CClassifierGradientTest from secml.array import CArray class CClassifierGradientTestLinear(CClassifierGradientTest): - __class_type = 'linear' + __class_type = "linear" def params(self, clf): """Classifier parameters.""" diff --git a/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_logistic.py b/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_logistic.py index d413f65f..a57422d3 100644 --- a/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_logistic.py +++ b/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_logistic.py @@ -5,9 +5,11 @@ .. moduleauthor:: Ambra Demontis """ -from secml.ml.classifiers.gradients.tests.test_classes import \ - CClassifierGradientTestLinear + +from secml.ml.classifiers.gradients.tests.test_classes import ( + CClassifierGradientTestLinear, +) class CClassifierGradientTestLogisticRegression(CClassifierGradientTestLinear): - __class_type = 'logistic' + __class_type = "logistic" diff --git a/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_ridge.py b/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_ridge.py index c99dcdd4..00fe159a 100644 --- a/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_ridge.py +++ b/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_ridge.py @@ -5,9 +5,11 @@ .. moduleauthor:: Ambra Demontis """ -from secml.ml.classifiers.gradients.tests.test_classes import \ - CClassifierGradientTestLinear + +from secml.ml.classifiers.gradients.tests.test_classes import ( + CClassifierGradientTestLinear, +) class CClassifierGradientTestRidge(CClassifierGradientTestLinear): - __class_type = 'ridge' + __class_type = "ridge" diff --git a/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_svm.py b/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_svm.py index 3ed82ccc..17bda369 100644 --- a/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_svm.py +++ b/src/secml/ml/classifiers/gradients/tests/test_classes/c_classifier_gradient_test_svm.py @@ -5,14 +5,14 @@ .. moduleauthor:: Ambra Demontis """ -from secml.ml.classifiers.gradients.tests.test_classes import \ - CClassifierGradientTest + +from secml.ml.classifiers.gradients.tests.test_classes import CClassifierGradientTest from secml.array import CArray class CClassifierGradientTestSVM(CClassifierGradientTest): - __class_type = 'svm' + __class_type = "svm" def params(self, clf): """Classifier parameters.""" diff --git a/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_logistic.py b/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_logistic.py index cead6a45..3eec39e6 100644 --- a/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_logistic.py +++ b/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_logistic.py @@ -1,7 +1,7 @@ -from secml.ml.classifiers.gradients.tests import \ - CClassifierGradientMixinTestCases -from secml.ml.classifiers.gradients.tests.test_classes import \ - CClassifierGradientTestLogisticRegression +from secml.ml.classifiers.gradients.tests import CClassifierGradientMixinTestCases +from secml.ml.classifiers.gradients.tests.test_classes import ( + CClassifierGradientTestLogisticRegression, +) from secml.ml.classifiers import CClassifierLogistic from secml.ml.features.normalization import CNormalizerMinMax @@ -9,6 +9,7 @@ class TestCClassifierGradientLogisticMixin(CClassifierGradientMixinTestCases): """Unittests for CClassifierGradientLogisticMixin.""" + clf_grads_class = CClassifierGradientTestLogisticRegression() def test_grad_tr_params_linear(self): @@ -17,9 +18,9 @@ def test_grad_tr_params_linear(self): for n in (None, CNormalizerMinMax((-10, 10))): clf = CClassifierLogistic(preprocess=n) clf.fit(self.ds.X, self.ds.Y) - self.logger.info('w: ' + str(clf.w) + ', b: ' + str(clf.b)) + self.logger.info("w: " + str(clf.w) + ", b: " + str(clf.b)) self._test_grad_tr_params(clf) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierGradientMixinTestCases.main() diff --git a/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_ridge.py b/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_ridge.py index 524dbedd..418cb99a 100644 --- a/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_ridge.py +++ b/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_ridge.py @@ -1,7 +1,7 @@ -from secml.ml.classifiers.gradients.tests import \ - CClassifierGradientMixinTestCases -from secml.ml.classifiers.gradients.tests.test_classes import \ - CClassifierGradientTestRidge +from secml.ml.classifiers.gradients.tests import CClassifierGradientMixinTestCases +from secml.ml.classifiers.gradients.tests.test_classes import ( + CClassifierGradientTestRidge, +) from secml.ml.classifiers import CClassifierRidge from secml.ml.features.normalization import CNormalizerMinMax @@ -9,6 +9,7 @@ class TestCClassifierGradientRidgeMixin(CClassifierGradientMixinTestCases): """Unittests for CClassifierGradientRidgeMixin.""" + clf_grads_class = CClassifierGradientTestRidge() def test_grad_tr_params_linear(self): @@ -20,5 +21,5 @@ def test_grad_tr_params_linear(self): self._test_grad_tr_params(clf) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierGradientMixinTestCases.main() diff --git a/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_sgd.py b/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_sgd.py index 831dbe3d..310a7145 100644 --- a/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_sgd.py +++ b/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_sgd.py @@ -1,5 +1,4 @@ -from secml.ml.classifiers.gradients.tests import \ - CClassifierGradientMixinTestCases +from secml.ml.classifiers.gradients.tests import CClassifierGradientMixinTestCases from secml.ml.classifiers import CClassifierSGD from secml.array import CArray @@ -11,9 +10,8 @@ class TestCClassifierGradientSGDMixin(CClassifierGradientMixinTestCases): def test_not_implemented(self): """Test `grad_tr_params`.""" with self.assertRaises(NotImplementedError): - CClassifierSGD('hinge', 'l2').grad_tr_params( - CArray([]), CArray([])) + CClassifierSGD("hinge", "l2").grad_tr_params(CArray([]), CArray([])) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierGradientMixinTestCases.main() diff --git a/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_svm.py b/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_svm.py index f9e0b4b0..f5d7e2ec 100644 --- a/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_svm.py +++ b/src/secml/ml/classifiers/gradients/tests/test_mixin_classifier_gradient_svm.py @@ -1,7 +1,5 @@ -from secml.ml.classifiers.gradients.tests import \ - CClassifierGradientMixinTestCases -from secml.ml.classifiers.gradients.tests.test_classes import \ - CClassifierGradientTestSVM +from secml.ml.classifiers.gradients.tests import CClassifierGradientMixinTestCases +from secml.ml.classifiers.gradients.tests.test_classes import CClassifierGradientTestSVM from secml.ml.classifiers import CClassifierSVM from secml.ml.features.normalization import CNormalizerMinMax @@ -9,13 +7,14 @@ class TestCClassifierGradientSVMMixin(CClassifierGradientMixinTestCases): """Unittests for CClassifierGradientSVMMixin.""" + clf_grads_class = CClassifierGradientTestSVM() def test_grad_tr_params_linear(self): """Test `grad_tr_params` on a linear classifier.""" for n in (None, CNormalizerMinMax((-10, 10))): - clf = CClassifierSVM(kernel='linear', preprocess=n) + clf = CClassifierSVM(kernel="linear", preprocess=n) clf.fit(self.ds.X, self.ds.Y) self._test_grad_tr_params(clf) @@ -23,10 +22,10 @@ def test_grad_tr_params_nonlinear(self): """Test `grad_tr_params` on a nonlinear classifier.""" for n in (None, CNormalizerMinMax((-10, 10))): - clf = CClassifierSVM(kernel='rbf', preprocess=n) + clf = CClassifierSVM(kernel="rbf", preprocess=n) clf.fit(self.ds.X, self.ds.Y) self._test_grad_tr_params(clf) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierGradientMixinTestCases.main() diff --git a/src/secml/ml/classifiers/loss/__init__.py b/src/secml/ml/classifiers/loss/__init__.py index 5cdae9bc..81405ac3 100644 --- a/src/secml/ml/classifiers/loss/__init__.py +++ b/src/secml/ml/classifiers/loss/__init__.py @@ -1,6 +1,8 @@ from .c_loss import CLoss, CLossRegression, CLossClassification -from .c_loss_epsilon_insensitive import \ - CLossEpsilonInsensitive, CLossEpsilonInsensitiveSquared +from .c_loss_epsilon_insensitive import ( + CLossEpsilonInsensitive, + CLossEpsilonInsensitiveSquared, +) from .c_loss_hinge import CLossHinge, CLossHingeSquared from .c_loss_squared import CLossSquare, CLossQuadratic from .c_softmax import CSoftmax diff --git a/src/secml/ml/classifiers/loss/c_loss.py b/src/secml/ml/classifiers/loss/c_loss.py index 5b2d45ed..320ae66b 100644 --- a/src/secml/ml/classifiers/loss/c_loss.py +++ b/src/secml/ml/classifiers/loss/c_loss.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from abc import ABCMeta, abstractmethod from secml.core import CCreator @@ -14,7 +15,8 @@ class CLoss(CCreator, metaclass=ABCMeta): """Interface for loss functions.""" - __super__ = 'CLoss' + + __super__ = "CLoss" @property @abstractmethod @@ -68,7 +70,8 @@ def dloss(self, y_true, score): class CLossRegression(CLoss): """Interface for loss functions suitable for regression problems.""" - suitable_for = 'regression' + + suitable_for = "regression" @abstractmethod def loss(self, y_true, score): @@ -113,7 +116,8 @@ def dloss(self, y_true, score): class CLossClassification(CLoss): """Interface for loss functions suitable for classification problems.""" - suitable_for = 'classification' + + suitable_for = "classification" @abstractmethod def loss(self, y_true, score, pos_label=None): @@ -194,7 +198,8 @@ def _check_binary_score(score, pos_label=1): if score.shape[1] > 2: raise ValueError( "only 2 classes are supported. " - "`score` has shape[1] = {:}".format(score.shape[1])) + "`score` has shape[1] = {:}".format(score.shape[1]) + ) else: score = score[:, pos_label].ravel() diff --git a/src/secml/ml/classifiers/loss/c_loss_cross_entropy.py b/src/secml/ml/classifiers/loss/c_loss_cross_entropy.py index 83ef9b53..64e23fbe 100755 --- a/src/secml/ml/classifiers/loss/c_loss_cross_entropy.py +++ b/src/secml/ml/classifiers/loss/c_loss_cross_entropy.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.ml.classifiers.loss import CLossClassification, CSoftmax from secml.array import CArray from secml import _NoValue @@ -31,7 +32,8 @@ class CLossCrossEntropy(CLossClassification): suitable_for : 'classification' """ - __class_type = 'cross-entropy' + + __class_type = "cross-entropy" def loss(self, y_true, score, pos_label=_NoValue): """Computes the value of the Cross Entropy loss function. diff --git a/src/secml/ml/classifiers/loss/c_loss_epsilon_insensitive.py b/src/secml/ml/classifiers/loss/c_loss_epsilon_insensitive.py index 8fab285a..fbaa41f1 100755 --- a/src/secml/ml/classifiers/loss/c_loss_epsilon_insensitive.py +++ b/src/secml/ml/classifiers/loss/c_loss_epsilon_insensitive.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.ml.classifiers.loss import CLossRegression from secml.array import CArray @@ -32,7 +33,8 @@ class CLossEpsilonInsensitive(CLossRegression): suitable_for : 'regression' """ - __class_type = 'e-insensitive' + + __class_type = "e-insensitive" def __init__(self, epsilon=0.1): self._epsilon = float(epsilon) @@ -130,7 +132,8 @@ class CLossEpsilonInsensitiveSquared(CLossEpsilonInsensitive): suitable_for : 'regression' """ - __class_type = 'e-insensitive-squared' + + __class_type = "e-insensitive-squared" def loss(self, y_true, score): """Computes the value of the squared epsilon-insensitive loss function. @@ -158,7 +161,7 @@ def loss(self, y_true, score): # (max(0, abs(y - s) - epsilon))^2 e = abs(y_true - score) - self.epsilon - e2 = e ** 2 + e2 = e**2 e2[e < 0] = 0 return e2 diff --git a/src/secml/ml/classifiers/loss/c_loss_hinge.py b/src/secml/ml/classifiers/loss/c_loss_hinge.py index fc2f2577..1dcd4d0d 100755 --- a/src/secml/ml/classifiers/loss/c_loss_hinge.py +++ b/src/secml/ml/classifiers/loss/c_loss_hinge.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.ml.classifiers.loss import CLossClassification from secml.ml.classifiers.loss.c_loss import _check_binary_score from secml.ml.classifiers.clf_utils import convert_binary_labels @@ -34,7 +35,8 @@ class CLossHinge(CLossClassification): suitable_for : 'classification' """ - __class_type = 'hinge' + + __class_type = "hinge" def loss(self, y_true, score, pos_label=1): """Computes the value of the hinge loss function. @@ -127,7 +129,8 @@ class CLossHingeSquared(CLossClassification): suitable_for : 'classification' """ - __class_type = 'hinge-squared' + + __class_type = "hinge-squared" def loss(self, y_true, score, pos_label=1): """Computes the value of the squared hinge loss function. @@ -161,7 +164,7 @@ def loss(self, y_true, score, pos_label=1): h = 1.0 - y_true * score h[h < 0] = 0.0 - return h ** 2 + return h**2 def dloss(self, y_true, score, pos_label=1): """Computes the derivative of the squared hinge loss function with respect to `score`. diff --git a/src/secml/ml/classifiers/loss/c_loss_logistic.py b/src/secml/ml/classifiers/loss/c_loss_logistic.py index 95e161ce..f0c82367 100755 --- a/src/secml/ml/classifiers/loss/c_loss_logistic.py +++ b/src/secml/ml/classifiers/loss/c_loss_logistic.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from secml.ml.classifiers.loss import CLossClassification from secml.ml.classifiers.loss.c_loss import _check_binary_score from secml.ml.classifiers.clf_utils import convert_binary_labels @@ -21,7 +22,8 @@ class CLossLogistic(CLossClassification): suitable_for : 'classification' """ - __class_type = 'log' + + __class_type = "log" def loss(self, y_true, score, pos_label=1, bound=10): """Computes the value of the logistic loss function. @@ -56,7 +58,7 @@ def loss(self, y_true, score, pos_label=1, bound=10): score = _check_binary_score(score, pos_label) # log(1 + exp(-y*s)) / log(2) - v = CArray(- y_true * score).astype(float) + v = CArray(-y_true * score).astype(float) if bound is None: v = (1.0 + v.exp()).log() @@ -103,7 +105,7 @@ def dloss(self, y_true, score, pos_label=1, bound=10): # d/df log ( 1+ exp(-yf)) / log(2) = # 1/ log(2) * ( 1+ exp(-yf)) exp(-yf) -y - v = CArray(- y_true * score).astype(float) + v = CArray(-y_true * score).astype(float) if bound is None: h = -y_true * v.exp() / (1.0 + v.exp()) @@ -112,7 +114,8 @@ def dloss(self, y_true, score, pos_label=1, bound=10): # linear approximation avoids numerical overflows # when -yf >> 1 : loss ~= -yf, and grad = -y h = -y_true.astype(float) - h[v < bound] = h[v < bound] * v[v < bound].exp() / \ - (1.0 + v[v < bound].exp()) + h[v < bound] = ( + h[v < bound] * v[v < bound].exp() / (1.0 + v[v < bound].exp()) + ) return h / CArray([2]).log() diff --git a/src/secml/ml/classifiers/loss/c_loss_squared.py b/src/secml/ml/classifiers/loss/c_loss_squared.py index d14431da..e5f06667 100755 --- a/src/secml/ml/classifiers/loss/c_loss_squared.py +++ b/src/secml/ml/classifiers/loss/c_loss_squared.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.ml.classifiers.loss import CLossRegression, CLossClassification from secml.ml.classifiers.loss.c_loss import _check_binary_score from secml.ml.classifiers.clf_utils import convert_binary_labels @@ -26,7 +27,8 @@ class CLossSquare(CLossClassification): suitable_for : 'classification' """ - __class_type = 'square' + + __class_type = "square" def loss(self, y_true, score, pos_label=1): """Computes the value of the squared epsilon-insensitive loss function. @@ -104,7 +106,8 @@ class CLossQuadratic(CLossRegression): suitable_for : 'regression' """ - __class_type = 'quadratic' + + __class_type = "quadratic" def loss(self, y_true, score): """Computes the value of the quadratic loss function. diff --git a/src/secml/ml/classifiers/loss/c_softmax.py b/src/secml/ml/classifiers/loss/c_softmax.py index f403a717..56ae6818 100644 --- a/src/secml/ml/classifiers/loss/c_softmax.py +++ b/src/secml/ml/classifiers/loss/c_softmax.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.core import CCreator from secml.array import CArray @@ -87,8 +88,7 @@ def gradient(self, s, y): """ if not s.is_vector_like: - raise ValueError( - "gradient can be computed for a single point only") + raise ValueError("gradient can be computed for a single point only") sigma_s = self.softmax(s) diff --git a/src/secml/ml/classifiers/loss/tests/test_c_loss_classification.py b/src/secml/ml/classifiers/loss/tests/test_c_loss_classification.py index f4e42970..dfe402d5 100644 --- a/src/secml/ml/classifiers/loss/tests/test_c_loss_classification.py +++ b/src/secml/ml/classifiers/loss/tests/test_c_loss_classification.py @@ -19,21 +19,22 @@ def setUp(self): self.svm = CClassifierSVM() self.svm.fit(self.ds.X, self.ds.Y) self.labels, self.scores = self.svm.predict( - self.ds.X, return_decision_function=True) + self.ds.X, return_decision_function=True + ) def test_one_at_zero(self): """Testing that classification loss return 1 for input 0.""" - for loss_id in ('hinge', 'hinge-squared', 'square', 'log'): + for loss_id in ("hinge", "hinge-squared", "square", "log"): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) - self.assertEqual( - CArray([1.0]), loss_class.loss(CArray([1]), CArray([0]))) + self.assertEqual(CArray([1.0]), loss_class.loss(CArray([1]), CArray([0]))) def test_in_out(self): """Unittest for input and output to loss classes""" + def _check_loss(l, n_samples): self.assertIsInstance(l, CArray) @@ -42,7 +43,7 @@ def _check_loss(l, n_samples): self.assertEqual(n_samples, l.size) self.assertIsSubDtype(l.dtype, float) - for loss_id in ('hinge', 'hinge-squared', 'square', 'log'): + for loss_id in ("hinge", "hinge-squared", "square", "log"): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) @@ -51,14 +52,18 @@ def _check_loss(l, n_samples): loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true, scores).mean():\n{:}".format( - loss_class.__class__.__name__, loss_mean)) + loss_class.__class__.__name__, loss_mean + ) + ) _check_loss(loss, self.ds.Y.size) loss_pos = loss_class.loss(self.ds.Y, self.scores[:, 1].ravel()) loss_mean_pos = loss_pos.mean() self.logger.info( "{:}.loss(y_true, scores[:, 1].ravel()).mean():\n".format( - loss_class.__class__.__name__, loss_mean_pos)) + loss_class.__class__.__name__, loss_mean_pos + ) + ) _check_loss(loss_pos, self.ds.Y.size) self.assertEqual(loss_mean, loss_mean_pos) @@ -67,14 +72,18 @@ def _check_loss(l, n_samples): loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true, scores, pos_label=0).mean():\n{:}".format( - loss_class.__class__.__name__, loss_mean)) + loss_class.__class__.__name__, loss_mean + ) + ) _check_loss(loss, self.ds.Y.size) loss_neg = loss_class.loss(self.ds.Y, self.scores[:, 0].ravel()) loss_mean_neg = loss_neg.mean() self.logger.info( "{:}.loss(y_true, scores[:,0].ravel()).mean():\n".format( - loss_class.__class__.__name__, loss_mean_neg)) + loss_class.__class__.__name__, loss_mean_neg + ) + ) _check_loss(loss_neg, self.ds.Y.size) self.assertEqual(loss_mean, loss_mean_neg) @@ -83,7 +92,9 @@ def _check_loss(l, n_samples): loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format( - loss_class.__class__.__name__, loss_mean)) + loss_class.__class__.__name__, loss_mean + ) + ) _check_loss(loss, 1) def test_draw(self): @@ -95,10 +106,9 @@ def test_draw(self): fig = CFigure() x = CArray.arange(-1, 3.01, 0.01) - fig.sp.plot(x, CArray([1 if i <= 0 else 0 for i in x]), - label='0-1 indicator') + fig.sp.plot(x, CArray([1 if i <= 0 else 0 for i in x]), label="0-1 indicator") - for loss_id in ('hinge', 'hinge-squared', 'square', 'log'): + for loss_id in ("hinge", "hinge-squared", "square", "log"): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) @@ -111,13 +121,14 @@ def test_draw(self): def test_grad(self): """Compare analytical gradients with its numerical approximation.""" + def _loss_wrapper(scores, loss, true_labels): return loss.loss(true_labels, scores) def _dloss_wrapper(scores, loss, true_labels): return loss.dloss(true_labels, scores) - for loss_id in ('hinge', 'hinge-squared', 'square', 'log'): + for loss_id in ("hinge", "hinge-squared", "square", "log"): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) @@ -125,16 +136,22 @@ def _dloss_wrapper(scores, loss, true_labels): y_true = CArray.randint(0, 2, n_elemes).todense() score = CArray.randn((n_elemes,)) - check_grad_val = CFunction( - _loss_wrapper, _dloss_wrapper).check_grad( - score, 1e-8, loss=loss_class, true_labels=y_true) - self.logger.info("Gradient difference between analytical svm " - "gradient and numerical gradient: %s", - str(check_grad_val)) - self.assertLess(check_grad_val, 1e-4, - "the gradient is wrong {:} for {:} loss".format( - check_grad_val, loss_id)) - - -if __name__ == '__main__': + check_grad_val = CFunction(_loss_wrapper, _dloss_wrapper).check_grad( + score, 1e-8, loss=loss_class, true_labels=y_true + ) + self.logger.info( + "Gradient difference between analytical svm " + "gradient and numerical gradient: %s", + str(check_grad_val), + ) + self.assertLess( + check_grad_val, + 1e-4, + "the gradient is wrong {:} for {:} loss".format( + check_grad_val, loss_id + ), + ) + + +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/classifiers/loss/tests/test_c_loss_cross_entropy.py b/src/secml/ml/classifiers/loss/tests/test_c_loss_cross_entropy.py index da4a0252..c897699d 100644 --- a/src/secml/ml/classifiers/loss/tests/test_c_loss_cross_entropy.py +++ b/src/secml/ml/classifiers/loss/tests/test_c_loss_cross_entropy.py @@ -13,14 +13,16 @@ class TestCLossCrossEntropy(CUnitTest): """Unittests for CLossCrossEntropy and softmax.""" def setUp(self): - self.ds = CDLRandom(n_classes=3, n_samples=50, random_state=0, - n_informative=3).load() + self.ds = CDLRandom( + n_classes=3, n_samples=50, random_state=0, n_informative=3 + ).load() self.logger.info("Fit an SVM and classify dataset...") self.ova = CClassifierMulticlassOVA(CClassifierSVM) self.ova.fit(self.ds.X, self.ds.Y) self.labels, self.scores = self.ova.predict( - self.ds.X, return_decision_function=True) + self.ds.X, return_decision_function=True + ) def test_in_out(self): """Unittest for input and output to CCrossEntropy""" @@ -38,18 +40,23 @@ def _check_loss(l, n_samples): loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true, scores).mean():\n{:}".format( - loss_class.__class__.__name__, loss_mean)) + loss_class.__class__.__name__, loss_mean + ) + ) _check_loss(loss, self.ds.Y.size) loss = loss_class.loss(self.ds.Y[0], self.scores[0, :]) loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format( - loss_class.__class__.__name__, loss_mean)) + loss_class.__class__.__name__, loss_mean + ) + ) _check_loss(loss, 1) def test_grad(self): """Compare analytical gradients with its numerical approximation.""" + def _loss_wrapper(scores, loss, true_labels): return loss.loss(true_labels, scores) @@ -69,7 +76,8 @@ def _loss_wrapper(scores, loss, true_labels): self.logger.info("GRAD: {:}".format(grad)) approx = CFunction(_loss_wrapper).approx_fprime( - score, eps, loss_class, y_true) + score, eps, loss_class, y_true + ) self.logger.info("APPROX (FULL): {:}".format(approx)) pos_label = pos_label if pos_label is not None else y_true.item() @@ -79,12 +87,15 @@ def _loss_wrapper(scores, loss, true_labels): check_grad_val = (grad - approx).norm() - self.logger.info("Gradient difference between analytical svm " - "gradient and numerical gradient: %s", - str(check_grad_val)) - self.assertLess(check_grad_val, 1e-4, - "the gradient is wrong {:}".format(check_grad_val)) + self.logger.info( + "Gradient difference between analytical svm " + "gradient and numerical gradient: %s", + str(check_grad_val), + ) + self.assertLess( + check_grad_val, 1e-4, "the gradient is wrong {:}".format(check_grad_val) + ) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/classifiers/loss/tests/test_c_loss_regression.py b/src/secml/ml/classifiers/loss/tests/test_c_loss_regression.py index d237a11d..e0dcc0fc 100644 --- a/src/secml/ml/classifiers/loss/tests/test_c_loss_regression.py +++ b/src/secml/ml/classifiers/loss/tests/test_c_loss_regression.py @@ -18,10 +18,12 @@ def setUp(self): self.svm = CClassifierSVM() self.svm.fit(self.ds.X, self.ds.Y) self.labels, self.scores = self.svm.predict( - self.ds.X, return_decision_function=True) + self.ds.X, return_decision_function=True + ) def test_in_out(self): """Unittest for input and output to loss classes""" + def _check_loss(l, n_samples): self.assertIsInstance(l, CArray) @@ -30,9 +32,7 @@ def _check_loss(l, n_samples): self.assertEqual(n_samples, l.size) self.assertIsSubDtype(l.dtype, float) - for loss_id in ('e-insensitive', - 'e-insensitive-squared', - 'quadratic'): + for loss_id in ("e-insensitive", "e-insensitive-squared", "quadratic"): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) @@ -41,14 +41,18 @@ def _check_loss(l, n_samples): loss_mean_pos = loss_pos.mean() self.logger.info( "{:}.loss(y_true, scores[:, 1].ravel()).mean():\n".format( - loss_class.__class__.__name__, loss_mean_pos)) + loss_class.__class__.__name__, loss_mean_pos + ) + ) _check_loss(loss_pos, self.ds.Y.size) loss = loss_class.loss(self.ds.Y[0], self.scores[0, 1].ravel()) loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format( - loss_class.__class__.__name__, loss_mean)) + loss_class.__class__.__name__, loss_mean + ) + ) _check_loss(loss, 1) with self.assertRaises(ValueError): @@ -63,9 +67,7 @@ def test_draw(self): fig = CFigure() x = CArray.arange(-1, 3.01, 0.01) - for loss_id in ('e-insensitive', - 'e-insensitive-squared', - 'quadratic'): + for loss_id in ("e-insensitive", "e-insensitive-squared", "quadratic"): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) @@ -77,5 +79,5 @@ def test_draw(self): fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/classifiers/loss/tests/test_c_softmax.py b/src/secml/ml/classifiers/loss/tests/test_c_softmax.py index d7a68f0c..b0c33805 100644 --- a/src/secml/ml/classifiers/loss/tests/test_c_softmax.py +++ b/src/secml/ml/classifiers/loss/tests/test_c_softmax.py @@ -11,14 +11,16 @@ class TestCSoftmax(CUnitTest): """Unittests for CSoftmax.""" def setUp(self): - self.ds = CDLRandom(n_classes=3, n_samples=50, random_state=0, - n_informative=3).load() + self.ds = CDLRandom( + n_classes=3, n_samples=50, random_state=0, n_informative=3 + ).load() self.logger.info("Fit an SVM and classify dataset...") self.ova = CClassifierMulticlassOVA(CClassifierSVM) self.ova.fit(self.ds.X, self.ds.Y) self.labels, self.scores = self.ova.predict( - self.ds.X, return_decision_function=True) + self.ds.X, return_decision_function=True + ) def test_softmax(self): """Unittests for softmax function.""" @@ -44,7 +46,7 @@ def test_softmax(self): def test_softmax_gradient(self): """Unittests for softmax gradient: - Compare analytical gradients with its numerical approximation.""" + Compare analytical gradients with its numerical approximation.""" self.softmax = CSoftmax() @@ -76,19 +78,21 @@ def _sigma_pos_label(s, y): self.logger.info("ANALITICAL GRAD: {:}".format(grad)) - approx = CFunction(_sigma_pos_label).approx_fprime( - score, 1e-5, pos_label) + approx = CFunction(_sigma_pos_label).approx_fprime(score, 1e-5, pos_label) self.logger.info("NUMERICAL GRADIENT: {:}".format(approx)) check_grad_val = (grad - approx).norm() - self.logger.info("The norm of the difference bettween the " - "analytical and the numerical gradient is: %s", - str(check_grad_val)) - self.assertLess(check_grad_val, 1e-4, - "the gradient is wrong {:}".format(check_grad_val)) + self.logger.info( + "The norm of the difference bettween the " + "analytical and the numerical gradient is: %s", + str(check_grad_val), + ) + self.assertLess( + check_grad_val, 1e-4, "the gradient is wrong {:}".format(check_grad_val) + ) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/classifiers/multiclass/c_classifier_multi.py b/src/secml/ml/classifiers/multiclass/c_classifier_multi.py index adf01258..c8a35e86 100644 --- a/src/secml/ml/classifiers/multiclass/c_classifier_multi.py +++ b/src/secml/ml/classifiers/multiclass/c_classifier_multi.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from abc import ABCMeta, abstractmethod from secml.ml.classifiers import CClassifier @@ -30,16 +31,17 @@ class CClassifierMulticlass(CClassifier, metaclass=ABCMeta): Any other construction parameter for the binary classifiers. """ - __super__ = 'CClassifierMulticlass' + + __super__ = "CClassifierMulticlass" def __init__(self, classifier, preprocess=None, n_jobs=1, **clf_params): # Calling init of CClassifier super(CClassifierMulticlass, self).__init__( - preprocess=preprocess, n_jobs=n_jobs) + preprocess=preprocess, n_jobs=n_jobs + ) # Binary classifier to use if not issubclass(classifier, CClassifier): - raise TypeError( - "Input classifier must be a subclass of CClassifier") + raise TypeError("Input classifier must be a subclass of CClassifier") # List of binary classifiers self._binary_classifiers = [classifier(**clf_params)] @@ -92,13 +94,12 @@ def set(self, param_name, param_value, copy=False): """ # Support for recursive setting, e.g. -> kernel.gamma - sup_param_name = param_name.split('.', 1)[0] + sup_param_name = param_name.split(".", 1)[0] # Check if we are setting a parameter of the multiclass classifier if hasattr(self, sup_param_name): # Call standard set on the multiclass clf object - super(CClassifierMulticlass, self).set( - param_name, param_value, copy=copy) + super(CClassifierMulticlass, self).set(param_name, param_value, copy=copy) return # SET PARAMETERS OF BINARY CLASSIFIERS @@ -107,9 +108,11 @@ def set(self, param_name, param_value, copy=False): if isinstance(param_value, tuple): # Check if enough binary classifiers are available if len(param_value) != self.num_classifiers: - raise ValueError("{0} binary classifier instances needed." - " Use .prepare(num_classes={0}) first" - "".format(len(param_value))) + raise ValueError( + "{0} binary classifier instances needed." + " Use .prepare(num_classes={0}) first" + "".format(len(param_value)) + ) # Update parameter (different value) in each binary classifier for clf_idx, clf in enumerate(self._binary_classifiers): clf.set(param_name, param_value[clf_idx], copy=copy) @@ -119,8 +122,7 @@ def set(self, param_name, param_value, copy=False): clf.set(param_name, param_value, copy=copy) return - raise ValueError( - "cannot set unknown parameter '{:}'".format(param_name)) + raise ValueError("cannot set unknown parameter '{:}'".format(param_name)) def get_state(self, **kwargs): """Returns the object state dictionary. @@ -153,7 +155,7 @@ def get_state(self, **kwargs): # these attributes from the attributes of the main classifier # Finally convert to tuple in order to prevent future modifications for attr in list(clf_ref_state): - new_key = 'binary_classifiers.' + attr + new_key = "binary_classifiers." + attr clf_ref_state[new_key] = tuple(clf_ref_state[attr]) del clf_ref_state[attr] @@ -189,40 +191,43 @@ def set_state(self, state_dict, copy=False): param_value = state_dict[param_name] # Support for recursive setting, e.g. -> kernel.gamma - sup_param_name = param_name.split('.', 1)[0] + sup_param_name = param_name.split(".", 1)[0] # Check if we are setting a parameter of the multiclass classifier if hasattr(self, sup_param_name): # Call standard set on the multiclass clf object super(CClassifierMulticlass, self).set_state( - {param_name: param_value}, copy=copy) + {param_name: param_value}, copy=copy + ) continue # SET PARAMETERS OF BINARY CLASSIFIERS - elif param_name.startswith('binary_classifiers.'): + elif param_name.startswith("binary_classifiers."): # Remove the identifier of the binary classifiers's attributes - sub_param_name = param_name[len('binary_classifiers.'):] + sub_param_name = param_name[len("binary_classifiers.") :] if sub_param_name in self._binary_classifiers[0].get_state(): if not isinstance(param_value, tuple): - raise ValueError("state of attribute '{:}' " - "must specified as a tuple" - "".format(param_name)) + raise ValueError( + "state of attribute '{:}' " + "must specified as a tuple" + "".format(param_name) + ) # Check if enough binary classifiers are available if len(param_value) != self.num_classifiers: self.prepare(len(param_value)) # Update attribute (different value) in each binary clf for clf_idx, clf in enumerate(self._binary_classifiers): - clf.set_state( - {sub_param_name: param_value[clf_idx]}, copy=copy) + clf.set_state({sub_param_name: param_value[clf_idx]}, copy=copy) continue raise AttributeError( - "cannot set unknown attribute '{:}'".format(param_name)) + "cannot set unknown attribute '{:}'".format(param_name) + ) def prepare(self, num_classes): """Creates num_classes copies of the binary classifier. @@ -239,6 +244,7 @@ def prepare(self, num_classes): """ from copy import deepcopy + if num_classes < 1: raise ValueError("number of classes must be higher than 0") clf = self._binary_classifiers[0] # Use the first clf as base @@ -248,8 +254,9 @@ def prepare(self, num_classes): # Delete binary classifiers in excess del self._binary_classifiers[num_classes:] - def estimate_parameters(self, dataset, parameters, splitter, metric, - pick='first', perf_evaluator='xval'): + def estimate_parameters( + self, dataset, parameters, splitter, metric, pick="first", perf_evaluator="xval" + ): """Estimate parameter that give better result respect a chose metric. Parameters @@ -294,7 +301,8 @@ def estimate_parameters(self, dataset, parameters, splitter, metric, splitter=splitter, metric=metric, pick=pick, - perf_evaluator=perf_evaluator) + perf_evaluator=perf_evaluator, + ) @abstractmethod def _fit(self, x, y): diff --git a/src/secml/ml/classifiers/multiclass/c_classifier_multi_ova.py b/src/secml/ml/classifiers/multiclass/c_classifier_multi_ova.py index 8a5e37ef..c09fad7d 100644 --- a/src/secml/ml/classifiers/multiclass/c_classifier_multi_ova.py +++ b/src/secml/ml/classifiers/multiclass/c_classifier_multi_ova.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.ml.classifiers.multiclass import CClassifierMulticlass from secml.ml.classifiers.gradients import CClassifierGradientMixin from secml.array import CArray @@ -12,8 +13,7 @@ from secml.parallel import parfor2 -def _fit_one_ova( - tr_class_idx, multi_ova, dataset, verbose): +def _fit_one_ova(tr_class_idx, multi_ova, dataset, verbose): """Fit a OVA classifier. Parameters @@ -34,8 +34,7 @@ def _fit_one_ova( # level is stored per-object looking to id multi_ova.verbose = verbose - multi_ova.logger.info( - "Training against class: {:}".format(tr_class_idx)) + multi_ova.logger.info("Training against class: {:}".format(tr_class_idx)) # Binarizing dataset train_ds = multi_ova.binarize_dataset(tr_class_idx, dataset) @@ -70,15 +69,13 @@ def _forward_one_ova(tr_class_idx, multi_ova, test_x, verbose): # level is stored per-object looking to id multi_ova.verbose = verbose - multi_ova.logger.info( - "Forward for class: {:}".format(tr_class_idx)) + multi_ova.logger.info("Forward for class: {:}".format(tr_class_idx)) # Perform forward on data for current class classifier return multi_ova._binary_classifiers[tr_class_idx].forward(test_x)[:, 1] -class CClassifierMulticlassOVA(CClassifierMulticlass, - CClassifierGradientMixin): +class CClassifierMulticlassOVA(CClassifierMulticlass, CClassifierGradientMixin): """OVA (One-Vs-All) Multiclass Classifier. Parameters @@ -93,15 +90,13 @@ class CClassifierMulticlassOVA(CClassifierMulticlass, class_type : 'ova' """ - __class_type = 'ova' + + __class_type = "ova" def __init__(self, classifier, preprocess=None, n_jobs=1, **clf_params): super(CClassifierMulticlassOVA, self).__init__( - classifier=classifier, - preprocess=preprocess, - n_jobs=n_jobs, - **clf_params + classifier=classifier, preprocess=preprocess, n_jobs=n_jobs, **clf_params ) def _fit(self, x, y): @@ -127,10 +122,14 @@ def _fit(self, x, y): # Fit a one-vs-all classifier for each class # Use the specified number of workers - self._binary_classifiers = parfor2(_fit_one_ova, - self.classes.size, - self.n_jobs, self, CDataset(x, y), - self.verbose) + self._binary_classifiers = parfor2( + _fit_one_ova, + self.classes.size, + self.n_jobs, + self, + CDataset(x, y), + self.verbose, + ) return self @@ -152,8 +151,10 @@ def binarize_dataset(class_idx, dataset): """ return CDataset( - dataset.X, dataset.get_labels_ovr(dataset.classes[class_idx]), - header=dataset.header) + dataset.X, + dataset.get_labels_ovr(dataset.classes[class_idx]), + header=dataset.header, + ) def _forward(self, x): """Computes the decision function for each pattern in x. @@ -179,10 +180,9 @@ def _forward(self, x): scores = CArray.empty(shape=(x.shape[0], self.n_classes)) # Discriminant function is now called for each different class - res = parfor2(_forward_one_ova, - self.n_classes, - self.n_jobs, self, x, - self.verbose) + res = parfor2( + _forward_one_ova, self.n_classes, self.n_jobs, self, x, self.verbose + ) # Building results array for i in range(self.n_classes): @@ -196,8 +196,7 @@ def _backward(self, w): w = CArray.ones(shape=(self.n_classes,)) # this is where we'll accumulate grads - grad = CArray.zeros( - shape=self._cached_x.shape, sparse=self._cached_x.issparse) + grad = CArray.zeros(shape=self._cached_x.shape, sparse=self._cached_x.issparse) # loop only over non-zero elements in w, to save computations for c in w.nnz_indices[1]: diff --git a/src/secml/ml/classifiers/multiclass/c_classifier_multi_ovo.py b/src/secml/ml/classifiers/multiclass/c_classifier_multi_ovo.py index 4a57942d..f22ed576 100644 --- a/src/secml/ml/classifiers/multiclass/c_classifier_multi_ovo.py +++ b/src/secml/ml/classifiers/multiclass/c_classifier_multi_ovo.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from itertools import combinations from secml.ml.classifiers.multiclass import CClassifierMulticlass @@ -45,8 +46,8 @@ def _fit_one_ovo(bin_clf_idx, multi_ovo, dataset, verbose): vs_class_idx = multi_ovo._clf_pair_idx[bin_clf_idx][1] multi_ovo.logger.info( - "Training class {:} against class: {:}".format( - tr_class_idx, vs_class_idx)) + "Training class {:} against class: {:}".format(tr_class_idx, vs_class_idx) + ) # Create the training dataset train_ds = multi_ovo.binarize_subset(tr_class_idx, vs_class_idx, dataset) @@ -82,14 +83,14 @@ def _forward_one_ovo(clf_idx, multi_ovo, test_x, verbose): multi_ovo.verbose = verbose multi_ovo.logger.info( - "Forward for classes: {:}".format(multi_ovo._clf_pair_idx[clf_idx])) + "Forward for classes: {:}".format(multi_ovo._clf_pair_idx[clf_idx]) + ) # Perform forward on data for current class classifier return multi_ovo._binary_classifiers[clf_idx].forward(test_x) -class CClassifierMulticlassOVO(CClassifierMulticlass, - CClassifierGradientMixin): +class CClassifierMulticlassOVO(CClassifierMulticlass, CClassifierGradientMixin): """OVO (One-Vs-One) Multiclass Classifier. Parameters @@ -104,14 +105,13 @@ class CClassifierMulticlassOVO(CClassifierMulticlass, class_type : 'ovo' """ - __class_type = 'ovo' + + __class_type = "ovo" def __init__(self, classifier, preprocess=None, **clf_params): super(CClassifierMulticlassOVO, self).__init__( - classifier=classifier, - preprocess=preprocess, - **clf_params + classifier=classifier, preprocess=preprocess, **clf_params ) # List with the binary classifiers classes pairs @@ -151,10 +151,14 @@ def _fit(self, x, y): # Fit a one-vs-one classifier # Use the specified number of workers - self._binary_classifiers = parfor2(_fit_one_ovo, - self.num_classifiers, - self.n_jobs, self, CDataset(x, y), - self.verbose) + self._binary_classifiers = parfor2( + _fit_one_ovo, + self.num_classifiers, + self.n_jobs, + self, + CDataset(x, y), + self.verbose, + ) return self @@ -187,7 +191,8 @@ def binarize_subset(tr_class_idx, vs_class_idx, dataset): # Using get_labels_ovr to avoid redundant functions return CDataset( - subset.X, subset.get_labels_ovr(tr_class), header=dataset.header) + subset.X, subset.get_labels_ovr(tr_class), header=dataset.header + ) @staticmethod def binarize_dataset(class_idx, dataset): @@ -232,10 +237,9 @@ def _forward(self, x): scores = CArray.zeros(shape=(x.shape[0], self.n_classes)) # Discriminant function is now called for each different class - res = parfor2(_forward_one_ovo, - self.num_classifiers, - self.n_jobs, self, x, - self.verbose) + res = parfor2( + _forward_one_ovo, self.num_classifiers, self.n_jobs, self, x, self.verbose + ) # Building results array for i in range(self.num_classifiers): @@ -250,7 +254,7 @@ def _forward(self, x): def _backward(self, w): """Implement gradient of decision function wrt x.""" if w is None: - raise ValueError('Pre-multiplying vector w cannot be None.') + raise ValueError("Pre-multiplying vector w cannot be None.") grad = None # To accumulate grads for i in range(self.num_classifiers): # TODO parfor @@ -260,12 +264,14 @@ def _backward(self, w): idx1 = self._clf_pair_idx[i][1] w_pos = CArray([1, 0]) - grad_pos = w[idx0] * \ - self._binary_classifiers[i].gradient(self._cached_x, w_pos) + grad_pos = w[idx0] * self._binary_classifiers[i].gradient( + self._cached_x, w_pos + ) w_neg = CArray([0, 1]) - grad_neg = w[idx1] * \ - self._binary_classifiers[i].gradient(self._cached_x, w_neg) + grad_neg = w[idx1] * self._binary_classifiers[i].gradient( + self._cached_x, w_neg + ) # Adjusting the scores for the OVO scheme grad = grad_pos if grad is None else grad + grad_pos diff --git a/src/secml/ml/classifiers/multiclass/tests/test_c_classifier_multi_ova.py b/src/secml/ml/classifiers/multiclass/tests/test_c_classifier_multi_ova.py index 493aafa0..bb82d72b 100644 --- a/src/secml/ml/classifiers/multiclass/tests/test_c_classifier_multi_ova.py +++ b/src/secml/ml/classifiers/multiclass/tests/test_c_classifier_multi_ova.py @@ -21,30 +21,30 @@ def setUp(self): def test_predict_withsvm(self): - svc = SVC(kernel='linear', class_weight='balanced') + svc = SVC(kernel="linear", class_weight="balanced") multiclass_sklearn = OneVsRestClassifier(svc) - multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, - class_weight='balanced', - n_jobs=2) + multiclass = CClassifierMulticlassOVA( + classifier=CClassifierSVM, class_weight="balanced", n_jobs=2 + ) multiclass.verbose = 2 multiclass.fit(self.dataset.X, self.dataset.Y) class_pred, score_pred = multiclass.predict( - self.dataset.X, return_decision_function=True) + self.dataset.X, return_decision_function=True + ) self.logger.info("Predicted: \n{:}".format(class_pred)) self.logger.info("Real: \n{:}".format(self.dataset.Y)) - acc = CMetric.create('accuracy').performance_score( - self.dataset.Y, class_pred) + acc = CMetric.create("accuracy").performance_score(self.dataset.Y, class_pred) self.logger.info("Accuracy: {:}".format(acc)) - multiclass_sklearn.fit(self.dataset.X.get_data(), - self.dataset.Y.tondarray()) + multiclass_sklearn.fit(self.dataset.X.get_data(), self.dataset.Y.tondarray()) y_sklearn = multiclass_sklearn.predict(self.dataset.X.get_data()) - acc_sklearn = CMetric.create('accuracy').performance_score( - self.dataset.Y, CArray(y_sklearn)) + acc_sklearn = CMetric.create("accuracy").performance_score( + self.dataset.Y, CArray(y_sklearn) + ) self.logger.info("Accuracy Sklearn: {:}".format(acc_sklearn)) self.assertLess(abs(acc - acc_sklearn), 0.01) @@ -52,23 +52,25 @@ def test_predict_withsvm(self): def test_set(self): from secml.ml.kernels import CKernelRBF - multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, - C=1, kernel=CKernelRBF()) + + multiclass = CClassifierMulticlassOVA( + classifier=CClassifierSVM, C=1, kernel=CKernelRBF() + ) # Test set before training - multiclass.set_params({'C': 100, 'kernel.gamma': 20}) + multiclass.set_params({"C": 100, "kernel.gamma": 20}) for clf in multiclass._binary_classifiers: self.assertEqual(clf.C, 100.0) self.assertEqual(clf.kernel.gamma, 20.0) # Restoring gamma - multiclass.set('kernel.gamma', 50) + multiclass.set("kernel.gamma", 50) # Setting different parameter in single trained_classifiers multiclass.prepare(num_classes=4) different_c = (10, 20, 30, 40) - multiclass.set('C', different_c) + multiclass.set("C", different_c) different_gamma = (50, 60, 70, 80) - multiclass.set('kernel.gamma', different_gamma) + multiclass.set("kernel.gamma", different_gamma) # Fit multiclass classifier than test set after training multiclass.fit(self.dataset.X, self.dataset.Y) @@ -78,7 +80,7 @@ def test_set(self): self.assertEqual(clf.kernel.gamma, different_gamma[clf_idx]) # Test set after training - multiclass.set_params({'C': 30, 'kernel.gamma': 200}) + multiclass.set_params({"C": 30, "kernel.gamma": 200}) for clf in multiclass._binary_classifiers: self.assertEqual(clf.C, 30.0) self.assertEqual(clf.kernel.gamma, 200.0) @@ -90,28 +92,27 @@ def test_set(self): # Setting parameter in single trained_classifiers multiclass._binary_classifiers[0].kernel.gamma = 300 for i in range(1, multiclass.num_classifiers): - self.assertNotEqual( - multiclass._binary_classifiers[i].kernel.gamma, 300.0) + self.assertNotEqual(multiclass._binary_classifiers[i].kernel.gamma, 300.0) # Setting different parameter in single trained_classifiers different_c = (100, 200, 300) # ValueError is raised as not enough binary classifiers are available with self.assertRaises(ValueError): - multiclass.set('C', different_c) + multiclass.set("C", different_c) multiclass.prepare(num_classes=3) - multiclass.set('C', different_c) + multiclass.set("C", different_c) for clf_idx, clf in enumerate(multiclass._binary_classifiers): self.assertEqual(clf.C, different_c[clf_idx]) def test_apply_method(self): - multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, - class_weight='balanced') + multiclass = CClassifierMulticlassOVA( + classifier=CClassifierSVM, class_weight="balanced" + ) multiclass.fit(self.dataset.X, self.dataset.Y) - multiclass.apply_method(CClassifierSVM.set, param_name='C', - param_value=150) + multiclass.apply_method(CClassifierSVM.set, param_name="C", param_value=150) for i in range(multiclass.num_classifiers): self.assertEqual(multiclass._binary_classifiers[i].C, 150) @@ -123,79 +124,94 @@ def test_normalization(self): ds_norm_x = CNormalizerMinMax().fit_transform(self.dataset.X) - multi_nonorm = CClassifierMulticlassOVA(classifier=CClassifierSVM, - class_weight='balanced') + multi_nonorm = CClassifierMulticlassOVA( + classifier=CClassifierSVM, class_weight="balanced" + ) multi_nonorm.fit(ds_norm_x, self.dataset.Y) pred_y_nonorm = multi_nonorm.predict(ds_norm_x) - multi = CClassifierMulticlassOVA(classifier=CClassifierSVM, - class_weight='balanced', - preprocess='min-max') + multi = CClassifierMulticlassOVA( + classifier=CClassifierSVM, class_weight="balanced", preprocess="min-max" + ) multi.fit(self.dataset.X, self.dataset.Y) pred_y = multi.predict(self.dataset.X) - self.logger.info( - "Predictions with internal norm:\n{:}".format(pred_y)) - self.logger.info( - "Predictions with external norm:\n{:}".format(pred_y_nonorm)) + self.logger.info("Predictions with internal norm:\n{:}".format(pred_y)) + self.logger.info("Predictions with external norm:\n{:}".format(pred_y_nonorm)) self.assertFalse((pred_y_nonorm != pred_y).any()) def test_plot_decision_function(self): """Test plot of multiclass classifier decision function.""" # generate synthetic data - ds = CDLRandom(n_classes=3, n_features=2, n_redundant=0, - n_clusters_per_class=1, class_sep=1, - random_state=0).load() + ds = CDLRandom( + n_classes=3, + n_features=2, + n_redundant=0, + n_clusters_per_class=1, + class_sep=1, + random_state=0, + ).load() multiclass = CClassifierMulticlassOVA( - classifier=CClassifierSVM, - class_weight='balanced', - preprocess='min-max') + classifier=CClassifierSVM, class_weight="balanced", preprocess="min-max" + ) # Training and classification multiclass.fit(ds.X, ds.Y) - y_pred, score_pred = multiclass.predict( - ds.X, return_decision_function=True) + y_pred, score_pred = multiclass.predict(ds.X, return_decision_function=True) def plot_hyperplane(img, clf, min_v, max_v, linestyle, label): """Plot the hyperplane associated to the OVA clf.""" xx = CArray.linspace( - min_v - 5, max_v + 5) # make sure the line is long enough + min_v - 5, max_v + 5 + ) # make sure the line is long enough # get the separating hyperplane yy = -(clf.w[0] * xx + clf.b) / clf.w[1] img.sp.plot(xx, yy.ravel(), linestyle, label=label) fig = CFigure(height=7, width=8) - fig.sp.title('{:} ({:})'.format(multiclass.__class__.__name__, - multiclass.classifier.__name__)) + fig.sp.title( + "{:} ({:})".format( + multiclass.__class__.__name__, multiclass.classifier.__name__ + ) + ) x_bounds, y_bounds = ds.get_bounds() - styles = ['go-', 'yp--', 'rs-.', 'bD--', 'c-.', 'm-', 'y-.'] + styles = ["go-", "yp--", "rs-.", "bD--", "c-.", "m-", "y-."] for c_idx, c in enumerate(ds.classes): # Plot boundary and predicted label for each OVA classifier - plot_hyperplane(fig, multiclass._binary_classifiers[c_idx], - x_bounds[0], x_bounds[1], styles[c_idx], - 'Boundary\nfor class {:}'.format(c)) - - fig.sp.scatter(ds.X[ds.Y == c, 0], - ds.X[ds.Y == c, 1], - s=40, c=styles[c_idx][0]) - fig.sp.scatter(ds.X[y_pred == c, 0], ds.X[y_pred == c, 1], s=160, - edgecolors=styles[c_idx][0], - facecolors='none', linewidths=2) + plot_hyperplane( + fig, + multiclass._binary_classifiers[c_idx], + x_bounds[0], + x_bounds[1], + styles[c_idx], + "Boundary\nfor class {:}".format(c), + ) + + fig.sp.scatter( + ds.X[ds.Y == c, 0], ds.X[ds.Y == c, 1], s=40, c=styles[c_idx][0] + ) + fig.sp.scatter( + ds.X[y_pred == c, 0], + ds.X[y_pred == c, 1], + s=160, + edgecolors=styles[c_idx][0], + facecolors="none", + linewidths=2, + ) # Plotting multiclass decision function - fig.sp.plot_decision_regions(multiclass, n_grid_points=100, - grid_limits=ds.get_bounds(offset=5)) + fig.sp.plot_decision_regions( + multiclass, n_grid_points=100, grid_limits=ds.get_bounds(offset=5) + ) - fig.sp.xlim(x_bounds[0] - .5 * x_bounds[1], - x_bounds[1] + .5 * x_bounds[1]) - fig.sp.ylim(y_bounds[0] - .5 * y_bounds[1], - y_bounds[1] + .5 * y_bounds[1]) + fig.sp.xlim(x_bounds[0] - 0.5 * x_bounds[1], x_bounds[1] + 0.5 * x_bounds[1]) + fig.sp.ylim(y_bounds[0] - 0.5 * y_bounds[1], y_bounds[1] + 0.5 * y_bounds[1]) fig.sp.legend(loc=4) # lower, right @@ -203,11 +219,11 @@ def plot_hyperplane(img, clf, min_v, max_v, linestyle, label): def test_fun(self): """Test for decision_function() and predict() methods.""" - self.logger.info( - "Test for decision_function() and predict() methods.") + self.logger.info("Test for decision_function() and predict() methods.") - mc = CClassifierMulticlassOVA(classifier=CClassifierSVM, - class_weight='balanced') + mc = CClassifierMulticlassOVA( + classifier=CClassifierSVM, class_weight="balanced" + ) scores_d = self._test_fun(mc, self.dataset.todense()) scores_s = self._test_fun(mc, self.dataset.tosparse()) @@ -216,8 +232,9 @@ def test_fun(self): def test_gradient(self): """Unittests for gradient() function.""" - multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, - class_weight='balanced') + multiclass = CClassifierMulticlassOVA( + classifier=CClassifierSVM, class_weight="balanced" + ) i = 5 # Sample to test @@ -236,8 +253,7 @@ def test_gradient(self): ova_grad = multiclass._binary_classifiers[i].grad_f_x(pattern, y=1) gradient = multiclass.grad_f_x(pattern, y=i) - self.logger.info( - "Gradient of {:}^th sub-clf is:\n{:}".format(i, gradient)) + self.logger.info("Gradient of {:}^th sub-clf is:\n{:}".format(i, gradient)) self.assert_array_equal(gradient, ova_grad) @@ -252,8 +268,7 @@ def test_gradient(self): # Compare dense gradients with sparse gradients for grad_i, grad in enumerate(grads_d): - self.assert_array_almost_equal( - grad.atleast_2d(), grads_s[grad_i]) + self.assert_array_almost_equal(grad.atleast_2d(), grads_s[grad_i]) # Test error raise with self.assertRaises(ValueError): @@ -264,8 +279,9 @@ def test_gradient(self): def test_multiclass_gradient(self): """Test if gradient is correct when requesting for all classes with w""" - multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, - class_weight='balanced') + multiclass = CClassifierMulticlassOVA( + classifier=CClassifierSVM, class_weight="balanced" + ) multiclass.fit(self.dataset.X, self.dataset.Y) div = CArray.rand(shape=multiclass.n_classes, random_state=0) @@ -274,68 +290,74 @@ def f_x(x): return CArray((x / div).mean()) def grad_f_x(x): - w = CArray.ones(shape=multiclass.n_classes) / \ - (div * multiclass.n_classes) + w = CArray.ones(shape=multiclass.n_classes) / (div * multiclass.n_classes) return multiclass.gradient(x, w=w) i = 5 # Sample to test x = self.dataset.X[i, :] from secml.optim.function import CFunction + check_grad_val = CFunction(f_x, grad_f_x).check_grad(x, epsilon=1e-1) - self.logger.info( - "norm(grad - num_grad): %s", str(check_grad_val)) + self.logger.info("norm(grad - num_grad): %s", str(check_grad_val)) self.assertLess(check_grad_val, 1e-3) def test_preprocess(self): """Test classifier with preprocessors inside.""" - multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, - class_weight='balanced') + multiclass = CClassifierMulticlassOVA( + classifier=CClassifierSVM, class_weight="balanced" + ) # All linear transformations with gradient implemented - self._test_preprocess(self.dataset, multiclass, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) - self._test_preprocess_grad(self.dataset, multiclass, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) + self._test_preprocess( + self.dataset, + multiclass, + ["min-max", "mean-std"], + [{"feature_range": (-1, 1)}, {}], + ) + self._test_preprocess_grad( + self.dataset, + multiclass, + ["min-max", "mean-std"], + [{"feature_range": (-1, 1)}, {}], + ) # Mixed linear/nonlinear transformations without gradient - self._test_preprocess( - self.dataset, multiclass, ['pca', 'unit-norm'], [{}, {}]) + self._test_preprocess(self.dataset, multiclass, ["pca", "unit-norm"], [{}, {}]) def test_set_get_state(self): """Test for set_state and get_state.""" - pre = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}]) + pre = CPreProcess.create_chain(["pca", "mean-std"], [{}, {}]) multi = CClassifierMulticlassOVA( classifier=CClassifierSVM, - kernel='rbf', - class_weight='balanced', - preprocess=pre) + kernel="rbf", + class_weight="balanced", + preprocess=pre, + ) # Setting different parameter in single trained_classifiers multi.prepare(num_classes=4) different_c = (10, 20, 30, 40) - multi.set('C', different_c) + multi.set("C", different_c) different_gamma = (50, 60, 70, 80) - multi.set('kernel.gamma', different_gamma) + multi.set("kernel.gamma", different_gamma) multi.fit(self.dataset.X, self.dataset.Y) pred_y = multi.predict(self.dataset.X) - self.logger.info( - "Predictions before restoring state:\n{:}".format(pred_y)) + self.logger.info("Predictions before restoring state:\n{:}".format(pred_y)) state = multi.get_state() self.logger.info("State of multiclass:\n{:}".format(state)) # Create an entirely new clf - pre_post = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}]) + pre_post = CPreProcess.create_chain(["pca", "mean-std"], [{}, {}]) multi_post = CClassifierMulticlassOVA( classifier=CClassifierSVM, - kernel='rbf', - class_weight='balanced', - preprocess=pre_post) + kernel="rbf", + class_weight="balanced", + preprocess=pre_post, + ) # Restore state multi_post.set_state(state) @@ -345,11 +367,10 @@ def test_set_get_state(self): self.assertEqual(clf.kernel.gamma, different_gamma[clf_idx]) pred_y_post = multi_post.predict(self.dataset.X) - self.logger.info( - "Predictions after restoring state:\n{:}".format(pred_y_post)) + self.logger.info("Predictions after restoring state:\n{:}".format(pred_y_post)) self.assert_array_equal(pred_y, pred_y_post) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierTestCases.main() diff --git a/src/secml/ml/classifiers/multiclass/tests/test_c_classifier_multi_ovo.py b/src/secml/ml/classifiers/multiclass/tests/test_c_classifier_multi_ovo.py index fce69661..46dfb1e1 100644 --- a/src/secml/ml/classifiers/multiclass/tests/test_c_classifier_multi_ovo.py +++ b/src/secml/ml/classifiers/multiclass/tests/test_c_classifier_multi_ovo.py @@ -20,30 +20,30 @@ def setUp(self): def test_predict_withsvm(self): - svc = SVC(kernel='linear', class_weight='balanced') + svc = SVC(kernel="linear", class_weight="balanced") multiclass_sklearn = OneVsOneClassifier(svc) - multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM, - class_weight='balanced', - n_jobs=2) + multiclass = CClassifierMulticlassOVO( + classifier=CClassifierSVM, class_weight="balanced", n_jobs=2 + ) multiclass.verbose = 2 multiclass.fit(self.dataset.X, self.dataset.Y) class_pred, score_pred = multiclass.predict( - self.dataset.X, return_decision_function=True) + self.dataset.X, return_decision_function=True + ) self.logger.info("Predicted: \n{:}".format(class_pred)) self.logger.info("Real: \n{:}".format(self.dataset.Y)) - acc = CMetric.create('accuracy').performance_score( - self.dataset.Y, class_pred) + acc = CMetric.create("accuracy").performance_score(self.dataset.Y, class_pred) self.logger.info("Accuracy: {:}".format(acc)) - multiclass_sklearn.fit(self.dataset.X.get_data(), - self.dataset.Y.tondarray()) + multiclass_sklearn.fit(self.dataset.X.get_data(), self.dataset.Y.tondarray()) y_sklearn = multiclass_sklearn.predict(self.dataset.X.get_data()) - acc_sklearn = CMetric.create('accuracy').performance_score( - self.dataset.Y, CArray(y_sklearn)) + acc_sklearn = CMetric.create("accuracy").performance_score( + self.dataset.Y, CArray(y_sklearn) + ) self.logger.info("Accuracy Sklearn: {:}".format(acc_sklearn)) self.assertLess(abs(acc - acc_sklearn), 0.21) @@ -51,23 +51,25 @@ def test_predict_withsvm(self): def test_set(self): from secml.ml.kernels import CKernelRBF - multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM, - C=1, kernel=CKernelRBF()) + + multiclass = CClassifierMulticlassOVO( + classifier=CClassifierSVM, C=1, kernel=CKernelRBF() + ) # Test set before training - multiclass.set_params({'C': 100, 'kernel.gamma': 20}) + multiclass.set_params({"C": 100, "kernel.gamma": 20}) for clf in multiclass._binary_classifiers: self.assertEqual(clf.C, 100.0) self.assertEqual(clf.kernel.gamma, 20.0) # Restoring kernel - multiclass.set('kernel.gamma', 50) + multiclass.set("kernel.gamma", 50) # Setting different parameter in single trained_classifiers multiclass.prepare(num_classes=6) different_c = (10, 20, 30, 40, 50, 60) - multiclass.set('C', different_c) + multiclass.set("C", different_c) different_gamma = (70, 80, 90, 100, 110, 120) - multiclass.set('kernel.gamma', different_gamma) + multiclass.set("kernel.gamma", different_gamma) # Fit multiclass classifier than test set after training multiclass.fit(self.dataset.X, self.dataset.Y) @@ -77,7 +79,7 @@ def test_set(self): self.assertEqual(clf.kernel.gamma, different_gamma[clf_idx]) # Test set after training - multiclass.set_params({'C': 30, 'kernel.gamma': 200}) + multiclass.set_params({"C": 30, "kernel.gamma": 200}) for clf in multiclass._binary_classifiers: self.assertEqual(clf.C, 30.0) self.assertEqual(clf.kernel.gamma, 200.0) @@ -89,28 +91,27 @@ def test_set(self): # Setting parameter in single trained_classifiers multiclass._binary_classifiers[0].kernel.gamma = 300 for i in range(1, multiclass.num_classifiers): - self.assertNotEqual( - multiclass._binary_classifiers[i].kernel.gamma, 300.0) + self.assertNotEqual(multiclass._binary_classifiers[i].kernel.gamma, 300.0) # Setting different parameter in single trained_classifiers different_c = (100, 200, 300) # ValueError is raised as not enough binary classifiers are available with self.assertRaises(ValueError): - multiclass.set('C', different_c) + multiclass.set("C", different_c) multiclass.prepare(num_classes=3) - multiclass.set('C', different_c) + multiclass.set("C", different_c) for clf_idx, clf in enumerate(multiclass._binary_classifiers): self.assertEqual(clf.C, different_c[clf_idx]) def test_apply_method(self): - multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM, - class_weight='balanced') - multiclass.fit(self.dataset.X,self.dataset.Y) - multiclass.apply_method(CClassifierSVM.set, param_name='C', - param_value=150) + multiclass = CClassifierMulticlassOVO( + classifier=CClassifierSVM, class_weight="balanced" + ) + multiclass.fit(self.dataset.X, self.dataset.Y) + multiclass.apply_method(CClassifierSVM.set, param_name="C", param_value=150) for i in range(multiclass.num_classifiers): self.assertEqual(multiclass._binary_classifiers[i].C, 150) @@ -121,79 +122,94 @@ def test_normalization(self): ds_norm_x = CNormalizerMinMax().fit_transform(self.dataset.X) - multi_nonorm = CClassifierMulticlassOVO(classifier=CClassifierSVM, - class_weight='balanced') + multi_nonorm = CClassifierMulticlassOVO( + classifier=CClassifierSVM, class_weight="balanced" + ) multi_nonorm.fit(ds_norm_x, self.dataset.Y) pred_y_nonorm = multi_nonorm.predict(ds_norm_x) - multi = CClassifierMulticlassOVO(classifier=CClassifierSVM, - class_weight='balanced', - preprocess='min-max') + multi = CClassifierMulticlassOVO( + classifier=CClassifierSVM, class_weight="balanced", preprocess="min-max" + ) multi.fit(self.dataset.X, self.dataset.Y) pred_y = multi.predict(self.dataset.X) - self.logger.info( - "Predictions with internal norm:\n{:}".format(pred_y)) - self.logger.info( - "Predictions with external norm:\n{:}".format(pred_y_nonorm)) + self.logger.info("Predictions with internal norm:\n{:}".format(pred_y)) + self.logger.info("Predictions with external norm:\n{:}".format(pred_y_nonorm)) self.assertFalse((pred_y_nonorm != pred_y).any()) def test_plot_decision_function(self): """Test plot of multiclass classifier decision function.""" # generate synthetic data - ds = CDLRandom(n_classes=3, n_features=2, n_redundant=0, - n_clusters_per_class=1, class_sep=1, - random_state=0).load() + ds = CDLRandom( + n_classes=3, + n_features=2, + n_redundant=0, + n_clusters_per_class=1, + class_sep=1, + random_state=0, + ).load() multiclass = CClassifierMulticlassOVO( - classifier=CClassifierSVM, - class_weight='balanced', - preprocess='min-max') + classifier=CClassifierSVM, class_weight="balanced", preprocess="min-max" + ) # Training and classification multiclass.fit(ds.X, ds.Y) - y_pred, score_pred = multiclass.predict( - ds.X, return_decision_function=True) + y_pred, score_pred = multiclass.predict(ds.X, return_decision_function=True) def plot_hyperplane(img, clf, min_v, max_v, linestyle, label): """Plot the hyperplane associated to the OVO clf.""" xx = CArray.linspace( - min_v - 5, max_v + 5) # make sure the line is long enough + min_v - 5, max_v + 5 + ) # make sure the line is long enough # get the separating hyperplane yy = -(clf.w[0] * xx + clf.b) / clf.w[1] img.sp.plot(xx, yy.ravel(), linestyle, label=label) fig = CFigure(height=7, width=8) - fig.sp.title('{:} ({:})'.format(multiclass.__class__.__name__, - multiclass.classifier.__name__)) + fig.sp.title( + "{:} ({:})".format( + multiclass.__class__.__name__, multiclass.classifier.__name__ + ) + ) x_bounds, y_bounds = ds.get_bounds() - styles = ['go-', 'yp--', 'rs-.', 'bD--', 'c-.', 'm-', 'y-.'] + styles = ["go-", "yp--", "rs-.", "bD--", "c-.", "m-", "y-."] for c_idx, c in enumerate(ds.classes): # Plot boundary and predicted label for each OVO classifier - plot_hyperplane(fig, multiclass._binary_classifiers[c_idx], - x_bounds[0], x_bounds[1], styles[c_idx], - 'Boundary\nfor class {:}'.format(c)) - - fig.sp.scatter(ds.X[ds.Y == c, 0], - ds.X[ds.Y == c, 1], - s=40, c=styles[c_idx][0]) - fig.sp.scatter(ds.X[y_pred == c, 0], ds.X[y_pred == c, 1], s=160, - edgecolors=styles[c_idx][0], - facecolors='none', linewidths=2) + plot_hyperplane( + fig, + multiclass._binary_classifiers[c_idx], + x_bounds[0], + x_bounds[1], + styles[c_idx], + "Boundary\nfor class {:}".format(c), + ) + + fig.sp.scatter( + ds.X[ds.Y == c, 0], ds.X[ds.Y == c, 1], s=40, c=styles[c_idx][0] + ) + fig.sp.scatter( + ds.X[y_pred == c, 0], + ds.X[y_pred == c, 1], + s=160, + edgecolors=styles[c_idx][0], + facecolors="none", + linewidths=2, + ) # Plotting multiclass decision function - fig.sp.plot_decision_regions(multiclass, n_grid_points=100, - grid_limits=ds.get_bounds(offset=5)) + fig.sp.plot_decision_regions( + multiclass, n_grid_points=100, grid_limits=ds.get_bounds(offset=5) + ) - fig.sp.xlim(x_bounds[0] - .5 * x_bounds[1], - x_bounds[1] + .5 * x_bounds[1]) - fig.sp.ylim(y_bounds[0] - .5 * y_bounds[1], - y_bounds[1] + .5 * y_bounds[1]) + fig.sp.xlim(x_bounds[0] - 0.5 * x_bounds[1], x_bounds[1] + 0.5 * x_bounds[1]) + fig.sp.ylim(y_bounds[0] - 0.5 * y_bounds[1], y_bounds[1] + 0.5 * y_bounds[1]) fig.sp.legend(loc=4) # lower, right @@ -201,11 +217,11 @@ def plot_hyperplane(img, clf, min_v, max_v, linestyle, label): def test_fun(self): """Test for decision_function() and predict() methods.""" - self.logger.info( - "Test for decision_function() and predict() methods.") + self.logger.info("Test for decision_function() and predict() methods.") - mc = CClassifierMulticlassOVO(classifier=CClassifierSVM, - class_weight='balanced') + mc = CClassifierMulticlassOVO( + classifier=CClassifierSVM, class_weight="balanced" + ) scores_d = self._test_fun(mc, self.dataset.todense()) scores_s = self._test_fun(mc, self.dataset.tosparse()) @@ -214,8 +230,9 @@ def test_fun(self): def test_gradient(self): """Unittests for gradient() function.""" - multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM, - class_weight='balanced') + multiclass = CClassifierMulticlassOVO( + classifier=CClassifierSVM, class_weight="balanced" + ) i = 5 # Sample to test @@ -230,22 +247,26 @@ def test_gradient(self): # Compute the gradient for class i ovo_grad_pos = CArray.zeros( - shape=pattern.shape, dtype=pattern.dtype, sparse=pattern.issparse) + shape=pattern.shape, dtype=pattern.dtype, sparse=pattern.issparse + ) ovo_grad_neg = CArray.zeros( - shape=pattern.shape, dtype=pattern.dtype, sparse=pattern.issparse) + shape=pattern.shape, dtype=pattern.dtype, sparse=pattern.issparse + ) for j in range(multiclass.num_classifiers): idx_pos = multiclass._clf_pair_idx[j][0] idx_neg = multiclass._clf_pair_idx[j][1] if idx_pos == i: w_bin = CArray([1, 0]) - grad_pos = \ - multiclass._binary_classifiers[j].gradient(pattern, w_bin) + grad_pos = multiclass._binary_classifiers[j].gradient( + pattern, w_bin + ) ovo_grad_pos += grad_pos if idx_neg == i: w_bin = CArray([0, 1]) - grad_neg = \ - multiclass._binary_classifiers[j].gradient(pattern, w_bin) + grad_neg = multiclass._binary_classifiers[j].gradient( + pattern, w_bin + ) ovo_grad_neg += grad_neg ovo_grad = (ovo_grad_pos + ovo_grad_neg) / 3 @@ -253,8 +274,7 @@ def test_gradient(self): w = CArray.zeros(shape=multiclass.n_classes) w[i] = 1 # one-hot encoding of y gradient = multiclass.gradient(pattern, w) - self.logger.info( - "Gradient of {:}^th sub-clf is:\n{:}".format(i, gradient)) + self.logger.info("Gradient of {:}^th sub-clf is:\n{:}".format(i, gradient)) self.assert_array_almost_equal(gradient.atleast_2d(), -ovo_grad) @@ -281,8 +301,9 @@ def test_gradient(self): def test_multiclass_gradient(self): """Test if gradient is correct when requesting for all classes with w""" - multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM, - class_weight='balanced') + multiclass = CClassifierMulticlassOVO( + classifier=CClassifierSVM, class_weight="balanced" + ) multiclass.fit(self.dataset.X, self.dataset.Y) div = CArray.rand(shape=multiclass.n_classes, random_state=0) @@ -291,32 +312,37 @@ def f_x(z): return CArray((z / div).mean()) def grad_f_x(p): - w = CArray.ones(shape=multiclass.n_classes) / \ - (div * multiclass.n_classes) + w = CArray.ones(shape=multiclass.n_classes) / (div * multiclass.n_classes) return multiclass.gradient(p, w=w) i = 5 # Sample to test x = self.dataset.X[i, :] from secml.optim.function import CFunction + check_grad_val = CFunction(f_x, grad_f_x).check_grad(x, epsilon=1e-1) - self.logger.info( - "norm(grad - num_grad): %s", str(check_grad_val)) + self.logger.info("norm(grad - num_grad): %s", str(check_grad_val)) self.assertLess(check_grad_val, 1e-3) def test_preprocess(self): """Test classifier with preprocessors inside.""" - multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM, - class_weight='balanced') + multiclass = CClassifierMulticlassOVO( + classifier=CClassifierSVM, class_weight="balanced" + ) # All linear transformations with gradient implemented - self._test_preprocess(self.dataset, multiclass, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) - self._test_preprocess_grad(self.dataset, multiclass, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) + self._test_preprocess( + self.dataset, + multiclass, + ["min-max", "mean-std"], + [{"feature_range": (-1, 1)}, {}], + ) + self._test_preprocess_grad( + self.dataset, + multiclass, + ["min-max", "mean-std"], + [{"feature_range": (-1, 1)}, {}], + ) # Mixed linear/nonlinear transformations without gradient - self._test_preprocess( - self.dataset, multiclass, ['pca', 'unit-norm'], [{}, {}]) + self._test_preprocess(self.dataset, multiclass, ["pca", "unit-norm"], [{}, {}]) diff --git a/src/secml/ml/classifiers/pytorch/c_classifier_pytorch.py b/src/secml/ml/classifiers/pytorch/c_classifier_pytorch.py index 42bb6b39..97e0a4ae 100644 --- a/src/secml/ml/classifiers/pytorch/c_classifier_pytorch.py +++ b/src/secml/ml/classifiers/pytorch/c_classifier_pytorch.py @@ -5,6 +5,7 @@ .. moduleauthor:: Maura Pintor """ + from functools import reduce import torch @@ -74,17 +75,25 @@ class as last layer or last forward function, or even in the class_type : 'pytorch-clf' """ - __class_type = 'pytorch-clf' - - def __init__(self, model, loss=None, - optimizer=None, - optimizer_scheduler=None, - pretrained=False, - pretrained_classes=None, - input_shape=None, - random_state=None, preprocess=None, - softmax_outputs=False, - epochs=10, batch_size=1, n_jobs=1): + + __class_type = "pytorch-clf" + + def __init__( + self, + model, + loss=None, + optimizer=None, + optimizer_scheduler=None, + pretrained=False, + pretrained_classes=None, + input_shape=None, + random_state=None, + preprocess=None, + softmax_outputs=False, + epochs=10, + batch_size=1, + n_jobs=1, + ): self._device = self._set_device() self._random_state = random_state @@ -95,15 +104,17 @@ def __init__(self, model, loss=None, pretrained=pretrained, pretrained_classes=pretrained_classes, input_shape=input_shape, - softmax_outputs=softmax_outputs, n_jobs=n_jobs) + softmax_outputs=softmax_outputs, + n_jobs=n_jobs, + ) self._init_model() self._batch_size = batch_size if self._batch_size is None: self.logger.info( - "No batch size passed. Value will be set to the default " - "value of 1.") + "No batch size passed. Value will be set to the default " "value of 1." + ) self._batch_size = 1 if self._input_shape is None: @@ -114,7 +125,8 @@ def __init__(self, model, loss=None, else: raise ValueError( "Input shape should be specified if the first " - "layer is not a `nn.Linear` module.") + "layer is not a `nn.Linear` module." + ) self._loss = loss self._optimizer = optimizer @@ -128,7 +140,8 @@ def __init__(self, model, loss=None, self._classes = self._pretrained_classes else: self._classes = CArray.arange( - list(self._model.modules())[-1].out_features) + list(self._model.modules())[-1].out_features + ) self._n_features = reduce(lambda a, b: a * b, self._input_shape) # hooks for getting intermediate outputs @@ -196,13 +209,12 @@ def batch_size(self, batch_size): @property def layers(self): - """Returns the layers of the model, if possible. """ + """Returns the layers of the model, if possible.""" if self._model_layers is None: if isinstance(self._model, nn.Module): self._model_layers = list(get_layers(self._model)) else: - raise TypeError( - "The input model must be an instance of `nn.Module`.") + raise TypeError("The input model must be an instance of `nn.Module`.") return self._model_layers @property @@ -216,7 +228,8 @@ def layer_shapes(self): self._model(x) for layer_name, layer in self.layers: self._model_layer_shapes[layer_name] = tuple( - self._intermediate_outputs[layer].shape) + self._intermediate_outputs[layer].shape + ) self._clean_hooks() return self._model_layer_shapes @@ -260,8 +273,7 @@ def hook_layer_output(self, layer_names=None): for name, layer in get_layers(self._model): if name in layer_names: - self._handlers.append( - layer.register_forward_hook(self._hook_forward)) + self._handlers.append(layer.register_forward_hook(self._hook_forward)) else: pass @@ -270,18 +282,24 @@ def _set_device(self): def get_params(self): """Returns the dictionary of class parameters.""" - loss_params = {'loss': self._loss} + loss_params = {"loss": self._loss} optim_params = { - 'optimizer': - self._optimizer.state_dict()['param_groups'][0] - if self._optimizer is not None else None, - 'optimizer_scheduler': + "optimizer": ( + self._optimizer.state_dict()["param_groups"][0] + if self._optimizer is not None + else None + ), + "optimizer_scheduler": ( self._optimizer_scheduler.state_dict() - if self._optimizer_scheduler is not None else None + if self._optimizer_scheduler is not None + else None + ), } return SubLevelsDict( - merge_dicts(super(CClassifierPyTorch, self).get_params(), - loss_params, optim_params)) + merge_dicts( + super(CClassifierPyTorch, self).get_params(), loss_params, optim_params + ) + ) def get_state(self, return_optimizer=True, **kwargs): """Returns the object state dictionary. @@ -306,10 +324,10 @@ def get_state(self, return_optimizer=True, **kwargs): state = super(CClassifierPyTorch, self).get_state(**kwargs) # Map model to CPU before saving - self._model.to(torch.device('cpu')) + self._model.to(torch.device("cpu")) # Use deepcopy as restoring device later will change them - state['model'] = deepcopy(self._model.state_dict()) + state["model"] = deepcopy(self._model.state_dict()) # Restore device for model self._model.to(self._device) @@ -319,18 +337,18 @@ def get_state(self, return_optimizer=True, **kwargs): # is True, `optimizer` and `optimizer_scheduler` should be included, # even if they are None if return_optimizer is False: - state.pop('optimizer') - state.pop('optimizer_scheduler') + state.pop("optimizer") + state.pop("optimizer_scheduler") else: # Unfortunately optimizer does not have a 'to(device)' method if self._optimizer is not None: for opt_state in self._optimizer.state.values(): for k, v in opt_state.items(): if isinstance(v, torch.Tensor): - opt_state[k] = v.to('cpu') + opt_state[k] = v.to("cpu") # Use deepcopy as restoring device later will change them - state['optimizer'] = deepcopy(self._optimizer.state_dict()) + state["optimizer"] = deepcopy(self._optimizer.state_dict()) # Restore optimizer state to proper device for opt_state in self._optimizer.state.values(): @@ -341,8 +359,9 @@ def get_state(self, return_optimizer=True, **kwargs): if self._optimizer_scheduler is not None: # Scheduler will be saved only if also optimizer is defined # No need to map to `cpu`, tensors in state - state['optimizer_scheduler'] = deepcopy( - self._optimizer_scheduler.state_dict()) + state["optimizer_scheduler"] = deepcopy( + self._optimizer_scheduler.state_dict() + ) return state @@ -350,27 +369,30 @@ def set_state(self, state_dict, copy=False): """Sets the object state using input dictionary.""" # TODO: DEEPCOPY FOR torch.load_state_dict? - if 'model' in state_dict: - self._model.load_state_dict(state_dict.pop('model')) + if "model" in state_dict: + self._model.load_state_dict(state_dict.pop("model")) - if 'optimizer' in state_dict: + if "optimizer" in state_dict: if self._optimizer is None: raise ValueError( "optimizer not found in current object but required for " "restoring state." "Save the state using `return_optimizer=False` or " - "add an optimizer to the model first.") - self._optimizer.load_state_dict(state_dict.pop('optimizer')) + "add an optimizer to the model first." + ) + self._optimizer.load_state_dict(state_dict.pop("optimizer")) - if 'optimizer_scheduler' in state_dict: + if "optimizer_scheduler" in state_dict: if self._optimizer_scheduler is None: raise ValueError( "`optimizer_scheduler` not found in current object " "but required for restoring state." "Save the state using `return_optimizer=False` or " - "add an optimizer scheduler to the model first.") + "add an optimizer scheduler to the model first." + ) self._optimizer_scheduler.load_state_dict( - state_dict.pop('optimizer_scheduler')) + state_dict.pop("optimizer_scheduler") + ) super(CClassifierPyTorch, self).set_state(state_dict, copy=copy) @@ -381,22 +403,27 @@ def __getattribute__(self, key): loss and optimizer.""" try: # If we are not getting the model itself - if key not in ['_model', '_optimizer', '_optimizer_scheduler']: - if hasattr(self, '_model') and key in self._model._modules: + if key not in ["_model", "_optimizer", "_optimizer_scheduler"]: + if hasattr(self, "_model") and key in self._model._modules: return self._model[key] - elif hasattr(self, '_optimizer') and \ - self._optimizer is not None and \ - key in self._optimizer.state_dict()['param_groups'][0]: - if len(self._optimizer.state_dict()['param_groups']) == 1: + elif ( + hasattr(self, "_optimizer") + and self._optimizer is not None + and key in self._optimizer.state_dict()["param_groups"][0] + ): + if len(self._optimizer.state_dict()["param_groups"]) == 1: return self._optimizer.param_groups[0][key] else: raise NotImplementedError( "__getattribute__ is not yet supported for " "optimizers with more than one element in " - "param_groups.") - elif hasattr(self, '_optimizer_scheduler') and \ - self._optimizer_scheduler is not None and \ - key in self._optimizer_scheduler.state_dict(): + "param_groups." + ) + elif ( + hasattr(self, "_optimizer_scheduler") + and self._optimizer_scheduler is not None + and key in self._optimizer_scheduler.state_dict() + ): return self._optimizer_scheduler[key] except (KeyError, AttributeError): @@ -412,15 +439,19 @@ def __setattr__(self, key, value): """ if isinstance(value, (torch.Tensor, torch.nn.Module)): value = value.to(self._device) - if hasattr(self, '_model') and key in self._model._modules: + if hasattr(self, "_model") and key in self._model._modules: self._model._modules[key] = value - elif hasattr(self, '_optimizer') and \ - self._optimizer is not None and \ - key in self._optimizer.state_dict()['param_groups'][0]: + elif ( + hasattr(self, "_optimizer") + and self._optimizer is not None + and key in self._optimizer.state_dict()["param_groups"][0] + ): self._optimizer.param_groups[0][key] = value - elif hasattr(self, '_optimizer_scheduler') and \ - self._optimizer_scheduler is not None and \ - key in self._optimizer_scheduler.state_dict(): + elif ( + hasattr(self, "_optimizer_scheduler") + and self._optimizer_scheduler is not None + and key in self._optimizer_scheduler.state_dict() + ): self._optimizer_scheduler.state_dict[key] = value else: # Otherwise, normal python set behavior super(CClassifierPyTorch, self).__setattr__(key, value) @@ -442,25 +473,28 @@ def _init_model(self): def _to_tensor(x): """Convert input CArray to tensor.""" if not isinstance(x, CArray): - raise ValueError("A `CArray` is required as " - "input to the `_to_tensor` method.") + raise ValueError( + "A `CArray` is required as " "input to the `_to_tensor` method." + ) x = x.tondarray() x = torch.from_numpy(x) x = x.type(torch.FloatTensor) if use_cuda is True: - x = x.cuda(device=torch.device('cuda')) + x = x.cuda(device=torch.device("cuda")) return x @staticmethod def _from_tensor(x): """Convert input tensor to CArray""" if not isinstance(x, torch.Tensor): - raise ValueError("A `torch.Tensor` is required as " - "input to the `_from_tensor` method.") + raise ValueError( + "A `torch.Tensor` is required as " "input to the `_from_tensor` method." + ) return CArray(x.cpu().numpy()).astype(float) - def _data_loader(self, data, labels=None, batch_size=10, - shuffle=False, num_workers=0): + def _data_loader( + self, data, labels=None, batch_size=10, shuffle=False, num_workers=0 + ): """Returns `torch.DataLoader` generated from the input CDataset. Parameters @@ -487,10 +521,14 @@ def _data_loader(self, data, labels=None, batch_size=10, """ transform = transforms.Lambda(lambda x: x.reshape(self._input_shape)) - return CDataLoaderPyTorch(data, labels, - batch_size, shuffle=shuffle, - transform=transform, - num_workers=num_workers, ).get_loader() + return CDataLoaderPyTorch( + data, + labels, + batch_size, + shuffle=shuffle, + transform=transform, + num_workers=num_workers, + ).get_loader() def _fit(self, x, y): """Fit PyTorch model. @@ -503,13 +541,15 @@ def _fit(self, x, y): Array of shape (n_samples,) containing the class labels. """ - if any([self._optimizer is None, - self._loss is None]): - raise ValueError("Optimizer and loss should both be defined " - "in order to fit the model.") + if any([self._optimizer is None, self._loss is None]): + raise ValueError( + "Optimizer and loss should both be defined " + "in order to fit the model." + ) train_loader = self._data_loader( - x, y, batch_size=self._batch_size, num_workers=self.n_jobs - 1) + x, y, batch_size=self._batch_size, num_workers=self.n_jobs - 1 + ) for epoch in range(self._epochs): running_loss = 0.0 @@ -526,8 +566,9 @@ def _fit(self, x, y): # print statistics running_loss += loss.item() if i % 2000 == 1999: # print every 2000 mini-batches - self.logger.info('[%d, %5d] loss: %.3f' % - (epoch + 1, i + 1, running_loss / 2000)) + self.logger.info( + "[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / 2000) + ) running_loss = 0.0 if self._optimizer_scheduler is not None: @@ -553,21 +594,27 @@ def _forward(self, x): Transformed input data. """ - data_loader = self._data_loader(x, num_workers=self.n_jobs - 1, - batch_size=self._batch_size) + data_loader = self._data_loader( + x, num_workers=self.n_jobs - 1, batch_size=self._batch_size + ) # Switch to evaluation mode self._model.eval() - out_shape = self.n_classes if self._out_layer is None else \ - reduce((lambda z, v: z * v), self.layer_shapes[self._out_layer]) + out_shape = ( + self.n_classes + if self._out_layer is None + else reduce((lambda z, v: z * v), self.layer_shapes[self._out_layer]) + ) output = torch.empty((len(data_loader.dataset), out_shape)) for batch_idx, (s, _) in enumerate(data_loader): # Log progress self.logger.info( - 'Classification: {batch}/{size}'.format(batch=batch_idx, - size=len(data_loader))) + "Classification: {batch}/{size}".format( + batch=batch_idx, size=len(data_loader) + ) + ) s = s.to(self._device) @@ -584,9 +631,9 @@ def _forward(self, x): self._cached_s = s self._cached_layer_output = ps - output[batch_idx * self.batch_size: - batch_idx * self.batch_size + len(s)] = \ - ps.view(ps.size(0), -1).detach() + output[ + batch_idx * self.batch_size : batch_idx * self.batch_size + len(s) + ] = ps.view(ps.size(0), -1).detach() # Apply softmax-scaling if needed if self._softmax_outputs is True and self._out_layer is None: @@ -627,8 +674,9 @@ def _get_layer_output(self, s, layer_name=None): return list(self._intermediate_outputs.values())[0] else: - raise ValueError("Pass layer names as a list or just None " - "for last layer output.") + raise ValueError( + "Pass layer names as a list or just None " "for last layer output." + ) def _backward(self, w): """Returns the gradient of the DNN - considering the output layer set @@ -646,21 +694,19 @@ def _backward(self, w): Accumulated gradient of the module wrt input data. """ if w is None: - raise ValueError("Function `_backward` needs the `w` array " - "to run backward with.") + raise ValueError( + "Function `_backward` needs the `w` array " "to run backward with." + ) # Apply softmax-scaling if needed (only if last layer is required) if self.softmax_outputs is True and self._out_layer is None: - out_carray = self._from_tensor( - self._cached_layer_output.squeeze(0).data) + out_carray = self._from_tensor(self._cached_layer_output.squeeze(0).data) softmax_grad = CArray.zeros(shape=out_carray.shape[0]) for y in w.nnz_indices[1]: - softmax_grad += w[y] * CSoftmax().gradient( - out_carray, y=y) + softmax_grad += w[y] * CSoftmax().gradient(out_carray, y=y) w = softmax_grad - w = self._to_tensor(w.atleast_2d()).reshape( - self._cached_layer_output.shape) + w = self._to_tensor(w.atleast_2d()).reshape(self._cached_layer_output.shape) w = w.to(self._device) if self._cached_s.grad is not None: @@ -668,8 +714,11 @@ def _backward(self, w): self._cached_layer_output.backward(w) - return self._from_tensor(self._cached_s.grad.data.view( - -1, reduce(lambda a, b: a * b, self.input_shape))) + return self._from_tensor( + self._cached_s.grad.data.view( + -1, reduce(lambda a, b: a * b, self.input_shape) + ) + ) def save_model(self, filename): """ @@ -682,17 +731,16 @@ def save_model(self, filename): """ state = { - 'model_state': self._model.state_dict(), - 'n_features': self.n_features, - 'classes': self.classes, + "model_state": self._model.state_dict(), + "n_features": self.n_features, + "classes": self.classes, } if self.optimizer is not None: - state['optimizer_state'] = self._optimizer.state_dict() + state["optimizer_state"] = self._optimizer.state_dict() if self._optimizer_scheduler is not None: - state['optimizer_scheduler_state'] = \ - self._optimizer_scheduler.state_dict() + state["optimizer_scheduler_state"] = self._optimizer_scheduler.state_dict() torch.save(state, filename) @@ -715,37 +763,41 @@ def load_model(self, filename, classes=None): """ state = torch.load(filename, map_location=self._device) - keys = ['model_state', 'n_features', 'classes'] + keys = ["model_state", "n_features", "classes"] if all(key in state for key in keys): if classes is not None: self.logger.warning( "Model was saved within `secml` framework. " - "The parameter `classes` will be ignored.") + "The parameter `classes` will be ignored." + ) # model was stored with save_model method - self._model.load_state_dict(state['model_state']) + self._model.load_state_dict(state["model_state"]) - if 'optimizer_state' in state \ - and self._optimizer is not None: - self._optimizer.load_state_dict(state['optimizer_state']) + if "optimizer_state" in state and self._optimizer is not None: + self._optimizer.load_state_dict(state["optimizer_state"]) else: self._optimizer = None - if 'optimizer_scheduler_state' in state \ - and self._optimizer_scheduler is not None: + if ( + "optimizer_scheduler_state" in state + and self._optimizer_scheduler is not None + ): self._optimizer_scheduler.load_state_dict( - state['optimizer_scheduler_state']) + state["optimizer_scheduler_state"] + ) else: self._optimizer_scheduler = None - self._n_features = state['n_features'] - self._classes = state['classes'] + self._n_features = state["n_features"] + self._classes = state["classes"] else: # model was stored outside secml framework try: self._model.load_state_dict(state) # This part is important to prevent not fitted if classes is None: self._classes = CArray.arange( - self.layer_shapes[self.layer_names[-1]][1]) + self.layer_shapes[self.layer_names[-1]][1] + ) else: self._classes = CArray(classes) self._n_features = reduce(lambda x, y: x * y, self.input_shape) @@ -753,4 +805,5 @@ def load_model(self, filename, classes=None): except Exception: self.logger.error( "Model's state dict should be stored according to " - "PyTorch docs. Use `torch.save(model.state_dict())`.") + "PyTorch docs. Use `torch.save(model.state_dict())`." + ) diff --git a/src/secml/ml/classifiers/pytorch/tests/c_classifier_pytorch_testcases.py b/src/secml/ml/classifiers/pytorch/tests/c_classifier_pytorch_testcases.py index a1d7b232..919f2e2b 100644 --- a/src/secml/ml/classifiers/pytorch/tests/c_classifier_pytorch_testcases.py +++ b/src/secml/ml/classifiers/pytorch/tests/c_classifier_pytorch_testcases.py @@ -33,8 +33,7 @@ def _test_predict(self, clf, ts): self.assertTrue(clf.is_fitted()) pred = clf.decision_function(ts.X) - label_torch, y_torch = \ - clf.predict(ts.X, return_decision_function=True) + label_torch, y_torch = clf.predict(ts.X, return_decision_function=True) self.logger.info("Decision Function:\n{}".format(pred)) self.logger.info("Classify:\n{}".format(y_torch)) @@ -52,8 +51,7 @@ def _test_accuracy(self, clf, ts): """ self.assertTrue(clf.is_fitted()) - label_torch, y_torch = \ - clf.predict(ts.X, return_decision_function=True) + label_torch, y_torch = clf.predict(ts.X, return_decision_function=True) acc_torch = CMetricAccuracy().performance_score(ts.Y, label_torch) @@ -104,8 +102,7 @@ def _test_out_at_layer(self, clf, x, layer_name): softmax_output = clf.softmax_outputs if softmax_output is True: - self.logger.info( - "Deactivate softmax-scaling to easily compare outputs") + self.logger.info("Deactivate softmax-scaling to easily compare outputs") clf.softmax_outputs = False layer = layer_name @@ -116,12 +113,17 @@ def _test_out_at_layer(self, clf, x, layer_name): if layer is None: self.assertTrue( - (clf.get_layer_output(x, layer=layer) - - clf.decision_function(x)).sum() == 0) + (clf.get_layer_output(x, layer=layer) - clf.decision_function(x)).sum() + == 0 + ) last_layer_name = clf.layer_names[-1] self.assertTrue( - (clf.get_layer_output(x, layer=last_layer_name) - - clf.decision_function(x)).sum() == 0) + ( + clf.get_layer_output(x, layer=last_layer_name) + - clf.decision_function(x) + ).sum() + == 0 + ) # Restore original value of softmax_outputs parameter clf.softmax_outputs = softmax_output @@ -168,16 +170,17 @@ def _test_set_params(self, clf, tr): self.logger.info("Testing assignment on optimizer") clf_copy.lr = 10 self.logger.debug("params: {}".format(clf_copy.get_params())) - self.assertTrue(clf_copy.get_params()['optimizer']['lr'] == 10) + self.assertTrue(clf_copy.get_params()["optimizer"]["lr"] == 10) self.assertTrue(clf_copy.lr == 10) self.logger.info("Testing assignment on model layer") clf_copy.fc2 = torch.nn.Linear( - clf_copy._model.fc2.in_features, - clf_copy._model.fc2.out_features) + clf_copy._model.fc2.in_features, clf_copy._model.fc2.out_features + ) - self.assertEqual(clf_copy.predict(tr[0, :].X).size, - clf.predict(tr[0, :].X).size) + self.assertEqual( + clf_copy.predict(tr[0, :].X).size, clf.predict(tr[0, :].X).size + ) clf_copy.fit(tr.X, tr.Y) self.assertNotEqual(id(clf._optimizer), id(clf_copy._optimizer)) @@ -186,9 +189,11 @@ def _test_set_params(self, clf, tr): self.assertTrue(clf_copy._model.fc2.in_features == 20) self.assertTrue(clf_copy._model.fc2.out_features == 20) self.logger.debug( - "Copy of the model modified. Last layer should have dims 20x20") - self.logger.debug("Last layer of copied model: {}".format( - clf_copy._model._modules['fc2'])) + "Copy of the model modified. Last layer should have dims 20x20" + ) + self.logger.debug( + "Last layer of copied model: {}".format(clf_copy._model._modules["fc2"]) + ) def _test_softmax_outputs(self, clf, x): """Check behavior of `softmax_outputs` parameter. @@ -213,7 +218,7 @@ def _test_softmax_outputs(self, clf, x): self.assert_approx_equal(preds.sum(), 1.0) # test gradient - w_in = CArray.zeros(shape=(clf.n_classes, )) + w_in = CArray.zeros(shape=(clf.n_classes,)) w_in[1] = 1 grad = clf.gradient(x, w=w_in) @@ -238,8 +243,7 @@ def _test_save_load_model(self, clf, clf_new, ts): self.assertTrue(clf.is_fitted()) pred_y = clf.predict(ts.X) - self.logger.info( - "Predictions of the original clf:\n{:}".format(pred_y)) + self.logger.info("Predictions of the original clf:\n{:}".format(pred_y)) state_path = fm.join(tempfile.gettempdir(), "state.tar") @@ -255,8 +259,7 @@ def _test_save_load_model(self, clf, clf_new, ts): del clf_new._optimizer_scheduler pred_y_post = clf_new.predict(ts.X) - self.logger.info( - "Predictions of the restored model:\n{:}".format(pred_y_post)) + self.logger.info("Predictions of the restored model:\n{:}".format(pred_y_post)) self.assert_array_equal(pred_y, pred_y_post) @@ -276,15 +279,13 @@ def _test_get_set_state(self, clf, clf_new, ts): self.assertTrue(clf.is_fitted()) pred_y = clf.predict(ts.X) - self.logger.info( - "Predictions before restoring state:\n{:}".format(pred_y)) + self.logger.info("Predictions before restoring state:\n{:}".format(pred_y)) state = clf.get_state(return_optimizer=False) # Restore state clf_new.set_state(state) pred_y_post = clf_new.predict(ts.X) - self.logger.info( - "Predictions after restoring state:\n{:}".format(pred_y_post)) + self.logger.info("Predictions after restoring state:\n{:}".format(pred_y_post)) self.assert_array_equal(pred_y, pred_y_post) diff --git a/src/secml/ml/classifiers/pytorch/tests/test_c_classifier_pytorch_blobs.py b/src/secml/ml/classifiers/pytorch/tests/test_c_classifier_pytorch_blobs.py index e1404e14..2827778e 100644 --- a/src/secml/ml/classifiers/pytorch/tests/test_c_classifier_pytorch_blobs.py +++ b/src/secml/ml/classifiers/pytorch/tests/test_c_classifier_pytorch_blobs.py @@ -46,14 +46,14 @@ def setUpClass(cls): # Load dataset and split tr/ts cls.tr, cls.ts = cls._create_tr_ts( - cls.n_tr, cls.n_ts, cls.n_classes, cls.n_features) + cls.n_tr, cls.n_ts, cls.n_classes, cls.n_features + ) # Model and classifier parameters cls.batch_size = 20 # Create the PyTorch model and our classifier - cls.clf = cls._create_clf( - cls.n_features, cls.n_classes, cls.batch_size) + cls.clf = cls._create_clf(cls.n_features, cls.n_classes, cls.batch_size) # Train the classifier cls.clf.fit(cls.tr.X, cls.tr.Y) @@ -62,16 +62,18 @@ def setUpClass(cls): def _create_tr_ts(n_tr, n_ts, n_classes, n_features): """Create BLOBS training and test sets.""" # generate synthetic data - ds = CDLRandom(n_samples=n_tr + n_ts, - n_classes=n_classes, - n_features=n_features, - n_redundant=0, n_clusters_per_class=1, - class_sep=1, random_state=0).load() + ds = CDLRandom( + n_samples=n_tr + n_ts, + n_classes=n_classes, + n_features=n_features, + n_redundant=0, + n_clusters_per_class=1, + class_sep=1, + random_state=0, + ).load() # Split in training and test - splitter = CTrainTestSplit(train_size=n_tr, - test_size=n_ts, - random_state=0) + splitter = CTrainTestSplit(train_size=n_tr, test_size=n_ts, random_state=0) tr, ts = splitter.split(ds) nmz = CNormalizerMinMax() @@ -95,16 +97,19 @@ def _create_clf(n_features, n_classes, batch_size): net = Net(n_features=n_features, n_classes=n_classes) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9) - optimizer_scheduler = \ - torch.optim.lr_scheduler.MultiStepLR(optimizer, [5, 8], gamma=0.1) - - return CClassifierPyTorch(model=net, - loss=criterion, - optimizer=optimizer, - optimizer_scheduler=optimizer_scheduler, - epochs=10, - batch_size=batch_size, - random_state=0) + optimizer_scheduler = torch.optim.lr_scheduler.MultiStepLR( + optimizer, [5, 8], gamma=0.1 + ) + + return CClassifierPyTorch( + model=net, + loss=criterion, + optimizer=optimizer, + optimizer_scheduler=optimizer_scheduler, + epochs=10, + batch_size=batch_size, + random_state=0, + ) def test_classification(self): """Test for `.decision_function` and `.predict` methods.""" @@ -133,30 +138,26 @@ def test_out_at_layer(self): def test_grad(self): """Test for `.gradient` method.""" - self._test_gradient_numerical( - self.clf, self.ts.X[0, :], th=1e-2, epsilon=1e-3) + self._test_gradient_numerical(self.clf, self.ts.X[0, :], th=1e-2, epsilon=1e-3) self._test_grad_atlayer( - self.clf, self.ts.X[0, :], layer_names=["fc1", 'fc2', None]) + self.clf, self.ts.X[0, :], layer_names=["fc1", "fc2", None] + ) def test_softmax_outputs(self): """Check behavior of `softmax_outputs` parameter.""" - self._test_softmax_outputs( - self.clf, self.ts.X[0, :]) - self._test_gradient_numerical( - self.clf, self.ts.X[0, :], th=1e-2, epsilon=1e-3) + self._test_softmax_outputs(self.clf, self.ts.X[0, :]) + self._test_gradient_numerical(self.clf, self.ts.X[0, :], th=1e-2, epsilon=1e-3) def test_save_load_model(self): """Test for `.save_model` and `.load_model` methods.""" # Create a second target classifier - clf_new = self._create_clf( - self.n_features, self.n_classes, self.batch_size) + clf_new = self._create_clf(self.n_features, self.n_classes, self.batch_size) self._test_save_load_model(self.clf, clf_new, self.ts) def test_get_set_state(self): """Test for `.get_state` and `.set_state` methods.""" # Create a second target classifier - clf_new = self._create_clf( - self.n_features, self.n_classes, self.batch_size) + clf_new = self._create_clf(self.n_features, self.n_classes, self.batch_size) self._test_get_set_state(self.clf, clf_new, self.ts) def test_preprocess_dnn(self): @@ -174,9 +175,8 @@ def test_preprocess_dnn(self): self._test_predict(new_clf, self.ts) self._test_accuracy(new_clf, self.ts) - self._test_gradient_numerical( - new_clf, self.ts.X[0, :], th=1e-2, epsilon=1e-3) + self._test_gradient_numerical(new_clf, self.ts.X[0, :], th=1e-2, epsilon=1e-3) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierPyTorchTestCases.main() diff --git a/src/secml/ml/classifiers/pytorch/tests/test_c_classifier_pytorch_conv.py b/src/secml/ml/classifiers/pytorch/tests/test_c_classifier_pytorch_conv.py index a5195ea0..3404ce25 100644 --- a/src/secml/ml/classifiers/pytorch/tests/test_c_classifier_pytorch_conv.py +++ b/src/secml/ml/classifiers/pytorch/tests/test_c_classifier_pytorch_conv.py @@ -44,12 +44,10 @@ def setUpClass(cls): def _create_tr_ts(n_tr, n_ts): """Create MNIST 3C training and test sets.""" digits = (1, 5, 9) - ds = CDataLoaderMNIST().load('training', digits=digits) + ds = CDataLoaderMNIST().load("training", digits=digits) # Split in training and test - splitter = CTrainTestSplit(train_size=n_tr, - test_size=n_ts, - random_state=0) + splitter = CTrainTestSplit(train_size=n_tr, test_size=n_ts, random_state=0) tr, ts = splitter.split(ds) tr.X /= 255 @@ -72,33 +70,37 @@ class Flatten(nn.Module): def forward(self, input): return input.view(input.size(0), -1) - od = OrderedDict([ - ('conv1', nn.Conv2d(1, 10, kernel_size=5)), - ('pool1', nn.MaxPool2d(2)), - ('conv2', nn.Conv2d(10, 20, kernel_size=5)), - ('drop', nn.Dropout2d()), - ('pool2', nn.MaxPool2d(2)), - ('flatten', Flatten()), - ('fc1', nn.Linear(320, 50)), - ('relu', nn.ReLU()), - ('fc2', nn.Linear(50, 3)), - ]) + od = OrderedDict( + [ + ("conv1", nn.Conv2d(1, 10, kernel_size=5)), + ("pool1", nn.MaxPool2d(2)), + ("conv2", nn.Conv2d(10, 20, kernel_size=5)), + ("drop", nn.Dropout2d()), + ("pool2", nn.MaxPool2d(2)), + ("flatten", Flatten()), + ("fc1", nn.Linear(320, 50)), + ("relu", nn.ReLU()), + ("fc2", nn.Linear(50, 3)), + ] + ) net = nn.Sequential(OrderedDict(od)) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9) - scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, - milestones=[1, 5, 8], - gamma=0.1) - - return CClassifierPyTorch(model=net, - loss=criterion, - optimizer=optimizer, - epochs=10, - batch_size=batch_size, - input_shape=(1, 28, 28), - optimizer_scheduler=scheduler, - random_state=0) + scheduler = optim.lr_scheduler.MultiStepLR( + optimizer=optimizer, milestones=[1, 5, 8], gamma=0.1 + ) + + return CClassifierPyTorch( + model=net, + loss=criterion, + optimizer=optimizer, + epochs=10, + batch_size=batch_size, + input_shape=(1, 28, 28), + optimizer_scheduler=scheduler, + random_state=0, + ) def test_classification(self): """Test for `.decision_function` and `.predict` methods.""" @@ -124,8 +126,9 @@ def test_out_at_layer(self): def test_grad(self): """Test for `.gradient` method.""" # TODO: ADD TEST OF GRADIENT METHOD - self._test_grad_atlayer(self.clf, self.ts.X[0, :], - layer_names=['conv1', 'fc1', 'fc2', None]) + self._test_grad_atlayer( + self.clf, self.ts.X[0, :], layer_names=["conv1", "fc1", "fc2", None] + ) def test_softmax_outputs(self): """Check behavior of `softmax_outputs` parameter.""" @@ -144,5 +147,5 @@ def test_get_set_state(self): self._test_get_set_state(self.clf, clf_new, self.ts) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierPyTorchTestCases.main() diff --git a/src/secml/ml/classifiers/pytorch/tests/test_c_classifier_pytorch_dnn.py b/src/secml/ml/classifiers/pytorch/tests/test_c_classifier_pytorch_dnn.py index 88aca22d..10bb0f9e 100644 --- a/src/secml/ml/classifiers/pytorch/tests/test_c_classifier_pytorch_dnn.py +++ b/src/secml/ml/classifiers/pytorch/tests/test_c_classifier_pytorch_dnn.py @@ -39,9 +39,7 @@ def _create_tr_ts(): ds = CDLRandom(n_samples=30, n_features=3 * 224 * 224).load() # Split in training and test - splitter = CTrainTestSplit(train_size=10, - test_size=20, - random_state=0) + splitter = CTrainTestSplit(train_size=10, test_size=20, random_state=0) tr, ts = splitter.split(ds) nmz = CNormalizerMinMax() @@ -56,17 +54,18 @@ def _create_clf(): torch.manual_seed(0) net = torchvision.models.resnet18(pretrained=True) criterion = nn.CrossEntropyLoss() - optimizer = optim.SGD(net.parameters(), - lr=0.001, momentum=0.9) - - return CClassifierPyTorch(model=net, - loss=criterion, - optimizer=optimizer, - epochs=10, - batch_size=20, - input_shape=(3, 224, 224), - pretrained=True, - random_state=0) + optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) + + return CClassifierPyTorch( + model=net, + loss=criterion, + optimizer=optimizer, + epochs=10, + batch_size=20, + input_shape=(3, 224, 224), + pretrained=True, + random_state=0, + ) def test_accuracy(self): """Compare classification accuracy of original and wrapped models.""" @@ -86,9 +85,11 @@ def test_accuracy(self): # check if the scores are equal self.assert_array_almost_equal( - wrapper_model_scores, pytorch_net_scores, + wrapper_model_scores, + pytorch_net_scores, err_msg="The scores of the pytorch network " - "and the wrapped one not equal") + "and the wrapped one not equal", + ) def test_layer_names(self): """Check behavior of `.layer_names` property.""" @@ -106,14 +107,14 @@ def test_out_at_layer(self): """Test for extracting output at specific layer.""" x = self.ts.X[0, :] self._test_out_at_layer(self.clf, x, "layer4:1:relu") - self._test_out_at_layer(self.clf, x, 'bn1') - self._test_out_at_layer(self.clf, x, 'fc') + self._test_out_at_layer(self.clf, x, "bn1") + self._test_out_at_layer(self.clf, x, "fc") self._test_out_at_layer(self.clf, x, None) def test_grad(self): """Test for `.gradient` method.""" # TODO: ADD TEST OF GRADIENT METHOD - self._test_grad_atlayer(self.clf, self.ts.X[0, :], ['fc', None]) + self._test_grad_atlayer(self.clf, self.ts.X[0, :], ["fc", None]) def test_softmax_outputs(self): """Check behavior of `softmax_outputs` parameter.""" @@ -132,5 +133,5 @@ def test_get_set_state(self): self._test_get_set_state(self.clf, clf_new, self.ts) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierPyTorchTestCases.main() diff --git a/src/secml/ml/classifiers/regularizer/c_regularizer.py b/src/secml/ml/classifiers/regularizer/c_regularizer.py index e54e6278..e8b93af6 100644 --- a/src/secml/ml/classifiers/regularizer/c_regularizer.py +++ b/src/secml/ml/classifiers/regularizer/c_regularizer.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from abc import ABCMeta, abstractmethod from secml.core import CCreator @@ -13,7 +14,8 @@ class CRegularizer(CCreator, metaclass=ABCMeta): """Abstract class that defines basic methods for regularizer functions.""" - __super__ = 'CRegularizer' + + __super__ = "CRegularizer" @abstractmethod def regularizer(self, *args, **kwargs): diff --git a/src/secml/ml/classifiers/regularizer/c_regularizer_elastic_net.py b/src/secml/ml/classifiers/regularizer/c_regularizer_elastic_net.py index d3e647c1..5757f285 100755 --- a/src/secml/ml/classifiers/regularizer/c_regularizer_elastic_net.py +++ b/src/secml/ml/classifiers/regularizer/c_regularizer_elastic_net.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.ml.classifiers.regularizer import CRegularizer @@ -27,7 +28,8 @@ class CRegularizerElasticNet(CRegularizer): class_type : 'elastic-net' """ - __class_type = 'elastic-net' + + __class_type = "elastic-net" def __init__(self, l1_ratio=0.15): self._l1_ratio = float(l1_ratio) @@ -51,8 +53,9 @@ def regularizer(self, w): Vector-like array. """ - return self.l1_ratio * w.norm(order=1) \ - + (1 - self.l1_ratio) * 0.5 * (w ** 2).sum() + return ( + self.l1_ratio * w.norm(order=1) + (1 - self.l1_ratio) * 0.5 * (w**2).sum() + ) def dregularizer(self, w): """Returns the derivative of the elastic-net regularizer diff --git a/src/secml/ml/classifiers/regularizer/c_regularizer_l1.py b/src/secml/ml/classifiers/regularizer/c_regularizer_l1.py index d9ab6815..160ce12f 100755 --- a/src/secml/ml/classifiers/regularizer/c_regularizer_l1.py +++ b/src/secml/ml/classifiers/regularizer/c_regularizer_l1.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.ml.classifiers.regularizer import CRegularizer @@ -25,7 +26,8 @@ class CRegularizerL1(CRegularizer): class_type : 'l1' """ - __class_type = 'l1' + + __class_type = "l1" def regularizer(self, w): """Returns Norm-L1. diff --git a/src/secml/ml/classifiers/regularizer/c_regularizer_l2.py b/src/secml/ml/classifiers/regularizer/c_regularizer_l2.py index ddfa7e61..b678c771 100755 --- a/src/secml/ml/classifiers/regularizer/c_regularizer_l2.py +++ b/src/secml/ml/classifiers/regularizer/c_regularizer_l2.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.ml.classifiers.regularizer import CRegularizer from secml.array import CArray @@ -24,7 +25,8 @@ class CRegularizerL2(CRegularizer): class_type : 'l2' """ - __class_type = 'l2' + + __class_type = "l2" def regularizer(self, w): """Returns Norm-L2. @@ -35,7 +37,7 @@ def regularizer(self, w): Vector-like array. """ - return 0.5 * (w ** 2).sum() + return 0.5 * (w**2).sum() def dregularizer(self, w): """Return Norm-L2 derivative. diff --git a/src/secml/ml/classifiers/reject/c_classifier_dnr.py b/src/secml/ml/classifiers/reject/c_classifier_dnr.py index 88298b2b..94ef1174 100644 --- a/src/secml/ml/classifiers/reject/c_classifier_dnr.py +++ b/src/secml/ml/classifiers/reject/c_classifier_dnr.py @@ -5,6 +5,7 @@ .. moduleauthor:: Angelo Sotgiu """ + from secml.array import CArray from secml.ml import CClassifier from secml.ml.classifiers.reject import CClassifierRejectThreshold @@ -49,7 +50,8 @@ class score of the combiner is lower than the threshold, the sample is Number of parallel workers to use for training the classifier. Cannot be higher than processor's number of cores. Default is 1. """ - __class_type = 'dnr' + + __class_type = "dnr" def __init__(self, combiner, layer_clf, dnn, layers, threshold, n_jobs=1): @@ -62,14 +64,17 @@ def __init__(self, combiner, layer_clf, dnn, layers, threshold, n_jobs=1): raise TypeError("`layers` must be a list") if isinstance(layer_clf, dict): if not sorted(layers) == sorted(layer_clf.keys()): - raise ValueError("`layer_clf` dict must contain `layers` " - "values as keys") + raise ValueError( + "`layer_clf` dict must contain `layers` " "values as keys" + ) if not all(isinstance(c, CClassifier) for c in layer_clf.values()): - raise TypeError("`layer_clf` dict must contain `CClassifier` " - "instances as values") + raise TypeError( + "`layer_clf` dict must contain `CClassifier` " "instances as values" + ) elif not isinstance(layer_clf, CClassifier): - raise TypeError("`layer_clf` must be an instance of either" - "`CClassifier` or `dict`") + raise TypeError( + "`layer_clf` must be an instance of either" "`CClassifier` or `dict`" + ) self._layers = layers self._layer_clfs = {} @@ -133,12 +138,11 @@ def _create_scores_dataset(self, x, y): """ n_classes = y.unique().size # array that contains concatenate scores of layer classifiers - concat_scores = CArray.zeros( - shape=(x.shape[0], n_classes * len(self._layers))) + concat_scores = CArray.zeros(shape=(x.shape[0], n_classes * len(self._layers))) for i, layer in enumerate(self._layers): scores = self._layer_clfs[layer].fit_forward(x, y) - concat_scores[:, i * n_classes: n_classes + i * n_classes] = scores + concat_scores[:, i * n_classes : n_classes + i * n_classes] = scores return concat_scores def _get_layer_clfs_scores(self, x): @@ -161,16 +165,15 @@ def _get_layer_clfs_scores(self, x): caching = self._cached_x is not None n_classes = self.n_classes - 1 # array that contains concatenate scores of layer classifiers - concat_scores = CArray.zeros( - shape=(x.shape[0], n_classes * len(self._layers))) + concat_scores = CArray.zeros(shape=(x.shape[0], n_classes * len(self._layers))) for i, l in enumerate(self._layers): scores = self._layer_clfs[l].forward(x, caching=caching) - concat_scores[:, i * n_classes: n_classes + i * n_classes] = scores + concat_scores[:, i * n_classes : n_classes + i * n_classes] = scores return concat_scores def _forward(self, x): - """"Private method that computes the decision function. + """ "Private method that computes the decision function. Parameters ---------- @@ -220,7 +223,8 @@ def _backward(self, w): for i, l in enumerate(self._layers): # backward pass to layer clfs of their respective w grad += self._layer_clfs[l].backward( - w=grad_combiner[i * n_classes: i * n_classes + n_classes]) + w=grad_combiner[i * n_classes : i * n_classes + n_classes] + ) return grad @property diff --git a/src/secml/ml/classifiers/reject/c_classifier_reject.py b/src/secml/ml/classifiers/reject/c_classifier_reject.py index 0af1f72c..28c20a9f 100644 --- a/src/secml/ml/classifiers/reject/c_classifier_reject.py +++ b/src/secml/ml/classifiers/reject/c_classifier_reject.py @@ -5,6 +5,7 @@ .. moduleauthor:: Ambra Demontis """ + from abc import abstractmethod, ABCMeta from secml.ml.classifiers import CClassifier @@ -28,7 +29,8 @@ class CClassifierReject(CClassifier, metaclass=ABCMeta): preprocess type. If None, input data is used as is. """ - __super__ = 'CClassifierReject' + + __super__ = "CClassifierReject" @abstractmethod def predict(self, x, return_decision_function=False, n_jobs=1): @@ -74,5 +76,4 @@ class label index. """ if y < -1 or y >= self.n_classes: - raise ValueError( - "class label {:} is out of range".format(y)) + raise ValueError("class label {:} is out of range".format(y)) diff --git a/src/secml/ml/classifiers/reject/c_classifier_reject_threshold.py b/src/secml/ml/classifiers/reject/c_classifier_reject_threshold.py index 88518f6e..044c901f 100644 --- a/src/secml/ml/classifiers/reject/c_classifier_reject_threshold.py +++ b/src/secml/ml/classifiers/reject/c_classifier_reject_threshold.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + import math from secml import _NoValue @@ -38,13 +39,15 @@ class CClassifierRejectThreshold(CClassifierReject): desired preprocessor. If None, input data is used as is. """ - __class_type = 'reject-threshold' + + __class_type = "reject-threshold" def __init__(self, clf, threshold, preprocess=None): if not isinstance(clf, CClassifier): raise ValueError( - "the inner classifier should be an instance of CClassifier") + "the inner classifier should be an instance of CClassifier" + ) self._clf = clf self.threshold = threshold @@ -171,8 +174,7 @@ def predict(self, x, return_decision_function=False, n_jobs=_NoValue): if n_jobs is not _NoValue: raise ValueError("`n_jobs` is not supported.") - labels, scores = CClassifier.predict( - self, x, return_decision_function=True) + labels, scores = CClassifier.predict(self, x, return_decision_function=True) # relabel rejection class labels[labels == self.n_classes - 1] = -1 return (labels, scores) if return_decision_function is True else labels @@ -218,7 +220,7 @@ def compute_threshold(self, rej_percent, ds): ------- threshold : float The estimated reject threshold - + """ if not self.is_fitted(): raise NotFittedError("The classifier must be fitted") diff --git a/src/secml/ml/classifiers/reject/tests/test_c_classifier_dnr.py b/src/secml/ml/classifiers/reject/tests/test_c_classifier_dnr.py index 0564ac6b..9d9de2c9 100644 --- a/src/secml/ml/classifiers/reject/tests/test_c_classifier_dnr.py +++ b/src/secml/ml/classifiers/reject/tests/test_c_classifier_dnr.py @@ -45,18 +45,20 @@ def _get_dataset(): # Load only 4 digits digits = (1, 4, 5, 9) - ds = CDataLoaderMNIST().load('training', digits=digits) + ds = CDataLoaderMNIST().load("training", digits=digits) # Extract training set for DNN and for DNR classifier and test set tr_dnn, ds_dnr = CTrainTestSplit( - train_size=300, test_size=350, random_state=0).split(ds) + train_size=300, test_size=350, random_state=0 + ).split(ds) tr_dnr, ts_dnr = CTrainTestSplit( - train_size=300, test_size=50, random_state=0).split(ds_dnr) + train_size=300, test_size=50, random_state=0 + ).split(ds_dnr) # Normalize data in [0, 1] - tr_dnn.X /= 255. - tr_dnr.X /= 255. - ts_dnr.X /= 255. + tr_dnn.X /= 255.0 + tr_dnr.X /= 255.0 + ts_dnr.X /= 255.0 return tr_dnn, tr_dnr, ts_dnr @@ -70,28 +72,37 @@ class Flatten(nn.Module): def forward(self, input): return input.view(input.size(0), -1) - od = OrderedDict([ - ('conv1', nn.Conv2d(1, 10, kernel_size=5)), - ('pool1', nn.MaxPool2d(2)), - ('conv2', nn.Conv2d(10, 20, kernel_size=5)), - ('drop', nn.Dropout2d()), - ('pool2', nn.MaxPool2d(2)), - ('flatten', Flatten()), - ('fc1', nn.Linear(320, 50)), - ('relu', nn.ReLU()), - ('fc2', nn.Linear(50, 4)), - ]) + od = OrderedDict( + [ + ("conv1", nn.Conv2d(1, 10, kernel_size=5)), + ("pool1", nn.MaxPool2d(2)), + ("conv2", nn.Conv2d(10, 20, kernel_size=5)), + ("drop", nn.Dropout2d()), + ("pool2", nn.MaxPool2d(2)), + ("flatten", Flatten()), + ("fc1", nn.Linear(320, 50)), + ("relu", nn.ReLU()), + ("fc2", nn.Linear(50, 4)), + ] + ) net = nn.Sequential(OrderedDict(od)) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9) scheduler = optim.lr_scheduler.MultiStepLR( - optimizer=optimizer, milestones=[1, 5, 8], gamma=0.1) + optimizer=optimizer, milestones=[1, 5, 8], gamma=0.1 + ) dnn = CClassifierPyTorch( - model=net, loss=criterion, optimizer=optimizer, epochs=10, - batch_size=20, input_shape=(1, 28, 28), - optimizer_scheduler=scheduler, random_state=0) + model=net, + loss=criterion, + optimizer=optimizer, + epochs=10, + batch_size=20, + input_shape=(1, 28, 28), + optimizer_scheduler=scheduler, + random_state=0, + ) dnn.fit(tr_dnn.X, tr_dnn.Y) return dnn @@ -99,7 +110,7 @@ def forward(self, input): @staticmethod def _create_clf(dnn): """Initialize the DNR classifier passing a single `layer_clf`""" - layers = ['conv2', 'relu'] + layers = ["conv2", "relu"] combiner = CClassifierSVM(kernel=CKernelRBF(gamma=1), C=1) layer_clf = CClassifierSVM(kernel=CKernelRBF(gamma=1), C=1) @@ -108,17 +119,18 @@ def _create_clf(dnn): @staticmethod def _create_clf_dict(dnn): """Initialize the DNR classifier passing a `layer_clf` dict""" - layers = ['conv2', 'relu'] + layers = ["conv2", "relu"] combiner = CClassifierSVM(kernel=CKernelRBF(gamma=1), C=1) - layer_clf = {'conv2': CClassifierSVM(kernel=CKernelRBF(gamma=1), C=1), - 'relu': CClassifierSVM(kernel=CKernelRBF(gamma=1), C=1)} + layer_clf = { + "conv2": CClassifierSVM(kernel=CKernelRBF(gamma=1), C=1), + "relu": CClassifierSVM(kernel=CKernelRBF(gamma=1), C=1), + } return CClassifierDNR(combiner, layer_clf, dnn, layers, -inf) def test_fun(self): """Test for decision_function() and predict() methods.""" - self.logger.info( - "Test for decision_function() and predict() methods.") + self.logger.info("Test for decision_function() and predict() methods.") scores_d = self._test_fun(self.clf, self.ts.todense()) scores_s = self._test_fun(self.clf, self.ts.tosparse()) @@ -130,13 +142,13 @@ def test_fun(self): self.logger.info("Accuracy: {:}".format(accuracy)) def test_reject(self): - y_pred, score_pred = self.clf.predict( - self.ts.X, return_decision_function=True) + y_pred, score_pred = self.clf.predict(self.ts.X, return_decision_function=True) # set the threshold to have 10% of rejection rate threshold = self.clf.compute_threshold(0.1, self.ts) self.clf.threshold = threshold y_pred_reject, score_pred_reject = self.clf.predict( - self.ts.X, return_decision_function=True) + self.ts.X, return_decision_function=True + ) # Compute the number of rejected samples n_rej = (y_pred_reject == -1).sum() @@ -144,33 +156,34 @@ def test_reject(self): self.logger.info("Real: \n{:}".format(self.ts.Y)) self.logger.info("Predicted: \n{:}".format(y_pred)) - self.logger.info( - "Predicted with reject: \n{:}".format(y_pred_reject)) + self.logger.info("Predicted with reject: \n{:}".format(y_pred_reject)) - acc = CMetric.create('accuracy').performance_score( - y_pred, self.ts.Y) + acc = CMetric.create("accuracy").performance_score(y_pred, self.ts.Y) self.logger.info("Accuracy no rejection: {:}".format(acc)) - rej_acc = CMetric.create('accuracy').performance_score( - y_pred_reject[y_pred_reject != -1], - self.ts.Y[y_pred_reject != -1]) + rej_acc = CMetric.create("accuracy").performance_score( + y_pred_reject[y_pred_reject != -1], self.ts.Y[y_pred_reject != -1] + ) self.logger.info("Accuracy with rejection: {:}".format(rej_acc)) # check that the accuracy using reject is higher that the one # without rejects self.assertGreaterEqual( - rej_acc, acc, "The accuracy of the classifier that is allowed " - "to reject is lower than the one of the " - "classifier that is not allowed to reject") + rej_acc, + acc, + "The accuracy of the classifier that is allowed " + "to reject is lower than the one of the " + "classifier that is not allowed to reject", + ) def test_set_params(self): """Test layer classifiers parameters setting""" - self.clf.set_params({'conv2.C': 10, 'conv2.kernel.gamma': 20}) - self.clf.set('relu.C', 20) + self.clf.set_params({"conv2.C": 10, "conv2.kernel.gamma": 20}) + self.clf.set("relu.C", 20) - self.assertEqual(self.clf._layer_clfs['conv2'].C, 10.0) - self.assertEqual(self.clf._layer_clfs['conv2'].kernel.gamma, 20.0) - self.assertEqual(self.clf._layer_clfs['relu'].C, 20.0) + self.assertEqual(self.clf._layer_clfs["conv2"].C, 10.0) + self.assertEqual(self.clf._layer_clfs["conv2"].kernel.gamma, 20.0) + self.assertEqual(self.clf._layer_clfs["relu"].C, 20.0) def test_create_dict(self): self.logger.info("Testing creation with `layer_clf` dict") @@ -178,5 +191,5 @@ def test_create_dict(self): clf_dict.fit(self.tr.X, self.tr.Y) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierRejectTestCases.main() diff --git a/src/secml/ml/classifiers/reject/tests/test_c_classifier_reject.py b/src/secml/ml/classifiers/reject/tests/test_c_classifier_reject.py index 63b1e378..3b16d10f 100644 --- a/src/secml/ml/classifiers/reject/tests/test_c_classifier_reject.py +++ b/src/secml/ml/classifiers/reject/tests/test_c_classifier_reject.py @@ -3,4 +3,5 @@ class CClassifierRejectTestCases(CClassifierTestCases): """Unittests interface for CClassifierReject.""" + pass diff --git a/src/secml/ml/classifiers/reject/tests/test_c_classifier_reject_threshold.py b/src/secml/ml/classifiers/reject/tests/test_c_classifier_reject_threshold.py index 426d8f0b..557e448e 100644 --- a/src/secml/ml/classifiers/reject/tests/test_c_classifier_reject_threshold.py +++ b/src/secml/ml/classifiers/reject/tests/test_c_classifier_reject_threshold.py @@ -17,12 +17,14 @@ class TestCClassifierRejectThreshold(CClassifierRejectTestCases): def setUp(self): """Test for init and fit methods.""" # generate synthetic data - self.dataset = CDLRandomBlobs(n_features=2, n_samples=100, centers=2, - cluster_std=2.0, random_state=0).load() + self.dataset = CDLRandomBlobs( + n_features=2, n_samples=100, centers=2, cluster_std=2.0, random_state=0 + ).load() self.logger.info("Testing classifier creation ") - self.clf_norej = CClassifierSGD(regularizer=CRegularizerL2(), - loss=CLossHinge(), random_state=0) + self.clf_norej = CClassifierSGD( + regularizer=CRegularizerL2(), loss=CLossHinge(), random_state=0 + ) self.clf = CClassifierRejectThreshold(self.clf_norej, threshold=0.6) self.clf.verbose = 2 # Enabling debug output for each classifier @@ -30,8 +32,7 @@ def setUp(self): def test_fun(self): """Test for decision_function() and predict() methods.""" - self.logger.info( - "Test for decision_function() and predict() methods.") + self.logger.info("Test for decision_function() and predict() methods.") scores_d = self._test_fun(self.clf, self.dataset.todense()) scores_s = self._test_fun(self.clf, self.dataset.tosparse()) @@ -49,9 +50,9 @@ def test_reject(self): # Classification of another dataset y_pred_reject, score_pred_reject = clf_reject.predict( - self.dataset.X, n_jobs=_NoValue, return_decision_function=True) - y_pred, score_pred = clf.predict(self.dataset.X, - return_decision_function=True) + self.dataset.X, n_jobs=_NoValue, return_decision_function=True + ) + y_pred, score_pred = clf.predict(self.dataset.X, return_decision_function=True) # Compute the number of rejected samples n_rej = (y_pred_reject == -1).sum() @@ -59,24 +60,25 @@ def test_reject(self): self.logger.info("Real: \n{:}".format(self.dataset.Y)) self.logger.info("Predicted: \n{:}".format(y_pred)) - self.logger.info( - "Predicted with reject: \n{:}".format(y_pred_reject)) + self.logger.info("Predicted with reject: \n{:}".format(y_pred_reject)) - acc = CMetric.create('accuracy').performance_score( - y_pred, self.dataset.Y) + acc = CMetric.create("accuracy").performance_score(y_pred, self.dataset.Y) self.logger.info("Accuracy no rejection: {:}".format(acc)) - rej_acc = CMetric.create('accuracy').performance_score( - y_pred_reject[y_pred_reject != -1], - self.dataset.Y[y_pred_reject != -1]) + rej_acc = CMetric.create("accuracy").performance_score( + y_pred_reject[y_pred_reject != -1], self.dataset.Y[y_pred_reject != -1] + ) self.logger.info("Accuracy with rejection: {:}".format(rej_acc)) # check that the accuracy using reject is higher that the one # without rejects self.assertGreaterEqual( - rej_acc, acc, "The accuracy of the classifier that is allowed " - "to reject is lower than the one of the " - "classifier that is not allowed to reject") + rej_acc, + acc, + "The accuracy of the classifier that is allowed " + "to reject is lower than the one of the " + "classifier that is not allowed to reject", + ) def test_gradient(self): """Unittest for gradient_f_x method.""" @@ -87,15 +89,13 @@ def test_gradient(self): ds = self.dataset.todense() clf = self.clf.fit(ds.X, ds.Y) - grads_d = self._test_gradient_numerical( - clf, ds.X[i, :], extra_classes=[-1]) + grads_d = self._test_gradient_numerical(clf, ds.X[i, :], extra_classes=[-1]) self.logger.info("Testing with sparse data...") ds = self.dataset.tosparse() clf = self.clf.fit(ds.X, ds.Y) - grads_s = self._test_gradient_numerical( - clf, ds.X[i, :], extra_classes=[-1]) + grads_s = self._test_gradient_numerical(clf, ds.X[i, :], extra_classes=[-1]) # FIXME: WHY THIS TEST IS CRASHING? RANDOM_STATE MAYBE? # Compare dense gradients with sparse gradients @@ -106,20 +106,25 @@ def test_gradient(self): def test_preprocess(self): """Test classifier with preprocessors inside.""" # All linear transformations with gradient implemented - self._test_preprocess(self.dataset, self.clf, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) - self._test_preprocess_grad(self.dataset, self.clf, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}], - extra_classes=[-1]) + self._test_preprocess( + self.dataset, + self.clf, + ["min-max", "mean-std"], + [{"feature_range": (-1, 1)}, {}], + ) + self._test_preprocess_grad( + self.dataset, + self.clf, + ["min-max", "mean-std"], + [{"feature_range": (-1, 1)}, {}], + extra_classes=[-1], + ) # Mixed linear/nonlinear transformations without gradient - self._test_preprocess( - self.dataset, self.clf, ['pca', 'unit-norm'], [{}, {}]) + self._test_preprocess(self.dataset, self.clf, ["pca", "unit-norm"], [{}, {}]) def test_draw(self): - """ Compare the classifiers graphically""" + """Compare the classifiers graphically""" self.logger.info("Testing classifiers graphically") fig = CFigure(width=10, markersize=8) @@ -127,20 +132,22 @@ def test_draw(self): # mark the rejected samples y = self.clf.predict(self.dataset.X) - fig.sp.plot_ds( - self.dataset[y == -1, :], colors=['k', 'k'], markersize=12) + fig.sp.plot_ds(self.dataset[y == -1, :], colors=["k", "k"], markersize=12) # plot the dataset fig.sp.plot_ds(self.dataset) # Plot objective function - fig.sp.plot_fun(self.clf.decision_function, - grid_limits=self.dataset.get_bounds(), - levels=[0], y=1) - fig.sp.title('Classifier with reject threshold') + fig.sp.plot_fun( + self.clf.decision_function, + grid_limits=self.dataset.get_bounds(), + levels=[0], + y=1, + ) + fig.sp.title("Classifier with reject threshold") fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CClassifierRejectTestCases.main() diff --git a/src/secml/ml/classifiers/secure/c_classifier_sec_svm.py b/src/secml/ml/classifiers/secure/c_classifier_sec_svm.py index 9c3a5641..05cfa084 100644 --- a/src/secml/ml/classifiers/secure/c_classifier_sec_svm.py +++ b/src/secml/ml/classifiers/secure/c_classifier_sec_svm.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml.core.constants import inf from secml.ml.classifiers import CClassifierSVM @@ -46,10 +47,21 @@ class CClassifierSecSVM(CClassifierSVM): class_type : 'sec-svm' """ - __class_type = 'sec-svm' - def __init__(self, ub=inf, idx_ub=None, lb=-inf, idx_lb=None, - eta=0.5, max_it=1e4, eps=1e-4, *args, **kwargs): + __class_type = "sec-svm" + + def __init__( + self, + ub=inf, + idx_ub=None, + lb=-inf, + idx_lb=None, + eta=0.5, + max_it=1e4, + eps=1e-4, + *args, + **kwargs + ): # Calling standard CClassifierSVM constructor super(self.__class__, self).__init__(*args, **kwargs) @@ -64,11 +76,9 @@ def __init__(self, ub=inf, idx_ub=None, lb=-inf, idx_lb=None, raise ValueError("Upper bounds should be higher then lower bounds") self._ub = ub - self._idx_ub = idx_ub if idx_ub is not None \ - else slice(None, None, None) + self._idx_ub = idx_ub if idx_ub is not None else slice(None, None, None) self._lb = lb - self._idx_lb = idx_lb if idx_lb is not None \ - else slice(None, None, None) + self._idx_lb = idx_lb if idx_lb is not None else slice(None, None, None) @property def ub(self): @@ -136,7 +146,7 @@ def C_hinge_loss(self, x, y): """ loss = self.C * self.hinge_loss(x, y) - if self.class_weight == 'balanced': + if self.class_weight == "balanced": loss[y == -1] = self.weight[0] * loss[y == -1] loss[y == 1] = self.weight[1] * loss[y == 1] @@ -154,11 +164,9 @@ def gradient_w_b(self, x, y): grad_loss = CArray.zeros(x.shape[1]) if (idx_err_vect * (y < 0)).any(): - grad_loss += x[idx_err_vect * (y <= 0), :].sum( - axis=0, keepdims=False) + grad_loss += x[idx_err_vect * (y <= 0), :].sum(axis=0, keepdims=False) if (idx_err_vect * (y > 0)).any(): - grad_loss -= x[idx_err_vect * (y > 0), :].sum( - axis=0, keepdims=False) + grad_loss -= x[idx_err_vect * (y > 0), :].sum(axis=0, keepdims=False) grad_w = self.w + self.C * grad_loss @@ -186,12 +194,11 @@ def _fit(self, x, y): """ if self.n_classes != 2: - raise ValueError( - "Trying to learn an SVM on more/less than two classes.") + raise ValueError("Trying to learn an SVM on more/less than two classes.") y = convert_binary_labels(y) - if self.class_weight == 'balanced': + if self.class_weight == "balanced": n_pos = y[y == 1].shape[0] n_neg = y[y == -1].shape[0] self.weight = CArray.zeros(2) @@ -208,15 +215,15 @@ def _fit(self, x, y): # pick a random sample subset idx = CArray.randsample( - CArray.arange(x.shape[0], dtype=int), x.shape[0], - random_state=i) + CArray.arange(x.shape[0], dtype=int), x.shape[0], random_state=i + ) # compute subgradients grad_w, grad_b = self.gradient_w_b(x[idx, :], y[idx]) for p in range(0, 71, 10): - step = (self.eta ** p) * 2 ** (-0.01 * i) / (x.shape[0] ** 0.5) + step = (self.eta**p) * 2 ** (-0.01 * i) / (x.shape[0] ** 0.5) self._w -= step * grad_w self._b -= step * grad_b @@ -246,7 +253,6 @@ def _fit(self, x, y): if i % 10 == 0: loss = self.hinge_loss(x, y).sum() - self.logger.info( - "i {:}: {:.4f}, L {:.4f}".format(i, obj, loss)) + self.logger.info("i {:}: {:.4f}, L {:.4f}".format(i, obj, loss)) # Sparse weights if input is sparse (like in CClassifierSVM) self._w = self.w.tosparse() if x.issparse else self.w diff --git a/src/secml/ml/classifiers/secure/tests/test_c_classifier_sec_svm.py b/src/secml/ml/classifiers/secure/tests/test_c_classifier_sec_svm.py index c9c62dda..e5d0a2be 100644 --- a/src/secml/ml/classifiers/secure/tests/test_c_classifier_sec_svm.py +++ b/src/secml/ml/classifiers/secure/tests/test_c_classifier_sec_svm.py @@ -15,8 +15,10 @@ def setUp(self): def _compute_alignment(self, ds, secsvm, svm): self.logger.info( - "Sec-SVM, Avg. Hinge loss: \n{:}".format(secsvm.hinge_loss( - ds.X, 2 * ds.Y - 1).mean())) + "Sec-SVM, Avg. Hinge loss: \n{:}".format( + secsvm.hinge_loss(ds.X, 2 * ds.Y - 1).mean() + ) + ) self.logger.info("SVM, b: {:}".format(svm.b)) self.logger.info("SVM, w: \n{:}".format(svm.w)) @@ -29,18 +31,22 @@ def _compute_alignment(self, ds, secsvm, svm): self.assertGreater(angle, 0.7) self.logger.info( - "Objective Function: \n{:}".format(secsvm.objective(ds.X, ds.Y))) + "Objective Function: \n{:}".format(secsvm.objective(ds.X, ds.Y)) + ) self.logger.info( - "Gradient w vs b: \n{:}".format(secsvm.gradient_w_b(ds.X, ds.Y))) + "Gradient w vs b: \n{:}".format(secsvm.gradient_w_b(ds.X, ds.Y)) + ) def test_alignment(self): - ds = CDLRandom(n_samples=100, - n_features=500, - n_redundant=0, - n_informative=10, - n_clusters_per_class=1, - random_state=0).load() + ds = CDLRandom( + n_samples=100, + n_features=500, + n_redundant=0, + n_informative=10, + n_clusters_per_class=1, + random_state=0, + ).load() self.logger.info("Train Sec SVM") sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-2, lb=-0.1, ub=0.5) @@ -63,10 +69,9 @@ def test_alignment(self): def test_plot(self): - ds = CDLRandom(n_samples=100, - n_features=2, - n_redundant=0, - random_state=100).load() + ds = CDLRandom( + n_samples=100, n_features=2, n_redundant=0, random_state=100 + ).load() self.logger.info("Train Sec SVM") sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-3, lb=-0.1, ub=0.5) @@ -84,28 +89,32 @@ def test_plot(self): # Plot dataset points fig.sp.plot_ds(ds) # Plot objective function - fig.sp.plot_fun(svm.predict, - multipoint=True, - plot_background=True, - plot_levels=False, - n_grid_points=100, - grid_limits=ds.get_bounds()) + fig.sp.plot_fun( + svm.predict, + multipoint=True, + plot_background=True, + plot_levels=False, + n_grid_points=100, + grid_limits=ds.get_bounds(), + ) fig.sp.title("SVM") fig.subplot(1, 2, 2) # Plot dataset points fig.sp.plot_ds(ds) # Plot objective function - fig.sp.plot_fun(sec_svm.predict, - multipoint=True, - plot_background=True, - plot_levels=False, - n_grid_points=100, - grid_limits=ds.get_bounds()) + fig.sp.plot_fun( + sec_svm.predict, + multipoint=True, + plot_background=True, + plot_levels=False, + n_grid_points=100, + grid_limits=ds.get_bounds(), + ) fig.sp.title("Sec-SVM") fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/classifiers/sklearn/c_classifier_decision_tree.py b/src/secml/ml/classifiers/sklearn/c_classifier_decision_tree.py index 49d7b028..c2214b53 100644 --- a/src/secml/ml/classifiers/sklearn/c_classifier_decision_tree.py +++ b/src/secml/ml/classifiers/sklearn/c_classifier_decision_tree.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from sklearn import tree from secml.ml.classifiers import CClassifierSkLearn @@ -50,18 +51,25 @@ class CClassifierDecisionTree(CClassifierSkLearn): class_type : 'dec-tree' """ - __class_type = 'dec-tree' - def __init__(self, criterion='gini', splitter='best', - max_depth=None, min_samples_split=2, - random_state=None, preprocess=None): + __class_type = "dec-tree" + + def __init__( + self, + criterion="gini", + splitter="best", + max_depth=None, + min_samples_split=2, + random_state=None, + preprocess=None, + ): dt = tree.DecisionTreeClassifier( criterion=criterion, splitter=splitter, max_depth=max_depth, min_samples_split=min_samples_split, - random_state=random_state) + random_state=random_state, + ) - CClassifierSkLearn.__init__(self, sklearn_model=dt, - preprocess=preprocess) + CClassifierSkLearn.__init__(self, sklearn_model=dt, preprocess=preprocess) diff --git a/src/secml/ml/classifiers/sklearn/c_classifier_knn.py b/src/secml/ml/classifiers/sklearn/c_classifier_knn.py index d83b4bd6..80e03ff4 100644 --- a/src/secml/ml/classifiers/sklearn/c_classifier_knn.py +++ b/src/secml/ml/classifiers/sklearn/c_classifier_knn.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis .. moduleauthor:: Ambra Demontis """ + from sklearn import neighbors from secml.array import CArray @@ -55,21 +56,34 @@ class CClassifierKNN(CClassifierSkLearn): class_type : 'knn' """ - __class_type = 'knn' - def __init__(self, n_neighbors=5, weights='uniform', - algorithm='auto', leaf_size=30, p=2, - metric='minkowski', metric_params=None, - preprocess=None): + __class_type = "knn" + + def __init__( + self, + n_neighbors=5, + weights="uniform", + algorithm="auto", + leaf_size=30, + p=2, + metric="minkowski", + metric_params=None, + preprocess=None, + ): self._tr = None knn = neighbors.KNeighborsClassifier( - n_neighbors=n_neighbors, weights=weights, algorithm=algorithm, p=p, - leaf_size=leaf_size, metric=metric, metric_params=metric_params) + n_neighbors=n_neighbors, + weights=weights, + algorithm=algorithm, + p=p, + leaf_size=leaf_size, + metric=metric, + metric_params=metric_params, + ) - CClassifierSkLearn.__init__(self, sklearn_model=knn, - preprocess=preprocess) + CClassifierSkLearn.__init__(self, sklearn_model=knn, preprocess=preprocess) @property def tr(self): @@ -100,11 +114,11 @@ def _fit(self, x, y): def kneighbors(self, x, num_samples=None): """ Find the training samples nearest to x - + Parameters ---------- x : CArray - The query point or points. + The query point or points. num_samples: int or None Number of neighbors to get. if None, use n_neighbors @@ -121,7 +135,8 @@ def kneighbors(self, x, num_samples=None): num_samples = self._sklearn_model.n_neighbors dist, index_point = self._sklearn_model.kneighbors( - x.get_data(), num_samples, return_distance=True) + x.get_data(), num_samples, return_distance=True + ) index_point = CArray(index_point, dtype=int).ravel() diff --git a/src/secml/ml/classifiers/sklearn/c_classifier_logistic.py b/src/secml/ml/classifiers/sklearn/c_classifier_logistic.py index e397fb39..c8273a6d 100644 --- a/src/secml/ml/classifiers/sklearn/c_classifier_logistic.py +++ b/src/secml/ml/classifiers/sklearn/c_classifier_logistic.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from sklearn.linear_model import LogisticRegression from secml.array import CArray @@ -13,13 +14,12 @@ from secml.ml.classifiers.loss import CLossLogistic from secml.ml.classifiers.regularizer import CRegularizerL2 -from secml.ml.classifiers.gradients import \ - CClassifierGradientLogisticMixin +from secml.ml.classifiers.gradients import CClassifierGradientLogisticMixin -class CClassifierLogistic(CClassifierLinearMixin, - CClassifierSkLearn, - CClassifierGradientLogisticMixin): +class CClassifierLogistic( + CClassifierLinearMixin, CClassifierSkLearn, CClassifierGradientLogisticMixin +): """Logistic Regression (aka logit, MaxEnt) classifier. Parameters @@ -45,27 +45,28 @@ class CClassifierLogistic(CClassifierLinearMixin, class_type : 'logistic' """ - __class_type = 'logistic' + + __class_type = "logistic" _loss = CLossLogistic() _reg = CRegularizerL2() - def __init__(self, C=1.0, max_iter=100, - random_state=None, preprocess=None): + def __init__(self, C=1.0, max_iter=100, random_state=None, preprocess=None): sklearn_model = LogisticRegression( - penalty='l2', + penalty="l2", dual=False, tol=0.0001, C=C, fit_intercept=True, intercept_scaling=1.0, class_weight=None, - solver='liblinear', + solver="liblinear", random_state=random_state, max_iter=max_iter, - multi_class='ovr', + multi_class="ovr", verbose=0, - warm_start=False) + warm_start=False, + ) CClassifierSkLearn.__init__(self, sklearn_model, preprocess=preprocess) diff --git a/src/secml/ml/classifiers/sklearn/c_classifier_nearest_centroid.py b/src/secml/ml/classifiers/sklearn/c_classifier_nearest_centroid.py index 8bb9b76a..2e25a1db 100644 --- a/src/secml/ml/classifiers/sklearn/c_classifier_nearest_centroid.py +++ b/src/secml/ml/classifiers/sklearn/c_classifier_nearest_centroid.py @@ -7,6 +7,7 @@ .. moduleauthor:: Marco Melis """ + from sklearn.neighbors import NearestCentroid from secml.array import CArray @@ -35,15 +36,16 @@ class CClassifierNearestCentroid(CClassifierSkLearn): class_type : 'nrst-centroid' """ - __class_type = 'nrst-centroid' - def __init__(self, metric='euclidean', - shrink_threshold=None, preprocess=None): + __class_type = "nrst-centroid" + + def __init__(self, metric="euclidean", shrink_threshold=None, preprocess=None): nc = NearestCentroid(metric=metric, shrink_threshold=shrink_threshold) super(CClassifierNearestCentroid, self).__init__( - sklearn_model=nc, preprocess=preprocess) + sklearn_model=nc, preprocess=preprocess + ) @property def metric(self): @@ -54,7 +56,7 @@ def centroids(self): return CArray(self._sklearn_model.centroids_) def _forward(self, x): - """ This sklearn classifier only supports predict. + """This sklearn classifier only supports predict. So we also implement a simple decision function based on pairwise distances. @@ -70,8 +72,12 @@ def _forward(self, x): (i.e., similarity w/ centroid). """ - dist = CArray(pairwise_distances( - x.get_data(), self._sklearn_model.centroids_, - metric=self._sklearn_model.metric)).atleast_2d() + dist = CArray( + pairwise_distances( + x.get_data(), + self._sklearn_model.centroids_, + metric=self._sklearn_model.metric, + ) + ).atleast_2d() return -dist diff --git a/src/secml/ml/classifiers/sklearn/c_classifier_random_forest.py b/src/secml/ml/classifiers/sklearn/c_classifier_random_forest.py index a878066e..b051a596 100644 --- a/src/secml/ml/classifiers/sklearn/c_classifier_random_forest.py +++ b/src/secml/ml/classifiers/sklearn/c_classifier_random_forest.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from secml.ml.classifiers import CClassifierSkLearn from sklearn.ensemble import RandomForestClassifier @@ -48,19 +49,25 @@ class CClassifierRandomForest(CClassifierSkLearn): class_type : 'random-forest' """ - __class_type = 'random-forest' - def __init__(self, n_estimators=10, criterion='gini', - max_depth=None, min_samples_split=2, - random_state=None, preprocess=None): + __class_type = "random-forest" + + def __init__( + self, + n_estimators=10, + criterion="gini", + max_depth=None, + min_samples_split=2, + random_state=None, + preprocess=None, + ): rf = RandomForestClassifier( n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split, - random_state=random_state + random_state=random_state, ) - CClassifierSkLearn.__init__(self, sklearn_model=rf, - preprocess=preprocess) + CClassifierSkLearn.__init__(self, sklearn_model=rf, preprocess=preprocess) diff --git a/src/secml/ml/classifiers/sklearn/c_classifier_ridge.py b/src/secml/ml/classifiers/sklearn/c_classifier_ridge.py index 10e5c316..54696b9d 100644 --- a/src/secml/ml/classifiers/sklearn/c_classifier_ridge.py +++ b/src/secml/ml/classifiers/sklearn/c_classifier_ridge.py @@ -7,6 +7,7 @@ .. moduleauthor:: Battista Biggio """ + from sklearn.linear_model import RidgeClassifier from secml.array import CArray @@ -16,8 +17,9 @@ from secml.ml.classifiers.regularizer import CRegularizerL2 -class CClassifierRidge(CClassifierLinearMixin, CClassifierSkLearn, - CClassifierGradientRidgeMixin): +class CClassifierRidge( + CClassifierLinearMixin, CClassifierSkLearn, CClassifierGradientRidgeMixin +): """Ridge Classifier. Parameters @@ -51,24 +53,35 @@ class frequencies as `n_samples / (n_classes * np.bincount(y))`. class_type : 'ridge' """ - __class_type = 'ridge' + + __class_type = "ridge" _loss = CLossSquare() _reg = CRegularizerL2() - def __init__(self, alpha=1.0, max_iter=int(1e5), class_weight=None, tol=1e-4, - fit_intercept=True, preprocess=None): + def __init__( + self, + alpha=1.0, + max_iter=int(1e5), + class_weight=None, + tol=1e-4, + fit_intercept=True, + preprocess=None, + ): # create instance of sklearn model - sklearn_model = RidgeClassifier(alpha=alpha, - fit_intercept=fit_intercept, - tol=tol, - max_iter=max_iter, - class_weight=class_weight, - solver='auto') + sklearn_model = RidgeClassifier( + alpha=alpha, + fit_intercept=fit_intercept, + tol=tol, + max_iter=max_iter, + class_weight=class_weight, + solver="auto", + ) # Calling the superclass init - CClassifierSkLearn.__init__(self, sklearn_model=sklearn_model, - preprocess=preprocess) + CClassifierSkLearn.__init__( + self, sklearn_model=sklearn_model, preprocess=preprocess + ) @property def C(self): @@ -89,8 +102,11 @@ def w(self): @property def b(self): if self.is_fitted(): - return CArray(self._sklearn_model.intercept_[0])[0] if \ - self.fit_intercept else 0 + return ( + CArray(self._sklearn_model.intercept_[0])[0] + if self.fit_intercept + else 0 + ) else: return None diff --git a/src/secml/ml/classifiers/sklearn/c_classifier_sgd.py b/src/secml/ml/classifiers/sklearn/c_classifier_sgd.py index 6d6216fb..7044f753 100644 --- a/src/secml/ml/classifiers/sklearn/c_classifier_sgd.py +++ b/src/secml/ml/classifiers/sklearn/c_classifier_sgd.py @@ -6,6 +6,7 @@ .. moduleauthor:: Battista Biggio """ + from sklearn import linear_model from secml.array import CArray @@ -15,9 +16,9 @@ from secml.ml.classifiers.gradients import CClassifierGradientSGDMixin -class CClassifierSGD(CClassifierLinearMixin, - CClassifierSkLearn, - CClassifierGradientSGDMixin): +class CClassifierSGD( + CClassifierLinearMixin, CClassifierSkLearn, CClassifierGradientSGDMixin +): """Stochastic Gradient Descent Classifier. Parameters @@ -90,14 +91,27 @@ class frequencies as `n_samples / (n_classes * np.bincount(y))`. class_type : 'sgd' """ - __class_type = 'sgd' - def __init__(self, loss, regularizer, alpha=0.01, - fit_intercept=True, max_iter=1000, tol=None, - shuffle=True, learning_rate='optimal', - eta0=10.0, power_t=0.5, class_weight=None, - warm_start=False, average=False, random_state=None, - preprocess=None): + __class_type = "sgd" + + def __init__( + self, + loss, + regularizer, + alpha=0.01, + fit_intercept=True, + max_iter=1000, + tol=None, + shuffle=True, + learning_rate="optimal", + eta0=10.0, + power_t=0.5, + class_weight=None, + warm_start=False, + average=False, + random_state=None, + preprocess=None, + ): # Keep private (not an sklearn sgd parameter) self._loss = CLoss.create(loss) @@ -118,7 +132,8 @@ def __init__(self, loss, regularizer, alpha=0.01, class_weight=class_weight, average=average, warm_start=warm_start, - random_state=random_state) + random_state=random_state, + ) # Pass loss function parameters to classifier sklearn_model.set_params(**self.loss.get_params()) @@ -126,8 +141,9 @@ def __init__(self, loss, regularizer, alpha=0.01, sklearn_model.set_params(**self.regularizer.get_params()) # Calling the superclass init - CClassifierSkLearn.__init__(self, sklearn_model=sklearn_model, - preprocess=preprocess) + CClassifierSkLearn.__init__( + self, sklearn_model=sklearn_model, preprocess=preprocess + ) @property def loss(self): @@ -158,8 +174,11 @@ def w(self): @property def b(self): if self.is_fitted(): - return CArray(self._sklearn_model.intercept_[0])[0] if \ - self.fit_intercept else 0 + return ( + CArray(self._sklearn_model.intercept_[0])[0] + if self.fit_intercept + else 0 + ) else: return None diff --git a/src/secml/ml/classifiers/sklearn/c_classifier_sklearn.py b/src/secml/ml/classifiers/sklearn/c_classifier_sklearn.py index 5c162a4b..990bcd94 100644 --- a/src/secml/ml/classifiers/sklearn/c_classifier_sklearn.py +++ b/src/secml/ml/classifiers/sklearn/c_classifier_sklearn.py @@ -5,6 +5,7 @@ .. moduleauthor:: Battista Biggio """ + from secml.ml.classifiers import CClassifier from secml.array import CArray from secml.utils.dict_utils import merge_dicts, SubLevelsDict @@ -39,8 +40,11 @@ def get_params(self): # as keys the attributes names without the accessibility prefix # We merge our dict with the sklearn `.get_params()` dict return SubLevelsDict( - merge_dicts(super(CWrapperSkLearnMixin, self).get_params(), - self._sklearn_model.get_params())) + merge_dicts( + super(CWrapperSkLearnMixin, self).get_params(), + self._sklearn_model.get_params(), + ) + ) def __getattribute__(self, key): """Get an attribute. @@ -50,7 +54,7 @@ def __getattribute__(self, key): """ try: # If we are not getting the sklearn model itself - if key != '_sklearn_model' and hasattr(self, '_sklearn_model'): + if key != "_sklearn_model" and hasattr(self, "_sklearn_model"): return self._sklearn_model.get_params()[key] except KeyError: pass # Parameter not found in sklearn model @@ -63,8 +67,7 @@ def __setattr__(self, key, value): This allow setting also the attributes of the internal sklearn model. """ - if hasattr(self, '_sklearn_model') and \ - key in self._sklearn_model.get_params(): + if hasattr(self, "_sklearn_model") and key in self._sklearn_model.get_params(): self._sklearn_model.set_params(**{key: value}) else: # Otherwise, normal python set behavior super(CWrapperSkLearnMixin, self).__setattr__(key, value) @@ -89,14 +92,14 @@ class CClassifierSkLearn(CWrapperSkLearnMixin, CClassifier): """ - __class_type = 'sklearn-clf' + __class_type = "sklearn-clf" def __init__(self, sklearn_model, preprocess=None): CWrapperSkLearnMixin.__init__(self, sklearn_model) CClassifier.__init__(self, preprocess=preprocess) - if hasattr(sklearn_model, 'classes_'): # Model is pretrained + if hasattr(sklearn_model, "classes_"): # Model is pretrained self._classes = CArray(sklearn_model.classes_) # FIXME: how to obtain this from pretrained models? self._n_features = 0 @@ -131,7 +134,8 @@ def _forward(self, x): probs = True else: raise AttributeError( - "This model has neither decision_function nor predict_proba.") + "This model has neither decision_function nor predict_proba." + ) scores = CArray(scores) @@ -143,12 +147,12 @@ def _forward(self, x): scores = outputs if scores.shape[1] != self.n_classes: # this happens in one-vs-one - raise ValueError( - "Number of columns is not equal to number of classes!") + raise ValueError("Number of columns is not equal to number of classes!") scores.atleast_2d() return scores def _backward(self, w): raise NotImplementedError( - "`_backward` is not implemented for this generic sklearn wrapper.") + "`_backward` is not implemented for this generic sklearn wrapper." + ) diff --git a/src/secml/ml/classifiers/sklearn/c_classifier_svm.py b/src/secml/ml/classifiers/sklearn/c_classifier_svm.py index c3f94d95..45cb5aed 100755 --- a/src/secml/ml/classifiers/sklearn/c_classifier_svm.py +++ b/src/secml/ml/classifiers/sklearn/c_classifier_svm.py @@ -5,6 +5,7 @@ .. moduleauthor:: Battista Biggio """ + from sklearn.svm import SVC from secml.array import CArray @@ -37,8 +38,7 @@ def _fit_one_ova(tr_class_idx, svm, x, y, svc_kernel, verbose): # level is stored per-object looking to id svm.verbose = verbose - svm.logger.info( - "Training against class: {:}".format(tr_class_idx)) + svm.logger.info("Training against class: {:}".format(tr_class_idx)) # Binarize labels y_ova = CArray(y == svm.classes[tr_class_idx]) @@ -98,12 +98,14 @@ class frequencies as `n_samples / (n_classes * np.bincount(y))`. CKernel : Pairwise kernels and metrics. """ - __class_type = 'svm' + + __class_type = "svm" _loss = CLossHinge() - def __init__(self, C=1.0, kernel=None, - class_weight=None, preprocess=None, n_jobs=1): + def __init__( + self, C=1.0, kernel=None, class_weight=None, preprocess=None, n_jobs=1 + ): # calling the superclass init CClassifier.__init__(self, preprocess=preprocess, n_jobs=n_jobs) @@ -156,8 +158,10 @@ def class_weight(self, value): """ # TODO we can have one weight per class but only for OVO if isinstance(value, dict) and len(value) != 2: - raise ValueError("weight of positive (+1) and negative (0) " - "classes only must be specified.") + raise ValueError( + "weight of positive (+1) and negative (0) " + "classes only must be specified." + ) self._class_weight = value @property @@ -207,8 +211,7 @@ def _fit(self, x, y): Trained classifier. """ - self.logger.info( - "Training SVM with parameters: {:}".format(self.get_params())) + self.logger.info("Training SVM with parameters: {:}".format(self.get_params())) # reset training self._w = None @@ -223,11 +226,11 @@ def _fit(self, x, y): # initialize params if self.kernel is None: # no kernel pre-processing, training in the primal - svc_kernel = 'linear' + svc_kernel = "linear" self._w = CArray.zeros(shape=(n_rows, n_cols)) else: # inputs are kernel values, training in the dual - svc_kernel = 'precomputed' + svc_kernel = "precomputed" self._alpha = CArray.zeros(shape=(n_rows, n_cols), sparse=True) self._b = CArray.zeros(shape=(self.n_classes,)) @@ -248,9 +251,16 @@ def _fit(self, x, y): def _fit_one_vs_all(self, x, y, svc_kernel): # ova (but we can also implement ovo - let's do separate functions) - out = parfor2(_fit_one_ova, - self.n_classes, self.n_jobs, - self, x, y, svc_kernel, self.verbose) + out = parfor2( + _fit_one_ova, + self.n_classes, + self.n_jobs, + self, + x, + y, + svc_kernel, + self.verbose, + ) # Building results for i in range(self.n_classes): @@ -263,7 +273,7 @@ def _fit_one_vs_all(self, x, y, svc_kernel): def _fit_binary(self, x, y, svc_kernel): svc = SVC(C=self.C, kernel=svc_kernel, class_weight=self.class_weight) - if svc_kernel == 'precomputed': + if svc_kernel == "precomputed": # training on sparse precomputed kernels is not supported svc.fit(x.tondarray(), y.get_data()) else: @@ -321,12 +331,10 @@ def _sv_margin(self, tol=1e-6): if self.n_classes > 2: raise ValueError("SVM is not binary!") - assert (self.kernel.rv.shape[0] == self.alpha.shape[1]) + assert self.kernel.rv.shape[0] == self.alpha.shape[1] alpha = self.alpha.todense() - s = alpha.find( - (abs(alpha) >= tol) * - (abs(alpha) <= self.C - tol)) + s = alpha.find((abs(alpha) >= tol) * (abs(alpha) <= self.C - tol)) if len(s) > 0: return self.kernel.rv[s, :], CArray(s) else: # no margin SVs @@ -336,9 +344,9 @@ def _kernel_function(self, x, z=None): """Compute kernel matrix between x and z, without pre-processing.""" # clone kernel removing rv and pre-processing kernel_params = self.kernel.get_params() - kernel_params.pop('preprocess') # detach preprocess and rv - kernel_params.pop('rv') - kernel_params.pop('n_jobs') # TODO: not accepted by kernel constructor + kernel_params.pop("preprocess") # detach preprocess and rv + kernel_params.pop("rv") + kernel_params.pop("n_jobs") # TODO: not accepted by kernel constructor kernel = CKernel.create(self.kernel.class_type, **kernel_params) z = z if z is not None else x return kernel.k(x, z) @@ -370,8 +378,9 @@ def grad_f_params(self, x, y=1): xs, _ = self._sv_margin() # these points are already preprocessed if xs is None: - self.logger.debug("Warning: sv_margin is empty " - "(all points are error vectors).") + self.logger.debug( + "Warning: sv_margin is empty " "(all points are error vectors)." + ) return None s = xs.shape[0] # margin support vector diff --git a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_decision_tree.py b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_decision_tree.py index 0fd5dc53..896474d7 100644 --- a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_decision_tree.py +++ b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_decision_tree.py @@ -13,33 +13,32 @@ def setUp(self): self.dec_tree = CClassifierDecisionTree(random_state=0) def test_classify(self): - """Test for predict method. """ + """Test for predict method.""" self.logger.info("Testing decision tree classifier training ") self.dec_tree.fit(self.dataset.X, self.dataset.Y) self.logger.info("Testing classification with trees") - self.logger.info( - "Number of classes: {:}".format(self.dec_tree.n_classes)) + self.logger.info("Number of classes: {:}".format(self.dec_tree.n_classes)) y, result = self.dec_tree.predict( - self.dataset.X[0, :], return_decision_function=True) - self.logger.info( - "Probability of affinity to each class: {:}".format(result)) + self.dataset.X[0, :], return_decision_function=True + ) + self.logger.info("Probability of affinity to each class: {:}".format(result)) self.logger.info("Class of affinity: {:}".format(y)) self.assertEqual(self.dataset.Y[0], y, "Wrong classification") y, result = self.dec_tree.predict( - self.dataset.X[50, :], return_decision_function=True) - self.logger.info( - "Probability of affinity to each class: {:}".format(result)) + self.dataset.X[50, :], return_decision_function=True + ) + self.logger.info("Probability of affinity to each class: {:}".format(result)) self.logger.info("Class of affinity: {:}".format(y)) self.assertEqual(self.dataset.Y[50], y, "Wrong classification") y, result = self.dec_tree.predict( - self.dataset.X[120, :], return_decision_function=True) - self.logger.info( - "Probability of affinity to each class: {:}".format(result)) + self.dataset.X[120, :], return_decision_function=True + ) + self.logger.info("Probability of affinity to each class: {:}".format(result)) self.logger.info("Class of affinity: {:}".format(y)) self.assertEqual(self.dataset.Y[120], y, "Wrong classification") @@ -53,21 +52,27 @@ def test_fun(self): def test_preprocess(self): """Test classifier with preprocessors inside.""" # All linear transformations - self._test_preprocess(self.dataset, self.dec_tree, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) + self._test_preprocess( + self.dataset, + self.dec_tree, + ["min-max", "mean-std"], + [{"feature_range": (-1, 1)}, {}], + ) # Mixed linear/nonlinear transformations - self._test_preprocess(self.dataset, self.dec_tree, - ['pca', 'unit-norm'], [{}, {}]) + self._test_preprocess( + self.dataset, self.dec_tree, ["pca", "unit-norm"], [{}, {}] + ) def test_plot(self): - ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2, - random_state=1).load() + ds = CDLRandomBlobs( + n_samples=100, centers=3, n_features=2, random_state=1 + ).load() fig = self._test_plot(self.dec_tree, ds, levels=[0.5]) - fig.savefig(fm.join(fm.abspath(__file__), 'figs', - 'test_c_classifier_decision_tree.pdf')) + fig.savefig( + fm.join(fm.abspath(__file__), "figs", "test_c_classifier_decision_tree.pdf") + ) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierTestCases.main() diff --git a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_knn.py b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_knn.py index 9eb45469..54a8fd62 100644 --- a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_knn.py +++ b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_knn.py @@ -11,9 +11,15 @@ class TestCClassifierKNN(CClassifierTestCases): def setUp(self): - ds = CDLRandom(n_samples=100, n_classes=3, n_features=2, - n_redundant=0, n_informative=2, n_clusters_per_class=1, - random_state=10000).load() + ds = CDLRandom( + n_samples=100, + n_classes=3, + n_features=2, + n_redundant=0, + n_informative=2, + n_clusters_per_class=1, + random_state=10000, + ).load() self.dataset = ds[:50, :] self.test = ds[50:, :] @@ -23,17 +29,16 @@ def setUp(self): self.knn.fit(self.dataset.X, self.dataset.Y) def test_plot(self): - ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2, - random_state=1).load() + ds = CDLRandomBlobs( + n_samples=100, centers=3, n_features=2, random_state=1 + ).load() fig = self._test_plot(self.knn, ds, levels=[0.5]) - fig.savefig(fm.join(fm.abspath(__file__), 'figs', - 'test_c_classifier_knn.pdf')) + fig.savefig(fm.join(fm.abspath(__file__), "figs", "test_c_classifier_knn.pdf")) def test_classification(self): self.logger.info("Check the classification method... ") - lab_cl, score = self.knn.predict( - self.test.X, return_decision_function=True) + lab_cl, score = self.knn.predict(self.test.X, return_decision_function=True) acc = CMetricAccuracy().performance_score(self.test.Y, lab_cl) @@ -53,20 +58,21 @@ def test_kneighbors(self): dist, index_n, corresp = self.knn.kneighbors(single_sample) self.logger.info("Sample to evaluate: {:}".format(single_sample)) self.logger.info("") - self.logger.info("Closest: {:}, index {:}, distance {:}" - "".format(corresp[dist.argmin(), :], - index_n[dist.argmin()], - dist.min())) + self.logger.info( + "Closest: {:}, index {:}, distance {:}" + "".format(corresp[dist.argmin(), :], index_n[dist.argmin()], dist.min()) + ) self.logger.info("Checking KNN classifier on multiple samples...") num_samp = 2 with self.timer(): - dist, index_n, corresp = self.knn.kneighbors( - array_samples, num_samp) + dist, index_n, corresp = self.knn.kneighbors(array_samples, num_samp) for i in range(10): self.logger.info("Sample to evaluate: {:}".format(single_sample)) - self.logger.info("Closest: {:}, index {:}, distance {:}" - "".format(corresp[i, :], index_n[i], dist[i, :])) + self.logger.info( + "Closest: {:}, index {:}, distance {:}" + "".format(corresp[i, :], index_n[i], dist[i, :]) + ) def test_fun(self): """Test for decision_function() and predict() methods.""" @@ -79,14 +85,13 @@ def test_preprocess(self): """Test classifier with preprocessors inside.""" knn = CClassifierKNN(n_neighbors=3) # All linear transformations - self._test_preprocess(self.dataset, knn, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) + self._test_preprocess( + self.dataset, knn, ["min-max", "mean-std"], [{"feature_range": (-1, 1)}, {}] + ) # Mixed linear/nonlinear transformations - self._test_preprocess(self.dataset, knn, - ['pca', 'unit-norm'], [{}, {}]) + self._test_preprocess(self.dataset, knn, ["pca", "unit-norm"], [{}, {}]) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierTestCases.main() diff --git a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_logistic.py b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_logistic.py index 881cda0a..2d6d943d 100644 --- a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_logistic.py +++ b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_logistic.py @@ -12,9 +12,13 @@ class TestCClassifierLogistic(CClassifierTestCases): def setUp(self): """Test for init and fit methods.""" # generate synthetic data - self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1, - n_clusters_per_class=1, - random_state=99).load() + self.dataset = CDLRandom( + n_features=2, + n_redundant=0, + n_informative=1, + n_clusters_per_class=1, + random_state=99, + ).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) @@ -23,10 +27,11 @@ def setUp(self): self.log = CClassifierLogistic(random_state=99) def test_plot(self): - """ Compare the classifiers graphically""" + """Compare the classifiers graphically""" fig = self._test_plot(self.log, self.dataset) - fig.savefig(fm.join(fm.abspath(__file__), 'figs', - 'test_c_classifier_logistic.pdf')) + fig.savefig( + fm.join(fm.abspath(__file__), "figs", "test_c_classifier_logistic.pdf") + ) def test_fun(self): """Test for decision_function() and predict() methods.""" @@ -73,17 +78,17 @@ def test_preprocess(self): ds = CDLRandom().load() # All linear transformations with gradient implemented - self._test_preprocess(ds, self.log, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) - self._test_preprocess_grad(ds, self.log, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) + self._test_preprocess( + ds, self.log, ["min-max", "mean-std"], [{"feature_range": (-1, 1)}, {}] + ) + self._test_preprocess_grad( + ds, self.log, ["min-max", "mean-std"], [{"feature_range": (-1, 1)}, {}] + ) self.logger.info("The following case will skip the gradient test") # Mixed linear/nonlinear transformations without gradient - self._test_preprocess(ds, self.log, ['pca', 'unit-norm'], [{}, {}]) + self._test_preprocess(ds, self.log, ["pca", "unit-norm"], [{}, {}]) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierTestCases.main() diff --git a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_nearest_centroid.py b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_nearest_centroid.py index f5a02985..dc59c6b5 100644 --- a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_nearest_centroid.py +++ b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_nearest_centroid.py @@ -12,20 +12,25 @@ class TestCClassifierNearestCentroid(CClassifierTestCases): def setUp(self): """Test for init and fit methods.""" - self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1, - n_clusters_per_class=1).load() + self.dataset = CDLRandom( + n_features=2, n_redundant=0, n_informative=1, n_clusters_per_class=1 + ).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.nc = CClassifierNearestCentroid() def test_plot(self): - """ Compare the classifiers graphically""" - ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2, - random_state=1).load() + """Compare the classifiers graphically""" + ds = CDLRandomBlobs( + n_samples=100, centers=3, n_features=2, random_state=1 + ).load() fig = self._test_plot(self.nc, ds, [-10]) - fig.savefig(fm.join(fm.abspath(__file__), 'figs', - 'test_c_classifier_nearest_centroid.pdf')) + fig.savefig( + fm.join( + fm.abspath(__file__), "figs", "test_c_classifier_nearest_centroid.pdf" + ) + ) def test_fun(self): """Test for decision_function() and predict() methods.""" @@ -39,14 +44,13 @@ def test_preprocess(self): ds = CDLRandom().load() # All linear transformations - self._test_preprocess(ds, self.nc, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) + self._test_preprocess( + ds, self.nc, ["min-max", "mean-std"], [{"feature_range": (-1, 1)}, {}] + ) # Mixed linear/nonlinear transformations - self._test_preprocess(ds, self.nc, - ['pca', 'unit-norm'], [{}, {}]) + self._test_preprocess(ds, self.nc, ["pca", "unit-norm"], [{}, {}]) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierTestCases.main() diff --git a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_random_forest.py b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_random_forest.py index dd81f19a..3b921e4e 100644 --- a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_random_forest.py +++ b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_random_forest.py @@ -19,27 +19,26 @@ def test_classify(self): self.logger.info("Testing classification with trees") - self.logger.info( - "Number of classes: {:}".format(self.rnd_forest.n_classes)) + self.logger.info("Number of classes: {:}".format(self.rnd_forest.n_classes)) y, result = self.rnd_forest.predict( - self.dataset.X[0, :], return_decision_function=True) - self.logger.info( - "Probability of affinity to each class: {:}".format(result)) + self.dataset.X[0, :], return_decision_function=True + ) + self.logger.info("Probability of affinity to each class: {:}".format(result)) self.logger.info("Class of affinity: {:}".format(y)) self.assertEqual(self.dataset.Y[0], y, "Wrong classification") y, result = self.rnd_forest.predict( - self.dataset.X[50, :], return_decision_function=True) - self.logger.info( - "Probability of affinity to each class: {:}".format(result)) + self.dataset.X[50, :], return_decision_function=True + ) + self.logger.info("Probability of affinity to each class: {:}".format(result)) self.logger.info("Class of affinity: {:}".format(y)) self.assertEqual(self.dataset.Y[50], y, "Wrong classification") y, result = self.rnd_forest.predict( - self.dataset.X[120, :], return_decision_function=True) - self.logger.info( - "Probability of affinity to each class: {:}".format(result)) + self.dataset.X[120, :], return_decision_function=True + ) + self.logger.info("Probability of affinity to each class: {:}".format(result)) self.logger.info("Class of affinity: {:}".format(y)) self.assertEqual(self.dataset.Y[120], y, "Wrong classification") @@ -53,22 +52,28 @@ def test_fun(self): def test_preprocess(self): """Test classifier with preprocessors inside.""" # All linear transformations - self._test_preprocess(self.dataset, self.rnd_forest, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) + self._test_preprocess( + self.dataset, + self.rnd_forest, + ["min-max", "mean-std"], + [{"feature_range": (-1, 1)}, {}], + ) # Mixed linear/nonlinear transformations - self._test_preprocess(self.dataset, self.rnd_forest, - ['pca', 'unit-norm'], [{}, {}]) + self._test_preprocess( + self.dataset, self.rnd_forest, ["pca", "unit-norm"], [{}, {}] + ) def test_plot(self): - """ Compare the classifiers graphically""" - ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2, - random_state=1).load() + """Compare the classifiers graphically""" + ds = CDLRandomBlobs( + n_samples=100, centers=3, n_features=2, random_state=1 + ).load() fig = self._test_plot(self.rnd_forest, ds, levels=[0.5]) - fig.savefig(fm.join(fm.abspath(__file__), 'figs', - 'test_c_classifier_random_forest.pdf')) + fig.savefig( + fm.join(fm.abspath(__file__), "figs", "test_c_classifier_random_forest.pdf") + ) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierTestCases.main() diff --git a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_ridge.py b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_ridge.py index 354e215f..9eb26e64 100644 --- a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_ridge.py +++ b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_ridge.py @@ -15,26 +15,29 @@ def setUp(self): """Test for init and fit methods.""" # generate synthetic data - self.dataset = CDLRandom(n_features=100, n_redundant=20, - n_informative=25, - n_clusters_per_class=2, - random_state=0).load() + self.dataset = CDLRandom( + n_features=100, + n_redundant=20, + n_informative=25, + n_clusters_per_class=2, + random_state=0, + ).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly) - self.ridges = [CClassifierRidge( - preprocess=kernel() if kernel is not None else None) - for kernel in kernel_types] - self.logger.info( - "Testing RIDGE with kernel functions: %s", str(kernel_types)) + self.ridges = [ + CClassifierRidge(preprocess=kernel() if kernel is not None else None) + for kernel in kernel_types + ] + self.logger.info("Testing RIDGE with kernel functions: %s", str(kernel_types)) for ridge in self.ridges: ridge.verbose = 2 # Enabling debug output for each classifier ridge.fit(self.dataset.X, self.dataset.Y) def test_time(self): - """ Compare execution time of ridge and SVM""" + """Compare execution time of ridge and SVM""" self.logger.info("Testing training speed of ridge compared to SVM ") for ridge in self.ridges: @@ -44,26 +47,31 @@ def test_time(self): with self.timer() as t_svm: svm.fit(self.dataset.X, self.dataset.Y) - self.logger.info( - "Execution time of SVM: {:}".format(t_svm.interval)) + self.logger.info("Execution time of SVM: {:}".format(t_svm.interval)) with self.timer() as t_ridge: ridge.fit(self.dataset.X, self.dataset.Y) - self.logger.info( - "Execution time of ridge: {:}".format(t_ridge.interval)) + self.logger.info("Execution time of ridge: {:}".format(t_ridge.interval)) def test_plot(self): - """ Compare the classifiers graphically""" - ds = CDLRandom(n_features=2, n_redundant=0, n_informative=2, - n_clusters_per_class=1, random_state=0).load() + """Compare the classifiers graphically""" + ds = CDLRandom( + n_features=2, + n_redundant=0, + n_informative=2, + n_clusters_per_class=1, + random_state=0, + ).load() ds.X = CNormalizerMinMax().fit_transform(ds.X) fig = self._test_plot(self.ridges[0], ds) - fig.savefig(fm.join(fm.abspath(__file__), 'figs', - 'test_c_classifier_ridge.pdf')) + fig.savefig( + fm.join(fm.abspath(__file__), "figs", "test_c_classifier_ridge.pdf") + ) def test_performance(self): - """ Compare the classifiers performance""" - self.logger.info("Testing error performance of the " - "classifiers on the training set") + """Compare the classifiers performance""" + self.logger.info( + "Testing error performance of the " "classifiers on the training set" + ) for ridge in self.ridges: self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess)) @@ -77,21 +85,23 @@ def test_performance(self): svm.fit(self.dataset.X, self.dataset.Y) label_svm, y_svm = svm.predict( - self.dataset.X, return_decision_function=True) + self.dataset.X, return_decision_function=True + ) label_ridge, y_ridge = ridge.predict( - self.dataset.X, return_decision_function=True) + self.dataset.X, return_decision_function=True + ) - acc_svm = CMetric.create('f1').performance_score( - self.dataset.Y, label_svm) - acc_ridge = CMetric.create('f1').performance_score( - self.dataset.Y, label_ridge) + acc_svm = CMetric.create("f1").performance_score(self.dataset.Y, label_svm) + acc_ridge = CMetric.create("f1").performance_score( + self.dataset.Y, label_ridge + ) self.logger.info("Accuracy of SVM: {:}".format(acc_svm)) - self.assertGreater(acc_svm, 0.90, - "Accuracy of SVM: {:}".format(acc_svm)) + self.assertGreater(acc_svm, 0.90, "Accuracy of SVM: {:}".format(acc_svm)) self.logger.info("Accuracy of ridge: {:}".format(acc_ridge)) - self.assertGreater(acc_ridge, 0.90, - "Accuracy of ridge: {:}".format(acc_ridge)) + self.assertGreater( + acc_ridge, 0.90, "Accuracy of ridge: {:}".format(acc_ridge) + ) def test_fun(self): """Test for decision_function() and predict() methods.""" @@ -115,13 +125,14 @@ def test_gradient(self): for ridge in self.ridges: self.logger.info( - "Checking grad. for Ridge with kernel: %s", ridge.preprocess) + "Checking grad. for Ridge with kernel: %s", ridge.preprocess + ) # set gamma for poly and rbf - if hasattr(ridge.preprocess, 'gamma'): - ridge.set('gamma', 1e-5) - if hasattr(ridge.preprocess, 'degree'): # set degree for poly - ridge.set('degree', 3) + if hasattr(ridge.preprocess, "gamma"): + ridge.set("gamma", 1e-5) + if hasattr(ridge.preprocess, "degree"): # set degree for poly + ridge.set("degree", 3) self.logger.info("Testing dense data...") ds = self.dataset.todense() @@ -151,16 +162,16 @@ def test_preprocess(self): clf = CClassifierRidge() # All linear transformations with gradient implemented - self._test_preprocess(ds, clf, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) - self._test_preprocess_grad(ds, clf, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) + self._test_preprocess( + ds, clf, ["min-max", "mean-std"], [{"feature_range": (-1, 1)}, {}] + ) + self._test_preprocess_grad( + ds, clf, ["min-max", "mean-std"], [{"feature_range": (-1, 1)}, {}] + ) # Mixed linear/nonlinear transformations without gradient - self._test_preprocess(ds, clf, ['pca', 'unit-norm'], [{}, {}]) + self._test_preprocess(ds, clf, ["pca", "unit-norm"], [{}, {}]) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierTestCases.main() diff --git a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_sgd.py b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_sgd.py index 87b55c74..fc270200 100644 --- a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_sgd.py +++ b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_sgd.py @@ -21,42 +21,50 @@ def setUp(self): """Test for init and fit methods.""" # generate synthetic data - self.dataset = CDLRandom(n_features=100, n_redundant=20, - n_informative=25, - n_clusters_per_class=2, - random_state=0).load() + self.dataset = CDLRandom( + n_features=100, + n_redundant=20, + n_informative=25, + n_clusters_per_class=2, + random_state=0, + ).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.logger.info("Testing classifier creation ") - self.sgd = CClassifierSGD(regularizer=CRegularizerL2(), - loss=CLossHinge(), - random_state=0) + self.sgd = CClassifierSGD( + regularizer=CRegularizerL2(), loss=CLossHinge(), random_state=0 + ) # this is equivalent to C=1 for SGD alpha = 1 / self.dataset.num_samples - kernel_types = \ - (None, CKernelLinear(), CKernelRBF(), CKernelPoly(degree=3)) - self.sgds = [CClassifierSGD( - regularizer=CRegularizerL2(), loss=CLossHinge(), - max_iter=1000, random_state=0, alpha=alpha, - preprocess=kernel if kernel is not None else None) - for kernel in kernel_types] - self.logger.info( - "Testing SGD with kernel functions: %s", str(kernel_types)) + kernel_types = (None, CKernelLinear(), CKernelRBF(), CKernelPoly(degree=3)) + self.sgds = [ + CClassifierSGD( + regularizer=CRegularizerL2(), + loss=CLossHinge(), + max_iter=1000, + random_state=0, + alpha=alpha, + preprocess=kernel if kernel is not None else None, + ) + for kernel in kernel_types + ] + self.logger.info("Testing SGD with kernel functions: %s", str(kernel_types)) for sgd in self.sgds: sgd.verbose = 0 # Enabling debug output for each classifier sgd.fit(self.dataset.X, self.dataset.Y) def test_draw(self): - """ Compare the classifiers graphically""" + """Compare the classifiers graphically""" self.logger.info("Testing classifiers graphically") # generate 2D synthetic data - dataset = CDLRandom(n_features=2, n_redundant=1, n_informative=1, - n_clusters_per_class=1).load() + dataset = CDLRandom( + n_features=2, n_redundant=1, n_informative=1, n_clusters_per_class=1 + ).load() dataset.X = CNormalizerMinMax().fit_transform(dataset.X) self.sgds[0].fit(dataset.X, dataset.Y) @@ -69,25 +77,25 @@ def test_draw(self): # Plot dataset points fig.sp.plot_ds(dataset) # Plot objective function - fig.sp.plot_fun(svm.decision_function, - grid_limits=dataset.get_bounds(), y=1) - fig.sp.title('SVM') + fig.sp.plot_fun(svm.decision_function, grid_limits=dataset.get_bounds(), y=1) + fig.sp.title("SVM") fig.subplot(2, 1, 2) # Plot dataset points fig.sp.plot_ds(dataset) # Plot objective function - fig.sp.plot_fun(self.sgds[0].decision_function, - grid_limits=dataset.get_bounds(), y=1) - fig.sp.title('SGD Classifier') + fig.sp.plot_fun( + self.sgds[0].decision_function, grid_limits=dataset.get_bounds(), y=1 + ) + fig.sp.title("SGD Classifier") - fig.savefig(fm.join(fm.abspath(__file__), 'figs', - 'test_c_classifier_sgd1.pdf')) + fig.savefig(fm.join(fm.abspath(__file__), "figs", "test_c_classifier_sgd1.pdf")) def test_performance(self): - """ Compare the classifiers performance""" - self.logger.info("Testing error performance of the " - "classifiers on the training set") + """Compare the classifiers performance""" + self.logger.info( + "Testing error performance of the " "classifiers on the training set" + ) for sgd in self.sgds: @@ -102,33 +110,37 @@ def test_performance(self): svm.fit(self.dataset.X, self.dataset.Y) label_svm, y_svm = svm.predict( - self.dataset.X, return_decision_function=True) + self.dataset.X, return_decision_function=True + ) label_sgd, y_sgd = sgd.predict( - self.dataset.X, return_decision_function=True) + self.dataset.X, return_decision_function=True + ) - acc_svm = CMetric.create('f1').performance_score( - self.dataset.Y, label_svm) - acc_sgd = CMetric.create('f1').performance_score( - self.dataset.Y, label_sgd) + acc_svm = CMetric.create("f1").performance_score(self.dataset.Y, label_svm) + acc_sgd = CMetric.create("f1").performance_score(self.dataset.Y, label_sgd) self.logger.info("Accuracy of SVM: {:}".format(acc_svm)) - self.assertGreater(acc_svm, 0.90, - "Accuracy of SVM: {:}".format(acc_svm)) + self.assertGreater(acc_svm, 0.90, "Accuracy of SVM: {:}".format(acc_svm)) self.logger.info("Accuracy of SGD: {:}".format(acc_sgd)) - self.assertGreater(acc_sgd, 0.90, - "Accuracy of SGD: {:}".format(acc_sgd)) + self.assertGreater(acc_sgd, 0.90, "Accuracy of SGD: {:}".format(acc_sgd)) def test_margin(self): self.logger.info("Testing margin separation of SGD...") # we create 50 separable points - dataset = CDLRandomBlobs(n_samples=50, centers=2, random_state=0, - cluster_std=0.60).load() + dataset = CDLRandomBlobs( + n_samples=50, centers=2, random_state=0, cluster_std=0.60 + ).load() # fit the model - clf = CClassifierSGD(loss=CLossHinge(), regularizer=CRegularizerL2(), - alpha=0.01, max_iter=200, random_state=0) + clf = CClassifierSGD( + loss=CLossHinge(), + regularizer=CRegularizerL2(), + alpha=0.01, + max_iter=200, + random_state=0, + ) clf.fit(dataset.X, dataset.Y) # plot the line, the points, and the nearest vectors to the plane @@ -142,16 +154,15 @@ def test_margin(self): x2 = X2[i, j] Z[i, j] = clf.decision_function(CArray([x1, x2]), y=1) levels = [-1.0, 0.0, 1.0] - linestyles = ['dashed', 'solid', 'dashed'] - colors = 'k' + linestyles = ["dashed", "solid", "dashed"] + colors = "k" fig = CFigure(linewidth=1) fig.sp.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles) - fig.sp.scatter(dataset.X[:, 0].ravel(), - dataset.X[:, 1].ravel(), - c=dataset.Y, s=40) + fig.sp.scatter( + dataset.X[:, 0].ravel(), dataset.X[:, 1].ravel(), c=dataset.Y, s=40 + ) - fig.savefig(fm.join(fm.abspath(__file__), 'figs', - 'test_c_classifier_sgd2.pdf')) + fig.savefig(fm.join(fm.abspath(__file__), "figs", "test_c_classifier_sgd2.pdf")) def test_fun(self): """Test for decision_function() and predict() methods.""" @@ -175,12 +186,13 @@ def test_gradient(self): for sgd in self.sgds: self.logger.info( - "Checking gradient for SGD with kernel: %s", sgd.preprocess) + "Checking gradient for SGD with kernel: %s", sgd.preprocess + ) - if hasattr(sgd.preprocess, 'gamma'): # set gamma for poly and rbf - sgd.set('gamma', 1e-5) - if hasattr(sgd.preprocess, 'degree'): # set degree for poly - sgd.set('degree', 3) + if hasattr(sgd.preprocess, "gamma"): # set gamma for poly and rbf + sgd.set("gamma", 1e-5) + if hasattr(sgd.preprocess, "degree"): # set degree for poly + sgd.set("degree", 3) self.logger.info("Testing dense data...") ds = self.dataset.todense() @@ -208,19 +220,20 @@ def test_preprocess(self): """Test classifier with preprocessors inside.""" ds = CDLRandom().load() clf = CClassifierSGD( - regularizer=CRegularizerL2(), loss=CLossHinge(), random_state=0) + regularizer=CRegularizerL2(), loss=CLossHinge(), random_state=0 + ) # All linear transformations with gradient implemented - self._test_preprocess(ds, clf, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) - self._test_preprocess_grad(ds, clf, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) + self._test_preprocess( + ds, clf, ["min-max", "mean-std"], [{"feature_range": (-1, 1)}, {}] + ) + self._test_preprocess_grad( + ds, clf, ["min-max", "mean-std"], [{"feature_range": (-1, 1)}, {}] + ) # Mixed linear/nonlinear transformations without gradient - self._test_preprocess(ds, clf, ['pca', 'unit-norm'], [{}, {}]) + self._test_preprocess(ds, clf, ["pca", "unit-norm"], [{}, {}]) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierTestCases.main() diff --git a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_sklearn.py b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_sklearn.py index 82a67a48..52d507c6 100644 --- a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_sklearn.py +++ b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_sklearn.py @@ -25,31 +25,37 @@ def setUp(self): # QuadraticDiscriminantAnalysis will raise a warning self.logger.filterwarnings( - "ignore", message="Variables are collinear", category=UserWarning) + "ignore", message="Variables are collinear", category=UserWarning + ) multiclass = True n_classes = 3 if multiclass is True else 2 self.dataset = CDLRandom( - n_features=25, n_redundant=10, n_informative=5, - n_classes=n_classes, n_samples=25, - n_clusters_per_class=2, random_state=0).load() + n_features=25, + n_redundant=10, + n_informative=5, + n_classes=n_classes, + n_samples=25, + n_clusters_per_class=2, + random_state=0, + ).load() self.skclfs = [ KNeighborsClassifier(3), - SVC(kernel="linear", C=0.025, - random_state=0, decision_function_shape='ovr'), + SVC( + kernel="linear", C=0.025, random_state=0, decision_function_shape="ovr" + ), SVC(kernel="rbf", gamma=2, C=1, random_state=0), DecisionTreeClassifier(max_depth=5, random_state=0), - RandomForestClassifier(max_depth=5, n_estimators=5, - random_state=0), + RandomForestClassifier(max_depth=5, n_estimators=5, random_state=0), MLPClassifier(alpha=1, max_iter=1000, random_state=0), AdaBoostClassifier(random_state=0), - OneVsRestClassifier(SVC(kernel='linear')), + OneVsRestClassifier(SVC(kernel="linear")), # These clf below only work on dense data! GaussianProcessClassifier(1.0 * RBF(1.0)), GaussianNB(), - QuadraticDiscriminantAnalysis() + QuadraticDiscriminantAnalysis(), ] self.classifiers = [] @@ -69,8 +75,7 @@ def test_fun(self): def _decision_function(x, y=None): x = x.atleast_2d() try: - scores = CArray( - self.skclfs[i].decision_function(x.get_data())) + scores = CArray(self.skclfs[i].decision_function(x.get_data())) probs = False except AttributeError: scores = CArray(self.skclfs[i].predict_proba(x.get_data())) @@ -96,20 +101,21 @@ def _decision_function(x, y=None): try: self._test_fun(clf, self.dataset.tosparse()) except TypeError: - self.logger.info( - "This sklearn model does not support sparse data!") + self.logger.info("This sklearn model does not support sparse data!") def test_preprocess(self): """Test classifier with preprocessors inside.""" # All linear transformations for clf in self.classifiers: - self._test_preprocess(self.dataset, clf, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) + self._test_preprocess( + self.dataset, + clf, + ["min-max", "mean-std"], + [{"feature_range": (-1, 1)}, {}], + ) # Mixed linear/nonlinear transformations - self._test_preprocess(self.dataset, clf, - ['pca', 'unit-norm'], [{}, {}]) + self._test_preprocess(self.dataset, clf, ["pca", "unit-norm"], [{}, {}]) def test_pretrained(self): """Test wrapping of pretrained models.""" @@ -119,42 +125,41 @@ def test_pretrained(self): X = iris.data y = iris.target - clf = svm.SVC(kernel='linear') + clf = svm.SVC(kernel="linear") from secml.core.exceptions import NotFittedError + with self.assertRaises(NotFittedError): secmlclf = CClassifierSkLearn(clf) secmlclf.predict(CArray(X)) clf.fit(X, y) - + y_pred = clf.predict(X) - clf = svm.SVC(kernel='linear') + clf = svm.SVC(kernel="linear") secmlclf = CClassifierSkLearn(clf) secmlclf.fit(X, y) y_pred_secml = secmlclf.predict(CArray(X)) - self.logger.info( - "Predicted labels by pretrained model:\n{:}".format(y_pred)) - self.logger.info( - "Predicted labels by our fit:\n{:}".format(y_pred_secml)) + self.logger.info("Predicted labels by pretrained model:\n{:}".format(y_pred)) + self.logger.info("Predicted labels by our fit:\n{:}".format(y_pred_secml)) self.assert_array_equal(y_pred, y_pred_secml) def test_set_get_state(self): """Test for set_state and get_state.""" - pre = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}]) + pre = CPreProcess.create_chain(["pca", "mean-std"], [{}, {}]) clf = CClassifierSkLearn( sklearn_model=SVC(kernel="rbf", gamma=2, C=1, random_state=0), - preprocess=pre) + preprocess=pre, + ) clf.fit(self.dataset.X, self.dataset.Y) pred_y = clf.predict(self.dataset.X) - self.logger.info( - "Predictions before restoring state:\n{:}".format(pred_y)) + self.logger.info("Predictions before restoring state:\n{:}".format(pred_y)) state = clf.get_state() self.logger.info("State of multiclass:\n{:}".format(state)) @@ -162,27 +167,28 @@ def test_set_get_state(self): # Generate a temp file to test import tempfile from secml.utils import fm + tempdir = tempfile.gettempdir() - tempfile = fm.join(tempdir, 'secml_testgetsetstate') + tempfile = fm.join(tempdir, "secml_testgetsetstate") # Test save state to disk tempfile = clf.save_state(tempfile) # Create an entirely new clf - pre_post = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}]) + pre_post = CPreProcess.create_chain(["pca", "mean-std"], [{}, {}]) clf_post = CClassifierSkLearn( sklearn_model=SVC(kernel="rbf", gamma=2, C=1, random_state=0), - preprocess=pre_post) + preprocess=pre_post, + ) # Restore state from disk clf_post.load_state(tempfile) pred_y_post = clf_post.predict(self.dataset.X) - self.logger.info( - "Predictions after restoring state:\n{:}".format(pred_y_post)) + self.logger.info("Predictions after restoring state:\n{:}".format(pred_y_post)) self.assert_array_equal(pred_y, pred_y_post) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierTestCases.main() diff --git a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_svm.py b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_svm.py index bccb532e..c4ca454e 100644 --- a/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_svm.py +++ b/src/secml/ml/classifiers/sklearn/tests/test_c_classifier_svm.py @@ -22,24 +22,28 @@ class TestCClassifierSVM(CClassifierTestCases): def setUp(self): # generate synthetic data - self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1, - n_clusters_per_class=1, random_state=1).load() + self.dataset = CDLRandom( + n_features=2, + n_redundant=0, + n_informative=1, + n_clusters_per_class=1, + random_state=1, + ).load() self.dataset_sparse = self.dataset.tosparse() kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly) - self.svms = [CClassifierSVM( - kernel=kernel() if kernel is not None else None) - for kernel in kernel_types] - self.logger.info( - "Testing SVM with kernel functions: %s", str(kernel_types)) + self.svms = [ + CClassifierSVM(kernel=kernel() if kernel is not None else None) + for kernel in kernel_types + ] + self.logger.info("Testing SVM with kernel functions: %s", str(kernel_types)) for svm in self.svms: # Enabling debug output for each classifier svm.verbose = 2 self.logger.info("." * 50) - self.logger.info("Number of Patterns: %s", - str(self.dataset.num_samples)) + self.logger.info("Number of Patterns: %s", str(self.dataset.num_samples)) self.logger.info("Features: %s", str(self.dataset.num_features)) def test_attributes(self): @@ -47,12 +51,12 @@ def test_attributes(self): self.logger.info("Testing SVM attributes setting") for svm in self.svms: - svm.set('C', 10) + svm.set("C", 10) self.assertEqual(svm.C, 10) - svm.set('class_weight', {-1: 1, 1: 50}) + svm.set("class_weight", {-1: 1, 1: 50}) # set gamma for poly and rbf and check if it is set properly - if hasattr(svm.kernel, 'gamma'): - svm.set('gamma', 100) + if hasattr(svm.kernel, "gamma"): + svm.set("gamma", 100) self.assertEqual(svm.kernel.gamma, 100) def test_linear_svm(self): @@ -71,10 +75,11 @@ def test_linear_svm(self): kernel_linear_svm.fit(self.dataset.X, self.dataset.Y) linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict( - self.dataset.X, return_decision_function=True) - kernel_linear_svm_pred_y, \ - kernel_linear_svm_pred_score = kernel_linear_svm.predict( - self.dataset.X, return_decision_function=True) + self.dataset.X, return_decision_function=True + ) + kernel_linear_svm_pred_y, kernel_linear_svm_pred_score = ( + kernel_linear_svm.predict(self.dataset.X, return_decision_function=True) + ) # check prediction self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y) @@ -83,15 +88,19 @@ def test_linear_svm(self): linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y) kernel_linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y) - self.assertTrue(linear_svm.w.issparse, - "Weights vector is not sparse even " - "if training data is sparse") + self.assertTrue( + linear_svm.w.issparse, + "Weights vector is not sparse even " "if training data is sparse", + ) linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict( - self.dataset_sparse.X, return_decision_function=True) - kernel_linear_svm_pred_y, \ - kernel_linear_svm_pred_score = kernel_linear_svm.predict( - self.dataset_sparse.X, return_decision_function=True) + self.dataset_sparse.X, return_decision_function=True + ) + kernel_linear_svm_pred_y, kernel_linear_svm_pred_score = ( + kernel_linear_svm.predict( + self.dataset_sparse.X, return_decision_function=True + ) + ) # check prediction self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y) @@ -101,17 +110,17 @@ def test_predict(self): self.logger.info("Testing SVM predict accuracy") for svm in self.svms: - self.logger.info( - "SVM with kernel: %s", svm.kernel.__class__) + self.logger.info("SVM with kernel: %s", svm.kernel.__class__) # Training and predicting using our SVM svm.fit(self.dataset.X, self.dataset.Y) pred_y, pred_score = svm.predict( - self.dataset.X, return_decision_function=True) + self.dataset.X, return_decision_function=True + ) # Training and predicting an SKlearn SVC - k = svm.kernel.class_type if svm.kernel is not None else 'linear' + k = svm.kernel.class_type if svm.kernel is not None else "linear" sklearn_svm = SVC(kernel=k) # Setting similarity function parameters into SVC too @@ -123,11 +132,11 @@ def test_predict(self): p_dict[p] = svm.kernel.get_params()[p] sklearn_svm.set_params(**p_dict) - sklearn_svm.fit(self.dataset.X.get_data(), - np.ravel(self.dataset.Y.get_data())) + sklearn_svm.fit( + self.dataset.X.get_data(), np.ravel(self.dataset.Y.get_data()) + ) sklearn_pred_y = sklearn_svm.predict(self.dataset.X.get_data()) - sklearn_score = sklearn_svm.decision_function( - self.dataset.X.get_data()) + sklearn_score = sklearn_svm.decision_function(self.dataset.X.get_data()) # Test if sklearn pred_y are equal to our predicted labels self.assert_array_equal(pred_y, sklearn_pred_y) @@ -137,10 +146,12 @@ def test_predict(self): self.assert_allclose(pred_score[:, 1].ravel(), sklearn_score) # EVALUATE PERFORMANCE - accuracy = skm.accuracy_score( - self.dataset.Y.get_data(), sklearn_pred_y) - self.logger.info("Prediction accuracy for kernel %s is %f ", - svm.kernel.__class__, accuracy) + accuracy = skm.accuracy_score(self.dataset.Y.get_data(), sklearn_pred_y) + self.logger.info( + "Prediction accuracy for kernel %s is %f ", + svm.kernel.__class__, + accuracy, + ) def test_shape(self): """Test shape of SVM parameters, scores etc.""" @@ -154,13 +165,13 @@ def _check_flattness(array): for svm in self.svms: - self.logger.info( - "SVM with similarity function: %s", svm.kernel.__class__) + self.logger.info("SVM with similarity function: %s", svm.kernel.__class__) # Training and predicting using our SVM svm.fit(self.dataset.X, self.dataset.Y) pred_y, pred_score = svm.predict( - self.dataset.X, return_decision_function=True) + self.dataset.X, return_decision_function=True + ) # chose random one pattern pattern = CArray(random.choice(self.dataset.X.get_data())) gradient = svm.grad_f_x(pattern, y=1) @@ -178,46 +189,49 @@ def test_sparse(self): self.logger.info("Testing SVM on sparse data") def _check_sparsedata(y, score, y_sparse, score_sparse): - self.assertFalse((y != y_sparse).any(), - "Predicted labels on sparse data are different.") + self.assertFalse( + (y != y_sparse).any(), "Predicted labels on sparse data are different." + ) # Rounding scores to prevent false positives in assert score_rounded = score[:, 1].ravel().round(3) score_sparse_rounded = score_sparse[:, 1].ravel().round(3) - self.assertFalse((score_rounded != score_sparse_rounded).any(), - "Predicted Scores on sparse data are different.") + self.assertFalse( + (score_rounded != score_sparse_rounded).any(), + "Predicted Scores on sparse data are different.", + ) for svm in self.svms: - self.logger.info( - "SVM with similarity function: %s", svm.kernel.__class__) + self.logger.info("SVM with similarity function: %s", svm.kernel.__class__) # Training and predicting on dense data for reference svm.fit(self.dataset.X, self.dataset.Y) pred_y, pred_score = svm.predict( - self.dataset.X, return_decision_function=True) + self.dataset.X, return_decision_function=True + ) # Training and predicting on sparse data svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y) pred_y_sparse, pred_score_sparse = svm.predict( - self.dataset_sparse.X, return_decision_function=True) + self.dataset_sparse.X, return_decision_function=True + ) - _check_sparsedata(pred_y, pred_score, pred_y_sparse, - pred_score_sparse) + _check_sparsedata(pred_y, pred_score, pred_y_sparse, pred_score_sparse) # Training on sparse and predicting on dense svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y) pred_y_sparse, pred_score_sparse = svm.predict( - self.dataset.X, return_decision_function=True) + self.dataset.X, return_decision_function=True + ) - _check_sparsedata(pred_y, pred_score, pred_y_sparse, - pred_score_sparse) + _check_sparsedata(pred_y, pred_score, pred_y_sparse, pred_score_sparse) # Training on dense and predicting on sparse svm.fit(self.dataset.X, self.dataset.Y) pred_y_sparse, pred_score_sparse = svm.predict( - self.dataset_sparse.X, return_decision_function=True) + self.dataset_sparse.X, return_decision_function=True + ) - _check_sparsedata(pred_y, pred_score, pred_y_sparse, - pred_score_sparse) + _check_sparsedata(pred_y, pred_score, pred_y_sparse, pred_score_sparse) def test_margin(self): self.logger.info("Testing margin separation of SVM...") @@ -228,8 +242,9 @@ def test_margin(self): rng = np.random.RandomState(0) n_samples_1 = 1000 n_samples_2 = 100 - X = np.r_[1.5 * rng.randn(n_samples_1, 2), - 0.5 * rng.randn(n_samples_2, 2) + [2, 2]] + X = np.r_[ + 1.5 * rng.randn(n_samples_1, 2), 0.5 * rng.randn(n_samples_2, 2) + [2, 2] + ] y = [0] * (n_samples_1) + [1] * (n_samples_2) dataset = CDataset(X, y) @@ -252,13 +267,12 @@ def test_margin(self): wyy = wa * xx - wclf.b / ww[1] fig = CFigure(linewidth=1) - fig.sp.plot(xx, yy.ravel(), 'k-', label='no weights') - fig.sp.plot(xx, wyy.ravel(), 'k--', label='with weights') + fig.sp.plot(xx, yy.ravel(), "k-", label="no weights") + fig.sp.plot(xx, wyy.ravel(), "k--", label="with weights") fig.sp.scatter(X[:, 0].ravel(), X[:, 1].ravel(), c=y) fig.sp.legend() - fig.savefig(fm.join(fm.abspath(__file__), 'figs', - 'test_c_classifier_svm.pdf')) + fig.savefig(fm.join(fm.abspath(__file__), "figs", "test_c_classifier_svm.pdf")) def test_store_dual_vars(self): """Test of parameters that control storing of dual space variables.""" @@ -270,12 +284,12 @@ def test_store_dual_vars(self): self.assertIsNone(svm.alpha) self.logger.info("Linear SVM in dual space") - svm = CClassifierSVM(kernel='linear') + svm = CClassifierSVM(kernel="linear") svm.fit(self.dataset.X, self.dataset.Y) self.assertIsNotNone(svm.alpha) self.logger.info("Nonlinear SVM in dual space") - svm = CClassifierSVM(kernel='rbf') + svm = CClassifierSVM(kernel="rbf") svm.fit(self.dataset.X, self.dataset.Y) self.assertIsNotNone(svm.alpha) @@ -294,15 +308,15 @@ def test_gradient(self): self.logger.info("Testing SVM.gradient() method") import random + for svm in self.svms: - self.logger.info( - "Computing gradient for SVM with kernel: %s", svm.kernel) + self.logger.info("Computing gradient for SVM with kernel: %s", svm.kernel) - if hasattr(svm.kernel, 'gamma'): # set gamma for poly and rbf - svm.set('gamma', 1e-5) - if hasattr(svm.kernel, 'degree'): # set degree for poly - svm.set('degree', 3) + if hasattr(svm.kernel, "gamma"): # set gamma for poly and rbf + svm.set("gamma", 1e-5) + if hasattr(svm.kernel, "degree"): # set degree for poly + svm.set("degree", 3) samps = random.sample(range(self.dataset.num_samples), 5) @@ -334,8 +348,7 @@ def test_gradient(self): # Compare dense gradients with sparse gradients for grad_i, grad in enumerate(grads_d): - self.assert_array_almost_equal( - grad.atleast_2d(), grads_s[grad_i]) + self.assert_array_almost_equal(grad.atleast_2d(), grads_s[grad_i]) def test_preprocess(self): """Test classifier with preprocessors inside.""" @@ -343,15 +356,15 @@ def test_preprocess(self): clf = CClassifierSVM() # All linear transformations with gradient implemented - self._test_preprocess(ds, clf, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) - self._test_preprocess_grad(ds, clf, - ['min-max', 'mean-std'], - [{'feature_range': (-1, 1)}, {}]) + self._test_preprocess( + ds, clf, ["min-max", "mean-std"], [{"feature_range": (-1, 1)}, {}] + ) + self._test_preprocess_grad( + ds, clf, ["min-max", "mean-std"], [{"feature_range": (-1, 1)}, {}] + ) # Mixed linear/nonlinear transformations without gradient - self._test_preprocess(ds, clf, ['pca', 'unit-norm'], [{}, {}]) + self._test_preprocess(ds, clf, ["pca", "unit-norm"], [{}, {}]) def test_multiclass(self): """Test multiclass SVM on MNIST digits.""" @@ -363,18 +376,18 @@ def test_multiclass(self): n_ts = 200 # Number of test set samples loader = CDataLoaderMNIST() - tr = loader.load('training', digits=digits, num_samples=n_tr) - ts = loader.load('testing', digits=digits, num_samples=n_ts) + tr = loader.load("training", digits=digits, num_samples=n_tr) + ts = loader.load("testing", digits=digits, num_samples=n_ts) # Normalize the features in `[0, 1]` tr.X /= 255 ts.X /= 255 svm_params = { - 'kernel': CKernelRBF(gamma=0.1), - 'C': 10, - 'class_weight': {0: 1, 1: 1}, - 'n_jobs': 2 + "kernel": CKernelRBF(gamma=0.1), + "C": 10, + "class_weight": {0: 1, 1: 1}, + "n_jobs": 2, } classifiers = [ CClassifierMulticlassOVA(CClassifierSVM, **svm_params), @@ -398,5 +411,5 @@ def test_multiclass(self): self.assert_array_almost_equal(grads[0], grads[1]) -if __name__ == '__main__': +if __name__ == "__main__": CClassifierTestCases.main() diff --git a/src/secml/ml/classifiers/tests/c_classifier_testcases.py b/src/secml/ml/classifiers/tests/c_classifier_testcases.py index feb5bab2..3b054563 100644 --- a/src/secml/ml/classifiers/tests/c_classifier_testcases.py +++ b/src/secml/ml/classifiers/tests/c_classifier_testcases.py @@ -68,8 +68,7 @@ def _test_fun(self, clf, ds): Classifier scores computed on a single point. """ - self.logger.info( - "Test for decision_function() and predict() methods.") + self.logger.info("Test for decision_function() and predict() methods.") if ds.issparse: self.logger.info("Testing on sparse data...") @@ -93,21 +92,18 @@ def _test_fun(self, clf, ds): for y in range(ds.num_classes): df.append(clf.decision_function(x, y=y)) df_priv.append(clf._forward(x_norm)[:, y].ravel()) + self.logger.info("decision_function(x, y={:}): {:}".format(y, df[y])) self.logger.info( - "decision_function(x, y={:}): {:}".format(y, df[y])) - self.logger.info( - "_decision_function(x_norm, y={:}): {:}".format(y, df_priv[y])) + "_decision_function(x_norm, y={:}): {:}".format(y, df_priv[y]) + ) self._check_df_scores(df_priv[y], ds.num_samples) self._check_df_scores(df[y], ds.num_samples) self.assertFalse((df[y] != df_priv[y]).any()) # Testing predict on multiple points - labels, scores = clf.predict( - x, return_decision_function=True) - self.logger.info( - "predict(x):\nlabels: {:}\nscores: {:}".format(labels, scores)) - self._check_classify_scores( - labels, scores, ds.num_samples, clf.n_classes) + labels, scores = clf.predict(x, return_decision_function=True) + self.logger.info("predict(x):\nlabels: {:}\nscores: {:}".format(labels, scores)) + self._check_classify_scores(labels, scores, ds.num_samples, clf.n_classes) # Comparing output of decision_function and predict for y in range(ds.num_classes): @@ -118,20 +114,18 @@ def _test_fun(self, clf, ds): for y in range(ds.num_classes): df.append(clf.decision_function(p, y=y)) df_priv.append(clf._forward(p_norm)[:, y].ravel()) - self.logger.info( - "decision_function(p, y={:}): {:}".format(y, df[y])) + self.logger.info("decision_function(p, y={:}): {:}".format(y, df[y])) self._check_df_scores(df[y], 1) self.logger.info( - "_decision_function(p_norm, y={:}): {:}".format(y, df_priv[y])) + "_decision_function(p_norm, y={:}): {:}".format(y, df_priv[y]) + ) self._check_df_scores(df_priv[y], 1) self.assertFalse((df[y] != df_priv[y]).any()) self.logger.info("Testing predict on single point") - labels, scores = clf.predict( - p, return_decision_function=True) - self.logger.info( - "predict(p):\nlabels: {:}\nscores: {:}".format(labels, scores)) + labels, scores = clf.predict(p, return_decision_function=True) + self.logger.info("predict(p):\nlabels: {:}\nscores: {:}".format(labels, scores)) self._check_classify_scores(labels, scores, 1, clf.n_classes) # Comparing output of decision_function and predict @@ -149,21 +143,22 @@ def _test_plot(self, clf, ds, levels=None): fig.subplot(1, 2, 1) fig.sp.plot_ds(ds) - fig.sp.plot_decision_regions( - clf, n_grid_points=50, grid_limits=ds.get_bounds()) + fig.sp.plot_decision_regions(clf, n_grid_points=50, grid_limits=ds.get_bounds()) fig.sp.title("Decision regions") fig.subplot(1, 2, 2) fig.sp.plot_ds(ds) - fig.sp.plot_fun(clf.decision_function, grid_limits=ds.get_bounds(), - levels=levels, y=1) + fig.sp.plot_fun( + clf.decision_function, grid_limits=ds.get_bounds(), levels=levels, y=1 + ) fig.sp.title("Discriminant function for y=1") return fig # TODO: consider moving at the CModule level! - def _test_gradient_numerical(self, clf, x, extra_classes=None, - th=1e-3, epsilon=eps, **grad_kwargs): + def _test_gradient_numerical( + self, clf, x, extra_classes=None, th=1e-3, epsilon=eps, **grad_kwargs + ): """Test for clf.grad_f_x comparing to numerical gradient. Parameters @@ -185,7 +180,7 @@ def _test_gradient_numerical(self, clf, x, extra_classes=None, A list with the gradients computed wrt each class. """ - if 'y' in grad_kwargs: + if "y" in grad_kwargs: raise ValueError("`y` cannot be passed to this unittest.") if extra_classes is not None: @@ -195,7 +190,7 @@ def _test_gradient_numerical(self, clf, x, extra_classes=None, grads = [] for c in classes: - grad_kwargs['y'] = c # Appending class to test_f_x + grad_kwargs["y"] = c # Appending class to test_f_x # Analytical gradient gradient = clf.grad_f_x(x, **grad_kwargs) @@ -205,17 +200,17 @@ def _test_gradient_numerical(self, clf, x, extra_classes=None, self.assertEqual(x.size, gradient.size) # Numerical gradient - num_gradient = CFunction( - clf.decision_function).approx_fprime(x.todense(), epsilon, y=c) + num_gradient = CFunction(clf.decision_function).approx_fprime( + x.todense(), epsilon, y=c + ) # Compute the norm of the difference error = (gradient - num_gradient).norm() + self.logger.info("Analytic grad wrt. class {:}:\n{:}".format(c, gradient)) self.logger.info( - "Analytic grad wrt. class {:}:\n{:}".format(c, gradient)) - self.logger.info( - "Numeric gradient wrt. class {:}:\n{:}".format( - c, num_gradient)) + "Numeric gradient wrt. class {:}:\n{:}".format(c, num_gradient) + ) self.logger.info("norm(grad - num_grad): {:}".format(error)) self.assertLess(error, th) @@ -231,8 +226,7 @@ def _create_preprocess_chain(pre_id_list, kwargs_list): chain = None pre_list = [] for i, pre_id in enumerate(pre_id_list): - chain = CPreProcess.create( - pre_id, preprocess=chain, **kwargs_list[i]) + chain = CPreProcess.create(pre_id, preprocess=chain, **kwargs_list[i]) pre_list.append(CPreProcess.create(pre_id, **kwargs_list[i])) return chain, pre_list @@ -295,11 +289,14 @@ def _test_preprocess(self, ds, clf, pre_id_list, kwargs_list): """ pre, data_pre, clf_pre, clf = self._create_preprocess_test( - ds, clf, pre_id_list, kwargs_list) + ds, clf, pre_id_list, kwargs_list + ) self.logger.info( "Testing {:} with preprocessor inside:\n{:}".format( - clf.__class__.__name__, clf_pre)) + clf.__class__.__name__, clf_pre + ) + ) y1, score1 = clf_pre.predict(ds.X, return_decision_function=True) y2, score2 = clf.predict(data_pre, return_decision_function=True) @@ -311,9 +308,18 @@ def _test_preprocess(self, ds, clf, pre_id_list, kwargs_list): # equal to the number of dataset features (so before preprocessing) self.assertEqual(ds.num_features, clf_pre.n_features) - def _test_preprocess_grad(self, ds, clf, pre_id_list, kwargs_list, - extra_classes=None, check_numerical=True, - th=1e-3, epsilon=eps, **grad_kwargs): + def _test_preprocess_grad( + self, + ds, + clf, + pre_id_list, + kwargs_list, + extra_classes=None, + check_numerical=True, + th=1e-3, + epsilon=eps, + **grad_kwargs + ): """Test if clf gradient with preprocessor inside is equal to the gradient of the clf trained on pre-transformed data. Also compare the gradient of the clf with preprocessor @@ -343,12 +349,14 @@ def _test_preprocess_grad(self, ds, clf, pre_id_list, kwargs_list, """ pre, data_pre, clf_pre, clf = self._create_preprocess_test( - ds, clf, pre_id_list, kwargs_list) + ds, clf, pre_id_list, kwargs_list + ) - self.logger.info("Testing clf gradient with preprocessor " - "inside:\n{:}".format(clf_pre)) + self.logger.info( + "Testing clf gradient with preprocessor " "inside:\n{:}".format(clf_pre) + ) - if 'y' in grad_kwargs: + if "y" in grad_kwargs: raise ValueError("`y` cannot be passed to this unittest.") if extra_classes is not None: @@ -357,8 +365,7 @@ def _test_preprocess_grad(self, ds, clf, pre_id_list, kwargs_list, classes = clf.classes for c in classes: - self.logger.info( - "Testing grad wrt. class {:}".format(c)) + self.logger.info("Testing grad wrt. class {:}".format(c)) # Grad of clf without preprocessor inside (using transformed data) v_pre = data_pre[0, :] @@ -386,8 +393,13 @@ def _test_preprocess_grad(self, ds, clf, pre_id_list, kwargs_list, if check_numerical is True: # Comparison with numerical gradient self._test_gradient_numerical( - clf_pre, ds.X[0, :], extra_classes=extra_classes, - th=th, epsilon=epsilon, **grad_kwargs) + clf_pre, + ds.X[0, :], + extra_classes=extra_classes, + th=th, + epsilon=epsilon, + **grad_kwargs + ) def _test_sparse_linear(self, ds, clf): """Test linear classifier operations on sparse data. @@ -401,8 +413,9 @@ def _test_sparse_linear(self, ds, clf): clf : CClassifier """ - self.logger.info("Testing {:} operations on sparse data.".format( - clf.__class__.__name__)) + self.logger.info( + "Testing {:} operations on sparse data.".format(clf.__class__.__name__) + ) ds_sparse = ds.tosparse() @@ -416,10 +429,8 @@ def _test_sparse_linear(self, ds, clf): x = ds.X[0, :] x_sparse = ds_sparse.X[0, :] - y, s = clf.predict( - x, return_decision_function=True) - y_sparse, s_sparse = clf.predict( - x_sparse, return_decision_function=True) + y, s = clf.predict(x, return_decision_function=True) + y_sparse, s_sparse = clf.predict(x_sparse, return_decision_function=True) self.assert_array_equal(y, y_sparse) self.assert_array_equal(s, s_sparse) @@ -432,5 +443,5 @@ def _test_sparse_linear(self, ds, clf): # self.assertTrue(grad.issparse) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/features/c_preprocess.py b/src/secml/ml/features/c_preprocess.py index 8edeecad..12eea61a 100644 --- a/src/secml/ml/features/c_preprocess.py +++ b/src/secml/ml/features/c_preprocess.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from abc import ABCMeta, abstractmethod from secml.ml import CModule @@ -21,7 +22,8 @@ class CPreProcess(CModule, metaclass=ABCMeta): desired preprocessor. If None, input data is used as is. """ - __super__ = 'CPreProcess' + + __super__ = "CPreProcess" def __init__(self, preprocess=None): CModule.__init__(self, preprocess=preprocess) @@ -94,8 +96,7 @@ def transform(self, x): return self.forward(x, caching=False) def _inverse_transform(self, x): - raise NotImplementedError( - "inverting this transformation is not supported.") + raise NotImplementedError("inverting this transformation is not supported.") def inverse_transform(self, x): """Revert data to original form. diff --git a/src/secml/ml/features/normalization/__init__.py b/src/secml/ml/features/normalization/__init__.py index ab34bd8a..f24125b8 100644 --- a/src/secml/ml/features/normalization/__init__.py +++ b/src/secml/ml/features/normalization/__init__.py @@ -4,4 +4,4 @@ from .c_normalizer_minmax import CNormalizerMinMax from .c_normalizer_unitnorm import CNormalizerUnitNorm from .c_normalizer_dnn import CNormalizerDNN -from .c_normalizer_tfidf import CNormalizerTFIDF \ No newline at end of file +from .c_normalizer_tfidf import CNormalizerTFIDF diff --git a/src/secml/ml/features/normalization/c_normalizer.py b/src/secml/ml/features/normalization/c_normalizer.py index f5b73b54..3b9544c1 100644 --- a/src/secml/ml/features/normalization/c_normalizer.py +++ b/src/secml/ml/features/normalization/c_normalizer.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from abc import ABCMeta from secml.core.decorators import deprecated @@ -13,4 +14,5 @@ class CNormalizer(CPreProcess, metaclass=ABCMeta): """Common interface for normalization preprocessing algorithms.""" - __super__ = 'CNormalizer' + + __super__ = "CNormalizer" diff --git a/src/secml/ml/features/normalization/c_normalizer_dnn.py b/src/secml/ml/features/normalization/c_normalizer_dnn.py index 2458f135..485ad729 100644 --- a/src/secml/ml/features/normalization/c_normalizer_dnn.py +++ b/src/secml/ml/features/normalization/c_normalizer_dnn.py @@ -6,6 +6,7 @@ .. moduleauthor:: Angelo Sotgiu """ + from secml import _NoValue from secml.array import CArray from secml.ml.features.normalization import CNormalizer @@ -34,7 +35,8 @@ class CNormalizerDNN(CNormalizer): parameter but to the DNN instead. """ - __class_type = 'dnn' + + __class_type = "dnn" def __init__(self, net, out_layer=None, preprocess=_NoValue): @@ -42,12 +44,10 @@ def __init__(self, net, out_layer=None, preprocess=_NoValue): self.out_layer = out_layer if not self.net.is_fitted(): - raise NotFittedError( - "the DNN should be already trained.") + raise NotFittedError("the DNN should be already trained.") if preprocess is not _NoValue: - raise ValueError( - "any additional `preprocess` should be passed to the DNN.") + raise ValueError("any additional `preprocess` should be passed to the DNN.") # No preprocess should be passed to super super(CNormalizerDNN, self).__init__(preprocess=None) @@ -118,5 +118,4 @@ def _forward(self, x): def _backward(self, w=None): # return the gradient at desired layer - return self.net.get_layer_gradient(x=self._cached_x, w=w, - layer=self.out_layer) + return self.net.get_layer_gradient(x=self._cached_x, w=w, layer=self.out_layer) diff --git a/src/secml/ml/features/normalization/c_normalizer_linear.py b/src/secml/ml/features/normalization/c_normalizer_linear.py index 1c2593b8..e3fa9d32 100644 --- a/src/secml/ml/features/normalization/c_normalizer_linear.py +++ b/src/secml/ml/features/normalization/c_normalizer_linear.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from abc import abstractmethod from secml.core.decorators import deprecated @@ -58,7 +59,7 @@ def _check_is_fitted(self): If the preprocessor is not fitted. """ - check_is_fitted(self, ['w', 'b']) + check_is_fitted(self, ["w", "b"]) def _forward(self, x): """Linearly scales features. @@ -76,8 +77,10 @@ def _forward(self, x): """ if x.atleast_2d().shape[1] != self.w.size: - raise ValueError("array to normalize must have {:} " - "features (columns).".format(self.w.size)) + raise ValueError( + "array to normalize must have {:} " + "features (columns).".format(self.w.size) + ) return (x * self.w).todense() + self.b @@ -97,8 +100,10 @@ def _inverse_transform(self, x): """ if x.atleast_2d().shape[1] != self.w.size: - raise ValueError("array to revert must have {:} " - "features (columns).".format(self.w.size)) + raise ValueError( + "array to revert must have {:} " + "features (columns).".format(self.w.size) + ) v = (x - self.b).atleast_2d() diff --git a/src/secml/ml/features/normalization/c_normalizer_mean_std.py b/src/secml/ml/features/normalization/c_normalizer_mean_std.py index 27b9fd32..1022b203 100644 --- a/src/secml/ml/features/normalization/c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/c_normalizer_mean_std.py @@ -6,6 +6,7 @@ .. moduleauthor:: Battista Biggio """ + from secml.array import CArray from secml.core.type_utils import is_scalar from secml.core.decorators import deprecated @@ -57,7 +58,8 @@ class CNormalizerMeanStd(CNormalizerLinear): class_type : 'mean-std' """ - __class_type = 'mean-std' + + __class_type = "mean-std" def __init__(self, mean=None, std=None, with_std=True, preprocess=None): @@ -72,13 +74,20 @@ def __init__(self, mean=None, std=None, with_std=True, preprocess=None): # Input validation if with_std is True: - if (mean is None and std is not None) or \ - (mean is not None and std is None) or \ - (mean is not None and std is not None and - len(self._in_mean) != len(self._in_std)): - raise ValueError("if `with_std` is True, `mean` and `std` " - "should be both None or both scalar or " - "both tuple of the same length") + if ( + (mean is None and std is not None) + or (mean is not None and std is None) + or ( + mean is not None + and std is not None + and len(self._in_mean) != len(self._in_std) + ) + ): + raise ValueError( + "if `with_std` is True, `mean` and `std` " + "should be both None or both scalar or " + "both tuple of the same length" + ) self._mean = None self._std = None @@ -128,13 +137,16 @@ def _expand_mean(self, n_feats): """Expand mean value to all dimensions.""" n_channels = len(self._in_mean) if not n_feats % n_channels == 0: - raise ValueError("input number of features must be " - "divisible by {:}".format(n_channels)) + raise ValueError( + "input number of features must be " + "divisible by {:}".format(n_channels) + ) channel_size = int(n_feats / n_channels) self._mean = CArray.ones(shape=(n_feats,)) for i in range(n_channels): - self._mean[i * channel_size: - i * channel_size + channel_size] *= self._in_mean[i] + self._mean[ + i * channel_size : i * channel_size + channel_size + ] *= self._in_mean[i] return self._mean def _expand_std(self, n_feats): @@ -145,13 +157,16 @@ def _expand_std(self, n_feats): else: n_channels = len(self._in_std) if not n_feats % n_channels == 0: - raise ValueError("input number of features must be " - "divisible by {:}".format(n_channels)) + raise ValueError( + "input number of features must be " + "divisible by {:}".format(n_channels) + ) channel_size = int(n_feats / n_channels) self._std = CArray.ones(shape=(n_feats,)) for i in range(n_channels): - self._std[i * channel_size: - i * channel_size + channel_size] *= self._in_std[i] + self._std[ + i * channel_size : i * channel_size + channel_size + ] *= self._in_std[i] return self._std def _compute_w_and_b(self): diff --git a/src/secml/ml/features/normalization/c_normalizer_minmax.py b/src/secml/ml/features/normalization/c_normalizer_minmax.py index be54cb7f..cf22dceb 100644 --- a/src/secml/ml/features/normalization/c_normalizer_minmax.py +++ b/src/secml/ml/features/normalization/c_normalizer_minmax.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml.ml.features.normalization import CNormalizerLinear @@ -66,13 +67,14 @@ class CNormalizerMinMax(CNormalizerLinear): [-1. 1. -1. ]]) """ - __class_type = 'min-max' + + __class_type = "min-max" def __init__(self, feature_range=None, preprocess=None): self._feature_range = None # setting desired feature range... the property will check for correct type - self.feature_range = (0., 1.) if feature_range is None else feature_range + self.feature_range = (0.0, 1.0) if feature_range is None else feature_range self._min = None self._max = None diff --git a/src/secml/ml/features/normalization/c_normalizer_tfidf.py b/src/secml/ml/features/normalization/c_normalizer_tfidf.py index 5b81f68d..d4b9adda 100755 --- a/src/secml/ml/features/normalization/c_normalizer_tfidf.py +++ b/src/secml/ml/features/normalization/c_normalizer_tfidf.py @@ -65,15 +65,17 @@ class CNormalizerTFIDF(CNormalizer): normalization of flat vectors, transpose the array first. """ - __class_type = 'tf-idf' - def __init__(self, norm='l2', preprocess=None): + __class_type = "tf-idf" + + def __init__(self, norm="l2", preprocess=None): # init attributes self._norm = None self._cached_x_tfidf = None # cached x after tfidf for gradient comp. self._unitnorm = CNormalizerUnitNorm() self._sklearn_tfidf = TfidfTransformer( - norm=None, use_idf=True, smooth_idf=True, sublinear_tf=False) + norm=None, use_idf=True, smooth_idf=True, sublinear_tf=False + ) super(CNormalizerTFIDF, self).__init__(preprocess=preprocess) # set norm @@ -109,7 +111,7 @@ def _check_is_fitted(self): If the preprocessor is not fitted. """ - if not hasattr(self._sklearn_tfidf, 'idf_'): + if not hasattr(self._sklearn_tfidf, "idf_"): raise ValueError("The normalizer has not been trained.") def _fit(self, x, y=None): diff --git a/src/secml/ml/features/normalization/c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/c_normalizer_unitnorm.py index 64264f33..496306f8 100644 --- a/src/secml/ml/features/normalization/c_normalizer_unitnorm.py +++ b/src/secml/ml/features/normalization/c_normalizer_unitnorm.py @@ -6,6 +6,7 @@ .. moduleauthor:: Battista Biggio """ + from secml.array import CArray from secml.ml.features.normalization import CNormalizer from secml.core.constants import inf @@ -66,7 +67,8 @@ class CNormalizerUnitNorm(CNormalizer): [ 0. 0.5 -0.5 ]]) """ - __class_type = 'unit-norm' + + __class_type = "unit-norm" def __init__(self, norm="l2", preprocess=None): """Class constructor""" @@ -84,9 +86,9 @@ def norm(self, value): """Set the norm that must be used to normalize each row.""" self._norm = value - if self._norm == 'l2': + if self._norm == "l2": self._order = 2 - elif self._norm == 'l1': + elif self._norm == "l1": self._order = 1 elif self._norm == "max": self._order = inf @@ -156,7 +158,7 @@ def _compute_norm_gradient(self, x, x_norm): elif self.norm == "l1": sign = x.sign() grad_norm_x = sign - elif self.norm == 'max': + elif self.norm == "max": grad_norm_x = CArray.zeros(d, sparse=x.issparse) abs_x = x.abs() # take absolute values of x... max_abs_x = abs_x.max() # ... and the maximum absolute value @@ -234,6 +236,6 @@ def _backward(self, w=None): # this is the derivative of the ratio x/||x|| grad = CArray.eye(d, d) * x_norm.item() - grad_norm_x.T.dot(x) - grad /= (x_norm ** 2) + grad /= x_norm**2 return grad if w is None else w.dot(grad) diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index 4338dd14..3eff0d6e 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -48,10 +48,12 @@ def _sklearn_comp(self, array, norm_sklearn, norm, sparse=False): def _test_chain(self, x, class_type_list, kwargs_list, y=None): """Tests if preprocess chain and manual chaining yield same result.""" x_chain = super(CNormalizerTestCases, self)._test_chain( - x, class_type_list, kwargs_list, y) + x, class_type_list, kwargs_list, y + ) - self.assertEqual((self.array_dense.shape[0], - self.array_dense.shape[1] - 1), x_chain.shape) + self.assertEqual( + (self.array_dense.shape[0], self.array_dense.shape[1] - 1), x_chain.shape + ) return x_chain @@ -59,12 +61,13 @@ def _test_chain_gradient(self, x, class_type_list, kwargs_list, y=None): """Tests if gradient preprocess chain and gradient of manual chaining yield same result.""" grad_chain = super(CNormalizerTestCases, self)._test_chain_gradient( - x, class_type_list, kwargs_list, y) + x, class_type_list, kwargs_list, y + ) self.assertEqual((self.array_dense.shape[1],), grad_chain.shape) return grad_chain -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_dnn.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_dnn.py index 3afa16ec..e1f389ed 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_dnn.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_dnn.py @@ -12,6 +12,7 @@ import torch from torch import nn, optim from torchvision import transforms + torch.manual_seed(0) from secml.array import CArray @@ -30,20 +31,24 @@ def mlp(input_dims=100, hidden_dims=(50, 50), output_dims=10): # Input layers layers = [ - ('linear1', torch.nn.Linear(input_dims, hidden_dims[0])), - ('relu1', torch.nn.ReLU()), + ("linear1", torch.nn.Linear(input_dims, hidden_dims[0])), + ("relu1", torch.nn.ReLU()), ] # Appending additional hidden layers for hl_i, hl_dims in enumerate(hidden_dims[1:]): prev_hl_dims = hidden_dims[hl_i] # Dims of the previous hl i_str = str(hl_i + 2) layers += [ - ('linear' + i_str, torch.nn.Linear(prev_hl_dims, hl_dims)), - ('relu' + i_str, torch.nn.ReLU())] + ("linear" + i_str, torch.nn.Linear(prev_hl_dims, hl_dims)), + ("relu" + i_str, torch.nn.ReLU()), + ] # Output layers layers += [ - ('linear' + str(len(hidden_dims) + 1), - torch.nn.Linear(hidden_dims[-1], output_dims))] + ( + "linear" + str(len(hidden_dims) + 1), + torch.nn.Linear(hidden_dims[-1], output_dims), + ) + ] # Creating the model with the list of layers return torch.nn.Sequential(OrderedDict(layers)) @@ -53,16 +58,21 @@ class TestCNormalizerPyTorch(CPreProcessTestCases): @classmethod def setUpClass(cls): - cls.ds = CDLRandom(n_samples=40, n_classes=3, - n_features=20, n_informative=15, - random_state=0).load() + cls.ds = CDLRandom( + n_samples=40, n_classes=3, n_features=20, n_informative=15, random_state=0 + ).load() model = mlp(input_dims=20, hidden_dims=(40,), output_dims=3) loss = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=1e-1) - cls.net = CClassifierPyTorch(model=model, loss=loss, - optimizer=optimizer, random_state=0, - epochs=10, pretrained=True) + cls.net = CClassifierPyTorch( + model=model, + loss=loss, + optimizer=optimizer, + random_state=0, + epochs=10, + pretrained=True, + ) cls.net.fit(cls.ds.X, cls.ds.Y) cls.norm = CNormalizerDNN(net=cls.net) @@ -84,10 +94,11 @@ def test_normalization(self): self.assert_allclose(out_norm, out_net) - self.norm.out_layer = 'linear1' + self.norm.out_layer = "linear1" self.logger.info( - "Testing normalization at layer {:}".format(self.norm.out_layer)) + "Testing normalization at layer {:}".format(self.norm.out_layer) + ) out_norm = self.norm.transform(x) out_net = self.net.get_layer_output(x, layer=self.norm.out_layer) @@ -101,7 +112,7 @@ def test_chain(self): """Test for preprocessors chain.""" # Inner preprocessors should be passed to the pytorch clf with self.assertRaises(ValueError): - CNormalizerDNN(net=self.net, preprocess='min-max') + CNormalizerDNN(net=self.net, preprocess="min-max") def test_gradient(self): """Test for gradient.""" @@ -120,7 +131,7 @@ def test_gradient(self): self.assertTrue(grad.is_vector_like) self.assertEqual(x.size, grad.size) - layer = 'linear1' + layer = "linear1" self.norm.out_layer = layer self.logger.info("Returning output for layer: {:}".format(layer)) out = self.net.get_layer_output(x, layer=layer) @@ -140,22 +151,25 @@ def test_aspreprocess(self): model = mlp(input_dims=20, hidden_dims=(40,), output_dims=3) loss = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=1e-1) - net = CClassifierPyTorch(model=model, loss=loss, - optimizer=optimizer, random_state=0, - epochs=10, preprocess='min-max') + net = CClassifierPyTorch( + model=model, + loss=loss, + optimizer=optimizer, + random_state=0, + epochs=10, + preprocess="min-max", + ) net.fit(self.ds.X, self.ds.Y) norm = CNormalizerDNN(net=net) - clf = CClassifierMulticlassOVA( - classifier=CClassifierSVM, preprocess=norm) + clf = CClassifierMulticlassOVA(classifier=CClassifierSVM, preprocess=norm) self.logger.info("Testing last layer") clf.fit(self.ds.X, self.ds.Y) - y_pred, scores = clf.predict( - self.ds.X, return_decision_function=True) + y_pred, scores = clf.predict(self.ds.X, return_decision_function=True) self.logger.info("TRUE:\n{:}".format(self.ds.Y.tolist())) self.logger.info("Predictions:\n{:}".format(y_pred.tolist())) self.logger.info("Scores:\n{:}".format(scores)) @@ -171,25 +185,23 @@ def test_aspreprocess(self): self.logger.info("Output of grad_f_x:\n{:}".format(grad)) - check_grad_val = CFunction( - clf.decision_function, clf.grad_f_x).check_grad( - x, y=c, epsilon=1e-1) - self.logger.info( - "norm(grad - num_grad): %s", str(check_grad_val)) + check_grad_val = CFunction(clf.decision_function, clf.grad_f_x).check_grad( + x, y=c, epsilon=1e-1 + ) + self.logger.info("norm(grad - num_grad): %s", str(check_grad_val)) self.assertLess(check_grad_val, 1e-3) self.assertTrue(grad.is_vector_like) self.assertEqual(x.size, grad.size) - layer = 'linear1' + layer = "linear1" norm.out_layer = layer self.logger.info("Testing layer {:}".format(norm.out_layer)) clf.fit(self.ds.X, self.ds.Y) - y_pred, scores = clf.predict( - self.ds.X, return_decision_function=True) + y_pred, scores = clf.predict(self.ds.X, return_decision_function=True) self.logger.info("TRUE:\n{:}".format(self.ds.Y.tolist())) self.logger.info("Predictions:\n{:}".format(y_pred.tolist())) self.logger.info("Scores:\n{:}".format(scores)) @@ -208,7 +220,8 @@ def test_get_set_state(self): out_norm = self.norm.transform(x) self.logger.info( - "Normalized sample before restoring state:\n{:}".format(out_norm)) + "Normalized sample before restoring state:\n{:}".format(out_norm) + ) state = self.norm.get_state(return_optimizer=False) model = mlp(input_dims=20, hidden_dims=(40,), output_dims=3) @@ -220,10 +233,11 @@ def test_get_set_state(self): post_out_norm = self.norm.transform(x) self.logger.info( - "Normalized sample after restoring state:\n{:}".format(post_out_norm)) + "Normalized sample after restoring state:\n{:}".format(post_out_norm) + ) self.assert_array_equal(out_norm, post_out_norm) -if __name__ == '__main__': +if __name__ == "__main__": CPreProcessTestCases.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py index a3234cf4..64d22a62 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -14,33 +14,45 @@ def test_transform(self): self.logger.info("Testing using std? {:}".format(with_std)) - self._sklearn_comp(self.array_dense, - StandardScaler(with_std=with_std), - CNormalizerMeanStd(with_std=with_std)) - self._sklearn_comp(self.array_sparse, - StandardScaler(with_std=with_std), - CNormalizerMeanStd(with_std=with_std)) - self._sklearn_comp(self.row_dense.atleast_2d(), - StandardScaler(with_std=with_std), - CNormalizerMeanStd(with_std=with_std)) - self._sklearn_comp(self.row_sparse, - StandardScaler(with_std=with_std), - CNormalizerMeanStd(with_std=with_std)) - self._sklearn_comp(self.column_dense, - StandardScaler(with_std=with_std), - CNormalizerMeanStd(with_std=with_std)) - self._sklearn_comp(self.column_sparse, - StandardScaler(with_std=with_std), - CNormalizerMeanStd(with_std=with_std)) + self._sklearn_comp( + self.array_dense, + StandardScaler(with_std=with_std), + CNormalizerMeanStd(with_std=with_std), + ) + self._sklearn_comp( + self.array_sparse, + StandardScaler(with_std=with_std), + CNormalizerMeanStd(with_std=with_std), + ) + self._sklearn_comp( + self.row_dense.atleast_2d(), + StandardScaler(with_std=with_std), + CNormalizerMeanStd(with_std=with_std), + ) + self._sklearn_comp( + self.row_sparse, + StandardScaler(with_std=with_std), + CNormalizerMeanStd(with_std=with_std), + ) + self._sklearn_comp( + self.column_dense, + StandardScaler(with_std=with_std), + CNormalizerMeanStd(with_std=with_std), + ) + self._sklearn_comp( + self.column_sparse, + StandardScaler(with_std=with_std), + CNormalizerMeanStd(with_std=with_std), + ) def test_mean_std(self): """Test using specific mean/std.""" - for (mean, std) in [(1.5, 0.1), - ((1.0, 1.1, 1.2, 1.3), (0.0, 0.1, 0.2, 0.3))]: + for mean, std in [(1.5, 0.1), ((1.0, 1.1, 1.2, 1.3), (0.0, 0.1, 0.2, 0.3))]: for array in [self.array_dense, self.array_sparse]: self.logger.info("Original array is:\n{:}".format(array)) self.logger.info( - "Normalizing using mean: {:} std: {:}".format(mean, std)) + "Normalizing using mean: {:} std: {:}".format(mean, std) + ) n = CNormalizerMeanStd(mean=mean, std=std).fit(array) out = n.transform(array) @@ -59,16 +71,18 @@ def test_mean_std(self): def test_chain(self): """Test a chain of preprocessors.""" - self._test_chain(self.array_dense, - ['min-max', 'pca', 'mean-std'], - [{'feature_range': (-5, 5)}, {}, {}]) + self._test_chain( + self.array_dense, + ["min-max", "pca", "mean-std"], + [{"feature_range": (-5, 5)}, {}, {}], + ) def test_chain_gradient(self): """Check gradient of a chain of preprocessors.""" - self._test_chain_gradient(self.array_dense, - ['min-max', 'mean-std'], - [{'feature_range': (-5, 5)}, {}]) + self._test_chain_gradient( + self.array_dense, ["min-max", "mean-std"], [{"feature_range": (-5, 5)}, {}] + ) -if __name__ == '__main__': +if __name__ == "__main__": CNormalizerTestCases.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py index 2ed9065a..e32792f3 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py @@ -29,9 +29,9 @@ def _sklearn_comp(self, array, norm_sklearn, norm, sparse=False): Trained normalizer. """ - norm_sklearn, norm = \ - super(TestCNormalizerMinMax, self)._sklearn_comp( - array, norm_sklearn, norm, sparse) + norm_sklearn, norm = super(TestCNormalizerMinMax, self)._sklearn_comp( + array, norm_sklearn, norm, sparse + ) self.logger.info("Testing out of range normalization") @@ -52,33 +52,31 @@ def _sklearn_comp(self, array, norm_sklearn, norm, sparse=False): def test_transform(self): """Test for `.transform()` method.""" + self._sklearn_comp(self.array_dense, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.array_sparse, MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp( - self.array_dense, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp( - self.array_sparse, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp( - self.row_dense.atleast_2d(), MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp( - self.row_sparse, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp( - self.column_dense, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp( - self.column_sparse, MinMaxScaler(), CNormalizerMinMax()) + self.row_dense.atleast_2d(), MinMaxScaler(), CNormalizerMinMax() + ) + self._sklearn_comp(self.row_sparse, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.column_dense, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.column_sparse, MinMaxScaler(), CNormalizerMinMax()) def test_chain(self): """Test a chain of preprocessors.""" - self._test_chain(self.array_dense, - ['min-max', 'pca', 'min-max'], - [{'feature_range': (-5, 5)}, {}, - {'feature_range': (0, 1)}]) + self._test_chain( + self.array_dense, + ["min-max", "pca", "min-max"], + [{"feature_range": (-5, 5)}, {}, {"feature_range": (0, 1)}], + ) def test_chain_gradient(self): """Check gradient of a chain of preprocessors.""" - self._test_chain_gradient(self.array_dense, - ['min-max', 'mean-std', 'min-max'], - [{'feature_range': (-5, 5)}, {}, - {'feature_range': (0, 1)}]) + self._test_chain_gradient( + self.array_dense, + ["min-max", "mean-std", "min-max"], + [{"feature_range": (-5, 5)}, {}, {"feature_range": (0, 1)}], + ) -if __name__ == '__main__': +if __name__ == "__main__": CNormalizerTestCases.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_tfidf.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_tfidf.py index 807c236f..3a001516 100755 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_tfidf.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_tfidf.py @@ -10,7 +10,7 @@ class TestCNormalizerTFIDF(CPreProcessTestCases): """Unittest for TestCNormalizerTFIDF.""" - norm_type_lst = [None, 'l2', 'l1'] + norm_type_lst = [None, "l2", "l1"] def test_norm_tfidf(self): """Test for TestCNormalizerTFIDF.""" @@ -55,19 +55,16 @@ def sklearn_comp(array, norm): sklearn_comp(self.column_dense, norm_type) sklearn_comp(self.column_sparse, norm_type) - def test_chain(self): """Test a chain of preprocessors.""" x_chain = self._test_chain( - self.array_dense, - ['tf-idf', 'pca', 'tf-idf'], - [{'norm': 'l2'}, {}, {}] + self.array_dense, ["tf-idf", "pca", "tf-idf"], [{"norm": "l2"}, {}, {}] ) # Expected shape is (3, 3), as pca max n_components is 4-1 self.assertEqual( - (self.array_dense.shape[0], self.array_dense.shape[1] - 1), - x_chain.shape) + (self.array_dense.shape[0], self.array_dense.shape[1] - 1), x_chain.shape + ) @unittest.skip def test_inverse_transform(self): @@ -103,7 +100,7 @@ def _get_transform_component(x, y): norm = CNormalizerTFIDF(norm=norm_type).fit(array) - if norm_type == 'l1': + if norm_type == "l1": # if the norm is one we are computing a sub-gradient decimal = 1 else: @@ -116,8 +113,7 @@ def _get_transform_component(x, y): n_feats = array.size for f in range(n_feats): - self.logger.info( - "Compare the gradient of feature: {:}".format(f)) + self.logger.info("Compare the gradient of feature: {:}".format(f)) # compute analytical gradient w = CArray.zeros(array.size) @@ -127,18 +123,18 @@ def _get_transform_component(x, y): self.logger.info("Analytical gradient is:\n{:}".format(an_grad)) num_grad = CFunction(_get_transform_component).approx_fprime( - array.todense(), epsilon=1e-5, y=f) + array.todense(), epsilon=1e-5, y=f + ) self.logger.info("Numerical gradient is:\n{:}".format(num_grad)) - self.assert_array_almost_equal(an_grad, num_grad, - decimal=decimal) + self.assert_array_almost_equal(an_grad, num_grad, decimal=decimal) for norm_type in self.norm_type_lst: - compare_analytical_and_numerical_grad(self.row_dense.ravel(), - norm_type=norm_type) - compare_analytical_and_numerical_grad(self.row_sparse, - norm_type=norm_type) + compare_analytical_and_numerical_grad( + self.row_dense.ravel(), norm_type=norm_type + ) + compare_analytical_and_numerical_grad(self.row_sparse, norm_type=norm_type) -if __name__ == '__main__': +if __name__ == "__main__": CPreProcessTestCases.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py index e89df4c6..60669ee5 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py @@ -13,30 +13,44 @@ class TestCNormalizerUnitNorm(CNormalizerTestCases): def test_transform(self): """Test for `.transform()` method.""" for norm_type in ["l1", "l2", "max"]: - self._sklearn_comp(self.array_dense, - Normalizer(norm=norm_type), - CNormalizerUnitNorm(norm=norm_type)) - self._sklearn_comp(self.array_sparse, - Normalizer(norm=norm_type), - CNormalizerUnitNorm(norm=norm_type)) - self._sklearn_comp(self.row_dense.atleast_2d(), - Normalizer(norm=norm_type), - CNormalizerUnitNorm(norm=norm_type)) - self._sklearn_comp(self.row_sparse, - Normalizer(norm=norm_type), - CNormalizerUnitNorm(norm=norm_type)) - self._sklearn_comp(self.column_dense, - Normalizer(norm=norm_type), - CNormalizerUnitNorm(norm=norm_type)) - self._sklearn_comp(self.column_sparse, - Normalizer(norm=norm_type), - CNormalizerUnitNorm(norm=norm_type)) + self._sklearn_comp( + self.array_dense, + Normalizer(norm=norm_type), + CNormalizerUnitNorm(norm=norm_type), + ) + self._sklearn_comp( + self.array_sparse, + Normalizer(norm=norm_type), + CNormalizerUnitNorm(norm=norm_type), + ) + self._sklearn_comp( + self.row_dense.atleast_2d(), + Normalizer(norm=norm_type), + CNormalizerUnitNorm(norm=norm_type), + ) + self._sklearn_comp( + self.row_sparse, + Normalizer(norm=norm_type), + CNormalizerUnitNorm(norm=norm_type), + ) + self._sklearn_comp( + self.column_dense, + Normalizer(norm=norm_type), + CNormalizerUnitNorm(norm=norm_type), + ) + self._sklearn_comp( + self.column_sparse, + Normalizer(norm=norm_type), + CNormalizerUnitNorm(norm=norm_type), + ) def test_chain(self): """Test a chain of preprocessors.""" - self._test_chain(self.array_dense, - ['min-max', 'pca', 'unit-norm'], - [{'feature_range': (-5, 5)}, {}, {}]) + self._test_chain( + self.array_dense, + ["min-max", "pca", "unit-norm"], + [{"feature_range": (-5, 5)}, {}, {}], + ) def _test_gradient(self): """Check the normalizer gradient.""" @@ -64,35 +78,38 @@ def _get_transform_component(x, y): n_feats = array.size for f in range(n_feats): - self.logger.info( - "Compare the gradient of feature: {:}".format(f)) + self.logger.info("Compare the gradient of feature: {:}".format(f)) # compute analytical gradient w = CArray.zeros(array.size) w[f] = 1 an_grad = norm.gradient(array, w=w) - self.logger.info("Analytical gradient is: {:}".format( - an_grad.todense())) + self.logger.info( + "Analytical gradient is: {:}".format(an_grad.todense()) + ) num_grad = CFunction(_get_transform_component).approx_fprime( - array.todense(), epsilon=1e-5, y=f) - self.logger.info("Numerical gradient is: {:}".format( - num_grad.todense())) + array.todense(), epsilon=1e-5, y=f + ) + self.logger.info( + "Numerical gradient is: {:}".format(num_grad.todense()) + ) - self.assert_array_almost_equal(an_grad, num_grad, - decimal=decimal) + self.assert_array_almost_equal(an_grad, num_grad, decimal=decimal) for norm_type in norm_type_lst: compare_analytical_and_numerical_grad( - self.row_dense.ravel(), norm_type=norm_type) - compare_analytical_and_numerical_grad( - self.row_sparse, norm_type=norm_type) + self.row_dense.ravel(), norm_type=norm_type + ) + compare_analytical_and_numerical_grad(self.row_sparse, norm_type=norm_type) compare_analytical_and_numerical_grad( - (100 * self.row_dense).ravel(), norm_type=norm_type) + (100 * self.row_dense).ravel(), norm_type=norm_type + ) compare_analytical_and_numerical_grad( - (100 * self.row_sparse), norm_type=norm_type) + (100 * self.row_sparse), norm_type=norm_type + ) -if __name__ == '__main__': +if __name__ == "__main__": CNormalizerTestCases.main() diff --git a/src/secml/ml/features/reduction/c_reducer.py b/src/secml/ml/features/reduction/c_reducer.py index 3925c088..7494e328 100644 --- a/src/secml/ml/features/reduction/c_reducer.py +++ b/src/secml/ml/features/reduction/c_reducer.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from abc import ABCMeta from secml.ml.features import CPreProcess @@ -12,4 +13,5 @@ class CReducer(CPreProcess, metaclass=ABCMeta): """Interface for feature dimensionality reduction algorithms.""" - __super__ = 'CReducer' + + __super__ = "CReducer" diff --git a/src/secml/ml/features/reduction/c_reducer_lda.py b/src/secml/ml/features/reduction/c_reducer_lda.py index 1f3093c8..1d5c57b3 100644 --- a/src/secml/ml/features/reduction/c_reducer_lda.py +++ b/src/secml/ml/features/reduction/c_reducer_lda.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from secml.array import CArray @@ -28,7 +29,8 @@ class CLDA(CReducer): class_type : 'lda' """ - __class_type = 'lda' + + __class_type = "lda" def __init__(self, n_components=None, preprocess=None): """Linear Discriminant Analysis (LDA) @@ -73,7 +75,7 @@ def __init__(self, n_components=None, preprocess=None): @property def eigenvec(self): """Eigenvectors estimated from the training data. - Is a matrix of shape: n_eigenvectors * n_features.""" + Is a matrix of shape: n_eigenvectors * n_features.""" return self._eigenvec @property @@ -100,7 +102,7 @@ def _check_is_fitted(self): If the preprocessor is not fitted. """ - check_is_fitted(self, ['_lda', 'mean']) + check_is_fitted(self, ["_lda", "mean"]) def _fit(self, x, y): """Fit the LDA using input data. @@ -137,11 +139,13 @@ def _fit(self, x, y): self._classes = targets.unique() if self.n_components is None: - self.n_components = (self._classes.size - 1) + self.n_components = self._classes.size - 1 else: if self.n_components > (self.classes.size - 1): - raise ValueError("Maximum number of components is {:}" - "".format(self.classes.size - 1)) + raise ValueError( + "Maximum number of components is {:}" + "".format(self.classes.size - 1) + ) self._lda = LinearDiscriminantAnalysis(n_components=self.n_components) self._lda.fit(data_carray.tondarray(), targets.tondarray()) @@ -184,8 +188,10 @@ def _forward(self, x): """ data_carray = CArray(x).todense().atleast_2d() if data_carray.shape[1] != self.mean.size: - raise ValueError("array to transform must have {:} features " - "(columns).".format(self.mean.size)) + raise ValueError( + "array to transform must have {:} features " + "(columns).".format(self.mean.size) + ) out = CArray(self._lda.transform(data_carray.tondarray())) return out.atleast_2d() if x.ndim >= 2 else out diff --git a/src/secml/ml/features/reduction/c_reducer_pca.py b/src/secml/ml/features/reduction/c_reducer_pca.py index 6172bbef..16fbe887 100644 --- a/src/secml/ml/features/reduction/c_reducer_pca.py +++ b/src/secml/ml/features/reduction/c_reducer_pca.py @@ -6,11 +6,12 @@ .. moduleauthor:: Ambra Demontis """ + from secml.array import CArray from secml.ml.features.reduction import CReducer from secml.utils.mixed_utils import check_is_fitted -__all__ = ['CPCA'] +__all__ = ["CPCA"] class CPCA(CReducer): @@ -28,7 +29,8 @@ class CPCA(CReducer): class_type : 'pca' """ - __class_type = 'pca' + + __class_type = "pca" def __init__(self, n_components=None, preprocess=None): """Principal Component Analysis (PCA) @@ -123,7 +125,7 @@ def _check_is_fitted(self): If the preprocessor is not fitted. """ - check_is_fitted(self, ['components', 'mean']) + check_is_fitted(self, ["components", "mean"]) def _fit(self, x, y=None): """Fit the PCA using input data. @@ -169,7 +171,9 @@ def _fit(self, x, y=None): self.n_components = min(n_samples, n_features) else: if self.n_components > n_samples: - raise ValueError("maximum number of components is {:}".format(n_samples)) + raise ValueError( + "maximum number of components is {:}".format(n_samples) + ) # Centering training data self._mean = data_carray.mean(axis=0, keepdims=False) @@ -184,8 +188,8 @@ def _svd_train(self, data_carray): # Computing SVD reduction from numpy import linalg from sklearn.utils.extmath import svd_flip - u, s, v = linalg.svd(data_carray.atleast_2d().tondarray(), - full_matrices=False) + + u, s, v = linalg.svd(data_carray.atleast_2d().tondarray(), full_matrices=False) # flip eigenvectors' sign to enforce deterministic output u, v = svd_flip(u, v) @@ -199,7 +203,7 @@ def _svd_train(self, data_carray): eigenvec = CArray(eigenvec[:, idx]).atleast_2d() components = CArray(components[idx, :]).atleast_2d() # percentage of variance explained by each component - explained_variance = (eigenval ** 2) / (data_carray.shape[0] - 1) + explained_variance = (eigenval**2) / (data_carray.shape[0] - 1) explained_variance_ratio = explained_variance / explained_variance.sum() if 0 < self.n_components < 1.0: @@ -209,13 +213,13 @@ def _svd_train(self, data_carray): self.n_components = CArray(ratio_cumsum < self.n_components).sum() + 1 # Consider only n_components - self._eigenval = CArray(eigenval[:self.n_components]) - self._eigenvec = CArray(eigenvec[:, :self.n_components]) - self._components = CArray(components[:self.n_components, :]) + self._eigenval = CArray(eigenval[: self.n_components]) + self._eigenvec = CArray(eigenvec[:, : self.n_components]) + self._components = CArray(components[: self.n_components, :]) # storing explained variance of n_components only - self._explained_variance = explained_variance[:self.n_components] - self._explained_variance_ratio = explained_variance_ratio[:self.n_components] + self._explained_variance = explained_variance[: self.n_components] + self._explained_variance_ratio = explained_variance_ratio[: self.n_components] return self @@ -253,8 +257,10 @@ def _forward(self, x): """ data_carray = CArray(x).todense().atleast_2d() if data_carray.shape[1] != self.mean.size: - raise ValueError("array to transform must have {:} " - "features (columns).".format(self.mean.size)) + raise ValueError( + "array to transform must have {:} " + "features (columns).".format(self.mean.size) + ) out = CArray((data_carray - self.mean).dot(self._components.T)) return out.atleast_2d() if x.ndim >= 2 else out @@ -286,8 +292,10 @@ def _inverse_transform(self, x): """ data_carray = CArray(x).atleast_2d() if data_carray.shape[1] != self.n_components: - raise ValueError("array to revert must have {:} " - "features (columns).".format(self.n_components)) + raise ValueError( + "array to revert must have {:} " + "features (columns).".format(self.n_components) + ) out = CArray(data_carray.dot(self._components) + self.mean) diff --git a/src/secml/ml/features/reduction/tests/test_c_reducer_lda.py b/src/secml/ml/features/reduction/tests/test_c_reducer_lda.py index 239e9d31..4f6369ba 100644 --- a/src/secml/ml/features/reduction/tests/test_c_reducer_lda.py +++ b/src/secml/ml/features/reduction/tests/test_c_reducer_lda.py @@ -26,7 +26,8 @@ def sklearn_comp(array, y): # Sklearn normalizer sklearn_lda = LinearDiscriminantAnalysis().fit( - array.tondarray(), y.tondarray()) + array.tondarray(), y.tondarray() + ) target = CArray(sklearn_lda.transform(array.tondarray())) # Our normalizer lda = CLDA().fit(array, y) @@ -66,18 +67,16 @@ def test_plot(self): red_dts = lda.fit_transform(patterns, labels) fig = CFigure(width=10, markersize=8) - fig.sp.scatter(red_dts[:, 0].ravel(), - red_dts[:, 1].ravel(), - c=labels) + fig.sp.scatter(red_dts[:, 0].ravel(), red_dts[:, 1].ravel(), c=labels) fig.show() def test_chain(self): """Test a chain of preprocessors.""" x_chain = self._test_chain( self.array_dense, - ['min-max', 'mean-std', 'lda'], - [{'feature_range': (-5, 5)}, {}, {}], - y=self.labels # LDA is supervised + ["min-max", "mean-std", "lda"], + [{"feature_range": (-5, 5)}, {}, {}], + y=self.labels, # LDA is supervised ) # Expected shape is (3, 1), as lda max n_components is classes - 1 @@ -85,9 +84,9 @@ def test_chain(self): x_chain = self._test_chain( self.array_dense, - ['mean-std', 'lda', 'min-max'], + ["mean-std", "lda", "min-max"], [{}, {}, {}], - y=self.labels # LDA is supervised + y=self.labels, # LDA is supervised ) # Expected shape is (3, 1), as lda max n_components is classes - 1 @@ -96,5 +95,5 @@ def test_chain(self): # TODO: ADD TEST FOR GRADIENT (WHEN IMPLEMENTED) -if __name__ == '__main__': +if __name__ == "__main__": CPreProcessTestCases.main() diff --git a/src/secml/ml/features/reduction/tests/test_c_reducer_pca.py b/src/secml/ml/features/reduction/tests/test_c_reducer_pca.py index adf18ff5..01bde2c7 100644 --- a/src/secml/ml/features/reduction/tests/test_c_reducer_pca.py +++ b/src/secml/ml/features/reduction/tests/test_c_reducer_pca.py @@ -14,15 +14,15 @@ def test_pca(self): # Few test cases involve an all-zero column, # so PCA will trigger a 0/0 warning self.logger.filterwarnings( - action='ignore', - message='invalid value encountered in true_divide', - category=RuntimeWarning - ) + action="ignore", + message="invalid value encountered in true_divide", + category=RuntimeWarning, + ) self.logger.filterwarnings( - action='ignore', - message='invalid value encountered in divide', - category=RuntimeWarning - ) + action="ignore", + message="invalid value encountered in divide", + category=RuntimeWarning, + ) def sklearn_comp(array): self.logger.info("Original array is:\n{:}".format(array)) @@ -54,16 +54,17 @@ def test_chain(self): """Test a chain of preprocessors.""" x_chain = self._test_chain( self.array_dense, - ['min-max', 'unit-norm', 'pca'], - [{'feature_range': (-5, 5)}, {}, {}] + ["min-max", "unit-norm", "pca"], + [{"feature_range": (-5, 5)}, {}, {}], ) # Expected shape is (3, 3), as pca max n_components is 4-1 - self.assertEqual((self.array_dense.shape[0], - self.array_dense.shape[1] - 1), x_chain.shape) + self.assertEqual( + (self.array_dense.shape[0], self.array_dense.shape[1] - 1), x_chain.shape + ) # TODO: ADD TEST FOR GRADIENT (WHEN IMPLEMENTED) -if __name__ == '__main__': +if __name__ == "__main__": CPreProcessTestCases.main() diff --git a/src/secml/ml/features/tests/c_preprocess_testcases.py b/src/secml/ml/features/tests/c_preprocess_testcases.py index 27228e04..1215fa14 100644 --- a/src/secml/ml/features/tests/c_preprocess_testcases.py +++ b/src/secml/ml/features/tests/c_preprocess_testcases.py @@ -37,5 +37,5 @@ def _test_chain(self, x, class_type_list, kwargs_list, y=None): return x_chain -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/kernels/c_kernel.py b/src/secml/ml/kernels/c_kernel.py index 3d203723..00bf88a0 100644 --- a/src/secml/ml/kernels/c_kernel.py +++ b/src/secml/ml/kernels/c_kernel.py @@ -6,6 +6,7 @@ .. moduleauthor:: Angelo Sotgiu """ + from abc import ABCMeta from secml.ml import CModule @@ -32,7 +33,8 @@ class CKernel(CModule, metaclass=ABCMeta): Can be a CModule subclass. If None, input data is used as is. """ - __super__ = 'CKernel' + + __super__ = "CKernel" def __init__(self, preprocess=None): self._rv = None diff --git a/src/secml/ml/kernels/c_kernel_chebyshev_distance.py b/src/secml/ml/kernels/c_kernel_chebyshev_distance.py index 7cc900f1..0fbe4752 100644 --- a/src/secml/ml/kernels/c_kernel_chebyshev_distance.py +++ b/src/secml/ml/kernels/c_kernel_chebyshev_distance.py @@ -7,6 +7,7 @@ .. moduleauthor:: Angelo Sotgiu """ + from sklearn import metrics from secml.array import CArray @@ -42,7 +43,8 @@ class CKernelChebyshevDistance(CKernel): [-2. -0.]]) """ - __class_type = 'chebyshev-dist' + + __class_type = "chebyshev-dist" def _forward(self, x): """Compute (negative) Chebyshev distances between x and cached rv. @@ -61,11 +63,14 @@ def _forward(self, x): if x.issparse is True or self._rv.issparse is True: raise TypeError( "Chebyshev Kernel not available for sparse data." - "See `sklearn.metrics.pairwise_distances`.") + "See `sklearn.metrics.pairwise_distances`." + ) - return -CArray(metrics.pairwise.pairwise_distances( - x.get_data(), self._rv.get_data(), - metric='chebyshev')) + return -CArray( + metrics.pairwise.pairwise_distances( + x.get_data(), self._rv.get_data(), metric="chebyshev" + ) + ) def _backward(self, w=None): """Calculate gradients of Chebyshev kernel wrt cached vector 'x'. @@ -91,16 +96,17 @@ def _backward(self, w=None): # checking if cached x is a vector if not self._cached_x.is_vector_like: raise ValueError( - "kernel gradient can be computed only wrt vector-like arrays.") + "kernel gradient can be computed only wrt vector-like arrays." + ) if self._rv is None: - raise ValueError( - "Please run forward with caching=True or set `rv` first.") + raise ValueError("Please run forward with caching=True or set `rv` first.") if self._cached_x.issparse is True or self._rv.issparse is True: raise TypeError( "Chebyshev Kernel not available for sparse data." - "See `sklearn.metrics.pairwise_distances`.") + "See `sklearn.metrics.pairwise_distances`." + ) diff = self._rv - self._cached_x m = abs(diff).max(axis=1) # extract m from each row diff --git a/src/secml/ml/kernels/c_kernel_euclidean.py b/src/secml/ml/kernels/c_kernel_euclidean.py index 0ab22d74..0108c84f 100644 --- a/src/secml/ml/kernels/c_kernel_euclidean.py +++ b/src/secml/ml/kernels/c_kernel_euclidean.py @@ -6,6 +6,7 @@ .. moduleauthor:: Angelo Sotgiu """ + from sklearn import metrics from secml.array import CArray @@ -48,7 +49,8 @@ class CKernelEuclidean(CKernel): [-2.828427 0. ]]) """ - __class_type = 'euclidean' + + __class_type = "euclidean" def __init__(self, squared=False, preprocess=None): self._squared = squared @@ -134,12 +136,16 @@ def _forward(self, x): Kernel between x and cached rv, shape (n_x, n_rv). """ - k = -CArray(metrics.pairwise.euclidean_distances( - x.get_data(), self._rv.get_data(), squared=self._squared, - X_norm_squared=self._x_norm_squared, - Y_norm_squared=self._rv_norm_squared)) - self._cached_kernel = None if self._cached_x is None or self._squared \ - else k + k = -CArray( + metrics.pairwise.euclidean_distances( + x.get_data(), + self._rv.get_data(), + squared=self._squared, + X_norm_squared=self._x_norm_squared, + Y_norm_squared=self._rv_norm_squared, + ) + ) + self._cached_kernel = None if self._cached_x is None or self._squared else k return k def _backward(self, w=None): @@ -167,21 +173,22 @@ def _backward(self, w=None): # Checking if cached x is a vector if not self._cached_x.is_vector_like: raise ValueError( - "kernel gradient can be computed only wrt vector-like arrays.") + "kernel gradient can be computed only wrt vector-like arrays." + ) - if self._rv is None or (not self._squared - and self._cached_kernel is None): + if self._rv is None or (not self._squared and self._cached_kernel is None): raise ValueError("Please run forward with caching=True first.") # Format of output array should be the same as cached x - self._rv = self._rv.tosparse() if self._cached_x.issparse \ - else self._rv.todense() + self._rv = ( + self._rv.tosparse() if self._cached_x.issparse else self._rv.todense() + ) if self._squared is True: # 2 * (rv - x) - diff = (self._rv - self._cached_x) + diff = self._rv - self._cached_x return 2 * diff if w is None else w.dot(2 * diff) - diff = (self._rv - self._cached_x) + diff = self._rv - self._cached_x k_grad = self._cached_kernel.T k_grad[k_grad == 0] = 1.0 # To avoid nans later diff --git a/src/secml/ml/kernels/c_kernel_histintersect.py b/src/secml/ml/kernels/c_kernel_histintersect.py index fc74939d..da7bd246 100644 --- a/src/secml/ml/kernels/c_kernel_histintersect.py +++ b/src/secml/ml/kernels/c_kernel_histintersect.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + import numpy as np from secml.array import CArray @@ -39,7 +40,8 @@ class CKernelHistIntersect(CKernel): [3. 7.]]) """ - __class_type = 'hist-intersect' + + __class_type = "hist-intersect" def _forward(self, x): """Compute the histogram intersection kernel between x and cached rv. @@ -97,11 +99,13 @@ def _backward(self, w=None): # Checking if cached x is a vector if not self._cached_x.is_vector_like: raise ValueError( - "kernel gradient can be computed only wrt vector-like arrays.") + "kernel gradient can be computed only wrt vector-like arrays." + ) if self._rv is None: - raise ValueError("Please run forward with caching=True or set" - "`rv` first.") + raise ValueError( + "Please run forward with caching=True or set" "`rv` first." + ) if self._cached_x.issparse is True: # Broadcasting not supported for sparse arrays @@ -109,7 +113,6 @@ def _backward(self, w=None): else: # Broadcasting is supported by design for dense arrays x_broadcast = self._cached_x - grad = CArray.zeros(shape=self._rv.shape, - sparse=self._cached_x.issparse) + grad = CArray.zeros(shape=self._rv.shape, sparse=self._cached_x.issparse) grad[x_broadcast < self._rv] = 1 # TODO support from CArray still missing return grad if w is None else w.dot(grad) diff --git a/src/secml/ml/kernels/c_kernel_laplacian.py b/src/secml/ml/kernels/c_kernel_laplacian.py index 26ff96de..01442351 100644 --- a/src/secml/ml/kernels/c_kernel_laplacian.py +++ b/src/secml/ml/kernels/c_kernel_laplacian.py @@ -6,6 +6,7 @@ .. moduleauthor:: Battista Biggio """ + from sklearn import metrics from secml.array import CArray @@ -47,7 +48,8 @@ class CKernelLaplacian(CKernel): [0.018316 1. ]]) """ - __class_type = 'laplacian' + + __class_type = "laplacian" def __init__(self, gamma=1.0, preprocess=None): @@ -94,9 +96,11 @@ def _forward(self, x): Kernel between x and cached rv, shape (n_x, n_rv). """ - k = CArray(metrics.pairwise.laplacian_kernel( - CArray(x).get_data(), CArray(self._rv).get_data(), - gamma=self.gamma)) + k = CArray( + metrics.pairwise.laplacian_kernel( + CArray(x).get_data(), CArray(self._rv).get_data(), gamma=self.gamma + ) + ) self._cached_kernel = None if self._cached_x is None else k return k @@ -125,16 +129,16 @@ def _backward(self, w): if not self._cached_x.is_vector_like or self._cached_x.shape[0] > 1: raise ValueError( "kernel gradient can be computed only wrt arrays with shape " - "(1, n_features).") + "(1, n_features)." + ) if self._rv is None or self._cached_kernel is None: raise ValueError("Please run forward with caching=True first.") # Format of output array should be the same as x - rv = self._rv.tosparse() if self._cached_x.issparse \ - else self._rv.todense() + rv = self._rv.tosparse() if self._cached_x.issparse else self._rv.todense() - diff = (rv - self._cached_x) + diff = rv - self._cached_x k_grad = self._cached_kernel.T diff --git a/src/secml/ml/kernels/c_kernel_linear.py b/src/secml/ml/kernels/c_kernel_linear.py index bea0e4e4..9b9c2a95 100644 --- a/src/secml/ml/kernels/c_kernel_linear.py +++ b/src/secml/ml/kernels/c_kernel_linear.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml.ml.kernels import CKernel @@ -37,7 +38,8 @@ class CKernelLinear(CKernel): [11. 25.]]) """ - __class_type = 'linear' + + __class_type = "linear" def _forward(self, x): """Compute the linear kernel between x and cached rv. @@ -79,11 +81,13 @@ def _backward(self, w=None): # Checking if cached x is a vector if not self._cached_x.is_vector_like: raise ValueError( - "kernel gradient can be computed only wrt vector-like arrays.") + "kernel gradient can be computed only wrt vector-like arrays." + ) if self._rv is None: - raise ValueError("Please run forward with caching=True or set" - "`rv` first.") + raise ValueError( + "Please run forward with caching=True or set" "`rv` first." + ) # Format of output array should be the same as rv grad = self._rv.deepcopy() diff --git a/src/secml/ml/kernels/c_kernel_poly.py b/src/secml/ml/kernels/c_kernel_poly.py index 1f230e32..2d8bccd5 100644 --- a/src/secml/ml/kernels/c_kernel_poly.py +++ b/src/secml/ml/kernels/c_kernel_poly.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from sklearn import metrics from secml.array import CArray @@ -52,7 +53,8 @@ class CKernelPoly(CKernel): [144. 676.]]) """ - __class_type = 'poly' + + __class_type = "poly" def __init__(self, degree=2, gamma=1.0, coef0=1.0, preprocess=None): @@ -128,9 +130,15 @@ def _forward(self, x): Kernel between x and rv. Array of shape (n_x, n_rv). """ - return CArray(metrics.pairwise.polynomial_kernel( - CArray(x).get_data(), CArray(self._rv).get_data(), - self.degree, self.gamma, self.coef0)) + return CArray( + metrics.pairwise.polynomial_kernel( + CArray(x).get_data(), + CArray(self._rv).get_data(), + self.degree, + self.gamma, + self.coef0, + ) + ) # TODO: check for high gamma, # we may have uncontrolled behavior (too high values) @@ -158,15 +166,23 @@ def _backward(self, w=None): # Checking if cached x is a vector if not self._cached_x.is_vector_like: raise ValueError( - "kernel gradient can be computed only wrt vector-like arrays.") + "kernel gradient can be computed only wrt vector-like arrays." + ) if self._rv is None: - raise ValueError("Please run forward with caching=True or set" - "`rv` first.") - - k = CArray(metrics.pairwise.polynomial_kernel( - self._rv.get_data(), self._cached_x.get_data(), - self.degree - 1, self.gamma, self.coef0)) + raise ValueError( + "Please run forward with caching=True or set" "`rv` first." + ) + + k = CArray( + metrics.pairwise.polynomial_kernel( + self._rv.get_data(), + self._cached_x.get_data(), + self.degree - 1, + self.gamma, + self.coef0, + ) + ) # Format of output array should be the same as cached x if self._cached_x.issparse: diff --git a/src/secml/ml/kernels/c_kernel_rbf.py b/src/secml/ml/kernels/c_kernel_rbf.py index 76049e2a..3ead4c1c 100644 --- a/src/secml/ml/kernels/c_kernel_rbf.py +++ b/src/secml/ml/kernels/c_kernel_rbf.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from sklearn import metrics from secml.array import CArray @@ -49,7 +50,8 @@ class CKernelRBF(CKernel): [3.354626e-04 1.000000e+00]]) """ - __class_type = 'rbf' + + __class_type = "rbf" def __init__(self, gamma=1.0, preprocess=None): @@ -96,8 +98,11 @@ def _forward(self, x): Kernel between x and cached reference_samples, shape (n_x, n_rv). """ - k = CArray(metrics.pairwise.rbf_kernel( - CArray(x).get_data(), CArray(self._rv).get_data(), self.gamma)) + k = CArray( + metrics.pairwise.rbf_kernel( + CArray(x).get_data(), CArray(self._rv).get_data(), self.gamma + ) + ) self._cached_kernel = None if self._cached_x is None else k return k @@ -125,23 +130,26 @@ def _backward(self, w=None): # Checking if cached x is a vector if not self._cached_x.is_vector_like: raise ValueError( - "kernel gradient can be computed only wrt vector-like arrays.") + "kernel gradient can be computed only wrt vector-like arrays." + ) if self._rv is None or self._cached_kernel is None: raise ValueError("Please run forward with caching=True first.") # Format of output array should be the same as cached x - self._rv = self._rv.tosparse() if self._cached_x.issparse \ - else self._rv.todense() + self._rv = ( + self._rv.tosparse() if self._cached_x.issparse else self._rv.todense() + ) k_grad = self._cached_kernel.T if w is not None: c = w.T * k_grad - return CArray(2 * self.gamma * (c.T.dot(self._rv) - - c.sum() * self._cached_x)) + return CArray( + 2 * self.gamma * (c.T.dot(self._rv) - c.sum() * self._cached_x) + ) else: - diff = (self._rv - self._cached_x) + diff = self._rv - self._cached_x # Casting the kernel to sparse if needed for efficient broadcasting if diff.issparse is True: k_grad = k_grad.tosparse() diff --git a/src/secml/ml/kernels/tests/c_kernel_testcases.py b/src/secml/ml/kernels/tests/c_kernel_testcases.py index 51b75041..512fa103 100644 --- a/src/secml/ml/kernels/tests/c_kernel_testcases.py +++ b/src/secml/ml/kernels/tests/c_kernel_testcases.py @@ -10,10 +10,14 @@ class CCKernelTestCases(CUnitTest): def _set_up(self, kernel_name): - self.d_dense = CDLRandom(n_samples=10, n_features=5, - n_redundant=0, n_informative=3, - n_clusters_per_class=1, - random_state=100).load() + self.d_dense = CDLRandom( + n_samples=10, + n_features=5, + n_redundant=0, + n_informative=3, + n_clusters_per_class=1, + random_state=100, + ).load() self.p1_dense = self.d_dense.X[0, :] self.p2_dense = self.d_dense.X[1, :] @@ -35,17 +39,22 @@ def _has_gradient(self): def _cmp_kernel(self, k_fun, a1, a2): k = k_fun(a1, a2) if isinstance(k, CArray): - self.logger.info("k shape with inputs {:} {:} is: {:}" - "".format(a1.shape, a2.shape, k.shape)) - self.assertEqual(k.shape, (CArray(a1).atleast_2d().shape[0], - CArray(a2).atleast_2d().shape[0])) + self.logger.info( + "k shape with inputs {:} {:} is: {:}" + "".format(a1.shape, a2.shape, k.shape) + ) + self.assertEqual( + k.shape, + (CArray(a1).atleast_2d().shape[0], CArray(a2).atleast_2d().shape[0]), + ) else: self.assertTrue(is_scalar(k)) def _test_similarity_shape(self): """Test shape of kernel.""" self.logger.info( - "Testing shape of " + self.kernel.class_type + " kernel output.") + "Testing shape of " + self.kernel.class_type + " kernel output." + ) x_vect = CArray.rand(shape=(1, 10)).ravel() x_mat = CArray.rand(shape=(10, 10)) @@ -64,7 +73,8 @@ def _test_similarity_shape(self): def _test_similarity_shape_sparse(self): """Test shape of kernel.""" self.logger.info( - "Testing shape of " + self.kernel.class_type + " kernel output.") + "Testing shape of " + self.kernel.class_type + " kernel output." + ) x_vect = CArray.rand(shape=(1, 10)).ravel().tosparse() x_mat = CArray.rand(shape=(10, 10)).tosparse() @@ -85,8 +95,9 @@ def _test_gradient(self): if not self._has_gradient(): self.logger.info( - "Gradient is not implemented for %s. " - "Skipping gradient dense tests.", self.kernel.class_type) + "Gradient is not implemented for %s. " "Skipping gradient dense tests.", + self.kernel.class_type, + ) return # we invert the order of input patterns as we compute the kernel @@ -112,11 +123,10 @@ def kern_grad_for_test(p2, p1, kernel_func): self.kernel.rv = self.d_dense.X[i, :] grad = self.kernel.gradient(self.p2_dense) if grad.norm() >= 1e-10: - grad_error = CFunction( - kern_f_for_test, kern_grad_for_test).check_grad( - self.p2_dense, 1e-8, self.d_dense.X[i, :], self.kernel) - self.logger.info("Gradient approx. error: {:}" - "".format(grad_error)) + grad_error = CFunction(kern_f_for_test, kern_grad_for_test).check_grad( + self.p2_dense, 1e-8, self.d_dense.X[i, :], self.kernel + ) + self.logger.info("Gradient approx. error: {:}" "".format(grad_error)) self.assertTrue(grad_error < 1e-4) def _test_gradient_sparse(self): @@ -125,7 +135,9 @@ def _test_gradient_sparse(self): if not self._has_gradient(): self.logger.info( "Gradient is not implemented for %s. " - "Skipping gradient sparse tests.", self.kernel.class_type) + "Skipping gradient sparse tests.", + self.kernel.class_type, + ) return self.logger.info("Testing gradient with sparse data.") @@ -133,20 +145,17 @@ def _test_gradient_sparse(self): self.kernel.rv = self.d_sparse.X k_grad = self.kernel.gradient(self.p2_dense) - self.logger.info( - "sparse/dense ->.isdense: {:}".format(k_grad.isdense)) + self.logger.info("sparse/dense ->.isdense: {:}".format(k_grad.isdense)) self.assertTrue(k_grad.isdense) self.kernel.rv = self.d_dense.X k_grad = self.kernel.gradient(self.p2_sparse) - self.logger.info( - "dense/sparse ->.issparse: {:}".format(k_grad.issparse)) + self.logger.info("dense/sparse ->.issparse: {:}".format(k_grad.issparse)) self.assertTrue(k_grad.issparse) self.kernel.rv = self.d_sparse.X k_grad = self.kernel.gradient(self.p2_sparse) - self.logger.info( - "sparse/sparse ->.issparse: {:}".format(k_grad.issparse)) + self.logger.info("sparse/sparse ->.issparse: {:}".format(k_grad.issparse)) self.assertTrue(k_grad.issparse) def _test_gradient_multiple_points(self): @@ -154,8 +163,9 @@ def _test_gradient_multiple_points(self): if not self._has_gradient(): self.logger.info( - "Gradient is not implemented for %s. " - "Skipping multiple-point tests.", self.kernel.class_type) + "Gradient is not implemented for %s. " "Skipping multiple-point tests.", + self.kernel.class_type, + ) return # check if gradient computed on multiple points is the same as @@ -183,8 +193,9 @@ def _test_gradient_multiple_points_sparse(self): if not self._has_gradient(): self.logger.info( - "Gradient is not implemented for %s. " - "Skipping multiple-point tests.", self.kernel.class_type) + "Gradient is not implemented for %s. " "Skipping multiple-point tests.", + self.kernel.class_type, + ) return # check if gradient computed on multiple points is the same as @@ -212,8 +223,9 @@ def _test_gradient_w(self): if not self._has_gradient(): self.logger.info( - "Gradient is not implemented for %s. " - "Skipping multiple-point tests.", self.kernel.class_type) + "Gradient is not implemented for %s. " "Skipping multiple-point tests.", + self.kernel.class_type, + ) return # check if the gradient computed when passing w is the same as the @@ -239,5 +251,5 @@ def _test_gradient_w(self): self.assert_array_almost_equal(grad_1, grad_2, decimal=10) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/kernels/tests/test_c_kernel_chebyshev_distance.py b/src/secml/ml/kernels/tests/test_c_kernel_chebyshev_distance.py index 45bbb727..ac816690 100644 --- a/src/secml/ml/kernels/tests/test_c_kernel_chebyshev_distance.py +++ b/src/secml/ml/kernels/tests/test_c_kernel_chebyshev_distance.py @@ -5,7 +5,7 @@ class TestCKernelChebyshevDistance(CCKernelTestCases): """Unit test for CKernelChebyshevDistance.""" def setUp(self): - self._set_up('chebyshev-dist') + self._set_up("chebyshev-dist") def test_similarity_shape(self): """Test shape of kernel.""" @@ -32,5 +32,5 @@ def test_gradient(self): self._test_gradient_w() -if __name__ == '__main__': +if __name__ == "__main__": CCKernelTestCases.main() diff --git a/src/secml/ml/kernels/tests/test_c_kernel_euclidean.py b/src/secml/ml/kernels/tests/test_c_kernel_euclidean.py index 5e68a525..d34db201 100644 --- a/src/secml/ml/kernels/tests/test_c_kernel_euclidean.py +++ b/src/secml/ml/kernels/tests/test_c_kernel_euclidean.py @@ -5,7 +5,7 @@ class TestCKernelEuclidean(CCKernelTestCases): """Unit test for CKernelEuclidean.""" def setUp(self): - self._set_up('euclidean') + self._set_up("euclidean") def test_similarity_shape(self): """Test shape of kernel.""" @@ -31,5 +31,5 @@ def test_gradient(self): self._test_gradient_w() -if __name__ == '__main__': +if __name__ == "__main__": CCKernelTestCases.main() diff --git a/src/secml/ml/kernels/tests/test_c_kernel_histintersect.py b/src/secml/ml/kernels/tests/test_c_kernel_histintersect.py index 904b71d0..e54f3d2b 100644 --- a/src/secml/ml/kernels/tests/test_c_kernel_histintersect.py +++ b/src/secml/ml/kernels/tests/test_c_kernel_histintersect.py @@ -5,7 +5,7 @@ class TestCKernelHistIntersect(CCKernelTestCases): """Unit test for CKernelHistIntersect.""" def setUp(self): - self._set_up('hist-intersect') + self._set_up("hist-intersect") def test_similarity_shape(self): """Test shape of kernel.""" @@ -20,5 +20,5 @@ def test_gradient(self): self._test_gradient_w() -if __name__ == '__main__': +if __name__ == "__main__": CCKernelTestCases.main() diff --git a/src/secml/ml/kernels/tests/test_c_kernel_laplacian.py b/src/secml/ml/kernels/tests/test_c_kernel_laplacian.py index a27ff147..93d80c9d 100644 --- a/src/secml/ml/kernels/tests/test_c_kernel_laplacian.py +++ b/src/secml/ml/kernels/tests/test_c_kernel_laplacian.py @@ -5,7 +5,7 @@ class TestCKernelLaplacian(CCKernelTestCases): """Unit test for CKernelLaplacian.""" def setUp(self): - self._set_up('laplacian') + self._set_up("laplacian") def test_similarity_shape(self): """Test shape of kernel.""" @@ -20,5 +20,5 @@ def test_gradient(self): self._test_gradient_w() -if __name__ == '__main__': +if __name__ == "__main__": CCKernelTestCases.main() diff --git a/src/secml/ml/kernels/tests/test_c_kernel_linear.py b/src/secml/ml/kernels/tests/test_c_kernel_linear.py index 6e17123a..61c38fdb 100644 --- a/src/secml/ml/kernels/tests/test_c_kernel_linear.py +++ b/src/secml/ml/kernels/tests/test_c_kernel_linear.py @@ -5,7 +5,7 @@ class TestCKernelLinear(CCKernelTestCases): """Unit test for CKernelLinear.""" def setUp(self): - self._set_up('linear') + self._set_up("linear") def test_similarity_shape(self): """Test shape of kernel.""" @@ -20,5 +20,5 @@ def test_gradient(self): self._test_gradient_w() -if __name__ == '__main__': +if __name__ == "__main__": CCKernelTestCases.main() diff --git a/src/secml/ml/kernels/tests/test_c_kernel_poly.py b/src/secml/ml/kernels/tests/test_c_kernel_poly.py index 233d6c46..3ab292c5 100644 --- a/src/secml/ml/kernels/tests/test_c_kernel_poly.py +++ b/src/secml/ml/kernels/tests/test_c_kernel_poly.py @@ -5,7 +5,7 @@ class TestCKernelPoly(CCKernelTestCases): """Unit test for CKernelPoly.""" def setUp(self): - self._set_up('poly') + self._set_up("poly") def test_similarity_shape(self): """Test shape of kernel.""" @@ -20,5 +20,5 @@ def test_gradient(self): self._test_gradient_w() -if __name__ == '__main__': +if __name__ == "__main__": CCKernelTestCases.main() diff --git a/src/secml/ml/kernels/tests/test_c_kernel_rbf.py b/src/secml/ml/kernels/tests/test_c_kernel_rbf.py index 725df860..6d8fb40b 100644 --- a/src/secml/ml/kernels/tests/test_c_kernel_rbf.py +++ b/src/secml/ml/kernels/tests/test_c_kernel_rbf.py @@ -5,7 +5,7 @@ class TestCKernelRBF(CCKernelTestCases): """Unit test for CKernelRBF.""" def setUp(self): - self._set_up('rbf') + self._set_up("rbf") def test_similarity_shape(self): """Test shape of kernel.""" @@ -20,5 +20,5 @@ def test_gradient(self): self._test_gradient_w() -if __name__ == '__main__': +if __name__ == "__main__": CCKernelTestCases.main() diff --git a/src/secml/ml/peval/c_perfevaluator.py b/src/secml/ml/peval/c_perfevaluator.py index 41864b82..0149004a 100755 --- a/src/secml/ml/peval/c_perfevaluator.py +++ b/src/secml/ml/peval/c_perfevaluator.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from abc import ABCMeta, abstractmethod from collections import OrderedDict from copy import deepcopy @@ -17,8 +18,7 @@ from secml.parallel import parfor2 -def _evaluate_one( - row_id, perf_eval, params, params_matrix, estimator, ds, verbose): +def _evaluate_one(row_id, perf_eval, params, params_matrix, estimator, ds, verbose): """Evaluate performance of estimator for one combination of parameters. Parameters @@ -57,7 +57,8 @@ def _evaluate_one( eval_score = perf_eval.compute_performance(estimator, ds) perf_eval.logger.info( - "Params: {:} - Score: {:}".format(estimator_params, eval_score)) + "Params: {:} - Score: {:}".format(estimator_params, eval_score) + ) return eval_score @@ -73,15 +74,15 @@ class CPerfEvaluator(CCreator, metaclass=ABCMeta): Name of the metric that we want maximize / minimize. """ - __super__ = 'CPerfEvaluator' + + __super__ = "CPerfEvaluator" def __init__(self, splitter, metric): self.splitter = CDataSplitter.create(splitter) self.metric = CMetric.create(metric) - def evaluate_params( - self, estimator, dataset, parameters, pick='first', n_jobs=1): + def evaluate_params(self, estimator, dataset, parameters, pick="first", n_jobs=1): """Evaluate parameters for input estimator on input dataset. Parameters @@ -119,15 +120,16 @@ def evaluate_params( # OrderedDict returns keys always in the same order, # so we are safe when iterating on params_matrix.shape[1] - parameters = OrderedDict( - sorted(parameters.items(), key=lambda t: t[0])) + parameters = OrderedDict(sorted(parameters.items(), key=lambda t: t[0])) params_idx = [] # create a list of list 'param_idx' with index of parameters' values for param_name in parameters: if not isinstance(parameters[param_name], list): - raise TypeError("values for parameter `{:}` must be " - "specified as a list.".format(param_name)) + raise TypeError( + "values for parameter `{:}` must be " + "specified as a list.".format(param_name) + ) # Add an index for each parameter's value params_idx.append(list(range(len(parameters[param_name])))) @@ -136,18 +138,28 @@ def evaluate_params( params_matrix = CArray.comblist(params_idx).astype(int) # Parallelize (if requested) over the rows of params_matrix - res_vect = parfor2(_evaluate_one, params_matrix.shape[0], - n_jobs, self, parameters, params_matrix, - estimator, dataset, self.verbose) + res_vect = parfor2( + _evaluate_one, + params_matrix.shape[0], + n_jobs, + self, + parameters, + params_matrix, + estimator, + dataset, + self.verbose, + ) # Transforming the list to array res_vect = CArray(res_vect) # Retrieve the best parameters best_params_dict, best_value = self._get_best_params( - res_vect, parameters, params_matrix, pick=pick) + res_vect, parameters, params_matrix, pick=pick + ) - self.logger.info("Best params: {:} - Value: {:}".format( - best_params_dict, best_value)) + self.logger.info( + "Best params: {:} - Value: {:}".format(best_params_dict, best_value) + ) # Restore original parameters of classifier for param in original_estimator.__dict__: @@ -163,13 +175,13 @@ def compute_performance(self, estimator, dataset): Parameters ---------- - estimator : CClassifier + estimator : CClassifier The classifier that we want evaluate. dataset : CDataset Dataset that we want use for evaluate the classifier. - + Returns - ------- + ------- score : float Performance score of estimator. @@ -177,7 +189,7 @@ def compute_performance(self, estimator, dataset): raise NotImplementedError() @abstractmethod - def _get_best_params(self, res_vect, params, params_matrix, pick='first'): + def _get_best_params(self, res_vect, params, params_matrix, pick="first"): """Returns the best parameters given input performance data. Parameters diff --git a/src/secml/ml/peval/c_perfevaluator_xval.py b/src/secml/ml/peval/c_perfevaluator_xval.py index ba48a37c..9af0725c 100755 --- a/src/secml/ml/peval/c_perfevaluator_xval.py +++ b/src/secml/ml/peval/c_perfevaluator_xval.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.ml.peval import CPerfEvaluator from secml.array import CArray from secml.core.type_utils import is_scalar @@ -27,20 +28,21 @@ class CPerfEvaluatorXVal(CPerfEvaluator): class_type : 'xval' """ - __class_type = 'xval' + + __class_type = "xval" def compute_performance(self, estimator, dataset): """Split data in folds and return the mean estimator performance. Parameters ---------- - estimator : CClassifier + estimator : CClassifier The Classifier that we want evaluate dataset : CDataset Dataset that we want use for evaluate the classifier Returns - ------- + ------- score : float Mean performance score of estimator computed on the K-Folds. @@ -59,7 +61,8 @@ def compute_performance(self, estimator, dataset): estimator.fit(train_dataset.X, train_dataset.Y) pred_label, pred_score = estimator.predict( - test_dataset.X, return_decision_function=True) + test_dataset.X, return_decision_function=True + ) if dataset.num_classes > 2: pred_score = None # Score cannot be used in multiclass case @@ -68,13 +71,14 @@ def compute_performance(self, estimator, dataset): pred_score = pred_score[:, 1].ravel() this_test_score = self.metric.performance_score( - test_dataset.Y, y_pred=pred_label, score=pred_score) + test_dataset.Y, y_pred=pred_label, score=pred_score + ) splits_score[split_idx] = this_test_score return splits_score.mean() - def _get_best_params(self, res_vect, params, params_matrix, pick='first'): + def _get_best_params(self, res_vect, params, params_matrix, pick="first"): """Returns the best parameters given input performance scores. The best parameters have the closest associated performance score @@ -105,8 +109,7 @@ def _get_best_params(self, res_vect, params, params_matrix, pick='first'): """ if not is_scalar(self.metric.best_value): - raise TypeError( - "XVal only works with metric with the best value as scalar") + raise TypeError("XVal only works with metric with the best value as scalar") # Get the index of the results closest to the best value diff = abs(res_vect - self.metric.best_value) @@ -133,12 +136,13 @@ def _get_best_params(self, res_vect, params, params_matrix, pick='first'): best_params_list.append(best_params_dict) # Chose which candidate parameters assign to classifier - if pick == 'first': # Usually the smallest + if pick == "first": # Usually the smallest best_params_dict = best_params_list[0] - elif pick == 'last': # Usually the biggest + elif pick == "last": # Usually the biggest best_params_dict = best_params_list[-1] - elif pick == 'random': + elif pick == "random": import random + best_params_dict = random.choice(best_params_list) else: raise ValueError("pick strategy '{:}' not known".format(pick)) diff --git a/src/secml/ml/peval/c_perfevaluator_xval_multiclass.py b/src/secml/ml/peval/c_perfevaluator_xval_multiclass.py index cf76ae01..e1d6c6bf 100755 --- a/src/secml/ml/peval/c_perfevaluator_xval_multiclass.py +++ b/src/secml/ml/peval/c_perfevaluator_xval_multiclass.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.ml.peval import CPerfEvaluator from secml.array import CArray from secml.core.type_utils import is_scalar @@ -26,20 +27,21 @@ class CPerfEvaluatorXValMulticlass(CPerfEvaluator): class_type : 'xval-multiclass' """ - __class_type = 'xval-multiclass' + + __class_type = "xval-multiclass" def compute_performance(self, estimator, dataset): """Split data in folds and return the mean estimator performance. Parameters ---------- - estimator : CClassifier + estimator : CClassifier The Classifier that we want evaluate dataset : CDataset Dataset that we want use for evaluate the classifier - + Returns - ------- + ------- scores : list Mean performance score of each binary estimator computed on the K-Folds. @@ -62,27 +64,28 @@ def compute_performance(self, estimator, dataset): split_scores = [] for class_idx in range(dataset.num_classes): # Binarize dataset - test_binary_ds = estimator.binarize_dataset( - class_idx, test_dataset) + test_binary_ds = estimator.binarize_dataset(class_idx, test_dataset) # Extract the target internal binary estimator. # They are all trained on the same data (normalized if needed) binary_clf = estimator._binary_classifiers[class_idx] pred_label, pred_score = binary_clf.predict( - test_binary_ds.X, return_decision_function=True) + test_binary_ds.X, return_decision_function=True + ) # Extracting score of the positive class pred_score = pred_score[:, 1].ravel() this_test_score = self.metric.performance_score( - test_binary_ds.Y, y_pred=pred_label, score=pred_score) + test_binary_ds.Y, y_pred=pred_label, score=pred_score + ) split_scores.append(this_test_score) test_scores[split_idx, :] = CArray(split_scores) return test_scores.mean(axis=0, keepdims=False).tolist() - def _get_best_params(self, res_vect, params, params_matrix, pick='first'): + def _get_best_params(self, res_vect, params, params_matrix, pick="first"): """Returns the best parameters given input performance scores. The best parameters have the closest associated performance score @@ -113,8 +116,7 @@ def _get_best_params(self, res_vect, params, params_matrix, pick='first'): """ if not is_scalar(self.metric.best_value): - raise TypeError( - "XVal only works with metric with the best value as scalar") + raise TypeError("XVal only works with metric with the best value as scalar") # Get the index of the results closest to the best value diff = abs(res_vect - self.metric.best_value) @@ -126,8 +128,7 @@ def _get_best_params(self, res_vect, params, params_matrix, pick='first'): # diff has one row for each parameters combination and # one column for each binary classifier - condidates_idx = diff[:, i].find_2d( - diff[:, i] == diff[:, i].min())[0] + condidates_idx = diff[:, i].find_2d(diff[:, i] == diff[:, i].min())[0] # Get the value of the result closest to the best value best_score.append(res_vect[condidates_idx[0], i]) @@ -147,12 +148,13 @@ def _get_best_params(self, res_vect, params, params_matrix, pick='first'): clf_best_params_list.append(best_params_dict) # Chose which candidate parameters assign to classifier - if pick == 'first': # Usually the smallest + if pick == "first": # Usually the smallest clf_best_params_dict = clf_best_params_list[0] - elif pick == 'last': # Usually the biggest + elif pick == "last": # Usually the biggest clf_best_params_dict = clf_best_params_list[-1] - elif pick == 'random': + elif pick == "random": import random + clf_best_params_dict = random.choice(clf_best_params_list) else: raise ValueError("pick strategy '{:}' not known".format(pick)) diff --git a/src/secml/ml/peval/metrics/__init__.py b/src/secml/ml/peval/metrics/__init__.py index 6075e244..2886bef4 100644 --- a/src/secml/ml/peval/metrics/__init__.py +++ b/src/secml/ml/peval/metrics/__init__.py @@ -1,5 +1,6 @@ from .c_roc import CRoc from .c_metric import CMetric + # Classification Metrics from .c_metric_accuracy import CMetricAccuracy from .c_metric_test_error import CMetricTestError @@ -7,6 +8,7 @@ from .c_metric_recall import CMetricRecall from .c_metric_f1 import CMetricF1 from .c_confusion_matrix import CMetricConfusionMatrix + # ROC-related metrics from .c_metric_auc import CMetricAUC from .c_metric_auc_wmw import CMetricAUCWMW @@ -16,6 +18,7 @@ from .c_metric_th_at_fpr import CMetricTHatFPR from .c_metric_tpr_at_th import CMetricTPRatTH from .c_metric_fnr_at_th import CMetricFNRatTH + # Regression Metrics from .c_metric_mae import CMetricMAE from .c_metric_mse import CMetricMSE diff --git a/src/secml/ml/peval/metrics/c_confusion_matrix.py b/src/secml/ml/peval/metrics/c_confusion_matrix.py index aaa4db96..8bffc387 100644 --- a/src/secml/ml/peval/metrics/c_confusion_matrix.py +++ b/src/secml/ml/peval/metrics/c_confusion_matrix.py @@ -5,6 +5,7 @@ .. moduleauthor:: Ambra Demontis """ + from sklearn.metrics import confusion_matrix from secml.array import CArray from secml.ml.peval.metrics import CMetric @@ -30,5 +31,4 @@ def _performance_score(self, y_true, y_pred): """ y_true = CArray(y_true) y_pred = CArray(y_pred) - return CArray(confusion_matrix(y_true.tondarray(), - y_pred.tondarray())) + return CArray(confusion_matrix(y_true.tondarray(), y_pred.tondarray())) diff --git a/src/secml/ml/peval/metrics/c_metric.py b/src/secml/ml/peval/metrics/c_metric.py index 2dd821dd..bb72f3a5 100755 --- a/src/secml/ml/peval/metrics/c_metric.py +++ b/src/secml/ml/peval/metrics/c_metric.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from abc import ABCMeta, abstractmethod import inspect @@ -41,12 +42,12 @@ class CMetric(CCreator, metaclass=ABCMeta): 0.3 """ - __super__ = 'CMetric' + + __super__ = "CMetric" best_value = None - def performance_score( - self, y_true=None, y_pred=None, score=None, **kwargs): + def performance_score(self, y_true=None, y_pred=None, score=None, **kwargs): """Compute the performance metric. Each metric can use as input either: @@ -72,11 +73,13 @@ def performance_score( # Check if all required parameters have been passed # Do not raise error if a defaulted parameter is not passed for p_idx, p in enumerate(metric_params): - if kwargs.get(p, None) is None and \ - (metric_defaults is None or - len(metric_params) - len(metric_defaults) > p_idx): - raise TypeError("metric '{:}' requires '{:}' parameter".format( - self.class_type, p)) + if kwargs.get(p, None) is None and ( + metric_defaults is None + or len(metric_params) - len(metric_defaults) > p_idx + ): + raise TypeError( + "metric '{:}' requires '{:}' parameter".format(self.class_type, p) + ) # Clean any other kwarg passed and not required by the metric for p in list(kwargs): diff --git a/src/secml/ml/peval/metrics/c_metric_accuracy.py b/src/secml/ml/peval/metrics/c_metric_accuracy.py index 0a2da254..3f89a482 100644 --- a/src/secml/ml/peval/metrics/c_metric_accuracy.py +++ b/src/secml/ml/peval/metrics/c_metric_accuracy.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import sklearn.metrics as skm from secml.array import CArray @@ -35,7 +36,8 @@ class CMetricAccuracy(CMetric): 0.75 """ - __class_type = 'accuracy' + + __class_type = "accuracy" best_value = 1.0 def _performance_score(self, y_true, y_pred): @@ -54,5 +56,4 @@ def _performance_score(self, y_true, y_pred): Returns metric value as float. """ - return float(skm.accuracy_score(y_true.tondarray(), - y_pred.tondarray())) + return float(skm.accuracy_score(y_true.tondarray(), y_pred.tondarray())) diff --git a/src/secml/ml/peval/metrics/c_metric_auc.py b/src/secml/ml/peval/metrics/c_metric_auc.py index 49a76b59..95f97af0 100644 --- a/src/secml/ml/peval/metrics/c_metric_auc.py +++ b/src/secml/ml/peval/metrics/c_metric_auc.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import sklearn.metrics as skm from secml.array import CArray @@ -39,7 +40,8 @@ class CMetricAUC(CMetric): 0.5 """ - __class_type = 'auc' + + __class_type = "auc" best_value = 1.0 def _performance_score(self, y_true, score): diff --git a/src/secml/ml/peval/metrics/c_metric_auc_wmw.py b/src/secml/ml/peval/metrics/c_metric_auc_wmw.py index d8e77f8a..975245fd 100644 --- a/src/secml/ml/peval/metrics/c_metric_auc_wmw.py +++ b/src/secml/ml/peval/metrics/c_metric_auc_wmw.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml.ml.peval.metrics import CMetric @@ -34,7 +35,8 @@ class CMetricAUCWMW(CMetric): 0.5 """ - __class_type = 'auc-wmw' + + __class_type = "auc-wmw" best_value = 1.0 def _performance_score(self, y_true, score): diff --git a/src/secml/ml/peval/metrics/c_metric_f1.py b/src/secml/ml/peval/metrics/c_metric_f1.py index 1bc31177..588c9c1f 100644 --- a/src/secml/ml/peval/metrics/c_metric_f1.py +++ b/src/secml/ml/peval/metrics/c_metric_f1.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import sklearn.metrics as skm from secml.array import CArray @@ -41,7 +42,8 @@ class CMetricF1(CMetric): 0.6666666666666666 """ - __class_type = 'f1' + + __class_type = "f1" best_value = 1.0 def _performance_score(self, y_true, y_pred): @@ -61,9 +63,10 @@ def _performance_score(self, y_true, y_pred): """ if y_true.unique().size > 2: # Multiclass data - average = 'weighted' + average = "weighted" else: # Default case - average = 'binary' + average = "binary" - return float(skm.f1_score( - y_true.tondarray(), y_pred.tondarray(), average=average)) + return float( + skm.f1_score(y_true.tondarray(), y_pred.tondarray(), average=average) + ) diff --git a/src/secml/ml/peval/metrics/c_metric_fnr_at_fpr.py b/src/secml/ml/peval/metrics/c_metric_fnr_at_fpr.py index d75c23d2..aedee806 100644 --- a/src/secml/ml/peval/metrics/c_metric_fnr_at_fpr.py +++ b/src/secml/ml/peval/metrics/c_metric_fnr_at_fpr.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml.ml.peval.metrics import CMetric from secml.ml.peval.metrics import CRoc @@ -40,7 +41,8 @@ class CMetricFNRatFPR(CMetric): 0.5 """ - __class_type = 'fnr-at-fpr' + + __class_type = "fnr-at-fpr" best_value = 1.0 def __init__(self, fpr=0.01): @@ -68,5 +70,4 @@ def _performance_score(self, y_true, score): This implementation is restricted to the binary classification task. """ - return 1 - CArray(self.fpr).interp( - *CRoc().compute(y_true, score)[0:2]).item() + return 1 - CArray(self.fpr).interp(*CRoc().compute(y_true, score)[0:2]).item() diff --git a/src/secml/ml/peval/metrics/c_metric_fnr_at_th.py b/src/secml/ml/peval/metrics/c_metric_fnr_at_th.py index af35893e..d357248d 100644 --- a/src/secml/ml/peval/metrics/c_metric_fnr_at_th.py +++ b/src/secml/ml/peval/metrics/c_metric_fnr_at_th.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml.ml.peval.metrics import CMetric from secml.core.type_utils import is_list @@ -37,7 +38,8 @@ class CMetricFNRatTH(CMetric): 0.5 """ - __class_type = 'fnr-at-th' + + __class_type = "fnr-at-th" best_value = 1.0 def __init__(self, th=0.0): diff --git a/src/secml/ml/peval/metrics/c_metric_mae.py b/src/secml/ml/peval/metrics/c_metric_mae.py index 9859c683..0e692650 100644 --- a/src/secml/ml/peval/metrics/c_metric_mae.py +++ b/src/secml/ml/peval/metrics/c_metric_mae.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import sklearn.metrics as skm from secml.array import CArray @@ -35,7 +36,8 @@ class CMetricMAE(CMetric): 0.25 """ - __class_type = 'mae' + + __class_type = "mae" best_value = 0.0 def _performance_score(self, y_true, score): @@ -54,5 +56,4 @@ def _performance_score(self, y_true, score): Returns metric value as float. """ - return float(skm.mean_absolute_error(y_true.tondarray(), - score.tondarray())) + return float(skm.mean_absolute_error(y_true.tondarray(), score.tondarray())) diff --git a/src/secml/ml/peval/metrics/c_metric_mse.py b/src/secml/ml/peval/metrics/c_metric_mse.py index a9eb6deb..c841b2fb 100644 --- a/src/secml/ml/peval/metrics/c_metric_mse.py +++ b/src/secml/ml/peval/metrics/c_metric_mse.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import sklearn.metrics as skm from secml.array import CArray @@ -35,7 +36,8 @@ class CMetricMSE(CMetric): 0.25 """ - __class_type = 'mse' + + __class_type = "mse" best_value = 0.0 def _performance_score(self, y_true, score): @@ -54,5 +56,4 @@ def _performance_score(self, y_true, score): Returns metric value as float. """ - return float(skm.mean_squared_error(y_true.tondarray(), - score.tondarray())) + return float(skm.mean_squared_error(y_true.tondarray(), score.tondarray())) diff --git a/src/secml/ml/peval/metrics/c_metric_pauc.py b/src/secml/ml/peval/metrics/c_metric_pauc.py index bb3e28de..26765ac5 100644 --- a/src/secml/ml/peval/metrics/c_metric_pauc.py +++ b/src/secml/ml/peval/metrics/c_metric_pauc.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import sklearn.metrics as skm from secml.array import CArray @@ -47,7 +48,8 @@ class CMetricPartialAUC(CMetric): 0.125 """ - __class_type = 'pauc' + + __class_type = "pauc" best_value = 1.0 def __init__(self, fpr=0.01, n_points=1000): diff --git a/src/secml/ml/peval/metrics/c_metric_precision.py b/src/secml/ml/peval/metrics/c_metric_precision.py index 6d696f0a..8c85b939 100644 --- a/src/secml/ml/peval/metrics/c_metric_precision.py +++ b/src/secml/ml/peval/metrics/c_metric_precision.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import sklearn.metrics as skm from secml.array import CArray @@ -37,7 +38,8 @@ class CMetricPrecision(CMetric): 0.625 """ - __class_type = 'precision' + + __class_type = "precision" best_value = 1.0 def _performance_score(self, y_true, y_pred): @@ -57,9 +59,10 @@ def _performance_score(self, y_true, y_pred): """ if y_true.unique().size > 2: # Multiclass data - average = 'weighted' + average = "weighted" else: # Default case - average = 'binary' + average = "binary" - return float(skm.precision_score( - y_true.tondarray(), y_pred.tondarray(), average=average)) + return float( + skm.precision_score(y_true.tondarray(), y_pred.tondarray(), average=average) + ) diff --git a/src/secml/ml/peval/metrics/c_metric_recall.py b/src/secml/ml/peval/metrics/c_metric_recall.py index d6a9ad40..b4930fe6 100644 --- a/src/secml/ml/peval/metrics/c_metric_recall.py +++ b/src/secml/ml/peval/metrics/c_metric_recall.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import sklearn.metrics as skm from secml.array import CArray @@ -38,7 +39,8 @@ class CMetricRecall(CMetric): 0.75 """ - __class_type = 'recall' + + __class_type = "recall" best_value = 1.0 def _performance_score(self, y_true, y_pred): @@ -58,9 +60,10 @@ def _performance_score(self, y_true, y_pred): """ if y_true.unique().size > 2: # Multiclass data - average = 'weighted' + average = "weighted" else: # Default case - average = 'binary' + average = "binary" - return float(skm.recall_score( - y_true.tondarray(), y_pred.tondarray(), average=average)) + return float( + skm.recall_score(y_true.tondarray(), y_pred.tondarray(), average=average) + ) diff --git a/src/secml/ml/peval/metrics/c_metric_test_error.py b/src/secml/ml/peval/metrics/c_metric_test_error.py index d9610b87..fe9e28c5 100644 --- a/src/secml/ml/peval/metrics/c_metric_test_error.py +++ b/src/secml/ml/peval/metrics/c_metric_test_error.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import sklearn.metrics as skm from secml.array import CArray @@ -35,7 +36,8 @@ class CMetricTestError(CMetric): 0.25 """ - __class_type = 'test-error' + + __class_type = "test-error" best_value = 0.0 def _performance_score(self, y_true, y_pred): @@ -54,5 +56,4 @@ def _performance_score(self, y_true, y_pred): Returns metric value as float. """ - return 1.0 - float(skm.accuracy_score(y_true.tondarray(), - y_pred.tondarray())) + return 1.0 - float(skm.accuracy_score(y_true.tondarray(), y_pred.tondarray())) diff --git a/src/secml/ml/peval/metrics/c_metric_th_at_fpr.py b/src/secml/ml/peval/metrics/c_metric_th_at_fpr.py index 40731b66..4ca24675 100644 --- a/src/secml/ml/peval/metrics/c_metric_th_at_fpr.py +++ b/src/secml/ml/peval/metrics/c_metric_th_at_fpr.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml.ml.peval.metrics import CMetric from secml.ml.peval.metrics import CRoc @@ -40,7 +41,8 @@ class CMetricTHatFPR(CMetric): 0.0005 """ - __class_type = 'th-at-fpr' + + __class_type = "th-at-fpr" best_value = 1.0 def __init__(self, fpr=0.01): diff --git a/src/secml/ml/peval/metrics/c_metric_tpr_at_fpr.py b/src/secml/ml/peval/metrics/c_metric_tpr_at_fpr.py index 3f23e7ed..e52ddf3e 100644 --- a/src/secml/ml/peval/metrics/c_metric_tpr_at_fpr.py +++ b/src/secml/ml/peval/metrics/c_metric_tpr_at_fpr.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml.ml.peval.metrics import CMetric from secml.ml.peval.metrics import CRoc @@ -40,7 +41,8 @@ class CMetricTPRatFPR(CMetric): 0.5 """ - __class_type = 'tpr-at-fpr' + + __class_type = "tpr-at-fpr" best_value = 1.0 def __init__(self, fpr=0.01): @@ -72,5 +74,4 @@ def _performance_score(self, y_true, score): This implementation is restricted to the binary classification task. """ - return CArray(self.fpr).interp( - *CRoc().compute(y_true, score)[0:2]).item() + return CArray(self.fpr).interp(*CRoc().compute(y_true, score)[0:2]).item() diff --git a/src/secml/ml/peval/metrics/c_metric_tpr_at_th.py b/src/secml/ml/peval/metrics/c_metric_tpr_at_th.py index 3a292761..025f6e8c 100644 --- a/src/secml/ml/peval/metrics/c_metric_tpr_at_th.py +++ b/src/secml/ml/peval/metrics/c_metric_tpr_at_th.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.array import CArray from secml.ml.peval.metrics import CMetric from secml.core.type_utils import is_list @@ -37,7 +38,8 @@ class CMetricTPRatTH(CMetric): 0.5 """ - __class_type = 'tpr-at-th' + + __class_type = "tpr-at-th" best_value = 1.0 def __init__(self, th=0.0): diff --git a/src/secml/ml/peval/metrics/c_roc.py b/src/secml/ml/peval/metrics/c_roc.py index b29a270f..5e65328a 100644 --- a/src/secml/ml/peval/metrics/c_roc.py +++ b/src/secml/ml/peval/metrics/c_roc.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.array import CArray @@ -69,11 +70,14 @@ def average(fpr, tpr, n_points=1000): # Checking consistency between input data if n_fpr == 0: - raise ValueError("At least 1 array with false/true " - "positives must be specified.") + raise ValueError( + "At least 1 array with false/true " "positives must be specified." + ) if n_fpr != n_tpr: - raise ValueError("Number of True Positive Rates and " - "False Positive Rates must be the same.") + raise ValueError( + "Number of True Positive Rates and " + "False Positive Rates must be the same." + ) # Computing ROC for a single (labels, scores) pair mean_fpr = CArray.linspace(0, 1, n_points) @@ -110,6 +114,7 @@ class CBaseRoc: .CRoc : class that fully supports ROC repetitions. """ + def __init__(self): self._fpr = None self._tpr = None @@ -199,7 +204,7 @@ def compute(self, y_true, score, positive_label=None): def __iter__(self): """Returns `fpr`, `tpr`, `th` always in this order.""" - seq = ('fpr', 'tpr', 'th') # Fixed order for consistency + seq = ("fpr", "tpr", "th") # Fixed order for consistency for e in seq: yield getattr(self, e) @@ -373,11 +378,13 @@ class or confidence values. # Checking consistency between input data if n_score == 0: - raise ValueError("At least 1 array with classification " - "scores must be specified.") + raise ValueError( + "At least 1 array with classification " "scores must be specified." + ) if n_ytrue != n_score and n_ytrue + n_score != n_score + 1: - raise ValueError("Either 1 or {:} labels arrays should " - "be specified.".format(n_score)) + raise ValueError( + "Either 1 or {:} labels arrays should " "be specified.".format(n_score) + ) # Resetting any computed average ROC self._data_average.reset() @@ -385,17 +392,17 @@ class or confidence values. if n_ytrue == 1: # Use the same true labels vs all scores for score_idx in range(n_score): - rep = CBaseRoc().compute(y_true_list[0], - score_list[score_idx], - positive_label) + rep = CBaseRoc().compute( + y_true_list[0], score_list[score_idx], positive_label + ) # Storing result as a new repetition for ROC self._data.append(rep) else: # Use each true labels vs corresponding scores for score_idx in range(n_score): - rep = CBaseRoc().compute(y_true_list[score_idx], - score_list[score_idx], - positive_label) + rep = CBaseRoc().compute( + y_true_list[score_idx], score_list[score_idx], positive_label + ) # Storing result as a new repetition for ROC self._data.append(rep) @@ -434,8 +441,7 @@ def average(self, n_points=1000, return_std=False): Only if return_std is True. """ - mean_fpr, mean_tpr, std_dev_tpr = average( - self.fpr, self.tpr, n_points=n_points) + mean_fpr, mean_tpr, std_dev_tpr = average(self.fpr, self.tpr, n_points=n_points) # Assigning final data self._data_average._fpr = mean_fpr diff --git a/src/secml/ml/peval/metrics/tests/test_metrics.py b/src/secml/ml/peval/metrics/tests/test_metrics.py index e045baae..b19a6068 100644 --- a/src/secml/ml/peval/metrics/tests/test_metrics.py +++ b/src/secml/ml/peval/metrics/tests/test_metrics.py @@ -11,7 +11,7 @@ class TestCMetrics(CUnitTest): def test_accuracy(self): self.logger.info("Testing accuracy metric...") - peval = CMetric.create('accuracy') + peval = CMetric.create("accuracy") y_true = CArray([0, 1, 2, 3]) y_pred = CArray([0, 2, 1, 3]) @@ -29,7 +29,7 @@ def test_accuracy(self): def test_precision(self): self.logger.info("Testing precision metric...") - peval = CMetric.create('precision') + peval = CMetric.create("precision") true = CArray([0, 0, 0, 0, 1, 1, 1, 1]) pred = CArray([1, 0, 0, 0, 1, 1, 0, 0]) @@ -42,7 +42,7 @@ def test_precision(self): def test_recall(self): self.logger.info("Testing recall metric...") - peval = CMetric.create('recall') + peval = CMetric.create("recall") true = CArray([0, 0, 0, 0, 1, 1, 1, 1]) pred = CArray([1, 0, 0, 0, 1, 1, 0, 0]) @@ -55,7 +55,7 @@ def test_recall(self): def test_f1(self): self.logger.info("Testing F1 score metric...") - peval = CMetric.create('f1') + peval = CMetric.create("f1") true = CArray([0, 0, 0, 0, 1, 1, 1, 1]) pred = CArray([1, 0, 0, 0, 1, 1, 0, 0]) @@ -69,7 +69,7 @@ def test_f1(self): def test_mae(self): self.logger.info("Testing MAE metric...") - peval = CMetric.create('mae') + peval = CMetric.create("mae") true = CArray([3, -0.5, 2, 7]) pred = CArray([2.5, 0.0, 2, 8]) @@ -81,7 +81,7 @@ def test_mae(self): def test_mse(self): self.logger.info("Testing MSE metric...") - peval = CMetric.create('mse') + peval = CMetric.create("mse") true = CArray([3, -0.5, 2, 7]) pred = CArray([2.5, 0.0, 2, 8]) @@ -113,7 +113,7 @@ def _test_roc_metric(self, metric): def test_tpratfpr(self): self.logger.info("Testing TPR @ FPR metric...") - metric = CMetric.create('tpr-at-fpr', fpr=0.1) + metric = CMetric.create("tpr-at-fpr", fpr=0.1) res = self._test_roc_metric(metric) @@ -122,7 +122,7 @@ def test_tpratfpr(self): def test_fnratfpr(self): self.logger.info("Testing FNR @ FPR metric...") - metric = CMetric.create('fnr-at-fpr', fpr=0.1) + metric = CMetric.create("fnr-at-fpr", fpr=0.1) res = self._test_roc_metric(metric) @@ -131,7 +131,7 @@ def test_fnratfpr(self): def test_thatfpr(self): self.logger.info("Testing TH @ FPR metric...") - metric = CMetric.create('th-at-fpr', fpr=0.1) + metric = CMetric.create("th-at-fpr", fpr=0.1) res = self._test_roc_metric(metric) @@ -140,7 +140,7 @@ def test_thatfpr(self): def test_tpratth(self): self.logger.info("Testing TPR @ TH metric...") - metric = CMetric.create('tpr-at-th', th=0.76) + metric = CMetric.create("tpr-at-th", th=0.76) res = self._test_roc_metric(metric) @@ -149,7 +149,7 @@ def test_tpratth(self): def test_fnratth(self): self.logger.info("Testing FNR @ TH metric...") - metric = CMetric.create('fnr-at-th', th=0.76) + metric = CMetric.create("fnr-at-th", th=0.76) res = self._test_roc_metric(metric) @@ -158,26 +158,26 @@ def test_fnratth(self): def test_auc(self): self.logger.info("Testing AUC metric...") - metric = CMetric.create('auc') + metric = CMetric.create("auc") res = self._test_roc_metric(metric) self.assertAlmostEqual(0.89, res, places=2) self.logger.info("Testing AUC-WMW metric...") - metric = CMetric.create('auc-wmw') + metric = CMetric.create("auc-wmw") res = self._test_roc_metric(metric) self.assertAlmostEqual(0.89, res, places=2) self.logger.info("Testing pAUC metric...") - metric = CMetric.create('pauc', fpr=1.0, n_points=500) + metric = CMetric.create("pauc", fpr=1.0, n_points=500) res = self._test_roc_metric(metric) self.assertAlmostEqual(0.89, res, places=2) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/peval/metrics/tests/test_roc.py b/src/secml/ml/peval/metrics/tests/test_roc.py index bfb069b7..f1a71f30 100644 --- a/src/secml/ml/peval/metrics/tests/test_roc.py +++ b/src/secml/ml/peval/metrics/tests/test_roc.py @@ -13,21 +13,27 @@ class TestCRoc(CUnitTest): def setUp(self): - self.dl1 = CDLRandom(n_features=1000, n_redundant=200, - n_informative=250, n_clusters_per_class=2, - random_state=0) - self.dl2 = CDLRandom(n_features=1000, n_redundant=200, - n_informative=250, n_clusters_per_class=2, - random_state=1000) + self.dl1 = CDLRandom( + n_features=1000, + n_redundant=200, + n_informative=250, + n_clusters_per_class=2, + random_state=0, + ) + self.dl2 = CDLRandom( + n_features=1000, + n_redundant=200, + n_informative=250, + n_clusters_per_class=2, + random_state=1000, + ) self.ds1 = self.dl1.load() self.ds2 = self.dl2.load() self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y) - self.y1, self.s1 = self.svm.predict( - self.ds1.X, return_decision_function=True) - self.y2, self.s2 = self.svm.predict( - self.ds2.X, return_decision_function=True) + self.y1, self.s1 = self.svm.predict(self.ds1.X, return_decision_function=True) + self.y2, self.s2 = self.svm.predict(self.ds2.X, return_decision_function=True) self.roc = CRoc() @@ -51,12 +57,12 @@ def test_compute(self): def test_mean(self): - self.roc.compute([self.ds1.Y, self.ds2.Y], - [self.s1[:, 1].ravel(), self.s2[:, 1].ravel()]) + self.roc.compute( + [self.ds1.Y, self.ds2.Y], [self.s1[:, 1].ravel(), self.s2[:, 1].ravel()] + ) mean_fp, mean_tp, mean_std = self.roc.average(return_std=True) fig = CFigure(linewidth=2) - fig.sp.errorbar( - self.roc.mean_fpr, self.roc.mean_tpr, yerr=mean_std) + fig.sp.errorbar(self.roc.mean_fpr, self.roc.mean_tpr, yerr=mean_std) for rep in range(self.roc.n_reps): fig.sp.semilogx(self.roc.fpr[rep], self.roc.tpr[rep]) fig.sp.semilogx(mean_fp, mean_tp) @@ -64,5 +70,5 @@ def test_mean(self): fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/peval/tests/test_perf_evaluator.py b/src/secml/ml/peval/tests/test_perf_evaluator.py index d312844f..c830c2fd 100755 --- a/src/secml/ml/peval/tests/test_perf_evaluator.py +++ b/src/secml/ml/peval/tests/test_perf_evaluator.py @@ -16,6 +16,7 @@ class CMetricFirstNan(CMetric): """Test metric which returns some nans.""" + best_value = 1.0 def __init__(self): @@ -31,6 +32,7 @@ def _performance_score(self, y_true, score): class CMetricAllNan(CMetric): """Test metric which returns all nans.""" + best_value = 1.0 def __init__(self): @@ -51,7 +53,7 @@ def setUp(self): self.test_dataset = loader.load() # CREATE CLASSIFIERS - kernel = CKernel.create('rbf') + kernel = CKernel.create("rbf") self.svm = CClassifierSVM(kernel=kernel) self.svm.verbose = 1 @@ -60,43 +62,47 @@ def setUp(self): def test_parameters_setting(self): # Changing default parameters to be sure are not used - self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1, 'n_jobs': 2}) + self.svm.set_params({"C": 25, "kernel.gamma": 1e-1, "n_jobs": 2}) - xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]} + xval_parameters = {"C": [1, 10, 100], "kernel.gamma": [1, 50]} # DO XVAL FOR CHOOSE BEST PARAMETERS - xval_splitter = CDataSplitter.create( - 'kfold', num_folds=5, random_state=50000) + xval_splitter = CDataSplitter.create("kfold", num_folds=5, random_state=50000) # Set the best parameters inside the classifier - self.svm.estimate_parameters(self.training_dataset, xval_parameters, - xval_splitter, 'accuracy') + self.svm.estimate_parameters( + self.training_dataset, xval_parameters, xval_splitter, "accuracy" + ) self.logger.info( - "SVM has now the following parameters: {:}".format( - self.svm.get_params())) + "SVM has now the following parameters: {:}".format(self.svm.get_params()) + ) - self.assertEqual(self.svm.get_params()['C'], 1) - self.assertEqual(self.svm.get_params()['kernel.gamma'], 50) + self.assertEqual(self.svm.get_params()["C"], 1) + self.assertEqual(self.svm.get_params()["kernel.gamma"], 50) # Now we compare the parameters chosen before with a new evaluator - perf_eval = CPerfEvaluatorXVal( - xval_splitter, CMetric.create('accuracy')) + perf_eval = CPerfEvaluatorXVal(xval_splitter, CMetric.create("accuracy")) perf_eval.verbose = 1 best_params, best_score = perf_eval.evaluate_params( - self.svm, self.training_dataset, xval_parameters) + self.svm, self.training_dataset, xval_parameters + ) for param in xval_parameters: - self.logger.info( - "Best '{:}' is: {:}".format(param, best_params[param])) - self.assertEqual(best_params[param], - self.svm.get_params()[param]) + self.logger.info("Best '{:}' is: {:}".format(param, best_params[param])) + self.assertEqual(best_params[param], self.svm.get_params()[param]) self.svm.verbose = 0 parameters_combination = [ - [1, 1], [1, 50], [10, 1], [10, 50], [100, 1], [100, 50]] + [1, 1], + [1, 50], + [10, 1], + [10, 50], + [100, 1], + [100, 50], + ] par_comb_score = CArray.zeros(len(parameters_combination)) for comb in range(len(parameters_combination)): @@ -109,30 +115,30 @@ def test_parameters_setting(self): self.svm.fit( self.training_dataset[xval_splitter.tr_idx[f], :].X, - self.training_dataset[xval_splitter.tr_idx[f], :].Y) + self.training_dataset[xval_splitter.tr_idx[f], :].Y, + ) this_fold_predicted = self.svm.predict( - self.training_dataset[xval_splitter.ts_idx[f], :].X) + self.training_dataset[xval_splitter.ts_idx[f], :].X + ) this_fold_accuracy = skm.accuracy_score( - self.training_dataset[ - xval_splitter.ts_idx[f], :].Y.get_data(), - this_fold_predicted.get_data()) + self.training_dataset[xval_splitter.ts_idx[f], :].Y.get_data(), + this_fold_predicted.get_data(), + ) this_fold_score.append(this_fold_accuracy) - par_comb_score[comb] = (np.mean(this_fold_score)) - self.logger.info( - "this fold mean: {:}".format(par_comb_score[comb])) + par_comb_score[comb] = np.mean(this_fold_score) + self.logger.info("this fold mean: {:}".format(par_comb_score[comb])) max_combination_score = par_comb_score.max() better_param_comb = parameters_combination[par_comb_score.argmax()] - self.logger.info("max combination score founded here: {:}".format( - max_combination_score)) - self.logger.info("max comb score founded during xval {:}".format( - best_score)) + self.logger.info( + "max combination score founded here: {:}".format(max_combination_score) + ) + self.logger.info("max comb score founded during xval {:}".format(best_score)) - self.assertEqual(max_combination_score, - best_score) + self.assertEqual(max_combination_score, best_score) # set parameters found by xval and check if are the same chosen here self.logger.info("the parameters selected by own xval are:") @@ -147,29 +153,28 @@ def test_parameters_setting(self): def test_nan_metric_value(self): # Changing default parameters to be sure are not used - self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1}) - xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]} + self.svm.set_params({"C": 25, "kernel.gamma": 1e-1}) + xval_parameters = {"C": [1, 10, 100], "kernel.gamma": [1, 50]} # DO XVAL FOR CHOOSE BEST PARAMETERS - xval_splitter = CDataSplitter.create( - 'kfold', num_folds=5, random_state=50000) + xval_splitter = CDataSplitter.create("kfold", num_folds=5, random_state=50000) self.logger.info("Testing metric with some nan") some_nan_metric = CMetricFirstNan() # Now we compare the parameters chosen before with a new evaluator - perf_eval = CPerfEvaluatorXVal( - xval_splitter, some_nan_metric) + perf_eval = CPerfEvaluatorXVal(xval_splitter, some_nan_metric) perf_eval.verbose = 1 best_params, best_score = perf_eval.evaluate_params( - self.svm, self.training_dataset, xval_parameters, pick='last') + self.svm, self.training_dataset, xval_parameters, pick="last" + ) self.logger.info("best score : {:}".format(best_score)) # The xval should select the only one actual value (others are nan) - self.assertEqual(best_score, 1.) + self.assertEqual(best_score, 1.0) self.logger.info("Testing metric with all nan") @@ -177,36 +182,38 @@ def test_nan_metric_value(self): self.logger.filterwarnings( action="ignore", message="All-NaN slice encountered", - category=RuntimeWarning + category=RuntimeWarning, ) all_nan_metric = CMetricAllNan() # Now we compare the parameters chosen before with a new evaluator - perf_eval = CPerfEvaluatorXVal( - xval_splitter, all_nan_metric) + perf_eval = CPerfEvaluatorXVal(xval_splitter, all_nan_metric) perf_eval.verbose = 1 with self.assertRaises(ValueError): perf_eval.evaluate_params( - self.svm, self.training_dataset, xval_parameters, pick='last') + self.svm, self.training_dataset, xval_parameters, pick="last" + ) def _run_multiclass(self, tr, multiclass, xval_params, expected_best): - xval_splitter = CDataSplitter.create( - 'kfold', num_folds=3, random_state=50000) + xval_splitter = CDataSplitter.create("kfold", num_folds=3, random_state=50000) # Set the best parameters inside the classifier best_params = multiclass.estimate_parameters( - tr, xval_params, xval_splitter, 'accuracy') + tr, xval_params, xval_splitter, "accuracy" + ) self.logger.info( "Multiclass SVM has now the following parameters: {:}".format( - multiclass.get_params())) + multiclass.get_params() + ) + ) for clf_idx, clf in enumerate(multiclass._binary_classifiers): - self.assertEqual(clf.C, expected_best['C']) - self.assertEqual(clf.kernel.gamma, expected_best['kernel.gamma']) + self.assertEqual(clf.C, expected_best["C"]) + self.assertEqual(clf.kernel.gamma, expected_best["kernel.gamma"]) # Final test: fit using best parameters multiclass.fit(tr.X, tr.Y) @@ -218,33 +225,32 @@ def _run_multiclass(self, tr, multiclass, xval_params, expected_best): def test_params_multiclass(self): """Parameter estimation for multiclass classifiers.""" # Create dummy dataset (we want a test different from train) - tr = CDLRandom(n_classes=4, n_clusters_per_class=1, - random_state=50000).load() + tr = CDLRandom(n_classes=4, n_clusters_per_class=1, random_state=50000).load() - kernel = CKernel.create('rbf') - multiclass = CClassifierMulticlassOVA( - CClassifierSVM, C=1, kernel=kernel) + kernel = CKernel.create("rbf") + multiclass = CClassifierMulticlassOVA(CClassifierSVM, C=1, kernel=kernel) multiclass.verbose = 1 - xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]} + xval_parameters = {"C": [1, 10, 100], "kernel.gamma": [0.1, 1]} - expected = {'C': 10.0, 'kernel.gamma': 0.1} + expected = {"C": 10.0, "kernel.gamma": 0.1} self._run_multiclass(tr, multiclass, xval_parameters, expected) self.logger.info("Testing with preprocessor") - kernel = CKernel.create('rbf') + kernel = CKernel.create("rbf") multiclass = CClassifierMulticlassOVA( - CClassifierSVM, C=1, kernel=kernel, preprocess='min-max') + CClassifierSVM, C=1, kernel=kernel, preprocess="min-max" + ) multiclass.verbose = 1 - xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]} + xval_parameters = {"C": [1, 10, 100], "kernel.gamma": [0.1, 1]} - expected = {'C': 10.0, 'kernel.gamma': 0.1} + expected = {"C": 10.0, "kernel.gamma": 0.1} self._run_multiclass(tr, multiclass, xval_parameters, expected) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/peval/tests/test_perf_evaluator_multiclass.py b/src/secml/ml/peval/tests/test_perf_evaluator_multiclass.py index 294b50e5..efad7f98 100755 --- a/src/secml/ml/peval/tests/test_perf_evaluator_multiclass.py +++ b/src/secml/ml/peval/tests/test_perf_evaluator_multiclass.py @@ -14,67 +14,69 @@ class TestCPerfEvaluatorMulticlass(CUnitTest): def setUp(self): # Create dummy dataset (we want a test different from train) - self.tr = CDLRandom(n_classes=4, n_clusters_per_class=1, - random_state=50000).load() - self.ts = CDLRandom(n_classes=4, n_clusters_per_class=1, - random_state=10000).load() + self.tr = CDLRandom( + n_classes=4, n_clusters_per_class=1, random_state=50000 + ).load() + self.ts = CDLRandom( + n_classes=4, n_clusters_per_class=1, random_state=10000 + ).load() def _run_multiclass(self, multiclass, xval_params, expected_best): - xval_splitter = CDataSplitter.create( - 'kfold', num_folds=3, random_state=50000) + xval_splitter = CDataSplitter.create("kfold", num_folds=3, random_state=50000) # Set the best parameters inside the classifier best_params = multiclass.estimate_parameters( - self.tr, xval_params, xval_splitter, 'accuracy', - perf_evaluator='xval-multiclass') + self.tr, + xval_params, + xval_splitter, + "accuracy", + perf_evaluator="xval-multiclass", + ) self.logger.info( "Multiclass SVM has now the following parameters: {:}".format( - multiclass.get_params())) + multiclass.get_params() + ) + ) for clf_idx, clf in enumerate(multiclass._binary_classifiers): - self.assertEqual( - clf.C, expected_best['C'][clf_idx]) - self.assertEqual( - clf.kernel.gamma, expected_best['kernel.gamma'][clf_idx]) + self.assertEqual(clf.C, expected_best["C"][clf_idx]) + self.assertEqual(clf.kernel.gamma, expected_best["kernel.gamma"][clf_idx]) # Final test: fit using best parameters multiclass.fit(self.tr.X, self.tr.Y) for clf_idx, clf in enumerate(multiclass._binary_classifiers): for param in best_params: - self.assertEqual(clf.get_params()[param], - best_params[param][clf_idx]) + self.assertEqual(clf.get_params()[param], best_params[param][clf_idx]) def test_params_multiclass(self): """Parameter estimation for multiclass classifiers.""" - kernel = CKernel.create('rbf') - multiclass = CClassifierMulticlassOVA( - CClassifierSVM, C=1, kernel=kernel) + kernel = CKernel.create("rbf") + multiclass = CClassifierMulticlassOVA(CClassifierSVM, C=1, kernel=kernel) multiclass.verbose = 1 - xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]} + xval_parameters = {"C": [1, 10, 100], "kernel.gamma": [0.1, 1]} - expected = {'C': [1.0, 1.0, 10.0, 10.0], - 'kernel.gamma': [0.1, 0.1, 0.1, 0.1]} + expected = {"C": [1.0, 1.0, 10.0, 10.0], "kernel.gamma": [0.1, 0.1, 0.1, 0.1]} self._run_multiclass(multiclass, xval_parameters, expected) self.logger.info("Testing with preprocessor") - kernel = CKernel.create('rbf') + kernel = CKernel.create("rbf") multiclass = CClassifierMulticlassOVA( - CClassifierSVM, C=1, kernel=kernel, preprocess='min-max') + CClassifierSVM, C=1, kernel=kernel, preprocess="min-max" + ) multiclass.verbose = 1 - xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]} + xval_parameters = {"C": [1, 10, 100], "kernel.gamma": [0.1, 1]} - expected = {'C': [100, 10, 10, 1], - 'kernel.gamma': [0.1, 0.1, 0.1, 0.1]} + expected = {"C": [100, 10, 10, 1], "kernel.gamma": [0.1, 0.1, 0.1, 0.1]} self._run_multiclass(multiclass, xval_parameters, expected) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/src/secml/ml/scalers/__init__.py b/src/secml/ml/scalers/__init__.py index a04798dd..14027c4b 100644 --- a/src/secml/ml/scalers/__init__.py +++ b/src/secml/ml/scalers/__init__.py @@ -1,7 +1,10 @@ import warnings -warnings.warn("This package is experimental and could change or " - "be removed in the future. " - "`ml.features.normalization` can be used instead.") + +warnings.warn( + "This package is experimental and could change or " + "be removed in the future. " + "`ml.features.normalization` can be used instead." +) from .c_scaler_sklearn import CScalerSkLearn from .c_scaler_norm import CScalerNorm diff --git a/src/secml/ml/scalers/c_scaler_minmax.py b/src/secml/ml/scalers/c_scaler_minmax.py index f6b6c739..4602ed17 100644 --- a/src/secml/ml/scalers/c_scaler_minmax.py +++ b/src/secml/ml/scalers/c_scaler_minmax.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Meloni """ + from sklearn.preprocessing import MinMaxScaler from secml.ml.scalers import CScalerSkLearn @@ -27,13 +28,14 @@ class CScalerMinMax(CScalerSkLearn): """ - __class_type = 'minmax' + __class_type = "minmax" def __init__(self, feature_range=(0, 1), copy=True, preprocess=None): scaler = MinMaxScaler(feature_range=feature_range, copy=copy) super(CScalerMinMax, self).__init__( - sklearn_scaler=scaler, preprocess=preprocess) + sklearn_scaler=scaler, preprocess=preprocess + ) def _check_is_fitted(self): """Check if the scaler is trained (fitted). @@ -44,7 +46,7 @@ def _check_is_fitted(self): If the scaler is not fitted. """ - self._check_is_fitted_scaler(self, ['min_', 'n_samples_seen_']) + self._check_is_fitted_scaler(self, ["min_", "n_samples_seen_"]) def _backward(self, w=None): self._check_is_fitted() diff --git a/src/secml/ml/scalers/c_scaler_norm.py b/src/secml/ml/scalers/c_scaler_norm.py index 00c95e1a..21f074c3 100644 --- a/src/secml/ml/scalers/c_scaler_norm.py +++ b/src/secml/ml/scalers/c_scaler_norm.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Meloni """ + from sklearn.preprocessing import Normalizer from secml.array import CArray @@ -28,7 +29,7 @@ class CScalerNorm(CScalerSkLearn): """ - __class_type = 'norm' + __class_type = "norm" def __init__(self, norm="l2", copy=True, preprocess=None): scaler = Normalizer(norm=norm, copy=copy) @@ -36,8 +37,7 @@ def __init__(self, norm="l2", copy=True, preprocess=None): self._order = None self.norm = norm - super(CScalerNorm, self).__init__( - sklearn_scaler=scaler, preprocess=preprocess) + super(CScalerNorm, self).__init__(sklearn_scaler=scaler, preprocess=preprocess) def _check_is_fitted(self): """This scaler doesn't need fit, so this function doesn't raise any @@ -70,20 +70,22 @@ def _backward(self, w=None): """ x = self._cached_x if x.shape[0] > 1: - raise ValueError("Parameter 'x' passed to the forward() method " - "needs to be a one dimensional vector " - "(passed a {:} dimensional vector)" - .format(x.ndim)) + raise ValueError( + "Parameter 'x' passed to the forward() method " + "needs to be a one dimensional vector " + "(passed a {:} dimensional vector)".format(x.ndim) + ) d = self._cached_x.size # get the number of features if w is not None: if (w.ndim != 1) or (w.size != d): - raise ValueError("Parameter 'w' needs to be a one dimensional " - "vector with the same number of elements " - "of parameter 'x' of the forward method " - "(passed a {:} dimensional vector with {:} " - "elements)" - .format(w.ndim, w.size)) + raise ValueError( + "Parameter 'w' needs to be a one dimensional " + "vector with the same number of elements " + "of parameter 'x' of the forward method " + "(passed a {:} dimensional vector with {:} " + "elements)".format(w.ndim, w.size) + ) # compute the norm of x: ||x|| x_norm = self._compute_x_norm(x) @@ -92,7 +94,7 @@ def _backward(self, w=None): # this is the derivative of the ratio x/||x|| grad = CArray.eye(d, d) * x_norm.item() - grad_norm_x.T.dot(x) - grad /= (x_norm ** 2) + grad /= x_norm**2 return grad if w is None else w.dot(grad) @@ -106,9 +108,9 @@ def norm(self, value): """Set the norm that must be used to normalize each row.""" self._norm = value - if self._norm == 'l2': + if self._norm == "l2": self._order = 2 - elif self._norm == 'l1': + elif self._norm == "l1": self._order = 1 elif self._norm == "max": self._order = inf @@ -143,7 +145,7 @@ def _compute_norm_gradient(self, x, x_norm): elif self.norm == "l1": sign = x.sign() grad_norm_x = sign - elif self.norm == 'max': + elif self.norm == "max": grad_norm_x = CArray.zeros(d, sparse=x.issparse) abs_x = x.abs() # take absolute values of x... max_abs_x = abs_x.max() # ... and the maximum absolute value diff --git a/src/secml/ml/scalers/c_scaler_sklearn.py b/src/secml/ml/scalers/c_scaler_sklearn.py index 2b4335ce..528b690e 100644 --- a/src/secml/ml/scalers/c_scaler_sklearn.py +++ b/src/secml/ml/scalers/c_scaler_sklearn.py @@ -5,10 +5,10 @@ .. moduleauthor:: Marco Meloni """ + from secml.ml import CModule from secml.array import CArray -from secml.ml.classifiers.sklearn.c_classifier_sklearn \ - import CWrapperSkLearnMixin +from secml.ml.classifiers.sklearn.c_classifier_sklearn import CWrapperSkLearnMixin from abc import ABCMeta, abstractmethod @@ -26,7 +26,8 @@ class CScalerSkLearn(CWrapperSkLearnMixin, CModule, metaclass=ABCMeta): desired preprocessor. If None, input data is used as is. """ - __super__ = 'CScalerSkLearn' + + __super__ = "CScalerSkLearn" def __init__(self, sklearn_scaler, preprocess=None): @@ -96,8 +97,8 @@ def _check_is_fitted_scaler(scaler, attributes, msg=None, check_all=True): attributes = [attributes] elif not is_list(attributes): raise TypeError( - "the attribute(s) to check must be a string or a list " - "of strings") + "the attribute(s) to check must be a string or a list " "of strings" + ) obj = scaler.sklearn_scaler @@ -111,4 +112,3 @@ def _forward(self, x): def _backward(self, w): raise NotImplementedError() - diff --git a/src/secml/ml/scalers/c_scaler_std.py b/src/secml/ml/scalers/c_scaler_std.py index 8a0596b0..af11797c 100644 --- a/src/secml/ml/scalers/c_scaler_std.py +++ b/src/secml/ml/scalers/c_scaler_std.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Meloni """ + from sklearn.preprocessing import StandardScaler from secml.ml.scalers import CScalerSkLearn @@ -32,15 +33,12 @@ class CScalerStd(CScalerSkLearn): """ - __class_type = 'std' + __class_type = "std" - def __init__(self, copy=True, with_mean=True, with_std=True, - preprocess=None): - scaler = StandardScaler( - copy=copy, with_mean=with_mean, with_std=with_std) + def __init__(self, copy=True, with_mean=True, with_std=True, preprocess=None): + scaler = StandardScaler(copy=copy, with_mean=with_mean, with_std=with_std) - super(CScalerStd, self).__init__( - sklearn_scaler=scaler, preprocess=preprocess) + super(CScalerStd, self).__init__(sklearn_scaler=scaler, preprocess=preprocess) def _check_is_fitted(self): """Check if the scaler is trained (fitted). @@ -51,7 +49,7 @@ def _check_is_fitted(self): If the scaler is not fitted. """ - self._check_is_fitted_scaler(self, ['n_samples_seen_']) + self._check_is_fitted_scaler(self, ["n_samples_seen_"]) def _backward(self, w=None): self._check_is_fitted() diff --git a/src/secml/ml/scalers/tests/c_scaler_testcases.py b/src/secml/ml/scalers/tests/c_scaler_testcases.py index e926a787..76d31763 100644 --- a/src/secml/ml/scalers/tests/c_scaler_testcases.py +++ b/src/secml/ml/scalers/tests/c_scaler_testcases.py @@ -6,8 +6,7 @@ class CScalerTestCases(CModuleTestCases): """Unittests interface for Normalizers.""" - def _compare_scalers(self, scaler, scaler_sklearn, - array, convert_to_dense=False): + def _compare_scalers(self, scaler, scaler_sklearn, array, convert_to_dense=False): """Compare wrapped scikit-learn scaler to the unwrapped scaler. Parameters @@ -31,8 +30,7 @@ def _compare_scalers(self, scaler, scaler_sklearn, """ self.logger.info("Original array is:\n{:}".format(array)) - array_sk = array.get_data() if convert_to_dense is False \ - else array.tondarray() + array_sk = array.get_data() if convert_to_dense is False else array.tondarray() # Sklearn normalizer scaler_sklearn.fit(array_sk, None) @@ -52,10 +50,12 @@ def _compare_scalers(self, scaler, scaler_sklearn, def _test_chain(self, x, class_type_list, kwargs_list, y=None): """Tests if preprocess chain and manual chaining yield same result.""" x_chain = super(CScalerTestCases, self)._test_chain( - x, class_type_list, kwargs_list, y) + x, class_type_list, kwargs_list, y + ) - self.assertEqual((self.array_dense.shape[0], - self.array_dense.shape[1] - 1), x_chain.shape) + self.assertEqual( + (self.array_dense.shape[0], self.array_dense.shape[1] - 1), x_chain.shape + ) return x_chain @@ -63,12 +63,13 @@ def _test_chain_gradient(self, x, class_type_list, kwargs_list, y=None): """Tests if gradient preprocess chain and gradient of manual chaining yield same result.""" grad_chain = super(CScalerTestCases, self)._test_chain_gradient( - x, class_type_list, kwargs_list, y) + x, class_type_list, kwargs_list, y + ) self.assertEqual((self.array_dense.shape[1],), grad_chain.shape) return grad_chain -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/scalers/tests/test_c_scaler_minmax.py b/src/secml/ml/scalers/tests/test_c_scaler_minmax.py index 2c63777f..afc03365 100644 --- a/src/secml/ml/scalers/tests/test_c_scaler_minmax.py +++ b/src/secml/ml/scalers/tests/test_c_scaler_minmax.py @@ -9,8 +9,7 @@ class TestCScalerMinMax(CScalerTestCases): """Unittests for CScalerMinMax.""" - def _compare_scalers(self, scaler, scaler_sklearn, - array, convert_to_dense=False): + def _compare_scalers(self, scaler, scaler_sklearn, array, convert_to_dense=False): """Compare wrapped scikit-learn scaler to the unwrapped scaler. Parameters @@ -31,16 +30,13 @@ def _compare_scalers(self, scaler, scaler_sklearn, Trained normalizer. """ - scaler, scaler_sklearn = \ - super(TestCScalerMinMax, self)._compare_scalers(scaler, - scaler_sklearn, - array, - convert_to_dense) + scaler, scaler_sklearn = super(TestCScalerMinMax, self)._compare_scalers( + scaler, scaler_sklearn, array, convert_to_dense + ) self.logger.info("Testing out of range normalization") - array_sk = array.get_data() if convert_to_dense is False \ - else array.tondarray() + array_sk = array.get_data() if convert_to_dense is False else array.tondarray() # Sklearn normalizer (requires float dtype input) transform_sklearn = CArray(scaler_sklearn.transform(array_sk * 2)) @@ -57,27 +53,28 @@ def _compare_scalers(self, scaler, scaler_sklearn, def test_forward(self): """Test for `.forward()` method.""" - self._compare_scalers(CScalerMinMax(), MinMaxScaler(), - self.array_dense) - self._compare_scalers(CScalerMinMax(), MinMaxScaler(), - self.row_dense.atleast_2d()) - self._compare_scalers(CScalerMinMax(), MinMaxScaler(), - self.column_dense) + self._compare_scalers(CScalerMinMax(), MinMaxScaler(), self.array_dense) + self._compare_scalers( + CScalerMinMax(), MinMaxScaler(), self.row_dense.atleast_2d() + ) + self._compare_scalers(CScalerMinMax(), MinMaxScaler(), self.column_dense) def test_chain(self): """Test a chain of preprocessors.""" - self._test_chain(self.array_dense, - ['minmax', 'pca', 'minmax'], - [{'feature_range': (-5, 5)}, {}, - {'feature_range': (0, 1)}]) + self._test_chain( + self.array_dense, + ["minmax", "pca", "minmax"], + [{"feature_range": (-5, 5)}, {}, {"feature_range": (0, 1)}], + ) def test_chain_gradient(self): """Check gradient of a chain of preprocessors.""" - self._test_chain_gradient(self.array_dense, - ['minmax', 'std', 'minmax'], - [{'feature_range': (-5, 5)}, {}, - {'feature_range': (0, 1)}]) + self._test_chain_gradient( + self.array_dense, + ["minmax", "std", "minmax"], + [{"feature_range": (-5, 5)}, {}, {"feature_range": (0, 1)}], + ) -if __name__ == '__main__': +if __name__ == "__main__": CScalerTestCases.main() diff --git a/src/secml/ml/scalers/tests/test_c_scaler_norm.py b/src/secml/ml/scalers/tests/test_c_scaler_norm.py index 5149c711..baa8729f 100644 --- a/src/secml/ml/scalers/tests/test_c_scaler_norm.py +++ b/src/secml/ml/scalers/tests/test_c_scaler_norm.py @@ -11,37 +11,51 @@ class TestCScalerNorm(CScalerTestCases): def test_forward(self): """Test for `.forward()` method.""" for norm_type in ["l1", "l2", "max"]: - self._compare_scalers(CScalerNorm(norm=norm_type), - Normalizer(norm=norm_type), - self.array_dense) - self._compare_scalers(CScalerNorm(norm=norm_type), - Normalizer(norm=norm_type), - self.array_sparse) - self._compare_scalers(CScalerNorm(norm=norm_type), - Normalizer(norm=norm_type), - self.row_dense.atleast_2d()) - self._compare_scalers(CScalerNorm(norm=norm_type), - Normalizer(norm=norm_type), - self.row_sparse) - self._compare_scalers(CScalerNorm(norm=norm_type), - Normalizer(norm=norm_type), - self.column_dense) - self._compare_scalers(CScalerNorm(norm=norm_type), - Normalizer(norm=norm_type), - self.column_sparse) + self._compare_scalers( + CScalerNorm(norm=norm_type), + Normalizer(norm=norm_type), + self.array_dense, + ) + self._compare_scalers( + CScalerNorm(norm=norm_type), + Normalizer(norm=norm_type), + self.array_sparse, + ) + self._compare_scalers( + CScalerNorm(norm=norm_type), + Normalizer(norm=norm_type), + self.row_dense.atleast_2d(), + ) + self._compare_scalers( + CScalerNorm(norm=norm_type), Normalizer(norm=norm_type), self.row_sparse + ) + self._compare_scalers( + CScalerNorm(norm=norm_type), + Normalizer(norm=norm_type), + self.column_dense, + ) + self._compare_scalers( + CScalerNorm(norm=norm_type), + Normalizer(norm=norm_type), + self.column_sparse, + ) def test_chain(self): """Test a chain of preprocessors.""" - self._test_chain(self.array_dense, - ['minmax', 'pca', 'norm'], - [{'feature_range': (-5, 5)}, {}, {}]) + self._test_chain( + self.array_dense, + ["minmax", "pca", "norm"], + [{"feature_range": (-5, 5)}, {}, {}], + ) def test_chain_gradient(self): """Check gradient of a chain of preprocessors.""" - self._test_chain_gradient(self.array_dense, - ['minmax', 'std', 'norm'], - [{'feature_range': (-5, 5)}, {}, {}]) + self._test_chain_gradient( + self.array_dense, + ["minmax", "std", "norm"], + [{"feature_range": (-5, 5)}, {}, {}], + ) -if __name__ == '__main__': +if __name__ == "__main__": CScalerTestCases.main() diff --git a/src/secml/ml/scalers/tests/test_c_scaler_std.py b/src/secml/ml/scalers/tests/test_c_scaler_std.py index 57282d7c..749f6667 100644 --- a/src/secml/ml/scalers/tests/test_c_scaler_std.py +++ b/src/secml/ml/scalers/tests/test_c_scaler_std.py @@ -14,39 +14,45 @@ def test_forward(self): for with_std in (True, False): self.logger.info("Testing using std? {:}".format(with_std)) - self._compare_scalers(CScalerStd(with_std=with_std), - StandardScaler(with_std=with_std), - self.array_dense) - self._compare_scalers(CScalerStd(with_std=with_std, - with_mean=False), - StandardScaler(with_std=with_std, - with_mean=False), - self.array_sparse) - self._compare_scalers(CScalerStd(with_std=with_std), - StandardScaler(with_std=with_std), - self.row_dense.atleast_2d()) - self._compare_scalers(CScalerStd(with_std=with_std, - with_mean=False), - StandardScaler(with_std=with_std, - with_mean=False), - self.row_sparse) - self._compare_scalers(CScalerStd(with_std=with_std), - StandardScaler(with_std=with_std), - self.column_dense) - self._compare_scalers(CScalerStd(with_std=with_std, - with_mean=False), - StandardScaler(with_std=with_std, - with_mean=False), - self.column_sparse) + self._compare_scalers( + CScalerStd(with_std=with_std), + StandardScaler(with_std=with_std), + self.array_dense, + ) + self._compare_scalers( + CScalerStd(with_std=with_std, with_mean=False), + StandardScaler(with_std=with_std, with_mean=False), + self.array_sparse, + ) + self._compare_scalers( + CScalerStd(with_std=with_std), + StandardScaler(with_std=with_std), + self.row_dense.atleast_2d(), + ) + self._compare_scalers( + CScalerStd(with_std=with_std, with_mean=False), + StandardScaler(with_std=with_std, with_mean=False), + self.row_sparse, + ) + self._compare_scalers( + CScalerStd(with_std=with_std), + StandardScaler(with_std=with_std), + self.column_dense, + ) + self._compare_scalers( + CScalerStd(with_std=with_std, with_mean=False), + StandardScaler(with_std=with_std, with_mean=False), + self.column_sparse, + ) def test_mean_std(self): """Test using specific mean/std.""" - for (mean, std) in [(1.5, 0.1), - ((1.0, 1.1, 1.2, 1.3), (0.0, 0.1, 0.2, 0.3))]: + for mean, std in [(1.5, 0.1), ((1.0, 1.1, 1.2, 1.3), (0.0, 0.1, 0.2, 0.3))]: for array in [self.array_dense, self.array_sparse]: self.logger.info("Original array is:\n{:}".format(array)) self.logger.info( - "Normalizing using mean: {:} std: {:}".format(mean, std)) + "Normalizing using mean: {:} std: {:}".format(mean, std) + ) n = CScalerStd(with_mean=not array.issparse) @@ -68,16 +74,18 @@ def test_mean_std(self): def test_chain(self): """Test a chain of preprocessors.""" - self._test_chain(self.array_dense, - ['minmax', 'pca', 'std'], - [{'feature_range': (-5, 5)}, {}, {}]) + self._test_chain( + self.array_dense, + ["minmax", "pca", "std"], + [{"feature_range": (-5, 5)}, {}, {}], + ) def test_chain_gradient(self): """Check gradient of a chain of preprocessors.""" - self._test_chain_gradient(self.array_dense, - ['minmax', 'std'], - [{'feature_range': (-5, 5)}, {}]) + self._test_chain_gradient( + self.array_dense, ["minmax", "std"], [{"feature_range": (-5, 5)}, {}] + ) -if __name__ == '__main__': +if __name__ == "__main__": CScalerTestCases.main() diff --git a/src/secml/ml/stats/c_density_estimation.py b/src/secml/ml/stats/c_density_estimation.py index da3c2eba..1998aba5 100644 --- a/src/secml/ml/stats/c_density_estimation.py +++ b/src/secml/ml/stats/c_density_estimation.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from sklearn.neighbors import KernelDensity from secml.array import CArray @@ -20,7 +21,7 @@ class CDensityEstimation(CCreator): bandwidth : float, optional The bandwidth of the kernel. Default 1. algorithm : str, optional - The tree algorithm to use. + The tree algorithm to use. Valid options are ['kd_tree'|'ball_tree'|'auto']. Default is 'auto'. kernel : str, optional The kernel to use. Valid kernels are @@ -52,9 +53,19 @@ class CDensityEstimation(CCreator): of BallTree or KDTree. """ - def __init__(self, bandwidth=1.0, algorithm='auto', kernel='gaussian', - metric='euclidean', atol=0, rtol=1e-8, breadth_first=True, - leaf_size=40, metric_params=None): + + def __init__( + self, + bandwidth=1.0, + algorithm="auto", + kernel="gaussian", + metric="euclidean", + atol=0, + rtol=1e-8, + breadth_first=True, + leaf_size=40, + metric_params=None, + ): self.bandwidth = bandwidth self.algorithm = algorithm @@ -86,7 +97,8 @@ def estimate_density(self, x, n_points=1000): rtol=self.rtol, breadth_first=self.breadth_first, leaf_size=self.leaf_size, - metric_params=self.metric_params).fit(x.atleast_2d().get_data()) + metric_params=self.metric_params, + ).fit(x.atleast_2d().get_data()) x = CArray.linspace(x.min() * 1.01, x.max() * 1.01, n_points) x = x.atleast_2d().T diff --git a/src/secml/ml/stats/c_distribution_gaussian.py b/src/secml/ml/stats/c_distribution_gaussian.py index 83c6f0f7..4a7b2adf 100644 --- a/src/secml/ml/stats/c_distribution_gaussian.py +++ b/src/secml/ml/stats/c_distribution_gaussian.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + from scipy.stats import multivariate_normal from secml.array import CArray from secml.core import CCreator @@ -45,8 +46,7 @@ def pdf(self, data): cov = self.cov if isinstance(cov, CArray): cov = cov.tondarray() - return CArray(multivariate_normal.pdf(data.tondarray(), - self.mean, cov)) + return CArray(multivariate_normal.pdf(data.tondarray(), self.mean, cov)) def logpdf(self, data): """Log of the probability density function. @@ -65,5 +65,4 @@ def logpdf(self, data): cov = self.cov if isinstance(cov, CArray): cov = cov.tondarray() - return CArray(multivariate_normal.logpdf(data.tondarray(), - self.mean, cov)) + return CArray(multivariate_normal.logpdf(data.tondarray(), self.mean, cov)) diff --git a/src/secml/ml/stats/tests/test_c_density_estimation.py b/src/secml/ml/stats/tests/test_c_density_estimation.py index 649716bf..28dd4da4 100644 --- a/src/secml/ml/stats/tests/test_c_density_estimation.py +++ b/src/secml/ml/stats/tests/test_c_density_estimation.py @@ -15,34 +15,40 @@ def test_plot_density(self): N = 200 np.random.seed(1) - X = np.concatenate((np.random.normal(0, 1, int(0.3 * N)), - np.random.normal(5, 1, int(0.7 * N))))[:, np.newaxis] + X = np.concatenate( + (np.random.normal(0, 1, int(0.3 * N)), np.random.normal(5, 1, int(0.7 * N))) + )[:, np.newaxis] X_plot = CArray(np.linspace(-5, 10, 1000)[:, np.newaxis]) - true_dens = CArray(0.3 * norm(0, 1).pdf(X_plot[:, 0].tondarray()) - + 0.7 * norm(5, 1).pdf(X_plot[:, 0].tondarray())) + true_dens = CArray( + 0.3 * norm(0, 1).pdf(X_plot[:, 0].tondarray()) + + 0.7 * norm(5, 1).pdf(X_plot[:, 0].tondarray()) + ) fig = CFigure(width=7) - fig.sp._sp.fill(X_plot[:, 0].tondarray(), true_dens.tondarray(), - fc='black', alpha=0.2, - label='input distribution') - - for kernel in ['gaussian', 'tophat', 'epanechnikov']: + fig.sp._sp.fill( + X_plot[:, 0].tondarray(), + true_dens.tondarray(), + fc="black", + alpha=0.2, + label="input distribution", + ) + + for kernel in ["gaussian", "tophat", "epanechnikov"]: kde = CDensityEstimation(kernel=kernel, bandwidth=0.5) x, y = kde.estimate_density(CArray(X), n_points=N) - fig.sp.plot(x, y, '-', - label="kernel = '{0}'".format(kernel)) + fig.sp.plot(x, y, "-", label="kernel = '{0}'".format(kernel)) fig.sp.text(6, 0.38, "N={0} points".format(N)) - fig.sp.legend(loc='upper left') - fig.sp.plot(X[:, 0], -0.005 - 0.01 * np.random.random(X.shape[0]), '+k') + fig.sp.legend(loc="upper left") + fig.sp.plot(X[:, 0], -0.005 - 0.01 * np.random.random(X.shape[0]), "+k") fig.sp.xlim(-4, 9) fig.sp.ylim(-0.02, 0.4) fig.show() -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/ml/tests/c_module_testcases.py b/src/secml/ml/tests/c_module_testcases.py index f38d963a..913fcb8e 100644 --- a/src/secml/ml/tests/c_module_testcases.py +++ b/src/secml/ml/tests/c_module_testcases.py @@ -8,9 +8,7 @@ class CModuleTestCases(CUnitTest): """Unittests interface for CPreProcess.""" def setUp(self): - self.array_dense = CArray([[1, 0, 0, 5], - [2, 4, 0, 0], - [3, 6, 0, 0]]) + self.array_dense = CArray([[1, 0, 0, 5], [2, 4, 0, 0], [3, 6, 0, 0]]) self.array_sparse = CArray(self.array_dense.deepcopy(), tosparse=True) self.labels = CArray([0, 1, 0]) @@ -31,8 +29,7 @@ def _create_chain(class_type_list, kwargs_list): chain = None # module with preprocessing chain modules = [] # list of modules (not connected via preprocessing) for i, pre_id in enumerate(class_type_list): - chain = CModule.create( - pre_id, preprocess=chain, **kwargs_list[i]) + chain = CModule.create(pre_id, preprocess=chain, **kwargs_list[i]) modules.append(CModule.create(pre_id, **kwargs_list[i])) return chain, modules @@ -94,8 +91,7 @@ def _test_chain_gradient(self, x, class_type_list, kwargs_list, y=None): for i, v in enumerate(v_list): grad = modules[i].gradient(v, w=grad) - self.logger.info( - "chain.gradient({:}):\n{:}".format(v, grad)) + self.logger.info("chain.gradient({:}):\n{:}".format(v, grad)) self.assert_allclose(grad_chain, grad) return grad @@ -104,20 +100,22 @@ def _test_chain_gradient(self, x, class_type_list, kwargs_list, y=None): class TestCModule(CModuleTestCases): def test_chain(self): """Test a chain of preprocessors.""" - self._test_chain(self.array_dense, - ['min-max', 'pca', 'min-max', 'rbf', 'svm'], - [{'feature_range': (-5, 5)}, {}, - {'feature_range': (0, 1)}, {}, {}], - y=self.labels) + self._test_chain( + self.array_dense, + ["min-max", "pca", "min-max", "rbf", "svm"], + [{"feature_range": (-5, 5)}, {}, {"feature_range": (0, 1)}, {}, {}], + y=self.labels, + ) def test_chain_gradient(self): """Check gradient of a chain of preprocessors.""" - self._test_chain_gradient(self.array_dense, - ['min-max', 'min-max', 'rbf', 'svm'], - [{'feature_range': (0, 1)}, - {}, {}, {}], - y=self.labels) + self._test_chain_gradient( + self.array_dense, + ["min-max", "min-max", "rbf", "svm"], + [{"feature_range": (0, 1)}, {}, {}, {}], + y=self.labels, + ) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/model_zoo/load_model.py b/src/secml/model_zoo/load_model.py index ed98aec2..fb20b6aa 100644 --- a/src/secml/model_zoo/load_model.py +++ b/src/secml/model_zoo/load_model.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import json import re from datetime import datetime, timedelta @@ -16,14 +17,15 @@ from secml.settings import SECML_MODELS_DIR -ZOO_REPO_URL = 'https://gitlab.com/secml/secml-zoo' -ZOO_REPO_BRANCH = _parse_env('SECML_ZOO_BRANCH', default='_noValue') -MODELS_DICT_FILE = 'models_dict.json' +ZOO_REPO_URL = "https://gitlab.com/secml/secml-zoo" +ZOO_REPO_BRANCH = _parse_env("SECML_ZOO_BRANCH", default="_noValue") +MODELS_DICT_FILE = "models_dict.json" MODELS_DICT_PATH = fm.join(SECML_MODELS_DIR, MODELS_DICT_FILE) _logger = CLog( logger_id=__name__, - file_handler=SECML_LOGS_PATH if SECML_STORE_LOGS is True else None) + file_handler=SECML_LOGS_PATH if SECML_STORE_LOGS is True else None, +) def _dl_data_versioned(file_path, output_dir, md5_digest=None): @@ -49,19 +51,19 @@ def _dl_data_versioned(file_path, output_dir, md5_digest=None): """ try: # Try downloading from the branch corresponding to current version - min_version = re.search(r'^\d+.\d+', secml.__version__).group(0) - branch = 'v' + min_version if ZOO_REPO_BRANCH == '_noValue' \ - else ZOO_REPO_BRANCH - dl_file_gitlab(ZOO_REPO_URL, file_path, output_dir, - branch=branch, md5_digest=md5_digest) + min_version = re.search(r"^\d+.\d+", secml.__version__).group(0) + branch = "v" + min_version if ZOO_REPO_BRANCH == "_noValue" else ZOO_REPO_BRANCH + dl_file_gitlab( + ZOO_REPO_URL, file_path, output_dir, branch=branch, md5_digest=md5_digest + ) except Exception as e: # Try looking into 'master' branch... _logger.debug(e) _logger.debug("Looking in the `master` branch...") - branch = \ - 'master' if ZOO_REPO_BRANCH == '_noValue' else ZOO_REPO_BRANCH - dl_file_gitlab(ZOO_REPO_URL, file_path, output_dir, - branch=branch, md5_digest=md5_digest) + branch = "master" if ZOO_REPO_BRANCH == "_noValue" else ZOO_REPO_BRANCH + dl_file_gitlab( + ZOO_REPO_URL, file_path, output_dir, branch=branch, md5_digest=md5_digest + ) def _get_models_dict(): @@ -84,7 +86,7 @@ def _get_models_dict(): # The `.last_update` contains the last time MODELS_DICT_FILE # has been download. Read the last update time if this file is available. # Otherwise the file will be created later - last_update_path = fm.join(SECML_MODELS_DIR, '.last_update') + last_update_path = fm.join(SECML_MODELS_DIR, ".last_update") last_update_format = "%d %m %Y %H:%M" # Specific format to avoid locale current_datetime = datetime.utcnow() # UTC datetime to avoid locale @@ -94,8 +96,7 @@ def _get_models_dict(): if fm.file_exist(last_update_path): try: with open(last_update_path) as fp: - last_update = \ - datetime.strptime(fp.read(), last_update_format) + last_update = datetime.strptime(fp.read(), last_update_format) # Compute the threshold for triggering an update last_update_th = last_update + timedelta(minutes=30) except ValueError as e: @@ -126,16 +127,19 @@ def _get_models_dict(): # is not available, so we propagate the error. Otherwise pass raise e _logger.debug(e) # Log the error for debug purposes - _logger.debug("Error when updating the models definitions. " - "Using the last available ones...") + _logger.debug( + "Error when updating the models definitions. " + "Using the last available ones..." + ) else: # No error raised during download process # Check if file has been correctly downloaded if not fm.file_exist(MODELS_DICT_PATH): raise RuntimeError( - 'Something wrong happened while downloading the ' - 'models definitions. Please try again.') + "Something wrong happened while downloading the " + "models definitions. Please try again." + ) # Update or create the "last update" file with open(last_update_path, "w") as fp: @@ -165,37 +169,39 @@ def load_model(model_id): """ model_info = _get_models_dict()[model_id] - model_path = fm.join(SECML_MODELS_DIR, model_info['model'] + '.py') + model_path = fm.join(SECML_MODELS_DIR, model_info["model"] + ".py") # Download (if needed) model's script, check md5 and extract it - if not fm.file_exist(model_path) or \ - model_info['model_md5'] != md5(model_path): - model_url_parts = ('models', model_info['model'] + '.py') - model_url = '/'.join(s.strip('/') for s in model_url_parts) + if not fm.file_exist(model_path) or model_info["model_md5"] != md5(model_path): + model_url_parts = ("models", model_info["model"] + ".py") + model_url = "/".join(s.strip("/") for s in model_url_parts) out_dir = fm.abspath(model_path) # Download requested model from current version's branch first, # then from master branch - _dl_data_versioned(model_url, out_dir, model_info['model_md5']) + _dl_data_versioned(model_url, out_dir, model_info["model_md5"]) # Check if file has been correctly downloaded if not fm.file_exist(model_path): - raise RuntimeError('Something wrong happened while ' - 'downloading the model. Please try again.') + raise RuntimeError( + "Something wrong happened while " + "downloading the model. Please try again." + ) - state_path = fm.join(SECML_MODELS_DIR, model_info['state'] + '.gz') + state_path = fm.join(SECML_MODELS_DIR, model_info["state"] + ".gz") # Download (if needed) state, check md5 and extract it - if not fm.file_exist(state_path) or \ - model_info['state_md5'] != md5(state_path): - state_url_parts = ('models', model_info['state'] + '.gz') - state_url = '/'.join(s.strip('/') for s in state_url_parts) + if not fm.file_exist(state_path) or model_info["state_md5"] != md5(state_path): + state_url_parts = ("models", model_info["state"] + ".gz") + state_url = "/".join(s.strip("/") for s in state_url_parts) out_dir = fm.abspath(state_path) # Download requested model state from current version's branch first, # then from master branch - _dl_data_versioned(state_url, out_dir, model_info['state_md5']) + _dl_data_versioned(state_url, out_dir, model_info["state_md5"]) # Check if file has been correctly downloaded if not fm.file_exist(state_path): - raise RuntimeError('Something wrong happened while ' - 'downloading the model. Please try again.') + raise RuntimeError( + "Something wrong happened while " + "downloading the model. Please try again." + ) def import_module(full_name, path): """Import a python module from a path.""" @@ -209,7 +215,7 @@ def import_module(full_name, path): return mod # Name of the function returning the model - model_name = model_info["model"].split('/')[-1] + model_name = model_info["model"].split("/")[-1] # Import the python module containing the function returning the model model_module = import_module(model_name, model_path) diff --git a/src/secml/model_zoo/tests/test_model_zoo.py b/src/secml/model_zoo/tests/test_model_zoo.py index f545a820..eda45e54 100644 --- a/src/secml/model_zoo/tests/test_model_zoo.py +++ b/src/secml/model_zoo/tests/test_model_zoo.py @@ -23,44 +23,45 @@ def setUpClass(cls): # via gitlab API to https://gitlab.com/secml/secml-zoo repository # Fake models definitions - cls.test_models_def = \ - fm.join(fm.abspath(__file__), 'models_dict_test.json') + cls.test_models_def = fm.join(fm.abspath(__file__), "models_dict_test.json") # Test model's definition - cls.test_model_id = '_test_model' - cls.test_model = \ - fm.join(fm.abspath(__file__), '_test_model_clf.py') - cls.test_model_state = \ - fm.join(fm.abspath(__file__), '_test_model-clf.gz') + cls.test_model_id = "_test_model" + cls.test_model = fm.join(fm.abspath(__file__), "_test_model_clf.py") + cls.test_model_state = fm.join(fm.abspath(__file__), "_test_model-clf.gz") # Url for mocking requests to the model zoo repository - repo = parse.quote('secml/secml-zoo', safe='') - file_model = parse.quote('models/_test/_test_model_clf.py', safe='') - file_state = parse.quote('models/_test/_test_model-clf.gz', safe='') - file_defs = parse.quote('models_dict.json', safe='') - vers = 'v' + re.search(r'^\d+.\d+', secml.__version__).group(0) + repo = parse.quote("secml/secml-zoo", safe="") + file_model = parse.quote("models/_test/_test_model_clf.py", safe="") + file_state = parse.quote("models/_test/_test_model-clf.gz", safe="") + file_defs = parse.quote("models_dict.json", safe="") + vers = "v" + re.search(r"^\d+.\d+", secml.__version__).group(0) - api_url = 'https://gitlab.com/api/v4/projects/' \ - '{:}/repository/files/{:}/raw?ref={:}' + api_url = ( + "https://gitlab.com/api/v4/projects/" "{:}/repository/files/{:}/raw?ref={:}" + ) # One url for master branch, one for current library version # One for model file, one for state file - cls.api_url_model_master = api_url.format(repo, file_model, 'master') + cls.api_url_model_master = api_url.format(repo, file_model, "master") cls.api_url_model_vers = api_url.format(repo, file_model, vers) - cls.api_url_state_master = api_url.format(repo, file_state, 'master') + cls.api_url_state_master = api_url.format(repo, file_state, "master") cls.api_url_state_vers = api_url.format(repo, file_state, vers) - cls.api_url_defs_master = api_url.format(repo, file_defs, 'master') + cls.api_url_defs_master = api_url.format(repo, file_defs, "master") cls.api_url_defs_vers = api_url.format(repo, file_defs, vers) cls.api_model_headers = { - 'Content-Disposition': r'inline; filename="_test_model_clf.py"'} + "Content-Disposition": r'inline; filename="_test_model_clf.py"' + } cls.api_state_headers = { - 'Content-Disposition': r'inline; filename="_test_model-clf.gz"'} + "Content-Disposition": r'inline; filename="_test_model-clf.gz"' + } cls.api_defs_headers = { - 'Content-Disposition': r'inline; filename="models_dict.json"'} + "Content-Disposition": r'inline; filename="models_dict.json"' + } # Set the debug level of models loader to debug - _logger.set_level('DEBUG') + _logger.set_level("DEBUG") def setUp(self): @@ -75,8 +76,8 @@ def tearDown(self): fm.remove_file(MODELS_DICT_PATH) # Removing folder with test model (force 'cause not empty) - if fm.folder_exist(fm.join(SECML_MODELS_DIR, '_test')): - fm.remove_folder(fm.join(SECML_MODELS_DIR, '_test'), force=True) + if fm.folder_exist(fm.join(SECML_MODELS_DIR, "_test")): + fm.remove_folder(fm.join(SECML_MODELS_DIR, "_test"), force=True) def _mock_requests(self, m, defs_url=None, model_url=None, state_url=None): """Mock model zoo resources download requests. @@ -96,24 +97,22 @@ def _mock_requests(self, m, defs_url=None, model_url=None, state_url=None): if defs_url: # Mocking models definitions self.logger.info("Mocking `{:}`".format(defs_url)) with open(self.test_models_def) as fdefs: - m.get(defs_url, - text=fdefs.read(), - headers=self.api_defs_headers) + m.get(defs_url, text=fdefs.read(), headers=self.api_defs_headers) if model_url: # Mocking model self.logger.info("Mocking `{:}`".format(model_url)) with open(self.test_model) as fmodel: - m.get(model_url, - text=fmodel.read(), - headers=self.api_model_headers) + m.get(model_url, text=fmodel.read(), headers=self.api_model_headers) if state_url: # Mocking model state self.logger.info("Mocking `{:}`".format(state_url)) - with open(self.test_model_state, 'br') as fmodelstate: + with open(self.test_model_state, "br") as fmodelstate: # We read the state file as binary data and pass to content - m.get(state_url, - content=fmodelstate.read(), - headers=self.api_state_headers) + m.get( + state_url, + content=fmodelstate.read(), + headers=self.api_state_headers, + ) def _check_test_model(self): """Load the test model and check its parameters.""" @@ -142,44 +141,50 @@ def _test_load_model(self, defs_url, model_url, state_url): # Simulate a fine process, with all resources available self._mock_requests( - m, defs_url=defs_url, model_url=model_url, state_url=state_url) + m, defs_url=defs_url, model_url=model_url, state_url=state_url + ) self._check_test_model() # Call model loading # We now simulate a need for `models_dict.json` update # by removing `.last_update` file - fm.remove_file(fm.join(SECML_MODELS_DIR, '.last_update')) + fm.remove_file(fm.join(SECML_MODELS_DIR, ".last_update")) # Also remove test model to force re-download - fm.remove_folder(fm.join(SECML_MODELS_DIR, '_test'), force=True) + fm.remove_folder(fm.join(SECML_MODELS_DIR, "_test"), force=True) self._check_test_model() # Call model loading # We now simulate a need for `models_dict.json` update, # but a connection error occurs (simulated by not mocking dl url) # Last available version of models dict should be used - fm.remove_file(fm.join(SECML_MODELS_DIR, '.last_update')) - fm.remove_folder(fm.join(SECML_MODELS_DIR, '_test'), force=True) + fm.remove_file(fm.join(SECML_MODELS_DIR, ".last_update")) + fm.remove_folder(fm.join(SECML_MODELS_DIR, "_test"), force=True) with requests_mock.Mocker() as m: # Do not mock the url for models definitions self._mock_requests( - m, defs_url=None, model_url=model_url, state_url=state_url) + m, defs_url=None, model_url=model_url, state_url=state_url + ) self._check_test_model() # Call model loading def test_load_model_vers(self): """Test for `load_model` standard behavior (dl from version branch).""" - self._test_load_model(defs_url=self.api_url_defs_vers, - model_url=self.api_url_model_vers, - state_url=self.api_url_state_vers) + self._test_load_model( + defs_url=self.api_url_defs_vers, + model_url=self.api_url_model_vers, + state_url=self.api_url_state_vers, + ) def test_load_model_master(self): """Test for `load_model` standard behavior (dl from master branch).""" - self._test_load_model(defs_url=self.api_url_defs_master, - model_url=self.api_url_model_master, - state_url=self.api_url_state_master) + self._test_load_model( + defs_url=self.api_url_defs_master, + model_url=self.api_url_model_master, + state_url=self.api_url_state_master, + ) def test_load_model_fail(self): """Test for `load_model` fail behavior.""" @@ -192,26 +197,32 @@ def test_load_model_fail(self): with self.assertRaises(requests_mock.NoMockAddress): with requests_mock.Mocker() as m: self._mock_requests( - m, defs_url=self.api_url_defs_vers, - model_url=None, state_url=self.api_url_state_vers) + m, + defs_url=self.api_url_defs_vers, + model_url=None, + state_url=self.api_url_state_vers, + ) self._check_test_model() # Models defs can be download, but not the model state with self.assertRaises(requests_mock.NoMockAddress): with requests_mock.Mocker() as m: self._mock_requests( - m, defs_url=self.api_url_defs_vers, - model_url=self.api_url_model_vers, state_url=None) + m, + defs_url=self.api_url_defs_vers, + model_url=self.api_url_model_vers, + state_url=None, + ) self._check_test_model() # Can download defs, but requested model not available with self.assertRaises(KeyError): with requests_mock.Mocker() as m: self._mock_requests( - m, defs_url=self.api_url_defs_vers, - model_url=None, state_url=None) - load_model('svm-test') + m, defs_url=self.api_url_defs_vers, model_url=None, state_url=None + ) + load_model("svm-test") -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/optim/constraints/c_constraint.py b/src/secml/optim/constraints/c_constraint.py index 0cdfe406..1839a404 100644 --- a/src/secml/optim/constraints/c_constraint.py +++ b/src/secml/optim/constraints/c_constraint.py @@ -6,6 +6,7 @@ .. moduleauthor:: Battista Biggio """ + from abc import ABCMeta, abstractmethod from secml.core import CCreator @@ -14,7 +15,8 @@ class CConstraint(CCreator, metaclass=ABCMeta): """Interface for equality/inequality constraints.""" - __super__ = 'CConstraint' + + __super__ = "CConstraint" def is_active(self, x, tol=1e-4): """Returns True if constraint is active. diff --git a/src/secml/optim/constraints/c_constraint_box.py b/src/secml/optim/constraints/c_constraint_box.py index 647ed848..657bc830 100644 --- a/src/secml/optim/constraints/c_constraint_box.py +++ b/src/secml/optim/constraints/c_constraint_box.py @@ -6,6 +6,7 @@ .. moduleauthor:: Marco Melis """ + import numpy as np from secml.optim.constraints import CConstraint from secml.array import CArray @@ -28,7 +29,8 @@ class CConstraintBox(CConstraint): class_type : 'box' """ - __class_type = 'box' + + __class_type = "box" def __init__(self, lb=None, ub=None): @@ -64,9 +66,11 @@ def _validate_bounds(self): if isinstance(self.lb, CArray) and isinstance(self.ub, CArray): if lb_array.size != ub_array.size: - raise ValueError("`ub` and `lb` must have the same size if " - "both `CArray`. Currently {:} and {:}" - "".format(ub_array.size, lb_array.size)) + raise ValueError( + "`ub` and `lb` must have the same size if " + "both `CArray`. Currently {:} and {:}" + "".format(ub_array.size, lb_array.size) + ) if (lb_array > ub_array).any(): raise ValueError("`lb` must be lower or equal than `ub`") @@ -88,16 +92,20 @@ def _check_inf(self): def center(self): """Center of the constraint.""" if self._check_inf() is True: - raise ValueError("cannot compute `center` as at least one value " - "in the bounds is +/- `inf`") + raise ValueError( + "cannot compute `center` as at least one value " + "in the bounds is +/- `inf`" + ) return CArray(0.5 * (self.ub + self.lb)).ravel() @property def radius(self): """Radius of the constraint.""" if self._check_inf() is True: - raise ValueError("cannot compute `radius` as at least one value " - "in the bounds is +/- `inf`") + raise ValueError( + "cannot compute `radius` as at least one value " + "in the bounds is +/- `inf`" + ) return CArray(0.5 * (self.ub - self.lb)).ravel() def set_center_radius(self, c, r): @@ -185,8 +193,7 @@ def _constraint(self, x): """ # if x is sparse, and center and radius are not (sparse) vectors - if x.issparse and self.center.size != x.size and \ - self.radius.size != x.size: + if x.issparse and self.center.size != x.size and self.radius.size != x.size: return self._constraint_sparse(x) return float((abs(x - self.center) - self.radius).max()) @@ -242,8 +249,9 @@ def _projection(self, x): """ # If bound is float, ensure x is float - if np.issubdtype(CArray(self.ub).dtype, np.floating) or \ - np.issubdtype(CArray(self.ub).dtype, np.floating): + if np.issubdtype(CArray(self.ub).dtype, np.floating) or np.issubdtype( + CArray(self.ub).dtype, np.floating + ): x = x.astype(float) if isinstance(self.ub, CArray): diff --git a/src/secml/optim/constraints/c_constraint_l1.py b/src/secml/optim/constraints/c_constraint_l1.py index e4c782d6..9c4aefc5 100644 --- a/src/secml/optim/constraints/c_constraint_l1.py +++ b/src/secml/optim/constraints/c_constraint_l1.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.array import CArray from secml.optim.constraints import CConstraint @@ -26,7 +27,8 @@ class CConstraintL1(CConstraint): class_type : 'l1' """ - __class_type = 'l1' + + __class_type = "l1" def __init__(self, center=0, radius=1): @@ -168,7 +170,7 @@ def _euclidean_proj_simplex(self, v, s=1): # get the number of > 0 components of the optimal solution # (only considering non-null elements in v - j = CArray.arange(1, cssv.size+1) + j = CArray.arange(1, cssv.size + 1) if u.issparse: rho = (j * u_nnz > (cssv - s)).sum() - 1 else: diff --git a/src/secml/optim/constraints/c_constraint_l2.py b/src/secml/optim/constraints/c_constraint_l2.py index 464770c9..a5f1e65e 100644 --- a/src/secml/optim/constraints/c_constraint_l2.py +++ b/src/secml/optim/constraints/c_constraint_l2.py @@ -5,6 +5,7 @@ .. moduleauthor:: Battista Biggio """ + from secml.optim.constraints import CConstraint from secml.array import CArray @@ -25,7 +26,8 @@ class CConstraintL2(CConstraint): class_type : 'l2' """ - __class_type = 'l2' + + __class_type = "l2" def __init__(self, center=0, radius=1): # Setting the value of the center (array or scalar) @@ -89,7 +91,7 @@ def _projection(self, x): # define tolerance and project onto radius-tol # to ensure that numerical errors do not violate the projection tol = 1e-6 - sub = (self._radius-tol) * (x - self.center) + sub = (self._radius - tol) * (x - self.center) sub_l2 = (x - self.center).norm(order=2) if sub_l2 != 0: # Avoid division by 0 sub /= sub_l2 diff --git a/src/secml/optim/constraints/tests/test_c_constraint.py b/src/secml/optim/constraints/tests/test_c_constraint.py index a90010e6..e4ac9af5 100644 --- a/src/secml/optim/constraints/tests/test_c_constraint.py +++ b/src/secml/optim/constraints/tests/test_c_constraint.py @@ -97,41 +97,40 @@ def _test_constraint(self, c, p_in=None, p_out=None, p_on=None): def check_constraint(cons, point, expect): res = cons.constraint(point) - self.logger.info( - ".constraint({:}): {:}".format(point, res)) + self.logger.info(".constraint({:}): {:}".format(point, res)) self.assertIsInstance(res, float) - if expect == 'equal': + if expect == "equal": self.assertEqual(0, res) - elif expect == 'less': + elif expect == "less": self.assertLess(res, 0) - elif expect == 'greater': + elif expect == "greater": self.assertGreater(res, 0) else: - raise ValueError( - "values {'equal', 'less', 'greater'} for `expect`") + raise ValueError("values {'equal', 'less', 'greater'} for `expect`") if p_in is None and p_out is None and p_on is None: raise ValueError("pass at least one point") if p_in is not None: # This point is INSIDE, constraint should be LESS then 0 - check_constraint(c, p_in, 'less') - check_constraint(c, p_in.astype(int), 'less') + check_constraint(c, p_in, "less") + check_constraint(c, p_in.astype(int), "less") if p_out is not None: # This point is OUTSIDE, constraint should be GREATER then 0 - check_constraint(c, p_out, 'greater') - check_constraint(c, p_out.astype(int), 'greater') + check_constraint(c, p_out, "greater") + check_constraint(c, p_out.astype(int), "greater") if p_on is not None: # This point is ON, constraint should be EQUAL to 0 - check_constraint(c, p_on, 'equal') - check_constraint(c, p_on.astype(int), 'equal') + check_constraint(c, p_on, "equal") + check_constraint(c, p_on.astype(int), "equal") - def _test_projection(self, c, p_in=None, p_out=None, - p_on=None, p_out_expected=None): + def _test_projection( + self, c, p_in=None, p_out=None, p_on=None, p_out_expected=None + ): """Test for CConstraint.projection(). Parameters @@ -180,7 +179,7 @@ def check_projection(cons, point, expected): check_projection(c, p_out, p_out_expected) check_projection(c, p_out.astype(int), p_out_expected) - def _test_plot(self, c, *points, label=''): + def _test_plot(self, c, *points, label=""): """Visualize the constraint. Parameters @@ -199,28 +198,28 @@ def _test_plot(self, c, *points, label=''): fig = CFigure(height=6, width=6) - fig.sp.plot_fun(func=c.constraint, - grid_limits=grid_limits, - n_grid_points=40, - levels=[0], - levels_linewidth=1.5) + fig.sp.plot_fun( + func=c.constraint, + grid_limits=grid_limits, + n_grid_points=40, + levels=[0], + levels_linewidth=1.5, + ) - colors = ['g', 'r', 'c', 'm', 'y', 'k', 'w'] + colors = ["g", "r", "c", "m", "y", "k", "w"] for p_i, p in enumerate(points): - self.logger.info( - "Plotting point (color {:}): {:}".format(colors[p_i], p)) + self.logger.info("Plotting point (color {:}): {:}".format(colors[p_i], p)) fig.sp.scatter(*p, c=colors[p_i], zorder=10) p_proj = c.projection(p) self.logger.info( - "Plotting point (color {:}): {:}".format(colors[p_i], p_proj)) + "Plotting point (color {:}): {:}".format(colors[p_i], p_proj) + ) fig.sp.scatter(*p_proj, c=colors[p_i], zorder=10) if label: - filename = \ - "test_constraint_{:}_{:}.pdf".format(c.class_type, label) + filename = "test_constraint_{:}_{:}.pdf".format(c.class_type, label) else: - filename = \ - "test_constraint_{:}.pdf".format(c.class_type) + filename = "test_constraint_{:}.pdf".format(c.class_type) fig.savefig(fm.join(fm.abspath(__file__), filename)) diff --git a/src/secml/optim/constraints/tests/test_c_constraint_box.py b/src/secml/optim/constraints/tests/test_c_constraint_box.py index afdc8403..5ef424b8 100644 --- a/src/secml/optim/constraints/tests/test_c_constraint_box.py +++ b/src/secml/optim/constraints/tests/test_c_constraint_box.py @@ -13,11 +13,11 @@ def setUp(self): self.c = CConstraintBox(lb=CArray([0, -0.5]), ub=1.5) # create a point that lies inside the constraint - self.p1_inside = CArray([1., 1.]) + self.p1_inside = CArray([1.0, 1.0]) # create a point that lies outside the constraint - self.p2_outside = CArray([2., 2.]) + self.p2_outside = CArray([2.0, 2.0]) # create a point that lies on the constraint - self.p3_on = CArray([0., 1.]) + self.p3_on = CArray([0.0, 1.0]) def test_check_bounds(self): """Check validation of lb/ub""" @@ -40,13 +40,15 @@ def test_check_bounds(self): def test_is_active(self): """Test for CConstraint.is_active().""" - self._test_is_active( - self.c, self.p1_inside, self.p2_outside, self.p3_on) + self._test_is_active(self.c, self.p1_inside, self.p2_outside, self.p3_on) # Test for sparse arrays self._test_is_active( - self.c, self.p1_inside.tosparse(), - self.p2_outside.tosparse(), self.p3_on.tosparse()) + self.c, + self.p1_inside.tosparse(), + self.p2_outside.tosparse(), + self.p3_on.tosparse(), + ) # Constraint with one or more inf, should be never active c = CConstraintBox(lb=CArray([0, -0.5]), ub=inf) @@ -61,32 +63,36 @@ def test_is_active(self): def test_is_violated(self): """Test for CConstraint.is_violated().""" - self._test_is_violated( - self.c, self.p1_inside, self.p2_outside, self.p3_on) + self._test_is_violated(self.c, self.p1_inside, self.p2_outside, self.p3_on) # Test for sparse arrays self._test_is_violated( - self.c, self.p1_inside.tosparse(), - self.p2_outside.tosparse(), self.p3_on.tosparse()) + self.c, + self.p1_inside.tosparse(), + self.p2_outside.tosparse(), + self.p3_on.tosparse(), + ) # Constraint with one or more inf c = CConstraintBox(lb=CArray([0, -inf]), ub=1.5) - self._test_is_violated( - c, self.p1_inside, self.p2_outside, self.p3_on) + self._test_is_violated(c, self.p1_inside, self.p2_outside, self.p3_on) c = CConstraintBox(lb=CArray([0, -0.5]), ub=inf) self._test_is_violated( # Using [-2, -2] as outside - c, self.p1_inside, -self.p2_outside, self.p3_on) + c, self.p1_inside, -self.p2_outside, self.p3_on + ) def test_constraint(self): """Test for CConstraint.constraint().""" - self._test_constraint( - self.c, self.p1_inside, self.p2_outside, self.p3_on) + self._test_constraint(self.c, self.p1_inside, self.p2_outside, self.p3_on) # Test for sparse arrays self._test_constraint( - self.c, self.p1_inside.tosparse(), - self.p2_outside.tosparse(), self.p3_on.tosparse()) + self.c, + self.p1_inside.tosparse(), + self.p2_outside.tosparse(), + self.p3_on.tosparse(), + ) # Constraint with one or more inf, error should be raised c = CConstraintBox(lb=CArray([0, -inf]), ub=1.5) @@ -95,41 +101,46 @@ def test_constraint(self): def test_projection(self): """Test for CConstraint.projection().""" - self._test_projection(self.c, self.p1_inside, self.p2_outside, - self.p3_on, CArray([1.5, 1.5])) + self._test_projection( + self.c, self.p1_inside, self.p2_outside, self.p3_on, CArray([1.5, 1.5]) + ) # Test for sparse arrays self._test_projection( - self.c, self.p1_inside.tosparse(), - self.p2_outside.tosparse(), self.p3_on.tosparse(), - CArray([1.5, 1.5], tosparse=True)) + self.c, + self.p1_inside.tosparse(), + self.p2_outside.tosparse(), + self.p3_on.tosparse(), + CArray([1.5, 1.5], tosparse=True), + ) # Check sparse arrays and scalar ub/lb # (corner case of sparse arrays as we cannot sub/add scalara) c = CConstraintBox(lb=0, ub=1.5) self._test_projection( - c, CArray([1., 1., 0.], tosparse=True), - CArray([2., 2., 0.], tosparse=True), - CArray([0., 1., 0.]).tosparse(), - CArray([1.5, 1.5, 0], tosparse=True)) + c, + CArray([1.0, 1.0, 0.0], tosparse=True), + CArray([2.0, 2.0, 0.0], tosparse=True), + CArray([0.0, 1.0, 0.0]).tosparse(), + CArray([1.5, 1.5, 0], tosparse=True), + ) # Constraint with one or more inf c = CConstraintBox(lb=CArray([0, -inf]), ub=1.5) self._test_projection( - c, self.p1_inside, self.p2_outside, - p_out_expected=CArray([1.5, 1.5])) + c, self.p1_inside, self.p2_outside, p_out_expected=CArray([1.5, 1.5]) + ) c = CConstraintBox(lb=CArray([-inf, -0.5]), ub=inf) self._test_projection( # Using [-2, -2] as outside, expect [-2, -0.5] - c, self.p1_inside, -self.p2_outside, - p_out_expected=CArray([-2, -0.5])) + c, self.p1_inside, -self.p2_outside, p_out_expected=CArray([-2, -0.5]) + ) def test_plot(self): """Visualize the constraint.""" # Plotting constraint and "critical" points - self._test_plot( - self.c, self.p1_inside, self.p2_outside, self.p3_on) + self._test_plot(self.c, self.p1_inside, self.p2_outside, self.p3_on) -if __name__ == '__main__': +if __name__ == "__main__": CConstraintTestCases.main() diff --git a/src/secml/optim/constraints/tests/test_c_constraint_l1.py b/src/secml/optim/constraints/tests/test_c_constraint_l1.py index f66c9aa8..e0af125b 100644 --- a/src/secml/optim/constraints/tests/test_c_constraint_l1.py +++ b/src/secml/optim/constraints/tests/test_c_constraint_l1.py @@ -16,108 +16,142 @@ def setUp(self): self.c_array = CConstraintL1(center=CArray([1, 1]), radius=1) # create a point that lies inside the constraints - self.c0_p1_inside = CArray([0., 0.]) - self.c_p1_inside = CArray([1., 1.]) + self.c0_p1_inside = CArray([0.0, 0.0]) + self.c_p1_inside = CArray([1.0, 1.0]) # create a point that lies outside the constraints - self.c0_p2_outside = CArray([1., 1.]) - self.c_p2_outside = CArray([2., 2.]) + self.c0_p2_outside = CArray([1.0, 1.0]) + self.c_p2_outside = CArray([2.0, 2.0]) # create a point that lies on the constraints - self.c0_p3_on = CArray([0., 1.]) - self.c_p3_on = CArray([0., 1.]) + self.c0_p3_on = CArray([0.0, 1.0]) + self.c_p3_on = CArray([0.0, 1.0]) def test_is_active(self): """Test for CConstraint.is_active().""" self._test_is_active( - self.c0, self.c0_p1_inside, self.c0_p2_outside, self.c0_p3_on) + self.c0, self.c0_p1_inside, self.c0_p2_outside, self.c0_p3_on + ) + self._test_is_active(self.c, self.c_p1_inside, self.c_p2_outside, self.c_p3_on) self._test_is_active( - self.c, self.c_p1_inside, self.c_p2_outside, self.c_p3_on) - self._test_is_active( - self.c_array, self.c_p1_inside, self.c_p2_outside, self.c_p3_on) + self.c_array, self.c_p1_inside, self.c_p2_outside, self.c_p3_on + ) # Test for sparse arrays self._test_is_violated( self.c0, self.c0_p1_inside.tosparse(), self.c0_p2_outside.tosparse(), - self.c0_p3_on.tosparse()) + self.c0_p3_on.tosparse(), + ) self._test_is_violated( self.c_array, self.c_p1_inside.tosparse(), self.c_p2_outside.tosparse(), - self.c_p3_on.tosparse()) + self.c_p3_on.tosparse(), + ) def test_is_violated(self): """Test for CConstraint.is_violated().""" self._test_is_violated( - self.c0, self.c0_p1_inside, self.c0_p2_outside, self.c0_p3_on) + self.c0, self.c0_p1_inside, self.c0_p2_outside, self.c0_p3_on + ) self._test_is_violated( - self.c, self.c_p1_inside, self.c_p2_outside, self.c_p3_on) + self.c, self.c_p1_inside, self.c_p2_outside, self.c_p3_on + ) self._test_is_violated( - self.c_array, self.c_p1_inside, self.c_p2_outside, self.c_p3_on) + self.c_array, self.c_p1_inside, self.c_p2_outside, self.c_p3_on + ) # Test for sparse arrays self._test_is_active( self.c0, self.c0_p1_inside.tosparse(), self.c0_p2_outside.tosparse(), - self.c0_p3_on.tosparse()) + self.c0_p3_on.tosparse(), + ) self._test_is_active( self.c_array, self.c_p1_inside.tosparse(), self.c_p2_outside.tosparse(), - self.c_p3_on.tosparse()) + self.c_p3_on.tosparse(), + ) def test_constraint(self): """Test for CConstraint.constraint().""" self._test_constraint( - self.c0, self.c0_p1_inside, self.c0_p2_outside, self.c0_p3_on) - self._test_constraint( - self.c, self.c_p1_inside, self.c_p2_outside, self.c_p3_on) + self.c0, self.c0_p1_inside, self.c0_p2_outside, self.c0_p3_on + ) + self._test_constraint(self.c, self.c_p1_inside, self.c_p2_outside, self.c_p3_on) self._test_constraint( - self.c_array, self.c_p1_inside, self.c_p2_outside, self.c_p3_on) + self.c_array, self.c_p1_inside, self.c_p2_outside, self.c_p3_on + ) # Test for sparse arrays self._test_constraint( self.c0, self.c0_p1_inside.tosparse(), self.c0_p2_outside.tosparse(), - self.c0_p3_on.tosparse()) + self.c0_p3_on.tosparse(), + ) self._test_constraint( self.c_array, self.c_p1_inside.tosparse(), self.c_p2_outside.tosparse(), - self.c_p3_on.tosparse()) + self.c_p3_on.tosparse(), + ) def test_projection(self): """Test for CConstraint.projection().""" - self._test_projection(self.c0, self.c0_p1_inside, self.c0_p2_outside, - self.c0_p3_on, CArray([0.5, 0.5])) - self._test_projection(self.c, self.c_p1_inside, self.c_p2_outside, - self.c_p3_on, CArray([1.5, 1.5])) - self._test_projection(self.c_array, self.c_p1_inside, self.c_p2_outside, - self.c_p3_on, CArray([1.5, 1.5])) + self._test_projection( + self.c0, + self.c0_p1_inside, + self.c0_p2_outside, + self.c0_p3_on, + CArray([0.5, 0.5]), + ) + self._test_projection( + self.c, + self.c_p1_inside, + self.c_p2_outside, + self.c_p3_on, + CArray([1.5, 1.5]), + ) + self._test_projection( + self.c_array, + self.c_p1_inside, + self.c_p2_outside, + self.c_p3_on, + CArray([1.5, 1.5]), + ) # Test for sparse arrays self._test_projection( - self.c0, self.c0_p1_inside.tosparse(), - self.c0_p2_outside.tosparse(), self.c0_p3_on.tosparse(), - CArray([0.5, 0.5], tosparse=True)) + self.c0, + self.c0_p1_inside.tosparse(), + self.c0_p2_outside.tosparse(), + self.c0_p3_on.tosparse(), + CArray([0.5, 0.5], tosparse=True), + ) self._test_projection( - self.c_array, self.c_p1_inside.tosparse(), - self.c_p2_outside.tosparse(), self.c_p3_on.tosparse(), - CArray([1.5, 1.5], tosparse=True)) + self.c_array, + self.c_p1_inside.tosparse(), + self.c_p2_outside.tosparse(), + self.c_p3_on.tosparse(), + CArray([1.5, 1.5], tosparse=True), + ) def test_gradient(self): """Test for CConstraint.gradient().""" # [0. 0.] is the center of the constraint, expected grad [0, c0] # however, numerical gradient is struggling so we avoid its comparison self.assert_array_almost_equal( - self.c0.gradient(self.c0_p1_inside), CArray([0, 0])) + self.c0.gradient(self.c0_p1_inside), CArray([0, 0]) + ) # [1. 1.] is the center of the constraint, expected grad [0, c0] # however, numerical gradient is struggling so we avoid its comparison self.assert_array_almost_equal( - self.c.gradient(self.c_p1_inside), CArray([0, 0])) + self.c.gradient(self.c_p1_inside), CArray([0, 0]) + ) self._test_gradient(self.c0, CArray([0.1, 0.2])) self._test_gradient(self.c0, self.c0_p2_outside) @@ -128,11 +162,13 @@ def test_gradient(self): # [0. 1.] is the verge of the constraint, expected grad [0, 1] # however, numerical gradient is struggling so we avoid its comparison self.assert_array_almost_equal( - self.c0.gradient(self.c0_p3_on), CArray([0., 1.])) + self.c0.gradient(self.c0_p3_on), CArray([0.0, 1.0]) + ) # [0. 1.] is the verge of the constraint, expected grad [-1, 1] # however, numerical gradient is struggling so we avoid its comparison self.assert_array_almost_equal( - self.c.gradient(self.c_p3_on), CArray([-1., 0.])) + self.c.gradient(self.c_p3_on), CArray([-1.0, 0.0]) + ) def test_subgradient(self): """Check if the subgradient is computed correctly @@ -158,23 +194,22 @@ def test_subgradient(self): self.logger.info("Subgrad in {:} is:\n{:}".format(x0, gradient)) - self.assertLessEqual(angl1, angl2, "Subgrad is not inside the cone of " - "{:} and {:}".format(p_min, p_max)) + self.assertLessEqual( + angl1, + angl2, + "Subgrad is not inside the cone of " "{:} and {:}".format(p_min, p_max), + ) def test_plot(self): """Visualize the constraint.""" # Plotting constraint and "critical" points - self._test_plot(self.c0, - self.c0_p1_inside, - self.c0_p2_outside, - self.c0_p3_on, - label='c0') - self._test_plot(self.c, - self.c_p1_inside, - self.c_p2_outside, - self.c_p3_on, - label='c') - - -if __name__ == '__main__': + self._test_plot( + self.c0, self.c0_p1_inside, self.c0_p2_outside, self.c0_p3_on, label="c0" + ) + self._test_plot( + self.c, self.c_p1_inside, self.c_p2_outside, self.c_p3_on, label="c" + ) + + +if __name__ == "__main__": CConstraintTestCases.main() diff --git a/src/secml/optim/constraints/tests/test_c_constraint_l2.py b/src/secml/optim/constraints/tests/test_c_constraint_l2.py index b58db959..0c8646b8 100644 --- a/src/secml/optim/constraints/tests/test_c_constraint_l2.py +++ b/src/secml/optim/constraints/tests/test_c_constraint_l2.py @@ -12,67 +12,84 @@ def setUp(self): self.c_array = CConstraintL2(center=CArray([1, 1]), radius=1) # create a point that lies inside the constraint - self.p1_inside = CArray([1., 1.]) + self.p1_inside = CArray([1.0, 1.0]) # create a point that lies outside the constraint - self.p2_outside = CArray([2., 2.]) + self.p2_outside = CArray([2.0, 2.0]) # create a point that lies on the constraint - self.p3_on = CArray([0., 1.]) + self.p3_on = CArray([0.0, 1.0]) def test_is_active(self): """Test for CConstraint.is_active().""" - self._test_is_active( - self.c, self.p1_inside, self.p2_outside, self.p3_on) - self._test_is_active( - self.c_array, self.p1_inside, self.p2_outside, self.p3_on) + self._test_is_active(self.c, self.p1_inside, self.p2_outside, self.p3_on) + self._test_is_active(self.c_array, self.p1_inside, self.p2_outside, self.p3_on) # Test for sparse arrays, works only for a center defined as CArray self._test_is_active( - self.c_array, self.p1_inside.tosparse(), - self.p2_outside.tosparse(), self.p3_on.tosparse()) + self.c_array, + self.p1_inside.tosparse(), + self.p2_outside.tosparse(), + self.p3_on.tosparse(), + ) def test_is_violated(self): """Test for CConstraint.is_violated().""" + self._test_is_violated(self.c, self.p1_inside, self.p2_outside, self.p3_on) self._test_is_violated( - self.c, self.p1_inside, self.p2_outside, self.p3_on) - self._test_is_violated( - self.c_array, self.p1_inside, self.p2_outside, self.p3_on) + self.c_array, self.p1_inside, self.p2_outside, self.p3_on + ) # Test for sparse arrays, works only for a center defined as CArray self._test_is_violated( - self.c_array, self.p1_inside.tosparse(), - self.p2_outside.tosparse(), self.p3_on.tosparse()) + self.c_array, + self.p1_inside.tosparse(), + self.p2_outside.tosparse(), + self.p3_on.tosparse(), + ) def test_constraint(self): """Test for CConstraint.constraint().""" - self._test_constraint( - self.c, self.p1_inside, self.p2_outside, self.p3_on) - self._test_constraint( - self.c_array, self.p1_inside, self.p2_outside, self.p3_on) + self._test_constraint(self.c, self.p1_inside, self.p2_outside, self.p3_on) + self._test_constraint(self.c_array, self.p1_inside, self.p2_outside, self.p3_on) # Test for sparse arrays, works only for a center defined as CArray self._test_constraint( - self.c_array, self.p1_inside.tosparse(), - self.p2_outside.tosparse(), self.p3_on.tosparse()) + self.c_array, + self.p1_inside.tosparse(), + self.p2_outside.tosparse(), + self.p3_on.tosparse(), + ) def test_projection(self): """Test for CConstraint.projection().""" - self._test_projection(self.c, self.p1_inside, self.p2_outside, - self.p3_on, CArray([1.7071, 1.7071])) - self._test_projection(self.c_array, self.p1_inside, self.p2_outside, - self.p3_on, CArray([1.7071, 1.7071])) + self._test_projection( + self.c, + self.p1_inside, + self.p2_outside, + self.p3_on, + CArray([1.7071, 1.7071]), + ) + self._test_projection( + self.c_array, + self.p1_inside, + self.p2_outside, + self.p3_on, + CArray([1.7071, 1.7071]), + ) # Test for sparse arrays, works only for a center defined as CArray self._test_projection( - self.c_array, self.p1_inside.tosparse(), - self.p2_outside.tosparse(), self.p3_on.tosparse(), - CArray([1.7071, 1.7071], tosparse=True)) + self.c_array, + self.p1_inside.tosparse(), + self.p2_outside.tosparse(), + self.p3_on.tosparse(), + CArray([1.7071, 1.7071], tosparse=True), + ) def test_gradient(self): """Test for CConstraint.gradient().""" # [1. 1.] is the center of the constraint, expected grad [0, 0] # however, numerical gradient is struggling so we avoid its comparison - self.assert_array_almost_equal( - self.c.gradient(self.p1_inside), CArray([0, 0])) + self.assert_array_almost_equal(self.c.gradient(self.p1_inside), CArray([0, 0])) self._test_gradient(self.c, CArray([1.1, 1.2])) self._test_gradient(self.c, self.p2_outside) @@ -81,8 +98,7 @@ def test_gradient(self): def test_plot(self): """Visualize the constraint.""" # Plotting constraint and "critical" points - self._test_plot( - self.c, self.p1_inside, self.p2_outside, self.p3_on) + self._test_plot(self.c, self.p1_inside, self.p2_outside, self.p3_on) def test_projection_and_violated(self): """Test that projection returns a point within the domain, even when @@ -92,5 +108,5 @@ def test_projection_and_violated(self): self.assertFalse(self.c.is_violated(p)) -if __name__ == '__main__': +if __name__ == "__main__": CConstraintTestCases.main() diff --git a/src/secml/optim/function/c_function.py b/src/secml/optim/function/c_function.py index 0f411030..dcfb758f 100644 --- a/src/secml/optim/function/c_function.py +++ b/src/secml/optim/function/c_function.py @@ -6,6 +6,7 @@ .. moduleauthor:: Battista Biggio """ + from scipy import optimize as sc_opt from secml.core import CCreator @@ -35,8 +36,9 @@ class CFunction(CCreator): class_type : 'generic' """ - __super__ = 'CFunction' - __class_type = 'generic' + + __super__ = "CFunction" + __class_type = "generic" def __init__(self, fun=None, gradient=None, n_dim=None): @@ -82,7 +84,8 @@ def _check_ndim(self, x): if self.n_dim is not None and n_dim != self.n_dim: raise ValueError( "unexpected dimension of input. " - "Got {:}, expected {:}".format(n_dim, self.n_dim)) + "Got {:}, expected {:}".format(n_dim, self.n_dim) + ) def fun(self, x, *args, **kwargs): """Evaluates function on x. @@ -173,11 +176,11 @@ def gradient_ndarray(self, x, *args, **kwargs): def has_fun(self): """True if function has been set.""" - return True if hasattr(self, '_fun') else False + return True if hasattr(self, "_fun") else False def has_gradient(self): """True if gradient has been set.""" - return True if hasattr(self, '_gradient') else False + return True if hasattr(self, "_gradient") else False def is_equal(self, x, val, tol=1e-6): """Evaluates if function value is close to `val` within tol.""" @@ -246,8 +249,7 @@ def approx_fprime(self, x, epsilon, *args, **kwargs): # double casting to always have a CArray xk_ndarray = CArray(x).ravel().tondarray() - epsilon = epsilon.tondarray() if \ - isinstance(epsilon, CArray) else epsilon + epsilon = epsilon.tondarray() if isinstance(epsilon, CArray) else epsilon # approx_fprime expects a scalar as output of fun def fun_ndarray(xk, f_args, f_kwargs): @@ -256,8 +258,9 @@ def fun_ndarray(xk, f_args, f_kwargs): return out_fun.item() # return scalar return out_fun # already scalar - return CArray(sc_opt.approx_fprime( - xk_ndarray, fun_ndarray, epsilon, args, kwargs)) + return CArray( + sc_opt.approx_fprime(xk_ndarray, fun_ndarray, epsilon, args, kwargs) + ) def check_grad(self, x, epsilon, *args, **kwargs): """Check the correctness of a gradient function by comparing diff --git a/src/secml/optim/function/c_function_3hcamel.py b/src/secml/optim/function/c_function_3hcamel.py index a33e4db0..1ae5230d 100644 --- a/src/secml/optim/function/c_function_3hcamel.py +++ b/src/secml/optim/function/c_function_3hcamel.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.optim.function import CFunction from secml.array import CArray @@ -27,13 +28,15 @@ class CFunctionThreeHumpCamel(CFunction): class_type : '3h-camel' """ - __class_type = '3h-camel' + + __class_type = "3h-camel" def __init__(self): # Passing data to CFunction super(CFunctionThreeHumpCamel, self).__init__( - fun=self._fun, n_dim=2, gradient=self._grad) + fun=self._fun, n_dim=2, gradient=self._grad + ) def _fun(self, x): """Apply Three-Hump Camel function to point x. @@ -52,12 +55,12 @@ def _fun(self, x): x = x.atleast_2d() if x.shape[1] != 2: raise ValueError( - "Three-Hump Camel function available for 2 dimensions only") + "Three-Hump Camel function available for 2 dimensions only" + ) # Split into 2 parts f1 = 2 * x[0].item() ** 2 - 1.05 * x[0].item() ** 4 - f2 = x[0].item() ** 6 / 6 + \ - x[0].item() * x[1].item() + x[1].item() ** 2 + f2 = x[0].item() ** 6 / 6 + x[0].item() * x[1].item() + x[1].item() ** 2 return f1 + f2 @@ -65,8 +68,10 @@ def _grad(self, x): """Three-Hump Camel function gradient wrt. point x.""" x = x.atleast_2d() if x.shape[1] != 2: - raise ValueError("Gradient of Three-Hump Camel function " - "only available for 2 dimensions") + raise ValueError( + "Gradient of Three-Hump Camel function " + "only available for 2 dimensions" + ) # Computing gradient of each dimension grad1_1 = 4 * x[0] - 4.2 * x[0] ** 3 grad1_2 = x[0] ** 5 + x[1] @@ -90,7 +95,7 @@ def global_min(): Value of the global minimum of the function. """ - return 0. + return 0.0 @staticmethod def global_min_x(): @@ -104,4 +109,4 @@ def global_min_x(): The global minimum point of the function. """ - return CArray([0., 0.]) + return CArray([0.0, 0.0]) diff --git a/src/secml/optim/function/c_function_beale.py b/src/secml/optim/function/c_function_beale.py index 1e29773b..0409cfe0 100644 --- a/src/secml/optim/function/c_function_beale.py +++ b/src/secml/optim/function/c_function_beale.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.optim.function.c_function import CFunction from secml.array import CArray @@ -28,13 +29,15 @@ class CFunctionBeale(CFunction): class_type : 'beale' """ - __class_type = 'beale' + + __class_type = "beale" def __init__(self): # Passing data to CFunction super(CFunctionBeale, self).__init__( - fun=self._fun, n_dim=2, gradient=self._grad) + fun=self._fun, n_dim=2, gradient=self._grad + ) def _fun(self, x): """Apply Beale function to point x. @@ -52,8 +55,7 @@ def _fun(self, x): """ x = x.atleast_2d() if x.shape[1] != 2: - raise ValueError( - "Beale function available for 2 dimensions only") + raise ValueError("Beale function available for 2 dimensions only") # Split into 3 parts f1 = (1.5 - x[0].item() + x[0].item() * x[1].item()) ** 2 @@ -66,16 +68,16 @@ def _grad(self, x): """Beale function gradient wrt. point x.""" x = x.atleast_2d() if x.shape[1] != 2: - raise ValueError("Gradient of Beale function " - "only available for 2 dimensions") + raise ValueError( + "Gradient of Beale function " "only available for 2 dimensions" + ) # Computing gradient of each dimension grad1_1 = 2 * (1.5 - x[0] + x[0] * x[1]) * (-1 + x[1]) grad1_2 = 2 * (2.25 - x[0] + x[0] * x[1] ** 2) * (-1 + x[1] ** 2) grad1_3 = 2 * (2.625 - x[0] + x[0] * x[1] ** 3) * (-1 + x[1] ** 3) grad2_1 = 2 * (1.5 - x[0] + x[0] * x[1]) * x[0] grad2_2 = 2 * (2.25 - x[0] + x[0] * x[1] ** 2) * (2 * x[0] * x[1]) - grad2_3 = 2 * (2.625 - x[0] + x[0] * x[1] ** 3) * \ - (3 * x[0] * x[1] ** 2) + grad2_3 = 2 * (2.625 - x[0] + x[0] * x[1] ** 3) * (3 * x[0] * x[1] ** 2) grad1 = grad1_1 + grad1_2 + grad1_3 grad2 = grad2_1 + grad2_2 + grad2_3 @@ -94,7 +96,7 @@ def global_min(): Value of the global minimum of the function. """ - return 0. + return 0.0 @staticmethod def global_min_x(): @@ -108,4 +110,4 @@ def global_min_x(): The global minimum point of the function. """ - return CArray([3., 0.5]) + return CArray([3.0, 0.5]) diff --git a/src/secml/optim/function/c_function_linear.py b/src/secml/optim/function/c_function_linear.py index a06b6c86..073e751b 100644 --- a/src/secml/optim/function/c_function_linear.py +++ b/src/secml/optim/function/c_function_linear.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.optim.function import CFunction from secml.array import CArray @@ -18,20 +19,21 @@ class CFunctionLinear(CFunction): class_type : 'linear' """ - __class_type = 'linear' + + __class_type = "linear" def __init__(self, b, c): if b.ndim != 2 or b.shape[1] != 1: - raise ValueError('b is not a column vector!') + raise ValueError("b is not a column vector!") self._b = b self._c = c # Passing data to CFunction - super(CFunctionLinear, self).__init__(fun=self._linear_fun, - n_dim=b.shape[0], - gradient=self._linear_grad) + super(CFunctionLinear, self).__init__( + fun=self._linear_fun, n_dim=b.shape[0], gradient=self._linear_grad + ) def _linear_fun(self, x): """Apply linear function to point x. diff --git a/src/secml/optim/function/c_function_mccormick.py b/src/secml/optim/function/c_function_mccormick.py index e7b92a71..8ff402d9 100644 --- a/src/secml/optim/function/c_function_mccormick.py +++ b/src/secml/optim/function/c_function_mccormick.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.optim.function import CFunction from secml.array import CArray @@ -27,13 +28,15 @@ class CFunctionMcCormick(CFunction): class_type : mc-cormick' """ - __class_type = 'mc-cormick' + + __class_type = "mc-cormick" def __init__(self): # Passing data to CFunction super(CFunctionMcCormick, self).__init__( - fun=self._fun, n_dim=2, gradient=self._grad) + fun=self._fun, n_dim=2, gradient=self._grad + ) def _fun(self, x): """Apply McCormick function to point x. @@ -51,8 +54,7 @@ def _fun(self, x): """ x = x.atleast_2d() if x.shape[1] != 2: - raise ValueError( - "McCormick function available for 2 dimensions only") + raise ValueError("McCormick function available for 2 dimensions only") # Split into 3 parts f1 = (x[0] + x[1]).sin().item() @@ -65,8 +67,9 @@ def _grad(self, x): """McCormick function gradient wrt. point x.""" x = x.atleast_2d() if x.shape[1] != 2: - raise ValueError("Gradient of McCormick function " - "only available for 2 dimensions") + raise ValueError( + "Gradient of McCormick function " "only available for 2 dimensions" + ) # Computing gradient of each dimension grad1_1 = (x[0] + x[1]).cos() grad1_2 = 2 * (x[0] - x[1]) diff --git a/src/secml/optim/function/c_function_quadratic.py b/src/secml/optim/function/c_function_quadratic.py index cbbbb09f..a9ec54d9 100644 --- a/src/secml/optim/function/c_function_quadratic.py +++ b/src/secml/optim/function/c_function_quadratic.py @@ -5,6 +5,7 @@ .. moduleauthor:: Battista Biggio """ + from secml.optim.function import CFunction from secml.array import CArray @@ -18,32 +19,32 @@ class CFunctionQuadratic(CFunction): class_type : 'quadratic' """ - __class_type = 'quadratic' + + __class_type = "quadratic" def __init__(self, A, b, c): if len(A.shape) != 2: - raise ValueError('A is not a 2D matrix!') + raise ValueError("A is not a 2D matrix!") elif A.shape[0] != A.shape[1]: - raise ValueError('A is not a squared matrix!') + raise ValueError("A is not a squared matrix!") # TODO: Add check: A should be symmetric as well if len(b.shape) != 2 or b.shape[1] != 1: - raise ValueError('b is not a column vector!') + raise ValueError("b is not a column vector!") if b.shape[0] != A.shape[0]: - raise ValueError( - 'A and b have inconsistent dimensions!') + raise ValueError("A and b have inconsistent dimensions!") self._A = A self._b = b self._c = c # Passing data to CFunction - super(CFunctionQuadratic, self).__init__(fun=self._quadratic_fun, - n_dim=A.shape[0], - gradient=self._quadratic_grad) + super(CFunctionQuadratic, self).__init__( + fun=self._quadratic_fun, n_dim=A.shape[0], gradient=self._quadratic_grad + ) def _quadratic_fun(self, x): """Apply quadratic function to point x. diff --git a/src/secml/optim/function/c_function_rosenbrock.py b/src/secml/optim/function/c_function_rosenbrock.py index 71dcad05..74b2c047 100644 --- a/src/secml/optim/function/c_function_rosenbrock.py +++ b/src/secml/optim/function/c_function_rosenbrock.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + from secml.optim.function import CFunction from secml.array import CArray @@ -33,14 +34,15 @@ class CFunctionRosenbrock(CFunction): Journal 3.3 (1960): 175-184. """ - __class_type = 'rosenbrock' + + __class_type = "rosenbrock" def __init__(self): # Passing data to CFunction - super(CFunctionRosenbrock, self).__init__(fun=self._fun, - n_dim=None, - gradient=self._grad) + super(CFunctionRosenbrock, self).__init__( + fun=self._fun, n_dim=None, gradient=self._grad + ) def _fun(self, x): """Apply Rosenbrock function to point x. @@ -58,13 +60,13 @@ def _fun(self, x): """ x = x.atleast_2d() if x.shape[1] < 2: - raise ValueError( - "Rosenbrock function available for at least 2 dimensions") + raise ValueError("Rosenbrock function available for at least 2 dimensions") f = 0 # Starting value for n in range(x.shape[1] - 1): - f += 100 * (x[n+1].item() - x[n].item() ** 2) ** 2 + \ - (x[n].item() - 1) ** 2 + f += ( + 100 * (x[n + 1].item() - x[n].item() ** 2) ** 2 + (x[n].item() - 1) ** 2 + ) return f @@ -76,8 +78,9 @@ def _grad(self, x): """ x = x.atleast_2d() if x.shape[1] != 2: - raise ValueError("Gradient of Rosenbrock function " - "only available for 2 dimensions") + raise ValueError( + "Gradient of Rosenbrock function " "only available for 2 dimensions" + ) # Computing gradient of each dimension grad1 = -400 * (x[1] - x[0] ** 2) * x[0] + 2 * (x[0] - 1) grad2 = 200 * (x[1] - x[0] ** 2) @@ -96,7 +99,7 @@ def global_min(): Value of the global minimum of the function. """ - return 0. + return 0.0 @staticmethod def global_min_x(ndim=2): @@ -116,7 +119,6 @@ def global_min_x(ndim=2): """ if ndim < 2: - raise ValueError( - "Rosenbrock function available for at least 2 dimensions") + raise ValueError("Rosenbrock function available for at least 2 dimensions") - return CArray.ones((ndim, ), dtype=float) + return CArray.ones((ndim,), dtype=float) diff --git a/src/secml/optim/function/tests/c_function_testcases.py b/src/secml/optim/function/tests/c_function_testcases.py index 98be2c5d..e4ef67fa 100644 --- a/src/secml/optim/function/tests/c_function_testcases.py +++ b/src/secml/optim/function/tests/c_function_testcases.py @@ -30,8 +30,7 @@ def _test_fun_result(self, fun, x, res_expected): res_expected : scalar """ - self.logger.info( - "Checking value of {:} @ {:}".format(fun.class_type, x)) + self.logger.info("Checking value of {:} @ {:}".format(fun.class_type, x)) res = fun.fun(x) @@ -41,8 +40,9 @@ def _test_fun_result(self, fun, x, res_expected): self.assertTrue(is_scalar(res)) self.assertAlmostEqual(res_expected, res, places=4) - def _test_2D(self, fun, grid_limits=None, levels=None, - vmin=None, vmax=None, fun_args=()): + def _test_2D( + self, fun, grid_limits=None, levels=None, vmin=None, vmax=None, fun_args=() + ): """2D plot of the function. Parameters @@ -59,11 +59,21 @@ def _test_2D(self, fun, grid_limits=None, levels=None, self.logger.info("Plotting 2D of {:}".format(fun_name)) fig = CFigure(width=7) - fig.sp.plot_fun(func=fun.fun, plot_levels=True, - grid_limits=grid_limits, levels=levels, - n_grid_points=50, n_colors=200, - vmin=vmin, vmax=vmax, func_args=fun_args) + fig.sp.plot_fun( + func=fun.fun, + plot_levels=True, + grid_limits=grid_limits, + levels=levels, + n_grid_points=50, + n_colors=200, + vmin=vmin, + vmax=vmax, + func_args=fun_args, + ) fig.sp.title(fun_name) - fig.savefig(fm.join(fm.abspath(__file__), - 'test_function_{:}.pdf'.format(fun.class_type))) + fig.savefig( + fm.join( + fm.abspath(__file__), "test_function_{:}.pdf".format(fun.class_type) + ) + ) diff --git a/src/secml/optim/function/tests/test_function.py b/src/secml/optim/function/tests/test_function.py index 2221ed63..718f9743 100644 --- a/src/secml/optim/function/tests/test_function.py +++ b/src/secml/optim/function/tests/test_function.py @@ -9,7 +9,7 @@ class TestCFunction(CFunctionTestCases): def setUp(self): - avail_funcs = ['3h-camel', 'beale', 'mc-cormick', 'rosenbrock'] + avail_funcs = ["3h-camel", "beale", "mc-cormick", "rosenbrock"] # Instancing the available functions to test optimizer self.funcs = {} @@ -20,20 +20,21 @@ def test_approx_fprime_check_grad(self): """Test if the gradient check made up with COptimizer.approx_fprime() and .check_grad() methods is correct.""" self.logger.info( - "Test for COptimizer.approx_fprime() and .check_grad() methods.") + "Test for COptimizer.approx_fprime() and .check_grad() methods." + ) - x0 = CArray([1., 0.]) # Starting point for minimization + x0 = CArray([1.0, 0.0]) # Starting point for minimization for fun_id in self.funcs: fun = self.funcs[fun_id] self.logger.info( - "Testing grad approx of {:}".format(fun.__class__.__name__)) + "Testing grad approx of {:}".format(fun.__class__.__name__) + ) grad_err = fun.check_grad(x0, epsilon=1e-8) - self.logger.info( - "(Real grad - approx).norm(): {:}".format(grad_err)) + self.logger.info("(Real grad - approx).norm(): {:}".format(grad_err)) self.assertLess(grad_err, 1e-3) @@ -46,12 +47,12 @@ def _dfun_2_params(self, x, y): # Two dumb function that have **kwargs as the second parameter def _fun_kwargs(self, x, **kwargs): - if kwargs != {'y': 1}: + if kwargs != {"y": 1}: raise ValueError return 1 def _dfun_kwargs(self, x, **kwargs): - if kwargs != {'y': 1}: + if kwargs != {"y": 1}: raise ValueError return CArray([0]) @@ -74,13 +75,15 @@ def test_approx_fprime_check_param_passage(self): self.logger.info( "Test the parameters passage made up by " "COptimizer.approx_fprime() " - "and .check_grad() methods.") + "and .check_grad() methods." + ) - x0 = CArray([1.]) # Starting point for minimization + x0 = CArray([1.0]) # Starting point for minimization epsilon = 0.1 self.logger.info( - "Testing when the function and the gradient have two parameter") + "Testing when the function and the gradient have two parameter" + ) fun = CFunction(fun=self._fun_2_params, gradient=self._dfun_2_params) self.logger.info("Testing check_grad") @@ -132,5 +135,5 @@ def test_approx_fprime_check_param_passage(self): self.assertEqual(0, grad_err) -if __name__ == '__main__': +if __name__ == "__main__": CFunctionTestCases.main() diff --git a/src/secml/optim/function/tests/test_function_3hcamel.py b/src/secml/optim/function/tests/test_function_3hcamel.py index e7e015a4..85c990c1 100644 --- a/src/secml/optim/function/tests/test_function_3hcamel.py +++ b/src/secml/optim/function/tests/test_function_3hcamel.py @@ -8,13 +8,12 @@ class TestCFunctionThreeHumpCamel(CFunctionTestCases): """Unit test for CFunctionThreeHumpCamel.""" def setUp(self): - self.fun = CFunction.create('3h-camel') + self.fun = CFunction.create("3h-camel") def test_fun_result(self): """Test if function returns the correct value.""" self._show_global_min(self.fun) - self._test_fun_result( - self.fun, CArray([0, 0]), self.fun.global_min()) + self._test_fun_result(self.fun, CArray([0, 0]), self.fun.global_min()) def test_2D(self): """Plot of a 2D example.""" @@ -22,5 +21,5 @@ def test_2D(self): self._test_2D(self.fun, grid_limits, levels=[1], vmin=0, vmax=5) -if __name__ == '__main__': +if __name__ == "__main__": CFunctionTestCases.main() diff --git a/src/secml/optim/function/tests/test_function_beale.py b/src/secml/optim/function/tests/test_function_beale.py index 142386ce..db75a657 100644 --- a/src/secml/optim/function/tests/test_function_beale.py +++ b/src/secml/optim/function/tests/test_function_beale.py @@ -8,13 +8,12 @@ class TestCFunctionBeale(CFunctionTestCases): """Unit test for CFunctionBeale.""" def setUp(self): - self.fun = CFunction.create('beale') + self.fun = CFunction.create("beale") def test_fun_result(self): """Test if function returns the correct value.""" self._show_global_min(self.fun) - self._test_fun_result( - self.fun, CArray([3, 0.5]), self.fun.global_min()) + self._test_fun_result(self.fun, CArray([3, 0.5]), self.fun.global_min()) def test_2D(self): """Plot of a 2D example.""" @@ -22,5 +21,5 @@ def test_2D(self): self._test_2D(self.fun, grid_limits, levels=[1], vmin=0, vmax=5) -if __name__ == '__main__': +if __name__ == "__main__": CFunctionTestCases.main() diff --git a/src/secml/optim/function/tests/test_function_mccormick.py b/src/secml/optim/function/tests/test_function_mccormick.py index f66c4a57..2bd9354f 100644 --- a/src/secml/optim/function/tests/test_function_mccormick.py +++ b/src/secml/optim/function/tests/test_function_mccormick.py @@ -8,13 +8,14 @@ class TestCFunctionMcCormick(CFunctionTestCases): """Unit test for CFunctionBeale.""" def setUp(self): - self.fun = CFunction.create('mc-cormick') + self.fun = CFunction.create("mc-cormick") def test_fun_result(self): """Test if function returns the correct value.""" self._show_global_min(self.fun) self._test_fun_result( - self.fun, CArray([-0.5472, -1.5472]), self.fun.global_min()) + self.fun, CArray([-0.5472, -1.5472]), self.fun.global_min() + ) def test_2D(self): """Plot of a 2D example.""" @@ -22,5 +23,5 @@ def test_2D(self): self._test_2D(self.fun, grid_limits, levels=[0], vmin=-2, vmax=2) -if __name__ == '__main__': +if __name__ == "__main__": CFunctionTestCases.main() diff --git a/src/secml/optim/function/tests/test_function_quadratic.py b/src/secml/optim/function/tests/test_function_quadratic.py index 088094bc..b68c2d06 100644 --- a/src/secml/optim/function/tests/test_function_quadratic.py +++ b/src/secml/optim/function/tests/test_function_quadratic.py @@ -11,7 +11,7 @@ def setUp(self): A = CArray.eye(2, 2) b = CArray.zeros((2, 1)) c = 0 - self.fun = CFunction.create('quadratic', A, b, c) + self.fun = CFunction.create("quadratic", A, b, c) def test_fun_result(self): """Test if function returns the correct value.""" @@ -25,10 +25,10 @@ def test_2D(self): A = CArray.eye(2, 2) b = CArray.zeros(2).T - circle = CFunction.create('quadratic', A, b, 0) + circle = CFunction.create("quadratic", A, b, 0) self._test_2D(circle, grid_limits, levels=[16]) -if __name__ == '__main__': +if __name__ == "__main__": CFunctionTestCases.main() diff --git a/src/secml/optim/function/tests/test_function_rosenbrock.py b/src/secml/optim/function/tests/test_function_rosenbrock.py index 2d566df8..2a6faa64 100644 --- a/src/secml/optim/function/tests/test_function_rosenbrock.py +++ b/src/secml/optim/function/tests/test_function_rosenbrock.py @@ -8,7 +8,7 @@ class TestCFunctionRosenbrock(CFunctionTestCases): """Unit test for CFunctionRosenbrock.""" def setUp(self): - self.fun = CFunction.create('rosenbrock') + self.fun = CFunction.create("rosenbrock") def test_fun_result(self): """Test if function returns the correct value.""" @@ -23,5 +23,5 @@ def test_2D(self): self._test_2D(self.fun, grid_limits, levels=[1], vmin=0, vmax=10) -if __name__ == '__main__': +if __name__ == "__main__": CFunctionTestCases.main() diff --git a/src/secml/optim/optimizers/c_optimizer.py b/src/secml/optim/optimizers/c_optimizer.py index fe648c3e..7cfc48c8 100644 --- a/src/secml/optim/optimizers/c_optimizer.py +++ b/src/secml/optim/optimizers/c_optimizer.py @@ -6,6 +6,7 @@ .. moduleauthor:: Battista Biggio """ + from abc import ABCMeta, abstractmethod from functools import partial @@ -33,7 +34,8 @@ class COptimizer(CCreator, metaclass=ABCMeta): A box constraint. Default None. """ - __super__ = 'COptimizer' + + __super__ = "COptimizer" def __init__(self, fun, constr=None, bounds=None): @@ -41,8 +43,7 @@ def __init__(self, fun, constr=None, bounds=None): # fun: the internal function to be always minimized # f: the "public" function. By default, minimize f(x), so fun=f if not isinstance(fun, CFunction): - raise TypeError( - "Input parameter is not a `CFunction` object.") + raise TypeError("Input parameter is not a `CFunction` object.") self._f = fun self._fun = fun @@ -110,8 +111,7 @@ def constr(self, constr): return if not isinstance(constr, CConstraint): - raise TypeError( - "Input parameter is not a `CConstraint` object.") + raise TypeError("Input parameter is not a `CConstraint` object.") self._constr = constr @@ -128,8 +128,7 @@ def bounds(self, bounds): return if not isinstance(bounds, CConstraintBox): - raise TypeError( - "Input parameter is not a `CConstraintBox` object.") + raise TypeError("Input parameter is not a `CConstraintBox` object.") self._bounds = bounds @@ -155,7 +154,7 @@ def minimize(self, x_init, args=(), **kwargs): Additional parameters of the minimization method. """ - raise NotImplementedError('Function `minimize` is not implemented.') + raise NotImplementedError("Function `minimize` is not implemented.") def maximize(self, x_init, args=(), **kwargs): """Interface for maximizers. @@ -188,7 +187,7 @@ def grad_inv(wrapped_grad, z, *f_args, **f_kwargs): self._fun = CFunction( fun=partial(fun_inv, self._f.fun), - gradient=partial(grad_inv, self._f.gradient) + gradient=partial(grad_inv, self._f.gradient), ) x = self.minimize(x_init, args=args, **kwargs) diff --git a/src/secml/optim/optimizers/c_optimizer_pgd.py b/src/secml/optim/optimizers/c_optimizer_pgd.py index 3ae8aaa7..ffe7b028 100644 --- a/src/secml/optim/optimizers/c_optimizer_pgd.py +++ b/src/secml/optim/optimizers/c_optimizer_pgd.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + from secml.array import CArray from secml.optim.optimizers import COptimizer @@ -44,17 +45,12 @@ class COptimizerPGD(COptimizer): class_type : 'pgd' """ - __class_type = 'pgd' - def __init__(self, fun, - constr=None, - bounds=None, - eta=1e-3, - eps=1e-4, - max_iter=200): + __class_type = "pgd" + + def __init__(self, fun, constr=None, bounds=None, eta=1e-3, eps=1e-4, max_iter=200): - COptimizer.__init__(self, fun=fun, - constr=constr, bounds=bounds) + COptimizer.__init__(self, fun=fun, constr=constr, bounds=bounds) # Read/write attributes self.eta = eta # gradient step size @@ -116,8 +112,8 @@ def _return_best_solution(self, i): f_seq = self.f_seq[:i] best_sol_idx = f_seq.argmin() - self._x_seq = self.x_seq[:best_sol_idx + 1, :] - self._f_seq = self.f_seq[:best_sol_idx + 1] + self._x_seq = self.x_seq[: best_sol_idx + 1, :] + self._f_seq = self.f_seq[: best_sol_idx + 1] self._x_opt = self._x_seq[-1, :] return self._x_opt @@ -147,7 +143,9 @@ def minimize(self, x_init, args=(), **kwargs): if len(kwargs) != 0: raise ValueError( "{:} does not accept additional parameters.".format( - self.__class__.__name__)) + self.__class__.__name__ + ) + ) # reset fun and grad eval counts for both fun and f (by default fun==f) self._f.reset_eval() @@ -159,11 +157,12 @@ def minimize(self, x_init, args=(), **kwargs): x0 = self.constr.center if self.bounds is not None and self.bounds.is_violated(x0): import warnings + warnings.warn( "x0 " + str(x0) + " is outside of the given bounds.", - category=RuntimeWarning) - self._x_seq = CArray.zeros((1, x0.size), - sparse=x0.issparse, dtype=x0.dtype) + category=RuntimeWarning, + ) + self._x_seq = CArray.zeros((1, x0.size), sparse=x0.issparse, dtype=x0.dtype) self._f_seq = CArray.zeros(1) self._x_seq[0, :] = x0 self._f_seq[0] = self._fun.fun(x0, *args) @@ -177,13 +176,16 @@ def minimize(self, x_init, args=(), **kwargs): if self.constr is not None and self.constr.is_violated(x_init): x_init = self.constr.projection(x_init) - if (self.bounds is not None and self.bounds.is_violated(x_init)) or \ - (self.constr is not None and self.constr.is_violated(x_init)): + if (self.bounds is not None and self.bounds.is_violated(x_init)) or ( + self.constr is not None and self.constr.is_violated(x_init) + ): raise ValueError( - "x_init " + str(x_init) + " is outside of feasible domain.") + "x_init " + str(x_init) + " is outside of feasible domain." + ) self._x_seq = CArray.zeros( - (self._max_iter, x_init.size), sparse=x_init.issparse) + (self._max_iter, x_init.size), sparse=x_init.issparse + ) self._f_seq = CArray.zeros(self._max_iter) x = x_init.deepcopy() @@ -195,24 +197,38 @@ def minimize(self, x_init, args=(), **kwargs): self._f_seq[i] = self._fun.fun(x, *args) if i > 0 and abs(self.f_seq[i - 1] - self.f_seq[i]) < self.eps: - self.logger.debug("Flat region, exiting... {:} {:}".format( - self._f_seq[i], self._f_seq[i - 1])) + self.logger.debug( + "Flat region, exiting... {:} {:}".format( + self._f_seq[i], self._f_seq[i - 1] + ) + ) return self._return_best_solution(i) - if i > 10 and abs(self.f_seq[i - 5:i].mean() - - self.f_seq[i - 10:i - 5].mean()) < self.eps: + if ( + i > 10 + and abs( + self.f_seq[i - 5 : i].mean() - self.f_seq[i - 10 : i - 5].mean() + ) + < self.eps + ): self.logger.debug( "Flat region over 10 iterations, exiting... {:} {:}".format( - self.f_seq[i - 3:i].mean(), - self.f_seq[i - 6:i - 3].mean())) + self.f_seq[i - 3 : i].mean(), self.f_seq[i - 6 : i - 3].mean() + ) + ) return self._return_best_solution(i) grad = self._fun.gradient(x, *args) # debugging information self.logger.debug( - 'Iter.: ' + str(i) + ', f(x): ' + - str(self._f_seq[i].item()) + ', |df/dx|: ' + str(grad.norm())) + "Iter.: " + + str(i) + + ", f(x): " + + str(self._f_seq[i].item()) + + ", |df/dx|: " + + str(grad.norm()) + ) # make a step into the deepest descent direction x -= self.eta * grad diff --git a/src/secml/optim/optimizers/c_optimizer_pgd_exp.py b/src/secml/optim/optimizers/c_optimizer_pgd_exp.py index 0762db0f..f36ff459 100644 --- a/src/secml/optim/optimizers/c_optimizer_pgd_exp.py +++ b/src/secml/optim/optimizers/c_optimizer_pgd_exp.py @@ -5,6 +5,7 @@ .. moduleauthor:: Battista Biggio """ + import numpy as np from secml.array import CArray @@ -55,20 +56,32 @@ class COptimizerPGDExp(COptimizerPGDLS): class_type : 'pgd-exp' """ - __class_type = 'pgd-exp' - def __init__(self, fun, - constr=None, bounds=None, - eta=1e-3, - eta_min=None, - eta_max=None, - max_iter=1000, - eps=1e-4): + __class_type = "pgd-exp" + + def __init__( + self, + fun, + constr=None, + bounds=None, + eta=1e-3, + eta_min=None, + eta_max=None, + max_iter=1000, + eps=1e-4, + ): COptimizerPGDLS.__init__( - self, fun=fun, constr=constr, bounds=bounds, - eta=eta, eta_min=eta_min, eta_max=eta_max, - max_iter=max_iter, eps=eps) + self, + fun=fun, + constr=constr, + bounds=bounds, + eta=eta, + eta_min=eta_min, + eta_max=eta_max, + max_iter=max_iter, + eps=eps, + ) ########################################## # METHODS @@ -81,7 +94,10 @@ def _init_line_search(self, eta, eta_min, eta_max): constr=self._constr, bounds=self._bounds, max_iter=20, - eta=eta, eta_min=eta_min, eta_max=eta_max) + eta=eta, + eta_min=eta_min, + eta_max=eta_max, + ) def _xk(self, x, fx, *args): """Returns a new point after gradient descent.""" @@ -99,12 +115,13 @@ def _xk(self, x, fx, *args): # filter modifications that would violate bounds (to sparsify gradient) grad = self._box_projected_gradient(x, grad) - if self.constr is not None and self.constr.class_type == 'l1': + if self.constr is not None and self.constr.class_type == "l1": # project z onto l1 constraint (via dual norm) grad = self._l1_projected_gradient(grad) - next_point = CArray(x - grad * self._line_search.eta, - dtype=self._dtype, tosparse=x.issparse) + next_point = CArray( + x - grad * self._line_search.eta, dtype=self._dtype, tosparse=x.issparse + ) if self.constr is not None and self.constr.is_violated(next_point): self.logger.debug("Line-search on distance constraint.") @@ -112,7 +129,7 @@ def _xk(self, x, fx, *args): grad_norm = grad.norm(order=2) if grad_norm > 1e-20: grad /= grad_norm - if self.constr.class_type == 'l1': + if self.constr.class_type == "l1": grad = grad.sign() # to move along the l1 ball surface z, fz = self._line_search.minimize(x, -grad, fx) return z, fz @@ -144,7 +161,7 @@ def _return_best_solution(self, i=None): """ if i is not None: - f_seq = self.f_seq[:i + 1] + f_seq = self.f_seq[: i + 1] else: f_seq = self.f_seq best_sol_idx = f_seq.argmin() @@ -152,8 +169,8 @@ def _return_best_solution(self, i=None): self.logger.debug("solutions {:}".format(f_seq)) self.logger.debug("best solution {:}".format(best_sol_idx)) - self._x_seq = self.x_seq[:best_sol_idx + 1, :] - self._f_seq = self.f_seq[:best_sol_idx + 1] + self._x_seq = self.x_seq[: best_sol_idx + 1, :] + self._f_seq = self.f_seq[: best_sol_idx + 1] self._x_opt = self._x_seq[-1, :] return self._x_opt @@ -182,16 +199,16 @@ def minimize(self, x_init, args=(), **kwargs): if len(kwargs) != 0: raise ValueError( "{:} does not accept additional parameters.".format( - self.__class__.__name__)) + self.__class__.__name__ + ) + ) # reset fun and grad eval counts for both fun and f (by default fun==f) self._f.reset_eval() self._fun.reset_eval() # initialize line search (and re-assign fun to it) - self._init_line_search(eta=self.eta, - eta_min=self.eta_min, - eta_max=self.eta_max) + self._init_line_search(eta=self.eta, eta_min=self.eta_min, eta_max=self.eta_max) # constr.radius = 0, exit if self.constr is not None and self.constr.radius == 0: @@ -199,11 +216,12 @@ def minimize(self, x_init, args=(), **kwargs): x0 = self.constr.center if self.bounds is not None and self.bounds.is_violated(x0): import warnings + warnings.warn( "x0 " + str(x0) + " is outside of the given bounds.", - category=RuntimeWarning) - self._x_seq = CArray.zeros( - (1, x0.size), sparse=x0.issparse, dtype=x0.dtype) + category=RuntimeWarning, + ) + self._x_seq = CArray.zeros((1, x0.size), sparse=x0.issparse, dtype=x0.dtype) self._f_seq = CArray.zeros(1) self._x_seq[0, :] = x0 self._f_seq[0] = self._fun.fun(x0, *args) @@ -219,10 +237,10 @@ def minimize(self, x_init, args=(), **kwargs): if self.constr is not None and self.constr.is_violated(x): x = self.constr.projection(x) - if (self.bounds is not None and self.bounds.is_violated(x)) or \ - (self.constr is not None and self.constr.is_violated(x)): - raise ValueError( - "x_init " + str(x) + " is outside of feasible domain.") + if (self.bounds is not None and self.bounds.is_violated(x)) or ( + self.constr is not None and self.constr.is_violated(x) + ): + raise ValueError("x_init " + str(x) + " is outside of feasible domain.") # dtype depends on x and eta (the grid discretization) if np.issubdtype(x_init.dtype, np.floating): @@ -232,9 +250,9 @@ def minimize(self, x_init, args=(), **kwargs): self._dtype = self._line_search.eta.dtype # initialize x_seq and f_seq - self._x_seq = CArray.zeros((self.max_iter, x_init.size), - sparse=x_init.issparse, - dtype=self._dtype) + self._x_seq = CArray.zeros( + (self.max_iter, x_init.size), sparse=x_init.issparse, dtype=self._dtype + ) self._f_seq = CArray.zeros(self.max_iter) # The first point is obviously the starting point, @@ -243,7 +261,7 @@ def minimize(self, x_init, args=(), **kwargs): self._x_seq[0, :] = x self._f_seq[0] = fx - self.logger.debug('Iter.: ' + str(0) + ', f(x): ' + str(fx)) + self.logger.debug("Iter.: " + str(0) + ", f(x): " + str(fx)) for i in range(1, self.max_iter): @@ -255,24 +273,34 @@ def minimize(self, x_init, args=(), **kwargs): self._f_seq[i] = fx self._x_opt = x - self.logger.debug('Iter.: ' + str(i) + - ', f(x): ' + str(fx) + - ', norm(gr(x)): ' + - str(CArray(self._grad).norm())) + self.logger.debug( + "Iter.: " + + str(i) + + ", f(x): " + + str(fx) + + ", norm(gr(x)): " + + str(CArray(self._grad).norm()) + ) diff = abs(self.f_seq[i].item() - self.f_seq[i - 1].item()) if diff < self.eps: self.logger.debug( "Flat region, exiting... ({:.4f} / {:.4f})".format( - self._f_seq[i].item(), - self._f_seq[i - 1].item())) + self._f_seq[i].item(), self._f_seq[i - 1].item() + ) + ) return self._return_best_solution(i) - if i > 20 and abs(self.f_seq[i - 10:i].mean() - - self.f_seq[i - 20:i - 10].mean()) < self.eps: + if ( + i > 20 + and abs( + self.f_seq[i - 10 : i].mean() - self.f_seq[i - 20 : i - 10].mean() + ) + < self.eps + ): self.logger.debug("Flat region for 20 iterations, exiting...") return self._return_best_solution(i) - self.logger.warning('Maximum iterations reached. Exiting.') + self.logger.warning("Maximum iterations reached. Exiting.") return self._return_best_solution() diff --git a/src/secml/optim/optimizers/c_optimizer_pgd_ls.py b/src/secml/optim/optimizers/c_optimizer_pgd_ls.py index b3a30f3e..136529cd 100644 --- a/src/secml/optim/optimizers/c_optimizer_pgd_ls.py +++ b/src/secml/optim/optimizers/c_optimizer_pgd_ls.py @@ -5,6 +5,7 @@ .. moduleauthor:: Battista Biggio """ + import numpy as np from secml.array import CArray @@ -59,18 +60,22 @@ class COptimizerPGDLS(COptimizer): class_type : 'pgd-ls' """ - __class_type = 'pgd-ls' - def __init__(self, fun, - constr=None, bounds=None, - eta=1e-3, - eta_min=None, - eta_max=None, - max_iter=1000, - eps=1e-4): + __class_type = "pgd-ls" + + def __init__( + self, + fun, + constr=None, + bounds=None, + eta=1e-3, + eta_min=None, + eta_max=None, + max_iter=1000, + eps=1e-4, + ): - COptimizer.__init__(self, fun=fun, - constr=constr, bounds=bounds) + COptimizer.__init__(self, fun=fun, constr=constr, bounds=bounds) # Read/write attributes self.eta = eta @@ -142,7 +147,10 @@ def _init_line_search(self, eta, eta_min, eta_max): constr=self._constr, bounds=self._bounds, max_iter=50, - eta=eta, eta_min=eta_min, eta_max=eta_max) + eta=eta, + eta_min=eta_min, + eta_max=eta_max, + ) @staticmethod def _l1_projected_gradient(grad): @@ -158,7 +166,8 @@ def _l1_projected_gradient(grad): argmax_pos = CArray(abs_grad == grad_max).nnz_indices[1][:1] # TODO: not sure if proj_grad should be always sparse (grad is not) proj_grad = CArray.zeros( - shape=grad.shape, sparse=grad.issparse, dtype=grad.dtype) + shape=grad.shape, sparse=grad.issparse, dtype=grad.dtype + ) proj_grad[argmax_pos] = grad[argmax_pos].sign() return proj_grad @@ -183,11 +192,17 @@ def _box_projected_gradient(self, x, grad): # Use (ad wrap in CArray) np.isclose with atol=1e-6, rtol=0 # FIXME: converting grad to dense as the sparse vs sparse logical_and # is too slow - x_lb = (x.round(6) == CArray(self.bounds.lb).round(6)).logical_and( - grad > 0).astype(bool) - - x_ub = (x.round(6) == CArray(self.bounds.ub).round(6)).logical_and( - grad < 0).astype(bool) + x_lb = ( + (x.round(6) == CArray(self.bounds.lb).round(6)) + .logical_and(grad > 0) + .astype(bool) + ) + + x_ub = ( + (x.round(6) == CArray(self.bounds.ub).round(6)) + .logical_and(grad < 0) + .astype(bool) + ) # reset gradient for unfeasible features grad[x_lb + x_ub] = 0 @@ -213,17 +228,18 @@ def _xk(self, x, fx, *args): # filter modifications that would violate bounds (to sparsify gradient) grad = self._box_projected_gradient(x, grad) - if self.constr is not None and self.constr.class_type == 'l1': + if self.constr is not None and self.constr.class_type == "l1": # project z onto l1 constraint (via dual norm) grad = self._l1_projected_gradient(grad) - next_point = CArray(x - grad * self._line_search.eta, - dtype=self._dtype, tosparse=x.issparse) + next_point = CArray( + x - grad * self._line_search.eta, dtype=self._dtype, tosparse=x.issparse + ) if self.constr is not None and self.constr.is_violated(next_point): self.logger.debug("Line-search on distance constraint.") grad = CArray(x - self.constr.projection(next_point)) - if self.constr.class_type == 'l1': + if self.constr.class_type == "l1": grad = grad.sign() # to move along the l1 ball surface z, fz = self._line_search.minimize(x, -grad, fx) return z, fz @@ -261,16 +277,16 @@ def minimize(self, x_init, args=(), **kwargs): if len(kwargs) != 0: raise ValueError( "{:} does not accept additional parameters.".format( - self.__class__.__name__)) + self.__class__.__name__ + ) + ) # reset fun and grad eval counts for both fun and f (by default fun==f) self._f.reset_eval() self._fun.reset_eval() # initialize line search (and re-assign fun to it) - self._init_line_search(eta=self.eta, - eta_min=self.eta_min, - eta_max=self.eta_max) + self._init_line_search(eta=self.eta, eta_min=self.eta_min, eta_max=self.eta_max) # constr.radius = 0, exit if self.constr is not None and self.constr.radius == 0: @@ -278,11 +294,12 @@ def minimize(self, x_init, args=(), **kwargs): x0 = self.constr.center if self.bounds is not None and self.bounds.is_violated(x0): import warnings + warnings.warn( "x0 " + str(x0) + " is outside of the given bounds.", - category=RuntimeWarning) - self._x_seq = CArray.zeros( - (1, x0.size), sparse=x0.issparse, dtype=x0.dtype) + category=RuntimeWarning, + ) + self._x_seq = CArray.zeros((1, x0.size), sparse=x0.issparse, dtype=x0.dtype) self._f_seq = CArray.zeros(1) self._x_seq[0, :] = x0 self._f_seq[0] = self._fun.fun(x0, *args) @@ -298,10 +315,10 @@ def minimize(self, x_init, args=(), **kwargs): if self.constr is not None and self.constr.is_violated(x): x = self.constr.projection(x) - if (self.bounds is not None and self.bounds.is_violated(x)) or \ - (self.constr is not None and self.constr.is_violated(x)): - raise ValueError( - "x_init " + str(x) + " is outside of feasible domain.") + if (self.bounds is not None and self.bounds.is_violated(x)) or ( + self.constr is not None and self.constr.is_violated(x) + ): + raise ValueError("x_init " + str(x) + " is outside of feasible domain.") # dtype depends on x and eta (the grid discretization) if np.issubdtype(x_init.dtype, np.floating): @@ -311,9 +328,9 @@ def minimize(self, x_init, args=(), **kwargs): self._dtype = self._line_search.eta.dtype # initialize x_seq and f_seq - self._x_seq = CArray.zeros((self.max_iter, x_init.size), - sparse=x_init.issparse, - dtype=self._dtype) + self._x_seq = CArray.zeros( + (self.max_iter, x_init.size), sparse=x_init.issparse, dtype=self._dtype + ) self._f_seq = CArray.zeros(self.max_iter) # The first point is obviously the starting point, @@ -322,7 +339,7 @@ def minimize(self, x_init, args=(), **kwargs): self._x_seq[0, :] = x self._f_seq[0] = fx - self.logger.debug('Iter.: ' + str(0) + ', f(x): ' + str(fx)) + self.logger.debug("Iter.: " + str(0) + ", f(x): " + str(fx)) for i in range(1, self.max_iter): @@ -334,20 +351,26 @@ def minimize(self, x_init, args=(), **kwargs): self._f_seq[i] = fx self._x_opt = x - self.logger.debug('Iter.: ' + str(i) + - ', f(x): ' + str(fx) + - ', norm(gr(x)): ' + - str(CArray(self._grad).norm())) + self.logger.debug( + "Iter.: " + + str(i) + + ", f(x): " + + str(fx) + + ", norm(gr(x)): " + + str(CArray(self._grad).norm()) + ) diff = abs(self.f_seq[i].item() - self.f_seq[i - 1].item()) if diff < self.eps: self.logger.debug( "Flat region, exiting... ({:.4f} / {:.4f})".format( - self._f_seq[i].item(), self._f_seq[i - 1].item())) - self._x_seq = self.x_seq[:i + 1, :] - self._f_seq = self.f_seq[:i + 1] + self._f_seq[i].item(), self._f_seq[i - 1].item() + ) + ) + self._x_seq = self.x_seq[: i + 1, :] + self._f_seq = self.f_seq[: i + 1] return x - self.logger.warning('Maximum iterations reached. Exiting.') + self.logger.warning("Maximum iterations reached. Exiting.") return x diff --git a/src/secml/optim/optimizers/c_optimizer_scipy.py b/src/secml/optim/optimizers/c_optimizer_scipy.py index f96936dc..b785efdb 100644 --- a/src/secml/optim/optimizers/c_optimizer_scipy.py +++ b/src/secml/optim/optimizers/c_optimizer_scipy.py @@ -6,12 +6,13 @@ .. moduleauthor:: Marco Melis """ + from scipy import optimize as sc_opt from secml.array import CArray from secml.optim.optimizers import COptimizer -SUPPORTED_METHODS = ['BFGS', 'L-BFGS-B'] +SUPPORTED_METHODS = ["BFGS", "L-BFGS-B"] class COptimizerScipy(COptimizer): @@ -22,7 +23,8 @@ class COptimizerScipy(COptimizer): class_type : 'scipy-opt' """ - __class_type = 'scipy-opt' + + __class_type = "scipy-opt" def _bounds_to_scipy(self): """Converts bounds to scipy format. @@ -90,7 +92,7 @@ def minimize(self, x_init, args=(), **kwargs): options : dict, optional A dictionary of solver options. All methods accept the following generic options: - + - maxiter : int Maximum number of iterations to perform. - disp : bool @@ -133,15 +135,15 @@ def minimize(self, x_init, args=(), **kwargs): self._fun.reset_eval() # select method - method = kwargs['method'] if 'method' in kwargs else None + method = kwargs["method"] if "method" in kwargs else None if method is None: # Only 'L-BFGS-B` supports bounds - method = 'BFGS' if self.bounds is None else 'L-BFGS-B' + method = "BFGS" if self.bounds is None else "L-BFGS-B" # check if method is supported if method not in SUPPORTED_METHODS: raise NotImplementedError("selected method is not supported.") # set method - kwargs['method'] = method + kwargs["method"] = method # we're not supporting any solver with constraints at this stage if self.constr is not None: @@ -149,32 +151,31 @@ def minimize(self, x_init, args=(), **kwargs): # converting input parameters to scipy # 1) gradient (jac) - jac = kwargs['jac'] if 'jac' in kwargs else self._fun.gradient_ndarray - kwargs['jac'] = jac + jac = kwargs["jac"] if "jac" in kwargs else self._fun.gradient_ndarray + kwargs["jac"] = jac # 2) bounds - bounds = kwargs['bounds'] if 'bounds' in kwargs else None + bounds = kwargs["bounds"] if "bounds" in kwargs else None if bounds is None: bounds = self._bounds_to_scipy() - kwargs['bounds'] = bounds + kwargs["bounds"] = bounds if self.verbose >= 2: # Override verbosity options - kwargs['options']['disp'] = True + kwargs["options"]["disp"] = True # call minimize now - sc_opt_out = sc_opt.minimize(self._fun.fun_ndarray, - x_init.ravel().tondarray(), - args=args, **kwargs) + sc_opt_out = sc_opt.minimize( + self._fun.fun_ndarray, x_init.ravel().tondarray(), args=args, **kwargs + ) if not sc_opt_out.success: - self.logger.warning( - "Optimization has not exited successfully!\n") + self.logger.warning("Optimization has not exited successfully!\n") if self.verbose >= 1: # Workaround for scipy message randomly being a str or bytes if isinstance(sc_opt_out.message, str): self.logger.info(sc_opt_out.message + "\n") else: - self.logger.info(str(sc_opt_out.message, 'ascii') + "\n") + self.logger.info(str(sc_opt_out.message, "ascii") + "\n") self._f_seq = CArray(sc_opt_out.fun) # only last iter available @@ -183,11 +184,14 @@ def minimize(self, x_init, args=(), **kwargs): # check if point is valid # i.e., if the selected solver does not ignore the constraints if self.constr is not None and self.constr.is_violated(self.x_opt): - self.logger.warning("Constraints are not satisfied. " - "The scipy solver may be ignoring them.\n") + self.logger.warning( + "Constraints are not satisfied. " + "The scipy solver may be ignoring them.\n" + ) if self.bounds is not None and self.bounds.is_violated(self.x_opt): - self.logger.warning("Bounds are not satisfied. " - "The scipy solver may be ignoring them.\n") + self.logger.warning( + "Bounds are not satisfied. " "The scipy solver may be ignoring them.\n" + ) return self.x_opt diff --git a/src/secml/optim/optimizers/line_search/c_line_search.py b/src/secml/optim/optimizers/line_search/c_line_search.py index da180e27..68269b16 100644 --- a/src/secml/optim/optimizers/line_search/c_line_search.py +++ b/src/secml/optim/optimizers/line_search/c_line_search.py @@ -5,6 +5,7 @@ .. moduleauthor:: Battista Biggio """ + from abc import ABCMeta, abstractmethod from secml.core import CCreator @@ -31,9 +32,10 @@ class CLineSearch(CCreator, metaclass=ABCMeta): Minimum resolution of the line-search grid. Default 1e-4. max_iter : int, optional Maximum number of iterations of the line search. Default 20. - + """ - __super__ = 'CLineSearch' + + __super__ = "CLineSearch" def __init__(self, fun, constr=None, bounds=None, eta=1e-4, max_iter=20): diff --git a/src/secml/optim/optimizers/line_search/c_line_search_bisect.py b/src/secml/optim/optimizers/line_search/c_line_search_bisect.py index 12260456..e5e0313b 100644 --- a/src/secml/optim/optimizers/line_search/c_line_search_bisect.py +++ b/src/secml/optim/optimizers/line_search/c_line_search_bisect.py @@ -5,6 +5,7 @@ .. moduleauthor:: Battista Biggio """ + import numpy as np from secml.optim.optimizers.line_search import CLineSearch @@ -38,15 +39,23 @@ class CLineSearchBisect(CLineSearch): class_type : 'bisect' """ - __class_type = 'bisect' - def __init__(self, fun, constr=None, bounds=None, - eta=1e-4, eta_min=0.1, eta_max=None, - max_iter=20): + __class_type = "bisect" + + def __init__( + self, + fun, + constr=None, + bounds=None, + eta=1e-4, + eta_min=0.1, + eta_max=None, + max_iter=20, + ): CLineSearch.__init__( - self, fun=fun, constr=constr, bounds=bounds, - eta=eta, max_iter=max_iter) + self, fun=fun, constr=constr, bounds=bounds, eta=eta, max_iter=max_iter + ) # init attributes self._eta_max = None @@ -117,10 +126,8 @@ def _update_z(self, x, eta, d): def _is_feasible(self, x): """Checks if x is within the feasible domain.""" - constr_violation = False if self.constr is None else \ - self.constr.is_violated(x) - bounds_violation = False if self.bounds is None else \ - self.bounds.is_violated(x) + constr_violation = False if self.constr is None else self.constr.is_violated(x) + bounds_violation = False if self.bounds is None else self.bounds.is_violated(x) if constr_violation or bounds_violation: return False @@ -130,10 +137,8 @@ def _is_feasible(self, x): def _select_best_point(self, x, d, idx_min, idx_max, **kwargs): """Returns best point among x and the two points found by the search. In practice, if f(x + eta*d) increases on d, we return x.""" - x1 = CArray(x + d * self.eta * idx_min, - dtype=self._dtype, tosparse=x.issparse) - x2 = CArray(x + d * self.eta * idx_max, - dtype=self._dtype, tosparse=x.issparse) + x1 = CArray(x + d * self.eta * idx_min, dtype=self._dtype, tosparse=x.issparse) + x2 = CArray(x + d * self.eta * idx_max, dtype=self._dtype, tosparse=x.issparse) self.logger.info("Select best point between...") self.logger.info("x (f: {:}) -> \n{:}".format(self._fx, x)) @@ -142,29 +147,36 @@ def _select_best_point(self, x, d, idx_min, idx_max, **kwargs): f0 = self._fx - if not self._is_feasible(x1) and \ - not self._is_feasible(x2): + if not self._is_feasible(x1) and not self._is_feasible(x2): self.logger.debug("x1 and x2 are not feasible. Returning x.") return x, f0 # uses cached values (if available) to save computations - f1 = self._fun_idx_min if self._fun_idx_min is not None else \ - self.fun.fun(x1, **kwargs) + f1 = ( + self._fun_idx_min + if self._fun_idx_min is not None + else self.fun.fun(x1, **kwargs) + ) if not self._is_feasible(x2): if f1 < f0: - self.logger.debug("x2 not feasible. Returning x1." - " f(x): " + str(f0) + - ", f(x1): " + str(f1)) + self.logger.debug( + "x2 not feasible. Returning x1." + " f(x): " + str(f0) + ", f(x1): " + str(f1) + ) return x1, f1 - self.logger.debug("x2 not feasible. Returning x." - " f(x): " + str(f0) + - ", f(x1): " + str(f1)) + self.logger.debug( + "x2 not feasible. Returning x." + " f(x): " + str(f0) + ", f(x1): " + str(f1) + ) return x, f0 # uses cached values (if available) to save computations - f2 = self._fun_idx_max if self._fun_idx_max is not None else \ - self.fun.fun(x2, **kwargs) + f2 = ( + self._fun_idx_max + if self._fun_idx_max is not None + else self.fun.fun(x2, **kwargs) + ) if not self._is_feasible(x1): if f2 < f0: @@ -223,19 +235,28 @@ def _compute_eta_max(self, x, d, **kwargs): t = CArray(eta / self.eta).round() self.logger.debug( - "[_compute_eta_max] eta: " + str(eta) + ", x: " + - str(x[x != 0]) + ", f(x): " + str(self._fx)) + "[_compute_eta_max] eta: " + + str(eta) + + ", x: " + + str(x[x != 0]) + + ", f(x): " + + str(self._fx) + ) # update z and fz z = self._update_z(x, eta, d) self.logger.debug( - "[_compute_eta_max] eta max, eta: " + str(eta) + ", z: " + - str(z[z != 0]) + ", f(z): " + str(self._fz)) + "[_compute_eta_max] eta max, eta: " + + str(eta) + + ", z: " + + str(z[z != 0]) + + ", f(z): " + + str(self._fz) + ) # divide eta by 2 if x+eta*d goes out of bounds or fz decreases # update (if required) z and fz - while eta > self.eta and \ - (not self._is_feasible(z) or self._fz > self._fx): + while eta > self.eta and (not self._is_feasible(z) or self._fz > self._fx): t = CArray(t / 2).round() eta = t * self.eta @@ -257,8 +278,13 @@ def _compute_eta_max(self, x, d, **kwargs): self._fun_idx_max = self._fz self.logger.debug( - "[_compute_eta_max] eta: " + str(eta) + ", z: " + - str(z[z != 0]) + ", f(z): " + str(self._fz)) + "[_compute_eta_max] eta: " + + str(eta) + + ", z: " + + str(z[z != 0]) + + ", f(z): " + + str(self._fz) + ) self._n_iter += 1 @@ -266,7 +292,7 @@ def _compute_eta_max(self, x, d, **kwargs): if not self._is_decreasing(z, d, **kwargs): return eta - self.logger.debug('Maximum iterations reached. Exiting.') + self.logger.debug("Maximum iterations reached. Exiting.") return eta def minimize(self, x, d, fx=None, tol=1e-4, **kwargs): @@ -322,9 +348,7 @@ def minimize(self, x, d, fx=None, tol=1e-4, **kwargs): self._fx = self.fun.fun(x) if fx is None else fx self._fz = self._fx - self.logger.info( - "line search: " + str(x[x != 0]) + - ", f(x): " + str(self._fx)) + self.logger.info("line search: " + str(x[x != 0]) + ", f(x): " + str(self._fx)) # reset cached values self._fun_idx_min = None @@ -345,10 +369,8 @@ def minimize(self, x, d, fx=None, tol=1e-4, **kwargs): self._fun_idx_min = self._fx self._fun_idx_max = None # this has not been cached - x1 = CArray(x + d * self.eta * idx_min, - dtype=self._dtype, tosparse=x.issparse) - x2 = CArray(x + d * self.eta * idx_max, - dtype=self._dtype, tosparse=x.issparse) + x1 = CArray(x + d * self.eta * idx_min, dtype=self._dtype, tosparse=x.issparse) + x2 = CArray(x + d * self.eta * idx_max, dtype=self._dtype, tosparse=x.issparse) self.logger.info("Running binary line search in...") self.logger.info("x1 (f: {:}) -> \n{:}".format(self._fun_idx_min, x1)) @@ -360,14 +382,12 @@ def minimize(self, x, d, fx=None, tol=1e-4, **kwargs): if (idx_max <= 1).any(): # local minimum found self.logger.debug("local minimum found") - return self._select_best_point( - x, d, idx_min, idx_max, **kwargs) + return self._select_best_point(x, d, idx_min, idx_max, **kwargs) else: if (idx_max - idx_min <= 1).any(): # local minimum found self.logger.debug("local minimum found") - return self._select_best_point( - x, d, idx_min, idx_max, **kwargs) + return self._select_best_point(x, d, idx_min, idx_max, **kwargs) # else, continue... idx = (0.5 * (idx_min + idx_max)).astype(int) @@ -377,9 +397,7 @@ def minimize(self, x, d, fx=None, tol=1e-4, **kwargs): # update z, fz z = self._update_z(x, self.eta, d * idx) - self.logger.debug( - ", z: " + str(z[z != 0]) + - ", f(z): " + str(self._fz)) + self.logger.debug(", z: " + str(z[z != 0]) + ", f(z): " + str(self._fz)) self._n_iter += 1 @@ -392,9 +410,8 @@ def minimize(self, x, d, fx=None, tol=1e-4, **kwargs): # check if we are approaching the minimum (flat region) if self._is_feasible(z) and abs(self._fz - fz_prev) <= tol: - self.logger.debug('Reached flat region. Exiting.') - return self._select_best_point( - x, d, idx_min, idx_max, **kwargs) + self.logger.debug("Reached flat region. Exiting.") + return self._select_best_point(x, d, idx_min, idx_max, **kwargs) - self.logger.debug('Maximum iterations reached. Exiting.') + self.logger.debug("Maximum iterations reached. Exiting.") return self._select_best_point(x, d, idx_min, idx_max, **kwargs) diff --git a/src/secml/optim/optimizers/line_search/c_line_search_bisect_proj.py b/src/secml/optim/optimizers/line_search/c_line_search_bisect_proj.py index 6ebe1976..427d84ea 100644 --- a/src/secml/optim/optimizers/line_search/c_line_search_bisect_proj.py +++ b/src/secml/optim/optimizers/line_search/c_line_search_bisect_proj.py @@ -5,6 +5,7 @@ .. moduleauthor:: Battista Biggio """ + import numpy as np from secml.array import CArray @@ -38,15 +39,30 @@ class CLineSearchBisectProj(CLineSearchBisect): class_type : 'bisect-proj' """ - __class_type = 'bisect-proj' - def __init__(self, fun, constr=None, bounds=None, - eta=1e-4, eta_min=0.1, eta_max=None, - max_iter=20): + __class_type = "bisect-proj" + + def __init__( + self, + fun, + constr=None, + bounds=None, + eta=1e-4, + eta_min=0.1, + eta_max=None, + max_iter=20, + ): CLineSearchBisect.__init__( - self, fun=fun, constr=constr, bounds=bounds, - eta=eta, eta_min=eta_min, eta_max=eta_max, max_iter=max_iter) + self, + fun=fun, + constr=constr, + bounds=bounds, + eta=eta, + eta_min=eta_min, + eta_max=eta_max, + max_iter=max_iter, + ) self._best_score = None self._best_eta = None @@ -66,10 +82,8 @@ def _update_z(self, x, eta, d, projection=False): def _is_feasible(self, x): """Checks if x is within the feasible domain.""" - constr_violation = False if self.constr is None else \ - self.constr.is_violated(x) - bounds_violation = False if self.bounds is None else \ - self.bounds.is_violated(x) + constr_violation = False if self.constr is None else self.constr.is_violated(x) + bounds_violation = False if self.bounds is None else self.bounds.is_violated(x) if constr_violation or bounds_violation: return False @@ -80,8 +94,7 @@ def _select_best_point(self, x, d, idx_min, idx_max, **kwargs): """Returns best point among x and the two points found by the search. In practice, if f(x + eta*d) increases on d, we return x.""" - v = CArray(x + d * self._best_eta, - dtype=self._dtype, tosparse=x.issparse) + v = CArray(x + d * self._best_eta, dtype=self._dtype, tosparse=x.issparse) if self.bounds is not None: v = self.bounds.projection(v) if self.bounds is not None else v if self.constr is not None: @@ -89,72 +102,85 @@ def _select_best_point(self, x, d, idx_min, idx_max, **kwargs): if self._is_feasible(v): return v, self._best_score - x1 = CArray(x + d * self.eta * idx_min, - dtype=self._dtype, tosparse=x.issparse) + x1 = CArray(x + d * self.eta * idx_min, dtype=self._dtype, tosparse=x.issparse) if self.bounds is not None: x1 = self.bounds.projection(x1) if self.bounds is not None else x1 if self.constr is not None: x1 = self.constr.projection(x1) if self.constr is not None else x1 - x2 = CArray(x + d * self.eta * idx_max, - dtype=self._dtype, tosparse=x.issparse) + x2 = CArray(x + d * self.eta * idx_max, dtype=self._dtype, tosparse=x.issparse) if self.bounds is not None: x2 = self.bounds.projection(x2) if self.bounds is not None else x2 if self.constr is not None: x2 = self.constr.projection(x2) if self.constr is not None else x2 - self.logger.debug("Select best point between: f[a], f[b]: [" + - str(self._fun_idx_min) + "," + - str(self._fun_idx_max) + "]") + self.logger.debug( + "Select best point between: f[a], f[b]: [" + + str(self._fun_idx_min) + + "," + + str(self._fun_idx_max) + + "]" + ) f0 = self._fx - if not self._is_feasible(x1) and \ - not self._is_feasible(x2): + if not self._is_feasible(x1) and not self._is_feasible(x2): if self._best_score < f0: - self.logger.debug("x1 and x2 are not feasible." - "Returning the best cached value.") + self.logger.debug( + "x1 and x2 are not feasible." "Returning the best cached value." + ) return v, self._best_score else: self.logger.debug("x1 and x2 are not feasible. Returning x.") return x, f0 # uses cached values (if available) to save computations - f1 = self._fun_idx_min if self._fun_idx_min is not None else \ - self.fun.fun(x1, **kwargs) + f1 = ( + self._fun_idx_min + if self._fun_idx_min is not None + else self.fun.fun(x1, **kwargs) + ) if not self._is_feasible(x2): if f1 < f0 and f1 < self._best_score: - self.logger.debug("x2 not feasible. Returning x1." - " f(x): " + str(f0) + - ", f(x1): " + str(f1)) + self.logger.debug( + "x2 not feasible. Returning x1." + " f(x): " + str(f0) + ", f(x1): " + str(f1) + ) return x1, f1 if self._best_score < f1: - self.logger.debug("x2 not feasible. Returning the best cached" - " value.") + self.logger.debug( + "x2 not feasible. Returning the best cached" " value." + ) return v, self._best_score - self.logger.debug("x2 not feasible. Returning x." - " f(x): " + str(f0) + - ", f(x1): " + str(f1)) + self.logger.debug( + "x2 not feasible. Returning x." + " f(x): " + str(f0) + ", f(x1): " + str(f1) + ) return x, f0 # uses cached values (if available) to save computations - f2 = self._fun_idx_max if self._fun_idx_max is not None else \ - self.fun.fun(x2, **kwargs) + f2 = ( + self._fun_idx_max + if self._fun_idx_max is not None + else self.fun.fun(x2, **kwargs) + ) if not self._is_feasible(x1): if f2 < f0 and f2 < self._best_score: self.logger.debug("x1 not feasible. Returning x2.") return x2, f2 if self._best_score < f2: - self.logger.debug("x1 not feasible. Returning the best cached " - "value.") + self.logger.debug( + "x1 not feasible. Returning the best cached " "value." + ) return v, self._best_score self.logger.debug("x1 not feasible. Returning x.") return x, f0 # else return best point among x1, x2 and x - self.logger.debug("f0: {:}, f1: {:}, f2: {:}, best: {:}".format( - f0, f1, f2, self._best_score)) + self.logger.debug( + "f0: {:}, f1: {:}, f2: {:}, best: {:}".format(f0, f1, f2, self._best_score) + ) if f2 <= f0 and f2 <= f1 and f2 <= self._best_score: self.logger.debug("Returning x2.") @@ -164,8 +190,7 @@ def _select_best_point(self, x, d, idx_min, idx_max, **kwargs): self.logger.debug("Returning x1.") return x1, f1 - if self._best_score <= f0 and self._best_score <= f1 and \ - self._best_score <= f2: + if self._best_score <= f0 and self._best_score <= f1 and self._best_score <= f2: self.logger.debug("Returning the best cached value.") return v, self._best_score @@ -225,8 +250,11 @@ def _compute_eta_max(self, x, d, **kwargs): z = self._update_z(x, eta, d, projection=True) self.logger.debug( - "[_compute_eta_min] eta: " + str(eta.item()) + - ", f(z): " + str(self._fz)) + "[_compute_eta_min] eta: " + + str(eta.item()) + + ", f(z): " + + str(self._fz) + ) # exponential line search starts here while self._n_iter < 10: @@ -244,13 +272,17 @@ def _compute_eta_max(self, x, d, **kwargs): self._fun_idx_max = self._fz self.logger.debug( - "[_compute_eta_max] eta: " + str(eta.item()) + - ", f(z0): " + str(self._fun_idx_min) + - ", f(z1): " + str(self._fun_idx_max)) + "[_compute_eta_max] eta: " + + str(eta.item()) + + ", f(z0): " + + str(self._fun_idx_min) + + ", f(z1): " + + str(self._fun_idx_max) + ) self._n_iter += 1 - self.logger.debug('Maximum iterations reached. Exiting.') + self.logger.debug("Maximum iterations reached. Exiting.") return eta def minimize(self, x, d, fx=None, tol=1e-4, **kwargs): @@ -306,9 +338,7 @@ def minimize(self, x, d, fx=None, tol=1e-4, **kwargs): self._best_score = self._fx self._best_eta = 0.0 - self.logger.info( - "line search: " + - ", f(x): " + str(self._fx)) + self.logger.info("line search: " + ", f(x): " + str(self._fx)) # reset cached values self._fun_idx_min = None @@ -329,8 +359,12 @@ def minimize(self, x, d, fx=None, tol=1e-4, **kwargs): self._fun_idx_min = self._fx self._fun_idx_max = None # this has not been cached - self.logger.debug("Running line search in: f[a], f[b]: [" + - str(self._fun_idx_min) + "," + - str(self._fun_idx_max) + "]") + self.logger.debug( + "Running line search in: f[a], f[b]: [" + + str(self._fun_idx_min) + + "," + + str(self._fun_idx_max) + + "]" + ) return self._select_best_point(x, d, idx_min, idx_max, **kwargs) diff --git a/src/secml/optim/optimizers/line_search/tests/test_line_search_bisect.py b/src/secml/optim/optimizers/line_search/tests/test_line_search_bisect.py index 3bc1e9c1..4fd9aab7 100644 --- a/src/secml/optim/optimizers/line_search/tests/test_line_search_bisect.py +++ b/src/secml/optim/optimizers/line_search/tests/test_line_search_bisect.py @@ -12,11 +12,10 @@ class TestLineSearch(CUnitTest): def test_minimize(self): """Testing the bisect line-search algorithm.""" - self.logger.info( - "Test for binary line search ... ") + self.logger.info("Test for binary line search ... ") def fun_test(x): - return x ** 2 - 1 + return x**2 - 1 self.fun = CFunction(fun=fun_test) @@ -38,18 +37,21 @@ def fun_test(x): self._save_fig() - self.assertTrue(x0.norm() <= 1e-6, - "Correct solution found, x0 = 0.") + self.assertTrue(x0.norm() <= 1e-6, "Correct solution found, x0 = 0.") def _save_fig(self): """Visualizing the function being optimized with line search.""" - x_range = CArray.arange(-5, 20, 0.5, ) + x_range = CArray.arange( + -5, + 20, + 0.5, + ) score_range = x_range.T.apply_along_axis(self.fun.fun, axis=1) ref_line = CArray.zeros(x_range.size) fig = CFigure(height=6, width=12) - fig.sp.plot(x_range, score_range, color='b') - fig.sp.plot(x_range, ref_line, color='k') - filename = fm.join(fm.abspath(__file__), 'test_line_search_bisect.pdf') + fig.sp.plot(x_range, score_range, color="b") + fig.sp.plot(x_range, ref_line, color="k") + filename = fm.join(fm.abspath(__file__), "test_line_search_bisect.pdf") fig.savefig(filename) diff --git a/src/secml/optim/optimizers/tests/c_optimizer_testcases.py b/src/secml/optim/optimizers/tests/c_optimizer_testcases.py index 78fab29d..84e49a7b 100644 --- a/src/secml/optim/optimizers/tests/c_optimizer_testcases.py +++ b/src/secml/optim/optimizers/tests/c_optimizer_testcases.py @@ -11,70 +11,77 @@ class COptimizerTestCases(CUnitTest): """Unittests interface for COptimizer.""" - make_figures = os.getenv('MAKE_FIGURES', False) # True to produce figures + + make_figures = os.getenv("MAKE_FIGURES", False) # True to produce figures def setUp(self): self.test_funcs = dict() # Instancing the available functions to test optimizer - self.test_funcs['3h-camel'] = { - 'fun': CFunction.create('3h-camel'), - 'x0': CArray([1, 1]), - 'grid_limits': [(-2, 2), (-2, 2)], - 'vmin': 0, 'vmax': 5 + self.test_funcs["3h-camel"] = { + "fun": CFunction.create("3h-camel"), + "x0": CArray([1, 1]), + "grid_limits": [(-2, 2), (-2, 2)], + "vmin": 0, + "vmax": 5, } - self.test_funcs['beale'] = { - 'fun': CFunction.create('beale'), - 'x0': CArray([0, 0]), - 'grid_limits': [(-1, 4.5), (-1, 1.5)], - 'vmin': 0, 'vmax': 1 + self.test_funcs["beale"] = { + "fun": CFunction.create("beale"), + "x0": CArray([0, 0]), + "grid_limits": [(-1, 4.5), (-1, 1.5)], + "vmin": 0, + "vmax": 1, } - self.test_funcs['mc-cormick'] = { - 'fun': CFunction.create('mc-cormick'), - 'x0': CArray([0, 1]), - 'grid_limits': [(-2, 3), (-3, 1)], - 'vmin': -2, 'vmax': 2 + self.test_funcs["mc-cormick"] = { + "fun": CFunction.create("mc-cormick"), + "x0": CArray([0, 1]), + "grid_limits": [(-2, 3), (-3, 1)], + "vmin": -2, + "vmax": 2, } - self.test_funcs['rosenbrock'] = { - 'fun': CFunction.create('rosenbrock'), - 'x0': CArray([-1, -1]), - 'grid_limits': [(-2.1, 1.1), (-2.1, 1.1)], - 'vmin': 0, 'vmax': 10 + self.test_funcs["rosenbrock"] = { + "fun": CFunction.create("rosenbrock"), + "x0": CArray([-1, -1]), + "grid_limits": [(-2.1, 1.1), (-2.1, 1.1)], + "vmin": 0, + "vmax": 10, } quad = self._quadratic_fun(2) - self.test_funcs['quad-2'] = { - 'fun': quad, - 'x0': CArray([4, -4]), - 'grid_limits': [(-5, 5), (-5, 5)], - 'vmin': None, 'vmax': None + self.test_funcs["quad-2"] = { + "fun": quad, + "x0": CArray([4, -4]), + "grid_limits": [(-5, 5), (-5, 5)], + "vmin": None, + "vmax": None, } n = 100 quad = self._quadratic_fun(n) - self.test_funcs['quad-100-sparse'] = { - 'fun': quad, - 'x0': CArray.zeros((n,), dtype=int).tosparse(dtype=int), + self.test_funcs["quad-100-sparse"] = { + "fun": quad, + "x0": CArray.zeros((n,), dtype=int).tosparse(dtype=int), } n = 2 poly = self._create_poly(d=n) - self.test_funcs['poly-2'] = { - 'fun': poly, - 'x0': CArray.ones((n,), dtype=int) * 2, - 'vmin': -10, 'vmax': 5, - 'grid_limits': [(-1, 1), (-1, 1)] + self.test_funcs["poly-2"] = { + "fun": poly, + "x0": CArray.ones((n,), dtype=int) * 2, + "vmin": -10, + "vmax": 5, + "grid_limits": [(-1, 1), (-1, 1)], } n = 100 # x0 is a sparse CArray and the solution is a zero vector poly = self._create_poly(d=n) - self.test_funcs['poly-100-int'] = { - 'fun': poly, - 'x0': CArray.ones((n,), dtype=int) * 2 + self.test_funcs["poly-100-int"] = { + "fun": poly, + "x0": CArray.ones((n,), dtype=int) * 2, } n = 100 poly = self._create_poly(d=n) - self.test_funcs['poly-100-int-sparse'] = { - 'fun': poly, - 'x0': CArray.ones((n,), dtype=int).tosparse(dtype=int) * 2 + self.test_funcs["poly-100-int-sparse"] = { + "fun": poly, + "x0": CArray.ones((n,), dtype=int).tosparse(dtype=int) * 2, } @staticmethod @@ -83,14 +90,16 @@ def _quadratic_fun(d): def _quadratic_fun_min(A, b): from scipy import linalg + min_x_scipy = linalg.solve( - (2 * A).tondarray(), -b.tondarray(), sym_pos=True) + (2 * A).tondarray(), -b.tondarray(), sym_pos=True + ) return CArray(min_x_scipy).ravel() A = CArray.eye(d, d) b = CArray.ones((d, 1)) * 2 - discr_fun = CFunction.create('quadratic', A, b, c=0) + discr_fun = CFunction.create("quadratic", A, b, c=0) min_x = _quadratic_fun_min(A, b) min_val = discr_fun.fun(min_x) @@ -105,20 +114,28 @@ def _create_poly(d): """Creates a polynomial function in d dimensions.""" def _poly_fun(x): - return (x ** 4).sum() + x.sum() ** 2 + return (x**4).sum() + x.sum() ** 2 def _poly_grad(x): - return (4 * x ** 3) + 2 * x + return (4 * x**3) + 2 * x int_fun = CFunction(fun=_poly_fun, gradient=_poly_grad) - int_fun.global_min = lambda: 0. - int_fun.global_min_x = lambda: CArray.zeros(d, ) + int_fun.global_min = lambda: 0.0 + int_fun.global_min_x = lambda: CArray.zeros( + d, + ) return int_fun - def _test_minimize(self, opt_class, fun_id, - opt_params=None, minimize_params=None, - label=None, out_int=False): + def _test_minimize( + self, + opt_class, + fun_id, + opt_params=None, + minimize_params=None, + label=None, + out_int=False, + ): """Test for COptimizer.minimize() method. Parameters @@ -143,50 +160,57 @@ def _test_minimize(self, opt_class, fun_id, fun_dict = self.test_funcs[fun_id] - fun = fun_dict['fun'] + fun = fun_dict["fun"] opt = opt_class(fun, **opt_params) opt.verbose = 1 self.logger.info( "Testing minimization of {:} using {:}".format( - fun.__class__.__name__, opt.__class__.__name__)) + fun.__class__.__name__, opt.__class__.__name__ + ) + ) - if fun.class_type == 'mc-cormick' and 'bounds' not in opt_params: + if fun.class_type == "mc-cormick" and "bounds" not in opt_params: raise RuntimeError( "mc-cormick always needs the following bounds for " "correct optimization: {:}, {:}".format(*fun.bounds()) ) - min_x = opt.minimize(fun_dict['x0'], **minimize_params) + min_x = opt.minimize(fun_dict["x0"], **minimize_params) - self.logger.info("x0: {:}".format(fun_dict['x0'])) + self.logger.info("x0: {:}".format(fun_dict["x0"])) self.logger.info("Found minimum: {:}".format(min_x)) self.logger.info("Fun value @ minimum: {:}".format(opt.f_opt)) if self.make_figures and fun.global_min_x().size == 2: - self._plot_optimization(opt, fun_dict['x0'], min_x, - grid_limits=fun_dict['grid_limits'], - method=minimize_params.get('method'), - vmin=fun_dict['vmin'], - vmax=fun_dict['vmax'], - label=label) + self._plot_optimization( + opt, + fun_dict["x0"], + min_x, + grid_limits=fun_dict["grid_limits"], + method=minimize_params.get("method"), + vmin=fun_dict["vmin"], + vmax=fun_dict["vmax"], + label=label, + ) # Round results for easier asserts self.assertAlmostEqual(opt.f_opt, fun.global_min(), places=3) self.assert_array_almost_equal( - min_x.todense().ravel(), fun.global_min_x(), decimal=3) + min_x.todense().ravel(), fun.global_min_x(), decimal=3 + ) # Check if the type of the solution is correct - self.assertEqual(fun_dict['x0'].issparse, min_x.issparse) + self.assertEqual(fun_dict["x0"].issparse, min_x.issparse) # Check if solution has expected int dtype or not self.assertIsSubDtype(min_x.dtype, int if out_int is True else float) @staticmethod def _plot_optimization( - solver, x_0, g_min, grid_limits, - method=None, vmin=None, vmax=None, label=None): + solver, x_0, g_min, grid_limits, method=None, vmin=None, vmax=None, label=None + ): """Plots the optimization problem. Parameters @@ -204,25 +228,43 @@ def _plot_optimization( fig = CFigure(markersize=12) # Plot objective function - fig.sp.plot_fun(func=CArray.apply_along_axis, - plot_background=True, - n_grid_points=30, n_colors=25, - grid_limits=grid_limits, - levels=[0.5], levels_color='gray', levels_style='--', - colorbar=True, func_args=(solver.f.fun, 1,), - vmin=vmin, vmax=vmax) + fig.sp.plot_fun( + func=CArray.apply_along_axis, + plot_background=True, + n_grid_points=30, + n_colors=25, + grid_limits=grid_limits, + levels=[0.5], + levels_color="gray", + levels_style="--", + colorbar=True, + func_args=( + solver.f.fun, + 1, + ), + vmin=vmin, + vmax=vmax, + ) if solver.bounds is not None: # Plot box constraint - fig.sp.plot_fun(func=lambda x: solver.bounds.constraint(x), - plot_background=False, n_grid_points=20, - grid_limits=grid_limits, levels=[0], - colorbar=False) + fig.sp.plot_fun( + func=lambda x: solver.bounds.constraint(x), + plot_background=False, + n_grid_points=20, + grid_limits=grid_limits, + levels=[0], + colorbar=False, + ) if solver.constr is not None: # Plot distance constraint - fig.sp.plot_fun(func=lambda x: solver.constr.constraint(x), - plot_background=False, n_grid_points=20, - grid_limits=grid_limits, levels=[0], - colorbar=False) + fig.sp.plot_fun( + func=lambda x: solver.constr.constraint(x), + plot_background=False, + n_grid_points=20, + grid_limits=grid_limits, + levels=[0], + colorbar=False, + ) # Plot optimization trace if solver.x_seq is not None: @@ -230,30 +272,36 @@ def _plot_optimization( else: fig.sp.plot_path(x_0.append(g_min, axis=0)) - fig.sp.title("{:}(fun={:}) - Glob Min @ {:}".format( - solver.class_type, solver.f.class_type, - solver.f.global_min_x().round(2).tolist())) + fig.sp.title( + "{:}(fun={:}) - Glob Min @ {:}".format( + solver.class_type, + solver.f.class_type, + solver.f.global_min_x().round(2).tolist(), + ) + ) - test_img_fold_name = 'test_images' + test_img_fold_name = "test_images" test_img_fold_path = fm.join(fm.abspath(__file__), test_img_fold_name) if not fm.folder_exist(test_img_fold_path): fm.make_folder(test_img_fold_path) if method is None: - filename = fm.join(test_img_fold_path, - solver.class_type + '-' + solver.f.class_type) + filename = fm.join( + test_img_fold_path, solver.class_type + "-" + solver.f.class_type + ) else: filename = fm.join( test_img_fold_path, - solver.class_type + '-' + method + '-' + solver.f.class_type) + solver.class_type + "-" + method + "-" + solver.f.class_type, + ) - filename += '-' + label if label is not None else '' + filename += "-" + label if label is not None else "" - test_img_fold_name = 'test_images' + test_img_fold_name = "test_images" if not fm.folder_exist(test_img_fold_name): fm.make_folder(test_img_fold_name) - fig.savefig('{:}.pdf'.format(filename)) + fig.savefig("{:}.pdf".format(filename)) def _test_constr_bounds(self, opt_class): """Test for COptimizer.minimize() method behaviour @@ -270,60 +318,73 @@ def _test_constr_bounds(self, opt_class): COptimizer. """ - fun_dict = self.test_funcs['rosenbrock'] - fun = fun_dict['fun'] + fun_dict = self.test_funcs["rosenbrock"] + fun = fun_dict["fun"] # Constraint radius 0 (no bounds) - opt_params = {'constr': CConstraintL2(center=CArray([0, 0]), radius=0)} + opt_params = {"constr": CConstraintL2(center=CArray([0, 0]), radius=0)} opt = opt_class(fun, **opt_params) opt.verbose = 1 - self.logger.info("Testing minimization of {:} using {:} " - "and constr {:}".format(fun.__class__.__name__, - opt.__class__.__name__, - opt_params['constr'])) + self.logger.info( + "Testing minimization of {:} using {:} " + "and constr {:}".format( + fun.__class__.__name__, opt.__class__.__name__, opt_params["constr"] + ) + ) - min_x = opt.minimize(fun_dict['x0']) + min_x = opt.minimize(fun_dict["x0"]) self.assertAlmostEqual(opt.f_opt, 1.0) self.assert_array_almost_equal(min_x.todense().ravel(), CArray([0, 0])) # Constraint radius 0 inside bounds - opt_params = {'constr': CConstraintL2(center=CArray([0, 0]), radius=0), - 'bounds': CConstraintBox(lb=-1, ub=1)} + opt_params = { + "constr": CConstraintL2(center=CArray([0, 0]), radius=0), + "bounds": CConstraintBox(lb=-1, ub=1), + } opt = opt_class(fun, **opt_params) opt.verbose = 1 - self.logger.info("Testing minimization of {:} using {:}, constr {:} " - "and bounds {:}".format(fun.__class__.__name__, - opt.__class__.__name__, - opt_params['constr'], - opt_params['bounds'])) + self.logger.info( + "Testing minimization of {:} using {:}, constr {:} " + "and bounds {:}".format( + fun.__class__.__name__, + opt.__class__.__name__, + opt_params["constr"], + opt_params["bounds"], + ) + ) - min_x = opt.minimize(fun_dict['x0']) + min_x = opt.minimize(fun_dict["x0"]) self.assertAlmostEqual(opt.f_opt, 1.0) self.assert_array_almost_equal(min_x.todense().ravel(), CArray([0, 0])) # Constraint radius 0 outside bounds (should raise warning) - opt_params = {'constr': CConstraintL2(center=CArray([0, 0]), radius=0), - 'bounds': CConstraintBox(lb=1, ub=2)} + opt_params = { + "constr": CConstraintL2(center=CArray([0, 0]), radius=0), + "bounds": CConstraintBox(lb=1, ub=2), + } opt = opt_class(fun, **opt_params) opt.verbose = 1 - self.logger.info("Testing minimization of {:} using {:}, constr {:} " - "and bounds {:}".format(fun.__class__.__name__, - opt.__class__.__name__, - opt_params['constr'], - opt_params['bounds'])) + self.logger.info( + "Testing minimization of {:} using {:}, constr {:} " + "and bounds {:}".format( + fun.__class__.__name__, + opt.__class__.__name__, + opt_params["constr"], + opt_params["bounds"], + ) + ) with self.logger.catch_warnings(record=True) as w: - min_x = opt.minimize(fun_dict['x0']) - self.assertIn( - 'is outside of the given bounds', w[0].message.args[0]) + min_x = opt.minimize(fun_dict["x0"]) + self.assertIn("is outside of the given bounds", w[0].message.args[0]) self.assertAlmostEqual(opt.f_opt, 1.0) self.assert_array_almost_equal(min_x.todense().ravel(), CArray([0, 0])) diff --git a/src/secml/optim/optimizers/tests/test_c_optimizer_pgd.py b/src/secml/optim/optimizers/tests/test_c_optimizer_pgd.py index bcbd972a..d28273d1 100644 --- a/src/secml/optim/optimizers/tests/test_c_optimizer_pgd.py +++ b/src/secml/optim/optimizers/tests/test_c_optimizer_pgd.py @@ -8,34 +8,34 @@ class TestCOptimizerPGD(COptimizerTestCases): def test_minimize_3h_camel(self): """Test for COptimizer.minimize() method on 3h-camel fun.""" - opt_params = {'eta': 1e-1, 'eps': 1e-12} + opt_params = {"eta": 1e-1, "eps": 1e-12} - self._test_minimize( - COptimizerPGD, '3h-camel', opt_params=opt_params) + self._test_minimize(COptimizerPGD, "3h-camel", opt_params=opt_params) def test_minimize_beale(self): """Test for COptimizer.minimize() method on beale fun.""" - opt_params = {'eta': 1e-2, 'eps': 1e-12, 'max_iter': 2000} + opt_params = {"eta": 1e-2, "eps": 1e-12, "max_iter": 2000} - self._test_minimize( - COptimizerPGD, 'beale', opt_params=opt_params) + self._test_minimize(COptimizerPGD, "beale", opt_params=opt_params) def test_minimize_mc_cormick(self): """Test for COptimizer.minimize() method on mc-cormick fun.""" from secml.optim.function import CFunctionMcCormick from secml.optim.constraints import CConstraintBox - opt_params = {'eta': 1e-1, 'eps': 1e-12, - 'bounds': CConstraintBox(*CFunctionMcCormick.bounds())} - self._test_minimize( - COptimizerPGD, 'mc-cormick', opt_params=opt_params) + opt_params = { + "eta": 1e-1, + "eps": 1e-12, + "bounds": CConstraintBox(*CFunctionMcCormick.bounds()), + } + + self._test_minimize(COptimizerPGD, "mc-cormick", opt_params=opt_params) def test_minimize_rosenbrock(self): """Test for COptimizer.minimize() method on rosenbrock fun.""" - opt_params = {'eta': 0.002, 'eps': 1e-12, 'max_iter': 8000} + opt_params = {"eta": 0.002, "eps": 1e-12, "max_iter": 8000} - self._test_minimize( - COptimizerPGD, 'rosenbrock', opt_params=opt_params) + self._test_minimize(COptimizerPGD, "rosenbrock", opt_params=opt_params) def test_constr_bounds(self): """Test for COptimizer.minimize() method behaviour @@ -43,5 +43,5 @@ def test_constr_bounds(self): self._test_constr_bounds(COptimizerPGD) -if __name__ == '__main__': +if __name__ == "__main__": COptimizerTestCases.main() diff --git a/src/secml/optim/optimizers/tests/test_c_optimizer_pgd_exp.py b/src/secml/optim/optimizers/tests/test_c_optimizer_pgd_exp.py index 890927ea..192b3025 100644 --- a/src/secml/optim/optimizers/tests/test_c_optimizer_pgd_exp.py +++ b/src/secml/optim/optimizers/tests/test_c_optimizer_pgd_exp.py @@ -8,35 +8,35 @@ class TestCOptimizerPGDExp(COptimizerTestCases): def test_minimize_3h_camel(self): """Test for COptimizer.minimize() method on 3h-camel fun.""" - opt_params = {'eta': 1e-6, 'eta_min': 1e-1, 'eps': 1e-12} + opt_params = {"eta": 1e-6, "eta_min": 1e-1, "eps": 1e-12} - self._test_minimize( - COptimizerPGDExp, '3h-camel', opt_params=opt_params) + self._test_minimize(COptimizerPGDExp, "3h-camel", opt_params=opt_params) def test_minimize_beale(self): """Test for COptimizer.minimize() method on beale fun.""" - opt_params = {'eta': 1e-6, 'eta_min': 1e-4, 'eps': 1e-12} + opt_params = {"eta": 1e-6, "eta_min": 1e-4, "eps": 1e-12} - self._test_minimize( - COptimizerPGDExp, 'beale', opt_params=opt_params) + self._test_minimize(COptimizerPGDExp, "beale", opt_params=opt_params) def test_minimize_mc_cormick(self): """Test for COptimizer.minimize() method on mc-cormick fun.""" from secml.optim.function import CFunctionMcCormick from secml.optim.constraints import CConstraintBox - opt_params = {'eta': 1e-6, 'eta_min': 1e-1, 'eps': 1e-12, - 'bounds': CConstraintBox(*CFunctionMcCormick.bounds())} - self._test_minimize( - COptimizerPGDExp, 'mc-cormick', opt_params=opt_params) + opt_params = { + "eta": 1e-6, + "eta_min": 1e-1, + "eps": 1e-12, + "bounds": CConstraintBox(*CFunctionMcCormick.bounds()), + } + + self._test_minimize(COptimizerPGDExp, "mc-cormick", opt_params=opt_params) def test_minimize_rosenbrock(self): """Test for COptimizer.minimize() method on rosenbrock fun.""" - opt_params = {'eta': 1e-6, 'eta_min': 1e-3, 'eps': 1e-12, - 'max_iter': 2500} + opt_params = {"eta": 1e-6, "eta_min": 1e-3, "eps": 1e-12, "max_iter": 2500} - self._test_minimize( - COptimizerPGDExp, 'rosenbrock', opt_params=opt_params) + self._test_minimize(COptimizerPGDExp, "rosenbrock", opt_params=opt_params) def test_constr_bounds(self): """Test for COptimizer.minimize() method behaviour @@ -44,5 +44,5 @@ def test_constr_bounds(self): self._test_constr_bounds(COptimizerPGDExp) -if __name__ == '__main__': +if __name__ == "__main__": COptimizerTestCases.main() diff --git a/src/secml/optim/optimizers/tests/test_c_optimizer_pgd_ls.py b/src/secml/optim/optimizers/tests/test_c_optimizer_pgd_ls.py index 73a28728..36d405cc 100644 --- a/src/secml/optim/optimizers/tests/test_c_optimizer_pgd_ls.py +++ b/src/secml/optim/optimizers/tests/test_c_optimizer_pgd_ls.py @@ -8,34 +8,35 @@ class TestCOptimizerPGDLS(COptimizerTestCases): def test_minimize_3h_camel(self): """Test for COptimizer.minimize() method on 3h-camel fun.""" - opt_params = {'eta': 1e-6, 'eta_min': 1e-4, 'eps': 1e-12} + opt_params = {"eta": 1e-6, "eta_min": 1e-4, "eps": 1e-12} - self._test_minimize( - COptimizerPGDLS, '3h-camel', opt_params=opt_params) + self._test_minimize(COptimizerPGDLS, "3h-camel", opt_params=opt_params) def test_minimize_beale(self): """Test for COptimizer.minimize() method on beale fun.""" - opt_params = {'eta': 1e-6, 'eta_min': 1e-4, 'eps': 1e-12} + opt_params = {"eta": 1e-6, "eta_min": 1e-4, "eps": 1e-12} - self._test_minimize( - COptimizerPGDLS, 'beale', opt_params=opt_params) + self._test_minimize(COptimizerPGDLS, "beale", opt_params=opt_params) def test_minimize_mc_cormick(self): """Test for COptimizer.minimize() method on mc-cormick fun.""" from secml.optim.function import CFunctionMcCormick from secml.optim.constraints import CConstraintBox - opt_params = {'eta': 1e-6, 'eta_min': 1e-4, 'eps': 1e-12, - 'bounds': CConstraintBox(*CFunctionMcCormick.bounds())} - self._test_minimize( - COptimizerPGDLS, 'mc-cormick', opt_params=opt_params) + opt_params = { + "eta": 1e-6, + "eta_min": 1e-4, + "eps": 1e-12, + "bounds": CConstraintBox(*CFunctionMcCormick.bounds()), + } + + self._test_minimize(COptimizerPGDLS, "mc-cormick", opt_params=opt_params) def test_minimize_rosenbrock(self): """Test for COptimizer.minimize() method on rosenbrock fun.""" - opt_params = {'eta': 1e-6, 'eta_min': 1e-6, 'eps': 1e-12} + opt_params = {"eta": 1e-6, "eta_min": 1e-6, "eps": 1e-12} - self._test_minimize( - COptimizerPGDLS, 'rosenbrock', opt_params=opt_params) + self._test_minimize(COptimizerPGDLS, "rosenbrock", opt_params=opt_params) def test_constr_bounds(self): """Test for COptimizer.minimize() method behaviour @@ -43,5 +44,5 @@ def test_constr_bounds(self): self._test_constr_bounds(COptimizerPGDLS) -if __name__ == '__main__': +if __name__ == "__main__": COptimizerTestCases.main() diff --git a/src/secml/optim/optimizers/tests/test_c_optimizer_pgd_ls_discrete.py b/src/secml/optim/optimizers/tests/test_c_optimizer_pgd_ls_discrete.py index ba4f9807..4ca9e7bc 100644 --- a/src/secml/optim/optimizers/tests/test_c_optimizer_pgd_ls_discrete.py +++ b/src/secml/optim/optimizers/tests/test_c_optimizer_pgd_ls_discrete.py @@ -15,14 +15,19 @@ def test_minimize_3h_camel(self): The solution expected by this test is a integer vector. """ opt_params = { - 'eta': 1, 'eta_min': 1, 'eps': 1e-12, - 'bounds': CConstraintBox(lb=-1, ub=1) + "eta": 1, + "eta_min": 1, + "eps": 1e-12, + "bounds": CConstraintBox(lb=-1, ub=1), } - self._test_minimize(COptimizerPGDLS, '3h-camel', - opt_params=opt_params, - label='discrete', - out_int=True) + self._test_minimize( + COptimizerPGDLS, + "3h-camel", + opt_params=opt_params, + label="discrete", + out_int=True, + ) def test_minimize_3h_camel_l1(self): """Test for COptimizer.minimize() method on 3h-camel fun. @@ -31,14 +36,16 @@ def test_minimize_3h_camel_l1(self): The solution expected by this test is a float vector. """ opt_params = { - 'eta': 0.5, 'eta_min': 0.5, 'eps': 1e-12, - 'constr': CConstraintL1(radius=2), - 'bounds': CConstraintBox(lb=-1, ub=1) + "eta": 0.5, + "eta_min": 0.5, + "eps": 1e-12, + "constr": CConstraintL1(radius=2), + "bounds": CConstraintBox(lb=-1, ub=1), } - self._test_minimize(COptimizerPGDLS, '3h-camel', - opt_params=opt_params, - label='discrete-l1') + self._test_minimize( + COptimizerPGDLS, "3h-camel", opt_params=opt_params, label="discrete-l1" + ) def test_minimize_beale(self): """Test for COptimizer.minimize() method on 3h-camel fun. @@ -47,14 +54,16 @@ def test_minimize_beale(self): The solution expected by this test is a float vector. """ opt_params = { - 'eta': 1e-6, 'eta_min': 1e-4, 'eps': 1e-12, - 'constr': CConstraintL1(center=CArray([2, 0]), radius=2), - 'bounds': CConstraintBox(lb=0, ub=4) + "eta": 1e-6, + "eta_min": 1e-4, + "eps": 1e-12, + "constr": CConstraintL1(center=CArray([2, 0]), radius=2), + "bounds": CConstraintBox(lb=0, ub=4), } - self._test_minimize(COptimizerPGDLS, 'beale', - opt_params=opt_params, - label='discrete-l1') + self._test_minimize( + COptimizerPGDLS, "beale", opt_params=opt_params, label="discrete-l1" + ) def test_minimize_quad2d_no_bound(self): """Test for COptimizer.minimize() method on a quadratic function in @@ -63,16 +72,17 @@ def test_minimize_quad2d_no_bound(self): with an integer eta, an integer starting point and without any bound. The solution expected by this test is an integer vector. """ - opt_params = { - 'eta': 1, 'eta_min': 1, 'eps': 1e-12 - } + opt_params = {"eta": 1, "eta_min": 1, "eps": 1e-12} # both the starting point and eta are integer, # therefore we expect an integer solution - self._test_minimize(COptimizerPGDLS, 'quad-2', - opt_params=opt_params, - label='quad-2-discrete', - out_int=True) + self._test_minimize( + COptimizerPGDLS, + "quad-2", + opt_params=opt_params, + label="quad-2-discrete", + out_int=True, + ) def test_minimize_quad2d_bound(self): """Test for COptimizer.minimize() method on a quadratic function in @@ -82,15 +92,19 @@ def test_minimize_quad2d_bound(self): The solution expected by this test is an integer vector. """ opt_params = { - 'eta': 1, 'eta_min': 1, 'eps': 1e-12, - 'bounds': CConstraintBox(lb=-2, ub=3) + "eta": 1, + "eta_min": 1, + "eps": 1e-12, + "bounds": CConstraintBox(lb=-2, ub=3), } self._test_minimize( - COptimizerPGDLS, 'quad-2', + COptimizerPGDLS, + "quad-2", opt_params=opt_params, - label='quad-2-discrete-bounded', - out_int=True) + label="quad-2-discrete-bounded", + out_int=True, + ) def test_minimize_quad100d_sparse(self): """Test for COptimizer.minimize() method on a quadratic function in @@ -100,15 +114,19 @@ def test_minimize_quad100d_sparse(self): The solution expected by this test is an integer sparse vector. """ opt_params = { - 'eta': 1, 'eta_min': 1, 'eps': 1e-12, - 'bounds': CConstraintBox(lb=-2, ub=3) + "eta": 1, + "eta_min": 1, + "eps": 1e-12, + "bounds": CConstraintBox(lb=-2, ub=3), } self._test_minimize( - COptimizerPGDLS, 'quad-100-sparse', + COptimizerPGDLS, + "quad-100-sparse", opt_params=opt_params, - label='quad-100-sparse-discrete-bounded', - out_int=True) + label="quad-100-sparse-discrete-bounded", + out_int=True, + ) def test_minimize_quad100d_l1_sparse(self): """Test for COptimizer.minimize() method on a quadratic function in @@ -119,16 +137,20 @@ def test_minimize_quad100d_l1_sparse(self): The solution expected by this test is an integer sparse vector. """ opt_params = { - 'eta': 1, 'eta_min': 1, 'eps': 1e-12, - 'constr': CConstraintL1(radius=100), - 'bounds': CConstraintBox(lb=-2, ub=3) + "eta": 1, + "eta_min": 1, + "eps": 1e-12, + "constr": CConstraintL1(radius=100), + "bounds": CConstraintBox(lb=-2, ub=3), } self._test_minimize( - COptimizerPGDLS, 'quad-100-sparse', + COptimizerPGDLS, + "quad-100-sparse", opt_params=opt_params, - label='quad-100-sparse-discrete-bounded-l1', - out_int=True) + label="quad-100-sparse-discrete-bounded-l1", + out_int=True, + ) def test_minimize_poly_2d_bounded(self): """Test for COptimizer.minimize() method on a polynomial function in @@ -138,14 +160,18 @@ def test_minimize_poly_2d_bounded(self): The solution expected by this test is an integer vector. """ opt_params = { - 'eta': 1, 'eta_min': 1, 'eps': 1e-12, - 'bounds': CConstraintBox(lb=-1, ub=1)} + "eta": 1, + "eta_min": 1, + "eps": 1e-12, + "bounds": CConstraintBox(lb=-1, ub=1), + } self._test_minimize( - COptimizerPGDLS, 'poly-2', + COptimizerPGDLS, + "poly-2", opt_params=opt_params, - label='poly-discrete-bounded', - out_int=True + label="poly-discrete-bounded", + out_int=True, ) def test_minimize_poly_100d_bounded(self): @@ -156,15 +182,19 @@ def test_minimize_poly_100d_bounded(self): The solution of this problem is an integer vector (of zeros). """ opt_params = { - 'eta': 1, 'eta_min': 1, 'eps': 1e-12, - 'bounds': CConstraintBox(lb=-1, ub=1) + "eta": 1, + "eta_min": 1, + "eps": 1e-12, + "bounds": CConstraintBox(lb=-1, ub=1), } self._test_minimize( - COptimizerPGDLS, 'poly-100-int', + COptimizerPGDLS, + "poly-100-int", opt_params=opt_params, - label='poly-int-discrete-bounded', - out_int=True) + label="poly-int-discrete-bounded", + out_int=True, + ) def test_minimize_poly_100d_bounded_sparse(self): """Test for COptimizer.minimize() method on a polynomial function in @@ -175,16 +205,20 @@ def test_minimize_poly_100d_bounded_sparse(self): The solution expected by this test is an integer sparse vector (of zeros). """ opt_params = { - 'eta': 1, 'eta_min': 1, 'eps': 1e-12, - 'bounds': CConstraintBox(lb=-1, ub=1) + "eta": 1, + "eta_min": 1, + "eps": 1e-12, + "bounds": CConstraintBox(lb=-1, ub=1), } self._test_minimize( - COptimizerPGDLS, 'poly-100-int-sparse', + COptimizerPGDLS, + "poly-100-int-sparse", opt_params=opt_params, - label='poly-int-sparse-discrete-bounded', - out_int=True) + label="poly-int-sparse-discrete-bounded", + out_int=True, + ) -if __name__ == '__main__': +if __name__ == "__main__": COptimizerTestCases.main() diff --git a/src/secml/optim/optimizers/tests/test_c_optimizer_scipy.py b/src/secml/optim/optimizers/tests/test_c_optimizer_scipy.py index a7216b7a..155fc354 100644 --- a/src/secml/optim/optimizers/tests/test_c_optimizer_scipy.py +++ b/src/secml/optim/optimizers/tests/test_c_optimizer_scipy.py @@ -2,7 +2,7 @@ from secml.optim.optimizers import COptimizerScipy from secml.optim.constraints import CConstraintBox -TEST_FUNCS = ['3h-camel', 'beale', 'mc-cormick', 'rosenbrock'] +TEST_FUNCS = ["3h-camel", "beale", "mc-cormick", "rosenbrock"] class TestCOptimizerScipy(COptimizerTestCases): @@ -12,31 +12,39 @@ def test_minimize_3h_camel(self): """Test for COptimizer.minimize() method on 3h-camel fun.""" self._test_minimize( - COptimizerScipy, '3h-camel', opt_params={}, - minimize_params={ - 'method': 'BFGS', 'options': {'gtol': 1e-6}}) + COptimizerScipy, + "3h-camel", + opt_params={}, + minimize_params={"method": "BFGS", "options": {"gtol": 1e-6}}, + ) # test using L-BFGS-B scipy solver (supports bounds) bounds = CConstraintBox(lb=-2, ub=3) # fake box self._test_minimize( - COptimizerScipy, '3h-camel', opt_params={'bounds': bounds}, - minimize_params={ - 'method': 'L-BFGS-B', 'options': {'gtol': 1e-6}}) + COptimizerScipy, + "3h-camel", + opt_params={"bounds": bounds}, + minimize_params={"method": "L-BFGS-B", "options": {"gtol": 1e-6}}, + ) def test_minimize_beale(self): """Test for COptimizer.minimize() method on beale fun.""" self._test_minimize( - COptimizerScipy, 'beale', opt_params={}, - minimize_params={ - 'method': 'BFGS', 'options': {'gtol': 1e-6}}) + COptimizerScipy, + "beale", + opt_params={}, + minimize_params={"method": "BFGS", "options": {"gtol": 1e-6}}, + ) # test using L-BFGS-B scipy solver (supports bounds) bounds = CConstraintBox(lb=-2, ub=3) # fake box self._test_minimize( - COptimizerScipy, 'beale', opt_params={'bounds': bounds}, - minimize_params={ - 'method': 'L-BFGS-B', 'options': {'gtol': 1e-6}}) + COptimizerScipy, + "beale", + opt_params={"bounds": bounds}, + minimize_params={"method": "L-BFGS-B", "options": {"gtol": 1e-6}}, + ) def test_minimize_mc_cormick(self): """Test for COptimizer.minimize() method on mc-cormick fun.""" @@ -44,24 +52,30 @@ def test_minimize_mc_cormick(self): # test using L-BFGS-B scipy solver (supports bounds) bounds = CConstraintBox(lb=-2, ub=3) # fake box self._test_minimize( - COptimizerScipy, 'mc-cormick', opt_params={'bounds': bounds}, - minimize_params={ - 'method': 'L-BFGS-B', 'options': {'gtol': 1e-6}}) + COptimizerScipy, + "mc-cormick", + opt_params={"bounds": bounds}, + minimize_params={"method": "L-BFGS-B", "options": {"gtol": 1e-6}}, + ) def test_minimize_rosenbrock(self): """Test for COptimizer.minimize() method on rosenbrock fun.""" self._test_minimize( - COptimizerScipy, 'rosenbrock', opt_params={}, - minimize_params={ - 'method': 'BFGS', 'options': {'gtol': 1e-6}}) + COptimizerScipy, + "rosenbrock", + opt_params={}, + minimize_params={"method": "BFGS", "options": {"gtol": 1e-6}}, + ) # test using L-BFGS-B scipy solver (supports bounds) bounds = CConstraintBox(lb=-2, ub=3) # fake box self._test_minimize( - COptimizerScipy, 'rosenbrock', opt_params={'bounds': bounds}, - minimize_params={ - 'method': 'L-BFGS-B', 'options': {'gtol': 1e-6}}) + COptimizerScipy, + "rosenbrock", + opt_params={"bounds": bounds}, + minimize_params={"method": "L-BFGS-B", "options": {"gtol": 1e-6}}, + ) if __name__ == "__main__": diff --git a/src/secml/parallel/parfor.py b/src/secml/parallel/parfor.py index 09affd33..791a6d22 100644 --- a/src/secml/parallel/parfor.py +++ b/src/secml/parallel/parfor.py @@ -7,7 +7,7 @@ def parfor(task, processes, args): Applies a function *task* to each argument in *args*, using a pool of concurrent processes. - + Parameters ---------- task : function @@ -18,7 +18,7 @@ def parfor(task, processes, args): all processor's cores will be used. args : any Iterable object, where each element is an argument for task. - + Returns ------- out : iterable @@ -62,13 +62,15 @@ def parfor2(task, n_reps, processes, *args): # Don't try to spawn more processes than available CPUs num_cores = min(cpu_count(), processes) - return Parallel(n_jobs=num_cores, backend='multiprocessing')( - delayed(task)(i, *args) for i in range(n_reps)) + return Parallel(n_jobs=num_cores, backend="multiprocessing")( + delayed(task)(i, *args) for i in range(n_reps) + ) if __name__ == "__main__": from math import factorial + arguments = range(10) res = [factorial(z) for z in arguments] parres = parfor(factorial, 2, arguments) @@ -78,7 +80,7 @@ def element_wise_power(idx, list_of_scalars): print("Repetition {:} started...".format(idx)) list_of_scalars_pow = [] for obj_idx, obj in enumerate(list_of_scalars): - list_of_scalars_pow.append(list_of_scalars[obj_idx]**idx) + list_of_scalars_pow.append(list_of_scalars[obj_idx] ** idx) print("Repetition {:} ended...".format(idx)) return list_of_scalars_pow diff --git a/src/secml/settings.py b/src/secml/settings.py index 2ef449d6..0d616cb2 100644 --- a/src/secml/settings.py +++ b/src/secml/settings.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import os import sys import shutil @@ -12,20 +13,30 @@ # Logger for this module only. Use `secml.utils.CLog` elsewhere import logging + _logger = logging.getLogger(__name__) _logger.setLevel(logging.INFO) _logger_handle = logging.StreamHandler(sys.stdout) -_logger_handle.setFormatter(logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s")) +_logger_handle.setFormatter( + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +) _logger.addHandler(_logger_handle) -__all__ = ['SECML_HOME_DIR', 'SECML_CONFIG', - 'SECML_DS_DIR', 'SECML_MODELS_DIR', 'SECML_EXP_DIR', - 'SECML_STORE_LOGS', 'SECML_LOGS_DIR', - 'SECML_LOGS_FILENAME', 'SECML_LOGS_PATH', - 'SECML_PYTORCH_DIR', 'SECML_PYTORCH_USE_CUDA', - 'parse_config'] +__all__ = [ + "SECML_HOME_DIR", + "SECML_CONFIG", + "SECML_DS_DIR", + "SECML_MODELS_DIR", + "SECML_EXP_DIR", + "SECML_STORE_LOGS", + "SECML_LOGS_DIR", + "SECML_LOGS_FILENAME", + "SECML_LOGS_PATH", + "SECML_PYTORCH_DIR", + "SECML_PYTORCH_USE_CUDA", + "parse_config", +] def parse_config(conf_files, section, parameter, default=None, dtype=None): @@ -83,22 +94,23 @@ def parse_config(conf_files, section, parameter, default=None, dtype=None): elif dtype == bool: param = _config.getboolean(section, parameter) else: - raise TypeError( - "accepted dtypes are int, float, bool, str (or None)") + raise TypeError("accepted dtypes are int, float, bool, str (or None)") except NoSectionError: if default is not None: # Use default if config file does not exists # or does not have the desired section return default raise RuntimeError( - "no section `[{:}]` found in configuration files.".format(section)) + "no section `[{:}]` found in configuration files.".format(section) + ) except NoOptionError: if default is not None: # Use default if desired parameter is not specified under section return default raise RuntimeError( "parameter `{:}` not found under section `[{:}]` in " - "configuration files.".format(parameter, section)) + "configuration files.".format(parameter, section) + ) return param @@ -135,12 +147,10 @@ def _parse_env(name, default=None, dtype=None): if dtype == int or dtype == float or dtype == bool: return dtype(val) else: - raise TypeError( - "accepted dtypes are int, float, bool, str (or None)") + raise TypeError("accepted dtypes are int, float, bool, str (or None)") -def _parse_env_config(name, conf_files, section, parameter, - default=None, dtype=None): +def _parse_env_config(name, conf_files, section, parameter, default=None, dtype=None): """Parse input variable from `os.environ` or configuration files. If input variable `name` is not found in `os.environ`, @@ -166,8 +176,8 @@ def _parse_env_config(name, conf_files, section, parameter, SECML_HOME_DIR = _parse_env( - 'SECML_HOME_DIR', - default=os.path.join(os.path.expanduser('~'), 'secml-data')) + "SECML_HOME_DIR", default=os.path.join(os.path.expanduser("~"), "secml-data") +) """Main directory for storing datasets, experiments, temporary files. This is set by default to: @@ -177,19 +187,19 @@ def _parse_env_config(name, conf_files, section, parameter, """ if not os.path.isdir(SECML_HOME_DIR): os.makedirs(os.path.abspath(SECML_HOME_DIR)) - _logger.info('New `SECML_HOME_DIR` created: {:}'.format(SECML_HOME_DIR)) + _logger.info("New `SECML_HOME_DIR` created: {:}".format(SECML_HOME_DIR)) -SECML_CONFIG_FNAME = 'secml.conf' +SECML_CONFIG_FNAME = "secml.conf" """Name of the configuration file (default `secml.conf`).""" if not os.path.isfile(os.path.join(SECML_HOME_DIR, SECML_CONFIG_FNAME)): - def_config = os.path.normpath(os.path.join(os.path.dirname( - os.path.abspath(__file__)), SECML_CONFIG_FNAME)) + def_config = os.path.normpath( + os.path.join(os.path.dirname(os.path.abspath(__file__)), SECML_CONFIG_FNAME) + ) home_config = os.path.join(SECML_HOME_DIR, SECML_CONFIG_FNAME) # Copy the default config file to SECML_HOME_DIR if not already available shutil.copy(def_config, home_config) - _logger.info( - 'Default configuration file copied to: {:}'.format(home_config)) + _logger.info("Default configuration file copied to: {:}".format(home_config)) def _config_fpath(): @@ -215,18 +225,20 @@ def _config_fpath(): to the lowest priority. """ + def gen_candidates(): yield os.path.join(os.getcwd(), SECML_CONFIG_FNAME) try: - secml_config = os.environ['$SECML_CONFIG'] + secml_config = os.environ["$SECML_CONFIG"] except KeyError: pass else: yield secml_config - yield os.path.join(secml_config, 'SECML_CONFIG_FNAME') + yield os.path.join(secml_config, "SECML_CONFIG_FNAME") yield os.path.join(SECML_HOME_DIR, SECML_CONFIG_FNAME) - yield os.path.normpath(os.path.join(os.path.dirname( - os.path.abspath(__file__)), SECML_CONFIG_FNAME)) + yield os.path.normpath( + os.path.join(os.path.dirname(os.path.abspath(__file__)), SECML_CONFIG_FNAME) + ) candidates = [] for fname in gen_candidates(): @@ -245,8 +257,12 @@ def gen_candidates(): # ------- # SECML_DS_DIR = _parse_env_config( - 'SECML_DS_DIR', SECML_CONFIG, 'secml', 'ds_dir', - dtype=str, default=os.path.join(SECML_HOME_DIR, 'datasets') + "SECML_DS_DIR", + SECML_CONFIG, + "secml", + "ds_dir", + dtype=str, + default=os.path.join(SECML_HOME_DIR, "datasets"), ) """Main directory for storing datasets. @@ -255,11 +271,15 @@ def gen_candidates(): """ if not os.path.isdir(SECML_DS_DIR): os.makedirs(os.path.abspath(SECML_DS_DIR)) - _logger.info('New `SECML_DS_DIR` created: {:}'.format(SECML_DS_DIR)) + _logger.info("New `SECML_DS_DIR` created: {:}".format(SECML_DS_DIR)) SECML_MODELS_DIR = _parse_env_config( - 'SECML_MODELS_DIR', SECML_CONFIG, 'secml', 'models_dir', - dtype=str, default=os.path.join(SECML_HOME_DIR, 'models') + "SECML_MODELS_DIR", + SECML_CONFIG, + "secml", + "models_dir", + dtype=str, + default=os.path.join(SECML_HOME_DIR, "models"), ) """Main directory where pre-trained models are stored. @@ -268,11 +288,15 @@ def gen_candidates(): """ if not os.path.isdir(SECML_MODELS_DIR): os.makedirs(os.path.abspath(SECML_MODELS_DIR)) - _logger.info('New `SECML_MODELS_DIR` created: {:}'.format(SECML_MODELS_DIR)) + _logger.info("New `SECML_MODELS_DIR` created: {:}".format(SECML_MODELS_DIR)) SECML_EXP_DIR = _parse_env_config( - 'SECML_EXP_DIR', SECML_CONFIG, 'secml', 'exp_dir', - dtype=str, default=os.path.join(SECML_HOME_DIR, 'experiments') + "SECML_EXP_DIR", + SECML_CONFIG, + "secml", + "exp_dir", + dtype=str, + default=os.path.join(SECML_HOME_DIR, "experiments"), ) """Main directory of experiments data. @@ -281,21 +305,29 @@ def gen_candidates(): """ if not os.path.isdir(SECML_EXP_DIR): os.makedirs(os.path.abspath(SECML_EXP_DIR)) - _logger.info('New `SECML_EXP_DIR` created: {:}'.format(SECML_EXP_DIR)) + _logger.info("New `SECML_EXP_DIR` created: {:}".format(SECML_EXP_DIR)) # ------------ # # [SECML:LOGS] # # ------------ # SECML_STORE_LOGS = _parse_env_config( - 'SECML_STORE_LOGS', SECML_CONFIG, 'secml:logs', 'store_logs', - dtype=bool, default=False + "SECML_STORE_LOGS", + SECML_CONFIG, + "secml:logs", + "store_logs", + dtype=bool, + default=False, ) """Whether to store logs to file. Default False.""" SECML_LOGS_DIR = _parse_env_config( - 'SECML_LOGS_DIR', SECML_CONFIG, 'secml:logs', 'logs_dir', - dtype=str, default=os.path.join(SECML_HOME_DIR, 'logs') + "SECML_LOGS_DIR", + SECML_CONFIG, + "secml:logs", + "logs_dir", + dtype=str, + default=os.path.join(SECML_HOME_DIR, "logs"), ) """Directory where logs will be stored. @@ -304,11 +336,15 @@ def gen_candidates(): """ if not os.path.isdir(SECML_LOGS_DIR): os.makedirs(os.path.abspath(SECML_LOGS_DIR)) - _logger.info('New `SECML_LOGS_DIR` created: {:}'.format(SECML_LOGS_DIR)) + _logger.info("New `SECML_LOGS_DIR` created: {:}".format(SECML_LOGS_DIR)) SECML_LOGS_FILENAME = _parse_env_config( - 'SECML_LOGS_FILENAME', SECML_CONFIG, 'secml:logs', 'logs_filename', - dtype=str, default='logs.log' + "SECML_LOGS_FILENAME", + SECML_CONFIG, + "secml:logs", + "logs_filename", + dtype=str, + default="logs.log", ) """Name of the logs file on disk. Default: `logs.log`.""" @@ -321,17 +357,22 @@ def gen_candidates(): # --------------- # SECML_PYTORCH_USE_CUDA = _parse_env_config( - 'SECML_PYTORCH_USE_CUDA', SECML_CONFIG, 'secml:pytorch', 'use_cuda', - dtype=bool, default=True + "SECML_PYTORCH_USE_CUDA", + SECML_CONFIG, + "secml:pytorch", + "use_cuda", + dtype=bool, + default=True, ) """Controls if CUDA should be used by the PyTorch wrapper when available.""" -SECML_PYTORCH_DIR =_parse_env( - 'SECML_PYTORCH_DIR', - default=os.path.join(os.path.expanduser('~'), 'secml-data/pytorch-data')) +SECML_PYTORCH_DIR = _parse_env( + "SECML_PYTORCH_DIR", + default=os.path.join(os.path.expanduser("~"), "secml-data/pytorch-data"), +) if not os.path.isdir(SECML_PYTORCH_DIR): os.makedirs(os.path.abspath(SECML_PYTORCH_DIR)) - _logger.info('New `SECML_PYTORCH_DIR` created: {:}'.format(SECML_PYTORCH_DIR)) + _logger.info("New `SECML_PYTORCH_DIR` created: {:}".format(SECML_PYTORCH_DIR)) """Directory for storing PyTorch data. This is set by default to: `{SECML_HOME_DIR}`/pytorch-data` diff --git a/src/secml/test_simple.py b/src/secml/test_simple.py index ab46627e..9ac8e00f 100644 --- a/src/secml/test_simple.py +++ b/src/secml/test_simple.py @@ -8,15 +8,16 @@ def test_simple(): """Plot the result of a dot product operation.""" + def test_dot(): a = CArray([1, 2, 3]) b = CArray([10, 20, 30]) return a.dot(b) fig = CFigure() - fig.sp.plot(test_dot(), marker='o') + fig.sp.plot(test_dot(), marker="o") fig.show() -if __name__ == '__main__': +if __name__ == "__main__": test_simple() diff --git a/src/secml/testing/__init__.py b/src/secml/testing/__init__.py index d6d744eb..c51066c6 100644 --- a/src/secml/testing/__init__.py +++ b/src/secml/testing/__init__.py @@ -1,7 +1,6 @@ try: import pytest except ImportError: - raise ImportError( - "Install extra component `unittests` to use `secml.testing`") + raise ImportError("Install extra component `unittests` to use `secml.testing`") from .c_unittest import CUnitTest diff --git a/src/secml/testing/c_unittest.py b/src/secml/testing/c_unittest.py index e6f054b0..15c94827 100644 --- a/src/secml/testing/c_unittest.py +++ b/src/secml/testing/c_unittest.py @@ -6,6 +6,7 @@ .. moduleauthor:: Ambra Demontis """ + import unittest import pytest import numpy as np @@ -39,9 +40,11 @@ def setUpClass(cls): # in the same directory of the calling unittest. # Use `store_logs=True` in configuration file to activate this cls._logger = CLog( - logger_id='unittest', add_stream=True, - file_handler='unittests.log' if SECML_STORE_LOGS is True else None) - cls._logger.set_level('DEBUG') + logger_id="unittest", + add_stream=True, + file_handler="unittests.log" if SECML_STORE_LOGS is True else None, + ) + cls._logger.set_level("DEBUG") # As per python 3.2 filterwarnings are reset by unittests for each # TestCase. We thus need to restore our filter for every TestCase @@ -52,57 +55,75 @@ def timer(self): return self.logger.timer() # Raises an AssertionError if two array_like objects are not equal - def assert_array_equal(self, x, y, err_msg='', verbose=True): - x = x.tondarray() if hasattr(x, 'tondarray') else x - y = y.tondarray() if hasattr(y, 'tondarray') else y + def assert_array_equal(self, x, y, err_msg="", verbose=True): + x = x.tondarray() if hasattr(x, "tondarray") else x + y = y.tondarray() if hasattr(y, "tondarray") else y return npt.assert_array_equal(x, y, err_msg, verbose) + assert_array_equal.__doc__ = npt.assert_array_equal.__doc__ # AssertionError if two objects are not equal up to desired precision - def assert_array_almost_equal( - self, x, y, decimal=6, err_msg='', verbose=True): - x = x.tondarray() if hasattr(x, 'tondarray') else x - y = y.tondarray() if hasattr(y, 'tondarray') else y + def assert_array_almost_equal(self, x, y, decimal=6, err_msg="", verbose=True): + x = x.tondarray() if hasattr(x, "tondarray") else x + y = y.tondarray() if hasattr(y, "tondarray") else y return npt.assert_array_almost_equal(x, y, decimal, err_msg, verbose) + assert_array_almost_equal.__doc__ = npt.assert_array_almost_equal.__doc__ # Compare two arrays relatively to their spacing def assert_array_almost_equal_nulp(self, x, y, nulp=1): - x = x.tondarray() if hasattr(x, 'tondarray') else x - y = y.tondarray() if hasattr(y, 'tondarray') else y + x = x.tondarray() if hasattr(x, "tondarray") else x + y = y.tondarray() if hasattr(y, "tondarray") else y return npt.assert_array_almost_equal_nulp(x, y, nulp) + assert_array_almost_equal_nulp.__doc__ = npt.assert_array_almost_equal_nulp.__doc__ # AssertionError if two array_like objects are not ordered by less than - def assert_array_less(self, x, y, err_msg='', verbose=True): - x = x.tondarray() if hasattr(x, 'tondarray') else x - y = y.tondarray() if hasattr(y, 'tondarray') else y + def assert_array_less(self, x, y, err_msg="", verbose=True): + x = x.tondarray() if hasattr(x, "tondarray") else x + y = y.tondarray() if hasattr(y, "tondarray") else y return npt.assert_array_less(x, y, err_msg, verbose) + assert_array_less.__doc__ = npt.assert_array_less.__doc__ # Check that all elems differ in at most N Units in the last place def assert_array_max_ulp(self, a, b, maxulp=1, dtype=None): - a = a.tondarray() if hasattr(a, 'tondarray') else a - b = b.tondarray() if hasattr(b, 'tondarray') else b + a = a.tondarray() if hasattr(a, "tondarray") else a + b = b.tondarray() if hasattr(b, "tondarray") else b return npt.assert_array_max_ulp(a, b, maxulp, dtype) + assert_array_max_ulp.__doc__ = npt.assert_array_max_ulp.__doc__ # AssertionError if two objects are not equal up to desired tolerance - def assert_allclose(self, actual, desired, rtol=1e-6, atol=0, - equal_nan=True, err_msg='', verbose=True): - actual = actual.tondarray() if hasattr(actual, 'tondarray') else actual - des = desired.tondarray() if hasattr(desired, 'tondarray') else desired - return npt.assert_allclose( - actual, des, rtol, atol, equal_nan, err_msg, verbose) + def assert_allclose( + self, + actual, + desired, + rtol=1e-6, + atol=0, + equal_nan=True, + err_msg="", + verbose=True, + ): + actual = actual.tondarray() if hasattr(actual, "tondarray") else actual + des = desired.tondarray() if hasattr(desired, "tondarray") else desired + return npt.assert_allclose(actual, des, rtol, atol, equal_nan, err_msg, verbose) + assert_allclose.__doc__ = npt.assert_allclose.__doc__ # AssertionError if two items are not equal up to significant digits. def assert_approx_equal( - self, actual, desired, significant=6, err_msg='', verbose=True,): - actual = actual.tondarray() if hasattr(actual, 'tondarray') else actual - des = desired.tondarray() if hasattr(desired, 'tondarray') else desired - return npt.assert_approx_equal( - actual, des, significant, err_msg, verbose) + self, + actual, + desired, + significant=6, + err_msg="", + verbose=True, + ): + actual = actual.tondarray() if hasattr(actual, "tondarray") else actual + des = desired.tondarray() if hasattr(desired, "tondarray") else desired + return npt.assert_approx_equal(actual, des, significant, err_msg, verbose) + assert_approx_equal.__doc__ = npt.assert_approx_equal.__doc__ @staticmethod @@ -128,8 +149,10 @@ def assertIsSubDtype(actual, desired): else: # Convert built-in types to numpy dtypes for using issubdtype desired = np.dtype(desired).type if not np.issubdtype(actual, desired): - raise AssertionError("{:} is not lower/equal to {:} in the type " - "hierarchy.".format(actual, desired)) + raise AssertionError( + "{:} is not lower/equal to {:} in the type " + "hierarchy.".format(actual, desired) + ) # Skip something skip = pytest.mark.skip diff --git a/src/secml/utils/__init__.py b/src/secml/utils/__init__.py index 5c68d484..3048587b 100755 --- a/src/secml/utils/__init__.py +++ b/src/secml/utils/__init__.py @@ -1,5 +1,6 @@ from .c_log import CLog, CTimer from . import c_file_manager as fm + # Utility collections from .mixed_utils import OrderedFlexibleClass from .dict_utils import LastInDict, SubLevelsDict, load_dict, merge_dicts diff --git a/src/secml/utils/c_file_manager.py b/src/secml/utils/c_file_manager.py index 2809a882..0d71d51a 100755 --- a/src/secml/utils/c_file_manager.py +++ b/src/secml/utils/c_file_manager.py @@ -5,14 +5,27 @@ .. moduleauthor:: Marco Melis """ + import os import shutil import tempfile # Remember to add any new method to following list -__all__ = ['folder_exist', 'file_exist', 'make_folder_incwd', 'make_folder', - 'remove_folder', 'make_rand_folder', 'abspath', 'normpath', - 'join', 'split', 'expanduser', 'dirsep', 'get_tempfile'] +__all__ = [ + "folder_exist", + "file_exist", + "make_folder_incwd", + "make_folder", + "remove_folder", + "make_rand_folder", + "abspath", + "normpath", + "join", + "split", + "expanduser", + "dirsep", + "get_tempfile", +] def folder_exist(folder_path): @@ -55,7 +68,9 @@ def make_folder_incwd(folder_name, mode=0o777): make_folder : Create a directory given full path. """ - return make_folder(os.path.join(os.path.dirname(os.getcwd()), folder_name), mode=mode) + return make_folder( + os.path.join(os.path.dirname(os.getcwd()), folder_name), mode=mode + ) def make_folder(folder_path, mode=0o777): @@ -142,8 +157,8 @@ def _ignore_(path, names): return _ignore_ -def copy_folder(folder_path, copy_folder_path, ignore_file=''): - """Copy a folder and every folder/file it contain. +def copy_folder(folder_path, copy_folder_path, ignore_file=""): + """Copy a folder and every folder/file it contain. Path must point to a directory (can not be a symbolic link). @@ -153,8 +168,8 @@ def copy_folder(folder_path, copy_folder_path, ignore_file=''): Absolute or relative path to folder to copy. copy_folder_path : str Absolute or relative path of new folder where you want store folder_path's data. - ignore_file : tuple - contain folder name of file + path that we wouldn't copy + ignore_file : tuple + contain folder name of file + path that we wouldn't copy for example : file_to_ignore = ('filetoignore.txt','foldertoignore') """ @@ -162,14 +177,18 @@ def copy_folder(folder_path, copy_folder_path, ignore_file=''): raise OSError("folder that you want copy doesn't exist!") else: try: - shutil.copytree(folder_path, copy_folder_path, ignore=shutil.ignore_patterns(*ignore_file)) + shutil.copytree( + folder_path, + copy_folder_path, + ignore=shutil.ignore_patterns(*ignore_file), + ) except OSError as e: - print('Directory not copied. Error: %s' % e) + print("Directory not copied. Error: %s" % e) # TODO: CHECK DOCSTRING def copy_file(file_path, copy_file_path): - """Copy one file. + """Copy one file. Path must point to a file (can not be a symbolic link). @@ -186,7 +205,7 @@ def copy_file(file_path, copy_file_path): try: shutil.copy(file_path, copy_file_path) except OSError as e: - print('File not copied. Error: %s' % e) + print("File not copied. Error: %s" % e) def make_rand_folder(folder_path=None, custom_name=None): @@ -217,10 +236,19 @@ def make_rand_folder(folder_path=None, custom_name=None): """ # Generating random folder ID from numpy import random + folder_id = random.randint(1, 1000000) - folder_name = str(folder_id) if custom_name is None else "{:}_{:}".format(custom_name, folder_id) + folder_name = ( + str(folder_id) + if custom_name is None + else "{:}_{:}".format(custom_name, folder_id) + ) # make_folder will manage errors - return make_folder_incwd(folder_name) if folder_path is None else make_folder(join(folder_path, folder_name)) + return ( + make_folder_incwd(folder_name) + if folder_path is None + else make_folder(join(folder_path, folder_name)) + ) def abspath(file_name): diff --git a/src/secml/utils/c_log.py b/src/secml/utils/c_log.py index a253baed..14e18b8f 100755 --- a/src/secml/utils/c_log.py +++ b/src/secml/utils/c_log.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import logging import time import sys @@ -14,7 +15,7 @@ # Custom logging level that DISABLE logging of all messages DISABLE = 100 -logging.addLevelName(100, 'DISABLE') +logging.addLevelName(100, "DISABLE") CRITICAL = 50 FATAL = CRITICAL ERROR = 40 @@ -28,8 +29,7 @@ logging.captureWarnings(True) # Default formatter -formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s") +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") class CLog: @@ -92,8 +92,14 @@ class CLog: """ - def __init__(self, level=None, logger_id=None, add_stream=True, - file_handler=None, propagate=False): + def __init__( + self, + level=None, + logger_id=None, + add_stream=True, + file_handler=None, + propagate=False, + ): # Setting up logger with default logging level (WARNING) self._logger_id = None if logger_id is None else str(logger_id) self._propagate = propagate @@ -126,7 +132,7 @@ def __getstate__(self): """Return CLog instance before pickling.""" state = dict(self.__dict__) # We now remove the store logger (will be restored after) - del state['_logger'] + del state["_logger"] return state def __setstate__(self, state): @@ -162,8 +168,9 @@ def attach_file(self, filepath): """Adds a file handler to the logger.""" # Handler will be attached only if not already there for h in self._logger.handlers: - if isinstance(h, logging.FileHandler) and \ - h.baseFilename == os.path.abspath(filepath): + if isinstance(h, logging.FileHandler) and h.baseFilename == os.path.abspath( + filepath + ): return handler = logging.FileHandler(filepath) self._add_handler(handler) @@ -198,12 +205,15 @@ def get_child(self, name): """ # Root logger can be created using '' (empty string) as name - parent_id = '' if self._logger_id is None else self.logger_id + '.' + parent_id = "" if self._logger_id is None else self.logger_id + "." # Stream and/or file handler are set for # ancestors only (to avoid output duplication) - return self.__class__(logger_id=parent_id + str(name), - add_stream=False, file_handler=None, - propagate=True) # This is a child, so propagate + return self.__class__( + logger_id=parent_id + str(name), + add_stream=False, + file_handler=None, + propagate=True, + ) # This is a child, so propagate def log(self, level, msg, *args, **kwargs): """Logs a message with specified level on this logger. @@ -344,8 +354,9 @@ def catch_warnings(record=False): return warnings.catch_warnings(record=record) @staticmethod - def filterwarnings(action, message="", category=Warning, - module="", lineno=0, append=False): + def filterwarnings( + action, message="", category=Warning, module="", lineno=0, append=False + ): """Insert an entry into the list of warnings filters (at the front). Wrapper of `warnings.filterwarnings`. @@ -367,8 +378,13 @@ def filterwarnings(action, message="", category=Warning, """ return warnings.filterwarnings( - action, message=message, category=category, - module=module, lineno=lineno, append=append) + action, + message=message, + category=category, + module=module, + lineno=lineno, + append=append, + ) class CTimer: @@ -492,16 +508,18 @@ def timed(log=None, msg=None): Elapsed time: ... ms """ + def wrapper(fun): @wraps(fun) # To make wrapped_fun work as fun def wrapped_fun(*args, **kwargs): # Setting a custom message msg_a = msg if msg is None: - msg_a = "Entering timed block " \ - "`{:}`...".format(fun.__name__) + msg_a = "Entering timed block " "`{:}`...".format(fun.__name__) # Execute the function with the timer with CTimer(log=log, msg=msg_a): return fun(*args, **kwargs) + return wrapped_fun + return wrapper diff --git a/src/secml/utils/dict_utils.py b/src/secml/utils/dict_utils.py index 218ae419..a2003003 100644 --- a/src/secml/utils/dict_utils.py +++ b/src/secml/utils/dict_utils.py @@ -5,13 +5,13 @@ .. moduleauthor:: Marco Melis """ + from collections.abc import MutableMapping -__all__ = ['load_dict', 'merge_dicts', 'invert_dict', - 'LastInDict', 'SubLevelsDict'] +__all__ = ["load_dict", "merge_dicts", "invert_dict", "LastInDict", "SubLevelsDict"] -def load_dict(file_path, values_dtype=str, encoding='ascii'): +def load_dict(file_path, values_dtype=str, encoding="ascii"): """Load dictionary from textfile. Each file's line should be @@ -33,10 +33,10 @@ def load_dict(file_path, values_dtype=str, encoding='ascii'): """ new_dict = {} - with open(file_path, mode='rt', encoding=encoding) as df: + with open(file_path, mode="rt", encoding=encoding) as df: for key_line in df: # a line is 'key: value' - key_line_split = key_line.split(':') + key_line_split = key_line.split(":") try: # Removing any space from key value before setting new_dict[key_line_split[0]] = values_dtype(key_line_split[1].strip()) @@ -101,7 +101,10 @@ def invert_dict(d): {1: ['k2', 'k3'], 2: ['k1', 'k2'], 3: 'k2'} """ - def tolist(x): return [x] if not isinstance(x, (list, tuple)) else list(x) + + def tolist(x): + return [x] if not isinstance(x, (list, tuple)) else list(x) + new_d = {} for k in d.items(): for v in tolist(k[1]): @@ -138,6 +141,7 @@ class LastInDict(MutableMapping): 102030 """ + def __init__(self): self._data = dict() self._rw_lastin_key = None @@ -200,12 +204,13 @@ class SubLevelsDict(MutableMapping): 10 """ + def __init__(self, data): self._data = dict(data) def __setitem__(self, key, value): # Support for recursion, e.g. -> attr1.attr2 - key = key.split('.') + key = key.split(".") # Setting a key element works like in dictionaries if len(key) == 1: @@ -223,11 +228,11 @@ def __setitem__(self, key, value): if hasattr(data, key[-1]): setattr(data, key[-1], value) else: - raise AttributeError("'{:}' not found.".format('.'.join(key))) + raise AttributeError("'{:}' not found.".format(".".join(key))) def __getitem__(self, key): # Support for recursion, e.g. -> attr1.attr2 - key = key.split('.') + key = key.split(".") # The first element of key is a key of the dictionary data = self._data[key[0]] # Now get the desired subattributes recursively, @@ -238,7 +243,7 @@ def __getitem__(self, key): return data def __delitem__(self, key): - if len(key.split('.')) != 1: + if len(key.split(".")) != 1: raise ValueError("only first-level attributes can be removed.") del self._data[key] @@ -247,7 +252,7 @@ def __len__(self): def __contains__(self, key): # Support for recursion, e.g. -> attr1.attr2 - key = key.split('.') + key = key.split(".") # Check the first element, is a key of the dictionary if key[0] not in self._data: diff --git a/src/secml/utils/download_utils.py b/src/secml/utils/download_utils.py index 8e0cc966..9b3c83e0 100644 --- a/src/secml/utils/download_utils.py +++ b/src/secml/utils/download_utils.py @@ -5,6 +5,7 @@ .. moduleauthor:: Marco Melis """ + import sys import re import requests @@ -13,11 +14,10 @@ from secml.utils import fm -__all__ = ['dl_file', 'dl_file_gitlab', 'md5'] +__all__ = ["dl_file", "dl_file_gitlab", "md5"] -def dl_file(url, output_dir, user=None, headers=None, - chunk_size=1024, md5_digest=None): +def dl_file(url, output_dir, user=None, headers=None, chunk_size=1024, md5_digest=None): """Download file from input url and store in output_dir. Parameters @@ -40,19 +40,20 @@ def dl_file(url, output_dir, user=None, headers=None, """ # Parsing user string - auth = tuple(user.split(':')) if user is not None else None + auth = tuple(user.split(":")) if user is not None else None # If no password is specified, use an empty string - auth = (auth[0], '') if auth is not None and len(auth) == 1 else auth + auth = (auth[0], "") if auth is not None and len(auth) == 1 else auth r = requests.get(url, auth=auth, headers=headers, stream=True) if r.status_code != 200: raise RuntimeError( - "File is not available (error code {:})".format(r.status_code)) + "File is not available (error code {:})".format(r.status_code) + ) # Get file size (bytes) if "content-length" in r.headers: - total_size = r.headers.get('content-length').strip() + total_size = r.headers.get("content-length").strip() total_size = int(total_size) else: # Total size unknown total_size = None @@ -73,17 +74,16 @@ def dl_file(url, output_dir, user=None, headers=None, fm.make_folder(output_dir) try: # Get the filename from the response headers - fname = re.findall( - r"filename=\"(.+)\"", r.headers["Content-Disposition"])[0] + fname = re.findall(r"filename=\"(.+)\"", r.headers["Content-Disposition"])[0] except (KeyError, IndexError): # Or use the last part of download url (removing parameters) - fname = url.split('/')[-1].split('?', 1)[0] + fname = url.split("/")[-1].split("?", 1)[0] # Build full path of output file out_path = fm.join(output_dir, fname) # Read data and store each chunk - with open(out_path, 'wb') as f: + with open(out_path, "wb") as f: for chunk in r.iter_content(chunk_size=chunk_size): if chunk: # filter out keep-alive new chunks f.write(chunk) @@ -93,8 +93,11 @@ def dl_file(url, output_dir, user=None, headers=None, done = int((50 * dl) / total_size) if sys.stdout.isatty() is True: # Provide real-time updates (if stdout is a tty) - sys.stdout.write("\r[{:}{:}] {:}/{:}".format( - '=' * done, ' ' * (50-done), dl, total_size)) + sys.stdout.write( + "\r[{:}{:}] {:}/{:}".format( + "=" * done, " " * (50 - done), dl, total_size + ) + ) sys.stdout.flush() sys.stdout.write("\nFile stored in `{:}`\n".format(out_path)) @@ -107,8 +110,15 @@ def dl_file(url, output_dir, user=None, headers=None, return out_path -def dl_file_gitlab(repo_url, file_path, output_dir, branch='master', - token=None, chunk_size=1024, md5_digest=None): +def dl_file_gitlab( + repo_url, + file_path, + output_dir, + branch="master", + token=None, + chunk_size=1024, + md5_digest=None, +): """Download file from a gitlab.com repository and store in output_dir. Parameters @@ -135,33 +145,36 @@ def dl_file_gitlab(repo_url, file_path, output_dir, branch='master', """ # Url of Repository files API, to be populated later - api_url = 'https://gitlab.com/api/v4/projects/' \ - '{:}/repository/files/{:}/raw?ref={:}' + api_url = ( + "https://gitlab.com/api/v4/projects/" "{:}/repository/files/{:}/raw?ref={:}" + ) # Decode the repository url by removing 'gitlab.com' prefix if defined # To make urlparse work correctly, we should add a '//gitlab.com/' prefix - if repo_url.startswith('gitlab.com'): # Handle 'gitlab.com/REPO' case - repo_url = '//' + repo_url + if repo_url.startswith("gitlab.com"): # Handle 'gitlab.com/REPO' case + repo_url = "//" + repo_url if not repo_url.startswith( - ('https://gitlab.com', 'http://gitlab.com', '//gitlab.com')): + ("https://gitlab.com", "http://gitlab.com", "//gitlab.com") + ): # Handle the '/REPO/' case by stripping the first slash (if any) - repo_url = '//gitlab.com/' + repo_url.lstrip('/') + repo_url = "//gitlab.com/" + repo_url.lstrip("/") # Strip last slash (if any) and parse - repo_url_parsed = parse.urlparse(repo_url.rstrip('/')) + repo_url_parsed = parse.urlparse(repo_url.rstrip("/")) # Remove the first slash always left by urlparse and encode - repo_url_encoded = parse.quote(repo_url_parsed.path[1:], safe='') + repo_url_encoded = parse.quote(repo_url_parsed.path[1:], safe="") # Strip the first slash (if any) and encode the file path - file_path_encoded = parse.quote(file_path.lstrip('/'), safe='') + file_path_encoded = parse.quote(file_path.lstrip("/"), safe="") # Build the final download url url = api_url.format(repo_url_encoded, file_path_encoded, branch) # Pass the private token as a request's header if defined - headers = {'PRIVATE-TOKEN': token} if token is not None else None + headers = {"PRIVATE-TOKEN": token} if token is not None else None - return dl_file(url, output_dir, headers=headers, - chunk_size=chunk_size, md5_digest=md5_digest) + return dl_file( + url, output_dir, headers=headers, chunk_size=chunk_size, md5_digest=md5_digest + ) def md5(fname, blocksize=65536): @@ -181,7 +194,7 @@ def md5(fname, blocksize=65536): """ hash_md5 = hashlib.md5() - with open(fname, mode='rb') as f: + with open(fname, mode="rb") as f: for chunk in iter(lambda: f.read(blocksize), b""): hash_md5.update(chunk) return hash_md5.hexdigest() diff --git a/src/secml/utils/mixed_utils.py b/src/secml/utils/mixed_utils.py index 9fb64f87..fc457dd2 100644 --- a/src/secml/utils/mixed_utils.py +++ b/src/secml/utils/mixed_utils.py @@ -6,7 +6,7 @@ """ -__all__ = ['AverageMeter', 'OrderedFlexibleClass', 'check_is_fitted'] +__all__ = ["AverageMeter", "OrderedFlexibleClass", "check_is_fitted"] class AverageMeter: @@ -24,13 +24,14 @@ class AverageMeter: Number of seen values. """ + def __init__(self): self.reset() def reset(self): - self.val = 0. - self.avg = 0. - self.sum = 0. + self.val = 0.0 + self.avg = 0.0 + self.sum = 0.0 self.count = 0 def update(self, val, n=1): @@ -84,7 +85,9 @@ def __init__(self, *items): if len(items) == 0: raise ValueError("class must have at least one attribute.") if not all(isinstance(i, tuple) for i in items): - raise TypeError("each attribute must be specified as a tuple of (key, value).") + raise TypeError( + "each attribute must be specified as a tuple of (key, value)." + ) # List with attributes sequence (this provides the fixed order) self._params = [] # __setattr__ will store the attribute in `_params` and set its value @@ -99,7 +102,7 @@ def attr_order(self): def __setattr__(self, key, value): """Set desired attribute and store the key in `_params`.""" # Register attribute only if new (skip service attribute _params) - if key != '_params' and not hasattr(self, key): + if key != "_params" and not hasattr(self, key): self._params.append(key) # Set attribute value in the standard way super(OrderedFlexibleClass, self).__setattr__(key, value) @@ -145,14 +148,15 @@ def check_is_fitted(obj, attributes, msg=None, check_all=True): if msg is None: msg = "this `{name}` is not trained. Call `.fit()` first." - if not hasattr(obj, 'fit'): + if not hasattr(obj, "fit"): raise TypeError("`{:}` does not implement `.fit()`.".format(obj)) if is_str(attributes): attributes = [attributes] elif not is_list(attributes): raise TypeError( - "the attribute(s) to check must be a string or a list of strings") + "the attribute(s) to check must be a string or a list of strings" + ) condition = any if check_all is True else all diff --git a/src/secml/utils/pickle_utils.py b/src/secml/utils/pickle_utils.py index e6244ffb..19616d49 100644 --- a/src/secml/utils/pickle_utils.py +++ b/src/secml/utils/pickle_utils.py @@ -6,13 +6,14 @@ .. moduleauthor:: Marco Melis """ + import pickle import gzip from secml.utils import fm # Remember to add any new method to following list -__all__ = ['save', 'load'] +__all__ = ["save", "load"] def save(file_path, obj): @@ -44,16 +45,16 @@ def save(file_path, obj): """ # Adding extension to destination file if user forgot about it... file_ext = fm.splitext(file_path)[1] - file_path = file_path + '.gz' if file_ext != '.gz' else file_path + file_path = file_path + ".gz" if file_ext != ".gz" else file_path # open the reference to target file - with gzip.open(file_path, 'wb') as f_ref: + with gzip.open(file_path, "wb") as f_ref: pickle.dump(obj, f_ref, protocol=4) return fm.join(fm.abspath(file_path), fm.split(file_path)[1]) -def load(file_path, encoding='bytes'): +def load(file_path, encoding="bytes"): """Load object from cPickle file. Load a generic gzip compressed python object created by `.save`. @@ -66,6 +67,6 @@ def load(file_path, encoding='bytes'): Encoding to use for loading the file. Default 'bytes'. """ - with gzip.open(file_path, 'rb') as f_ref: + with gzip.open(file_path, "rb") as f_ref: # Loading and returning the object return pickle.load(f_ref, encoding=encoding) diff --git a/src/secml/utils/tests/test_c_log.py b/src/secml/utils/tests/test_c_log.py index 1bf73c3f..132fabee 100644 --- a/src/secml/utils/tests/test_c_log.py +++ b/src/secml/utils/tests/test_c_log.py @@ -8,9 +8,7 @@ class TestCLog(unittest.TestCase): @classmethod def setUpClass(cls): - cls.logger = CLog(logger_id=cls.__name__, - add_stream=True, - level='DEBUG') + cls.logger = CLog(logger_id=cls.__name__, add_stream=True, level="DEBUG") def test_timed_nologging(self): @@ -43,10 +41,11 @@ def test_timed_logging(self): # Test for predefined interval with error with self.assertRaises(TypeError): with self.logger.timer() as t: - time.sleep('test') - self.logger.info("Interval " + str(t.interval) + - " should have been logged anyway") + time.sleep("test") + self.logger.info( + "Interval " + str(t.interval) + " should have been logged anyway" + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/src/secml/utils/tests/test_download_utils.py b/src/secml/utils/tests/test_download_utils.py index 29ed8659..76a10a20 100644 --- a/src/secml/utils/tests/test_download_utils.py +++ b/src/secml/utils/tests/test_download_utils.py @@ -15,15 +15,15 @@ def setUp(self): # Retrieve the temporary files directory self.tempdir = tempfile.gettempdir() # Url for the mock requests - self.test_url = 'mock://test.utils.download_utils' + self.test_url = "mock://test.utils.download_utils" @requests_mock.Mocker() def test_dlfile(self, m): """Test for `dl_file` standard beahavior.""" # Test for an available text file - url = self.test_url + '/test.txt' - file_content = 'resp' + url = self.test_url + "/test.txt" + file_content = "resp" m.get(url, text=file_content) out_file = dl_file(url, self.tempdir) @@ -32,36 +32,35 @@ def test_dlfile(self, m): self.assertEqual(file_content, f.read()) # Test for an available text file with parameters in the url - url = self.test_url + '/test.txt?id=1&out=45' - file_content = 'resp' + url = self.test_url + "/test.txt?id=1&out=45" + file_content = "resp" m.get(url, text=file_content) out_file = dl_file(url, self.tempdir) # Check if parameters have been correctly removed - self.assertEqual('test.txt', fm.split(out_file)[1]) + self.assertEqual("test.txt", fm.split(out_file)[1]) with open(out_file) as f: self.assertEqual(file_content, f.read()) # Test for an unavailable text file - url = self.test_url + '/test2.txt' - m.get(url, text='Not Found', status_code=404) + url = self.test_url + "/test2.txt" + m.get(url, text="Not Found", status_code=404) with self.assertRaises(RuntimeError) as e: dl_file(url, self.tempdir) - self.assertTrue( - 'File is not available (error code 404)' in str(e.exception)) + self.assertTrue("File is not available (error code 404)" in str(e.exception)) @requests_mock.Mocker() def test_dlfile_content_length(self, m): """Test for `dl_file` beahavior with 'content-length' header.""" # Test for an available text file (with 'content-length' header) - url = self.test_url + '/test.txt' - file_content = 'resp' + url = self.test_url + "/test.txt" + file_content = "resp" - m.get(url, text=file_content, headers={'Content-Length': '4'}) + m.get(url, text=file_content, headers={"Content-Length": "4"}) out_file = dl_file(url, self.tempdir) with open(out_file) as f: @@ -72,19 +71,18 @@ def test_dlfile_headers(self, m): """Test for `dl_file` beahavior with additional headers.""" # Test for an available text file (with 'content-length' header) - url = self.test_url + '/test.txt' - file_content = 'resp' + url = self.test_url + "/test.txt" + file_content = "resp" - m.get(url, text=file_content, request_headers={'TOKEN': 'test'}) + m.get(url, text=file_content, request_headers={"TOKEN": "test"}) - out_file = dl_file(url, self.tempdir, headers={'TOKEN': 'test'}) + out_file = dl_file(url, self.tempdir, headers={"TOKEN": "test"}) with open(out_file) as f: self.assertEqual(file_content, f.read()) # Additional headers should be ignored - out_file = dl_file(url, self.tempdir, - headers={'TOKEN': 'test', 'HEADER2': '2'}) + out_file = dl_file(url, self.tempdir, headers={"TOKEN": "test", "HEADER2": "2"}) with open(out_file) as f: self.assertEqual(file_content, f.read()) @@ -93,15 +91,15 @@ def test_dlfile_headers(self, m): with self.assertRaises(Exception): dl_file(url, self.tempdir) with self.assertRaises(Exception): - dl_file(url, self.tempdir, headers={'TOKEN': '2'}) + dl_file(url, self.tempdir, headers={"TOKEN": "2"}) with self.assertRaises(Exception): - dl_file(url, self.tempdir, headers={'HEADER2': 'test'}) + dl_file(url, self.tempdir, headers={"HEADER2": "test"}) @requests_mock.Mocker() def test_dlfile_content_disposition(self, m): """Test for `dl_file` beahavior with 'Content-Disposition' header.""" - def _test_dlfile(cont_disp, fn='test.txt'): + def _test_dlfile(cont_disp, fn="test.txt"): """ Parameters @@ -112,12 +110,14 @@ def _test_dlfile(cont_disp, fn='test.txt'): Expected filename. Default 'text.txt'. """ - url = self.test_url + '/test.txt' - file_content = 'resp' - - m.get(url, text=file_content, - headers={'Content-Length': '4', - 'Content-Disposition': cont_disp}) + url = self.test_url + "/test.txt" + file_content = "resp" + + m.get( + url, + text=file_content, + headers={"Content-Length": "4", "Content-Disposition": cont_disp}, + ) out_file = dl_file(url, self.tempdir) self.assertEqual(fn, fm.split(out_file)[1]) @@ -133,12 +133,12 @@ def _test_dlfile(cont_disp, fn='test.txt'): # Check for 'content-disposition' filename different from url disp = r'inline; filename="READ.md"; filename*=UTF-8\'\'READ.md' - _test_dlfile(disp, fn='READ.md') + _test_dlfile(disp, fn="READ.md") # Check for a simpler 'content-disposition' content disp = 'filename="READ.md"' - _test_dlfile(disp, fn='READ.md') + _test_dlfile(disp, fn="READ.md") # Check for 'content-disposition' without filename disp = 'inline; test="test"' @@ -148,14 +148,14 @@ def _test_dlfile(cont_disp, fn='test.txt'): def test_dl_file_md5(self): # Fixed long string to write to the file - x = b'abcd' * 10000 + x = b"abcd" * 10000 # Expected digest of the file - md5_test = '3f0f597c3c69ce42e554fdad3adcbeea' + md5_test = "3f0f597c3c69ce42e554fdad3adcbeea" # Generate a temp file to test and write content - tempf = fm.join(self.tempdir, 'test_dl_file_md5') - with open(tempf, 'wb') as fp: + tempf = fm.join(self.tempdir, "test_dl_file_md5") + with open(tempf, "wb") as fp: fp.write(x) md5_digest = md5(fp.name) @@ -167,23 +167,26 @@ def test_dl_file_md5(self): def test_dlfile_gitlab(self, m): """Test for `dl_file_gitlab` standard beahavior.""" - repo = 'secml/test' - file = 'files/test.txt' - branch = 'master' + repo = "secml/test" + file = "files/test.txt" + branch = "master" - api_url = 'https://gitlab.com/api/v4/projects/' \ - '{:}/repository/files/{:}/raw?ref={:}' + api_url = ( + "https://gitlab.com/api/v4/projects/" "{:}/repository/files/{:}/raw?ref={:}" + ) url = api_url.format( - parse.quote(repo, safe=''), - parse.quote(file, safe=''), - branch) - file_content = 'resp' + parse.quote(repo, safe=""), parse.quote(file, safe=""), branch + ) + file_content = "resp" # Mimic the response given by GitLab API disp = r'inline; filename="test.txt"; filename*=UTF-8\'\'test.txt' - m.get(url, text=file_content, headers={'Content-Length': '4', - 'Content-Disposition': disp}) + m.get( + url, + text=file_content, + headers={"Content-Length": "4", "Content-Disposition": disp}, + ) out_file = dl_file_gitlab(repo, file, self.tempdir, branch=branch) @@ -191,29 +194,23 @@ def test_dlfile_gitlab(self, m): self.assertEqual(file_content, f.read()) # Testing multiple similar values for repo and file parameters - dl_file_gitlab( - repo + '/', file, self.tempdir, branch=branch) - dl_file_gitlab( - 'gitlab.com/' + repo, file, self.tempdir, branch=branch) - dl_file_gitlab( - 'gitlab.com/' + repo + '/', file, self.tempdir, branch=branch) - dl_file_gitlab( - 'https://gitlab.com/' + repo, file, self.tempdir, branch=branch) - dl_file_gitlab( - 'http://gitlab.com/' + repo, file, self.tempdir, branch=branch) - dl_file_gitlab( - repo, '/' + file, self.tempdir, branch=branch) + dl_file_gitlab(repo + "/", file, self.tempdir, branch=branch) + dl_file_gitlab("gitlab.com/" + repo, file, self.tempdir, branch=branch) + dl_file_gitlab("gitlab.com/" + repo + "/", file, self.tempdir, branch=branch) + dl_file_gitlab("https://gitlab.com/" + repo, file, self.tempdir, branch=branch) + dl_file_gitlab("http://gitlab.com/" + repo, file, self.tempdir, branch=branch) + dl_file_gitlab(repo, "/" + file, self.tempdir, branch=branch) # Testing wrong inputs with self.assertRaises(requests_mock.NoMockAddress): - dl_file_gitlab(repo, file, self.tempdir, branch='develop') + dl_file_gitlab(repo, file, self.tempdir, branch="develop") with self.assertRaises(requests_mock.NoMockAddress): - dl_file_gitlab(repo, 'test.txt', self.tempdir, branch=branch) + dl_file_gitlab(repo, "test.txt", self.tempdir, branch=branch) with self.assertRaises(requests_mock.NoMockAddress): - dl_file_gitlab('secml/secml', file, self.tempdir, branch=branch) + dl_file_gitlab("secml/secml", file, self.tempdir, branch=branch) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main() diff --git a/src/secml/utils/tests/test_pickle_utils.py b/src/secml/utils/tests/test_pickle_utils.py index f9ac0710..c8f3ad6a 100644 --- a/src/secml/utils/tests/test_pickle_utils.py +++ b/src/secml/utils/tests/test_pickle_utils.py @@ -13,8 +13,9 @@ def test_save_load(self): # Generate a temp file to test import tempfile + tempdir = tempfile.gettempdir() - tempfile = fm.join(tempdir, 'secml_testpickle') + tempfile = fm.join(tempdir, "secml_testpickle") tempfile = pickle_utils.save(tempfile, a) @@ -23,5 +24,5 @@ def test_save_load(self): self.assert_array_equal(a_loaded, a) -if __name__ == '__main__': +if __name__ == "__main__": CUnitTest.main()