diff --git a/doc/DaphneLib/APIRef.md b/doc/DaphneLib/APIRef.md index ff4994725..a30c1f4de 100644 --- a/doc/DaphneLib/APIRef.md +++ b/doc/DaphneLib/APIRef.md @@ -40,7 +40,7 @@ However, as the methods largely map to DaphneDSL built-in functions, you can fin **Generating data in DAPHNE:** - **`fill`**`(arg, rows:int, cols:int) -> Matrix` -- **`seq`**`(start, end, inc) -> Matrix` +- **`seq`**`(start, end, inc = 1) -> Matrix` - **`rand`**`(rows: int, cols: int, min: Union[float, int] = None, max: Union[float, int] = None, sparsity: Union[float, int] = 0, seed: Union[float, int] = 0) -> Matrix` - **`createFrame`**`(columns: List[Matrix], labels: List[str] = None) -> 'Frame'` - **`diagMatrix`**`(self, arg: Matrix) -> 'Matrix'` @@ -148,6 +148,10 @@ In the following, we describe only the latter. - **`replace`**`(pattern, replacement)` - **`order`**`(colIdxs: List[int], ascs: List[bool], returnIndexes: bool)` +**Data preprocessing:** +- **`oneHot`**`(info:matrix)` +- **`bin`**`(numBins:int, Min = None, Max = None)` + **Other matrix operations:** - **`diagVector`**`()` diff --git a/src/api/python/daphne/context/daphne_context.py b/src/api/python/daphne/context/daphne_context.py index e4dddc954..50ddb9398 100644 --- a/src/api/python/daphne/context/daphne_context.py +++ b/src/api/python/daphne/context/daphne_context.py @@ -366,7 +366,7 @@ def createFrame(self, columns: List[Matrix], labels:List[str] = None) -> 'Frame' return Frame(self, 'createFrame', [*columns, *labels]) - def seq(self, start, end, inc) -> Matrix: + def seq(self, start, end, inc = 1) -> Matrix: named_input_nodes = {'start':start, 'end':end, 'inc':inc} return Matrix(self, 'seq', [], named_input_nodes=named_input_nodes) diff --git a/src/api/python/daphne/operator/nodes/matrix.py b/src/api/python/daphne/operator/nodes/matrix.py index 5e6ed536c..4ea6200d1 100644 --- a/src/api/python/daphne/operator/nodes/matrix.py +++ b/src/api/python/daphne/operator/nodes/matrix.py @@ -394,6 +394,17 @@ def outerGt(self, other: 'Matrix') -> 'Matrix': def outerGe(self, other: 'Matrix') -> 'Matrix': return Matrix(self.daphne_context, 'outerGe', [self, other]) + def oneHot(self, other: 'Matrix') -> 'Matrix': + return Matrix(self.daphne_context, 'oneHot', [self, other]) + + def bin(self, numBins, Min = None, Max = None) -> 'Matrix': + if (Max is None and Min is not None ) or (Min is None and Max is not None): + raise RuntimeError("bin: both min and max should be set, or both should be None") + if Max and Min: + return Matrix(self.daphne_context, 'bin', [self, numBins, Min, Max]) + else: + return Matrix(self.daphne_context, 'bin', [self, numBins]) + def order(self, colIdxs: List[int], ascs: List[bool], returnIndexes: bool) -> 'Matrix': if len(colIdxs) != len(ascs): raise RuntimeError("order: the lists given for parameters colIdxs and ascs must have the same length") diff --git a/test/api/python/DaphneLibTest.cpp b/test/api/python/DaphneLibTest.cpp index 89407c2b6..0eb9f3992 100644 --- a/test/api/python/DaphneLibTest.cpp +++ b/test/api/python/DaphneLibTest.cpp @@ -95,6 +95,7 @@ MAKE_TEST_CASE("matrix_outerbinary") MAKE_TEST_CASE("matrix_agg") MAKE_TEST_CASE("matrix_reorg") MAKE_TEST_CASE("matrix_other") +MAKE_TEST_CASE("matrix_preprocessing") MAKE_TEST_CASE_SCALAR("numpy_matrix_ops") MAKE_TEST_CASE_SCALAR("numpy_matrix_ops_extended") MAKE_TEST_CASE("numpy_matrix_ops_replace") diff --git a/test/api/python/matrix_preprocessing.daphne b/test/api/python/matrix_preprocessing.daphne new file mode 100644 index 000000000..29a67c6a5 --- /dev/null +++ b/test/api/python/matrix_preprocessing.daphne @@ -0,0 +1,24 @@ +# Copyright 2023 The DAPHNE Consortium +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +arg_m = reshape(seq(-2, 5), 2, 4); +info_m = [-1, 0, 5, 6](1, 4); +print(oneHot(arg_m, info_m)); + +arg_m_2 = reshape(seq(10, 70, 10), 1, 7); +print(bin(arg_m_2, 3)); +print(bin(arg_m_2, 3, 10, 70)); + +arg_m_3 = t([5.0, 20.0, nan, 40.0, inf, 60.0, 100.0]); +print(bin(arg_m_3, 3, 10.0, 70.0)); diff --git a/test/api/python/matrix_preprocessing.py b/test/api/python/matrix_preprocessing.py new file mode 100644 index 000000000..e11f54f2e --- /dev/null +++ b/test/api/python/matrix_preprocessing.py @@ -0,0 +1,29 @@ +# Copyright 2023 The DAPHNE Consortium +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from daphne.context.daphne_context import DaphneContext + +dc = DaphneContext() + +arg_m_1 = dc.seq(-2, 5).reshape(2, 4) +info_m = dc.seq(-1, 0).rbind(dc.seq(5, 6)).reshape(1, 4) +arg_m_1.oneHot(info_m).print().compute() + +arg_m_2 = dc.seq(10, 70, 10).reshape(1, 7) +arg_m_2.bin(3).print().compute() +arg_m_2.bin(3, 10, 70).print().compute() + +arg_m_3 = dc.seq(5.0, 20.0, 15).rbind(dc.fill(math.nan, 1, 1)).rbind(dc.fill(40.0, 1, 1)).rbind(dc.fill(math.inf, 1, 1)).rbind(dc.seq(60.0, 100.0, 40)) +arg_m_3.reshape(1, 7).bin(3, 10.0, 70.0).print().compute()