diff --git a/.gitignore b/.gitignore index 927f7eb..e55789a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .idea/* +build/* cmake-build-*/* cmake-build-debug/* scripts/*.txt @@ -16,3 +17,5 @@ scripts/benchmarks/*.dat *eps resources/datasets/google-cluster-data/saber-debs-demo.data resources/datasets/lrb/datafile3hours.dat +.DS_Store +/docs/.DS_Store diff --git a/Dockerfile b/Dockerfile index 6e5db3f..b401f9b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,8 @@ FROM ubuntu:bionic +ENV TZ=Etc/UTC +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + RUN apt update && \ apt upgrade -y && \ apt install -y \ @@ -10,7 +13,8 @@ RUN apt update && \ build-essential \ ccache \ flex \ - g++ \ + gcc \ + g++ \ git \ libboost-all-dev \ libbz2-dev \ @@ -33,7 +37,12 @@ RUN apt update && \ pkg-config \ python-dev \ zlib1g-dev \ - wget + wget \ + libaio-dev \ + libibverbs-dev \ + bpfcc-tools \ + sysstat \ + fio RUN cd && \ apt remove --purge --auto-remove cmake && \ @@ -88,7 +97,81 @@ ENV LIBRARY_PATH=$LLVM_HOME/lib:$LIBRARY_PATH ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBRARY_PATH ENV PATH=/usr/lib/ccache:$PATH +RUN cd && \ + apt install -y \ + git \ + gcc \ + g++ \ + autoconf \ + automake \ + asciidoc \ + asciidoctor \ + bash-completion \ + xmlto \ + libtool \ + pkg-config \ + libglib2.0-0 \ + libglib2.0-dev \ + libfabric1 \ + libfabric-dev \ + doxygen \ + graphviz \ + pandoc \ + libncurses5 \ + libkmod2 \ + libkmod-dev \ + libudev-dev \ + uuid-dev \ + libjson-c-dev \ + libkeyutils-dev \ + systemd \ + libsystemd-dev + +RUN cd && \ + git clone https://github.com/pmem/ndctl && \ + cd ndctl && \ + git checkout c7767834871f7ce50a2abe1da946e9e16fb08eda && \ + ./autogen.sh && \ + ./configure CFLAGS='-g -O2' --prefix=/usr/local --sysconfdir=/etc --libdir=/usr/local/lib64 && \ + make -j$(nproc) && \ + make install + +RUN cd && \ + apt install -y \ + autoconf \ + automake \ + pkg-config \ + libglib2.0-dev \ + libfabric-dev \ + pandoc \ + libncurses5-dev + +RUN cd && \ + git clone https://github.com/pmem/pmdk && \ + cd pmdk && \ + git checkout 3bc5b0da5a7a5d5752ad2cb4f5f9bf0edfd47d67 && \ + export PKG_CONFIG_PATH=/usr/local/lib64/pkgconfig:/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH} && \ + make -j$(nproc) && \ + PKG_CONFIG_PATH=/usr/local/lib64/pkgconfig:/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH} make install && \ + echo /usr/local/lib >> /etc/ld.so.conf && \ + echo /usr/local/lib64 >> /etc/ld.so.conf && \ + ldconfig +# echo 'export PKG_CONFIG_PATH=/usr/local/lib64/pkgconfig:/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH}' >> $HOME/.profile && \ +# source $HOME/.profile + +ENV PKG_CONFIG_PATH=/usr/local/lib64/pkgconfig:/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH} + +RUN cd && \ + git clone https://github.com/pmem/libpmemobj-cpp.git && \ + cd libpmemobj-cpp && \ + git checkout 9f784bba07b94cd36c9eebeaa88c5df4f05045b2 && \ + mkdir build && \ + cd build && \ + cmake -DTESTS_USE_VALGRIND=OFF .. && \ + make -j$(nproc) && \ + make install + RUN cd && \ git clone https://github.com/lsds/LightSaber.git && \ cd LightSaber && \ - ./scripts/build.sh + ./scripts/build.sh \ No newline at end of file diff --git a/README.md b/README.md index 24aae89..d029f4a 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,44 @@
- +
-# LightSaber [![License](https://img.shields.io/github/license/lsds/LightSaber.svg?branch=master)](https://github.com/lsds/LightSaber/blob/master/LICENCE.md) +[![License](https://img.shields.io/github/license/lsds/LightSaber.svg?branch=master)](https://github.com/lsds/LightSaber/blob/master/LICENCE.md) +# Introduction -LightSaber is a stream processing engine that balances parallelism and incremental processing when -executing window aggregation queries on multi-core CPUs. LightSaber operates on in-order -streams of data and achieves up to an order of magnitude higher throughput compared to existing systems. +As an ever-growing amount of data is acquired and analyzed in real-time, stream processing engines have become an essential part of any data processing stack. Given the importance of this class of applications, modern stream processing engines must be designed specifically for the efficient execution on multi-core CPUs. However, it is challenging to analyze conceptually infinite data streams with high throughput and low latency performance while providing fault-tolerance semantics. +This project offers two systems to help tackle this problem. -### Getting started -The `prepare-software.sh` script will guide you through the installation and compilation process of our system locally. -The script is tested on **Ubuntu 18.04.5 LTS**. + +## LightSaber + +LightSaber is a stream processing engine that balances parallelism and incremental processing when executing window aggregation queries +on multi-core CPUs. LightSaber operates on in-order data streams and achieves up to an order of magnitude higher throughput than existing systems. + +See application examples and how to configure LightSaber [here](#running-lightsaber). + +
+ +
+ + +## Scabbard +Scabbard is the first single-node SPE that supports exactly-once fault-tolerance semantics despite limited local I/O bandwidth. +It tightly couples the persistence operations with the operator graph through a novel persistent operator graph model and +dynamically reduces the required disk bandwidth at runtime through adaptive data compression. +Scabbard is based on the query execution engine and compiler from LightSaber. + +See application examples and how to configure Scabbard [here](#running-scabbard). + +
+ +
+ +## Getting started + +The `prepare-software.sh` script will guide you through the installation of our system locally. +The script is tested on **Ubuntu 18.04.5 LTS**. If an error occurs, you may have to manually +remove and add the symbolic links of the compiler binaries in `/usr/lib/ccache/`. ``` $ git clone https://github.com/lsds/LightSaber.git @@ -20,7 +47,7 @@ $ ./scripts/prepare-software.sh $ ./scripts/build.sh ``` -Otherwise use the Dockerfile: +Otherwise, use the Dockerfile: ``` $ git clone https://github.com/lsds/LightSaber.git $ cd LightSaber @@ -28,81 +55,194 @@ $ docker build --tag="lightsaber" . $ docker run -ti lightsaber ``` -### Run unit tests +### Setting up variables before running the code +When running a query, the **LightSaber system is used by default**. +**To enable the features of Scabbard, we have to set the variables defined [here](#scabbard-configuration)**. + +Skip the next part if you don't want to change the folder where code/data is stored, and you have +installed LLVM in the `$HOME` directory. + +Before running any query, set the path (the default is the `$HOME` directory) where files are stored in the +SystemConf.cpp file: +``` +SystemConf::FILE_ROOT_PATH = ... +``` +and the path for LLVM/Clang source files in src/CMakeLists (the default is the `$HOME` directory): +``` +set(USER_PATH "...") +``` + +### Adding new applications +When compiling in `Release` mode, add the `-UNDEBUG` flag in the `CMakeLists.txt` to enable `assert`: ``` -$ ./build/test/unit_tests/unit_tests_run +target_compile_options(exec ... -UNDEBUG) +``` + +### Start with unit tests +``` +$ ./build/test/unit_tests/ds_unit_tests +$ ./build/test/unit_tests/internals_unit_tests +$ ./build/test/unit_tests/operators_unit_tests ``` +## Running LightSaber + ### Running a microbenchmark (e.g., Projection) ``` $ ./build/test/benchmarks/microbenchmarks/TestProjection ``` -### Running an application benchmark with sample data +### Running a cluster monitoring application with sample data ``` $ ./build/test/benchmarks/applications/cluster_monitoring ``` -### How to cite LightSaber -* **[SIGMOD]** Georgios Theodorakis, Alexandros Koliousis, Peter R. Pietzuch, and Holger Pirk. LightSaber: Efficient Window Aggregation on Multi-core Processors, SIGMOD, 2020 + +### Running benchmarks from the paper +You can find the results in `build/test/benchmarks/applications/`. ``` -@inproceedings{Theodorakis2020, - author = {Georgios Theodorakis and Alexandros Koliousis and Peter R. Pietzuch and Holger Pirk}, - title = {{LightSaber: Efficient Window Aggregation on Multi-core Processors}}, - booktitle = {Proceedings of the 2020 ACM SIGMOD International Conference on Management of Data}, - series = {SIGMOD '20}, - year = {2020}, - publisher = {ACM}, - address = {Portland, OR, USA}, -} +$ cd scripts/lightsaber-bench +$ ./run-benchmarks-lightsaber.sh ``` -#### Other related publications -* **[EDBT]** Georgios Theodorakis, Peter R. Pietzuch, and Holger Pirk. SlideSide: A fast Incremental Stream Processing Algorithm for Multiple Queries, EDBT, 2020 -* **[ADMS]** Georgios Theodorakis, Alexandros Koliousis, Peter R. Pietzuch, and Holger Pirk. Hammer Slide: Work- and CPU-efficient Streaming Window Aggregation, ADMS, 2018 [[code]](https://github.com/grtheod/Hammerslide) -* **[SIGMOD]** Alexandros Koliousis, Matthias Weidlich, Raul Castro Fernandez, Alexander Wolf, Paolo Costa, and Peter Pietzuch. Saber: Window-Based Hybrid Stream Processing for Heterogeneous Architectures, SIGMOD, 2016 - - -### The LightSaber engine -
- -
- -#### LightSaber configuration +### LightSaber configuration Variables in **SystemConf.h** configure the LightSaber runtime. Each of them also corresponds to a command-line argument available to all LightSaber applications: ###### --threads _N_ - Sets the number of CPU worker threads (`WORKER_THREADS` variable). The default value is `1`. **CPU worker threads are pinned to physical cores**. The threads are pinned to core ids based on the underlying hardware (e.g., if there are multiple sockets with n cores each, the first n threads are pinned in the first socket and so on). -###### --slots _N_ +###### --batch-size _N_ +Sets the batch size in bytes (`BATCH_SIZE` variable). The default value is `131072`, i.e. 128 KB. + +###### --bundle-size _N_ +Sets the bundle size in bytes (`BUNDLE_SIZE` variable), which is used for generating data in-memory. +It has to be a multiple of the `BATCH_SIZE`. The default value is `131072`, i.e. 128 KB, which is the same as the `BATCH_SIZE`. +###### --slots _N_ Sets the number of intermediate query result slots (`SLOTS` variable). The default value is `256`. ###### --partial-windows _N_ - Sets the maximum number of window fragments in a query task (`PARTIAL_WINDOWS` variable). The default value is `1024`. ###### --circular-size _N_ - Sets the circular buffer size in bytes (`CIRCULAR_BUFFER_SIZE` variable). The default value is `4194304`, i.e. 4 MB. ###### --unbounded-size _N_ - Sets the intermediate result buffer size in bytes (`UNBOUNDED_BUFFER_SIZE` variable). The default value is `524288`, i.e. 512 KB. ###### --hashtable-size _N_ - Hash table size (in number of buckets): hash tables hold partial window aggregate results (`HASH_TABLE_SIZE` variable with the default value 512). ###### --performance-monitor-interval _N_ - -Sets the performance monitor interval, in msec (`PERFORMANCE_MONITOR_INTERVAL` variable). The default value is `1000`, i.e. 1 sec. Controls how often LightSaber prints on standard output performance statistics such as throughput and latency. +Sets the performance monitor interval in msec (`PERFORMANCE_MONITOR_INTERVAL` variable). +The default value is `1000`, i.e. 1 sec. Controls how often LightSaber prints on standard output performance statistics such as throughput and latency. ###### --latency `true`|`false` - Determines whether LightSaber should measure task latency or not (`LATENCY_ON` variable). The default value is `false`. +###### --parallel-merge `true`|`false` +Determines whether LightSaber uses parallel aggregation when merging fragment windows or not (`PARALLEL_MERGE_ON` variable). The default value is `false`. + ###### To enable NUMA-aware scheduling -Set the HAVE_NUMA flag in the respective CMakeLists.txt (e.g., in test/benchmarks/applications/CMakeLists.txt) and recompile the code. \ No newline at end of file +Set the `HAVE_NUMA` flag in the respective CMakeLists.txt (e.g., in `test/benchmarks/applications/CMakeLists.txt`) and recompile the code. + +###### To ingest/output data with TCP + +Set the `TCP_INPUT`/`TCP_OUTPUT` flag in the respective CMakeLists.txt (e.g., in `test/benchmarks/applicationsWithCheckpoints/CMakeLists.txt`) and recompile the code. +Check the `test/benchmarks/applications/RemoteBenchmark` folder for code samples to create TCP sources/sinks. + +###### To ingest/output data with RDMA + +Set the `RDMA_INPUT`/`RDMA_OUTPUT` flag in the respective CMakeLists.txt (e.g., in `test/benchmarks/applicationsWithCheckpoints/CMakeLists.txt`) and recompile the code. +Check the `test/benchmarks/applications/RemoteBenchmark` folder for code samples to create RDMA sources/sinks. + + + +## Running Scabbard + +### Running a microbenchmark (e.g., Aggregation) with persistent input streams and 1-sec checkpoints +``` +$ ./build/test/benchmarks/microbenchmarks/TestPersistentAggregation +``` + +### Running a cluster monitoring application with persistence using sample data +``` +$ ./build/test/benchmarks/applicationsWithCheckpoints/cluster_monitoring_checkpoints --circular-size 33554432 --unbounded-size 524288 --batch-size 524288 --bundle-size 524288 --query 1 --checkpoint-duration 1000 --disk-block-size 65536 --checkpoint-compression true --persist-input true --lineage true --latency true --threads 1 +``` + +### Running benchmarks from the paper +You can find the results in `build/test/benchmarks/applicationsWithCheckpoints/`. +``` +$ cd scripts/scabbard-bench/paper/ +$ ./run-benchmarks-...-FIG_X.sh +``` + +### Scabbard configuration + +In addition to [LightSaber's system variables](#lightsaber-configuration), we can configure the Scabbard runtime with variables specific its fault-tolerance semantics. +Each of them also corresponds to a command-line argument available to all Scabbard applications: + +###### --compression-monitor-interval _N_ +Sets the query compression decision update interval in msec (`COMPRESSION_MONITOR_INTERVAL` variable). The default value is `4000` i.e. 4 sec. + +###### --checkpoint-duration _N_ +Sets the performance monitor interval in msec (`CHECKPOINT_INTERVAL` variable). The default value is `1000`, i.e. 1 sec. + +###### --disk-block-size _N_ +Sets the size of blocks on disk in bytes (`BLOCK_SIZE` variable). The default value is `16KB`. + +###### --create-merge `true`|`false` +Determines whether Scabbard is generating merge tasks to avoid resource starvation due to asynchronous execution (`CREATE_MERGE_WITH_CHECKPOINTS` variable). The default value is `false`. + +###### --checkpoint-compression `true`|`false` +Determines whether Scabbard is compressing data before storing them to disk (`CHECKPOINT_COMPRESSION` variable). The default value is `false`. + +###### --persist-input `true`|`false` +Determines whether Scabbard persists its input streams (`PERSIST_INPUT` variable). The default value is `false`. + +###### --lineage `true`|`false` +Enables dependency tracking required for exaclty-once results (`LINEAGE_ON` variable). The default value is `false`. + +###### --adaptive-compression `true`|`false` +Enables adaptive compression (`ADAPTIVE_COMPRESSION_ON` variable). The default value is `false`. + +###### --adaptive-interval _N_ +Sets the interval in msec that triggers the code generation of new compression functions based on collected statistics (`ADAPTIVE_COMPRESSION_INTERVAL` variable). The default value is `4000`, i.e. 4 sec. + +###### --recover `true`|`false` +If set true, Scabbard attempts to recover using previous persisted data (`RECOVER` variable). The default value is `false`. + + +## How to cite Scabbard +* **[VLDB]** Georgios Theodorakis, Fotios Kounelis, Peter R. Pietzuch, and Holger Pirk. Scabbard: Single-Node Fault-Tolerant Stream Processing, VLDB, 2022 +``` +@inproceedings{Theodorakis2022, + author = {Georgios Theodorakis and Fotios Kounelis and Peter R. Pietzuch and Holger Pirk}, + title = {{Scabbard: Single-Node Fault-Tolerant Stream Processing}}, + series = {VLDB '22}, + year = {2022}, + publisher = {ACM}, +} +``` + +## How to cite LightSaber +* **[SIGMOD]** Georgios Theodorakis, Alexandros Koliousis, Peter R. Pietzuch, and Holger Pirk. LightSaber: Efficient Window Aggregation on Multi-core Processors, SIGMOD, 2020 +``` +@inproceedings{Theodorakis2020, + author = {Georgios Theodorakis and Alexandros Koliousis and Peter R. Pietzuch and Holger Pirk}, + title = {{LightSaber: Efficient Window Aggregation on Multi-core Processors}}, + booktitle = {Proceedings of the 2020 ACM SIGMOD International Conference on Management of Data}, + series = {SIGMOD '20}, + year = {2020}, + publisher = {ACM}, + address = {Portland, OR, USA}, +} +``` + +### Other related publications +* **[EDBT]** Georgios Theodorakis, Peter R. Pietzuch, and Holger Pirk. SlideSide: A fast Incremental Stream Processing Algorithm for Multiple Queries, EDBT, 2020 +* **[ADMS]** Georgios Theodorakis, Alexandros Koliousis, Peter R. Pietzuch, and Holger Pirk. Hammer Slide: Work- and CPU-efficient Streaming Window Aggregation, ADMS, 2018 [[code]](https://github.com/grtheod/Hammerslide) +* **[SIGMOD]** Alexandros Koliousis, Matthias Weidlich, Raul Castro Fernandez, Alexander Wolf, Paolo Costa, and Peter Pietzuch. Saber: Window-Based Hybrid Stream Processing for Heterogeneous Architectures, SIGMOD, 2016 + + diff --git a/VERSION b/VERSION index 8a9ecc2..7bcd0e3 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.1 \ No newline at end of file +0.0.2 \ No newline at end of file diff --git a/docs/images/Scabbard_arch.png b/docs/images/Scabbard_arch.png new file mode 100644 index 0000000..a60b341 Binary files /dev/null and b/docs/images/Scabbard_arch.png differ diff --git a/docs/images/Scabbard_logo.png b/docs/images/Scabbard_logo.png new file mode 100644 index 0000000..c37efba Binary files /dev/null and b/docs/images/Scabbard_logo.png differ diff --git a/resources/datasets/google-cluster-data/google-cluster-data.txt b/resources/datasets/google-cluster-data/google-cluster-data.txt index e384529..2d97a7d 100644 --- a/resources/datasets/google-cluster-data/google-cluster-data.txt +++ b/resources/datasets/google-cluster-data/google-cluster-data.txt @@ -16289,4 +16289,97 @@ 643 6218406404 447 -1 0 472 2 0 0.041 0.021 0.000 0 643 6251706807 25 336051749 1 256 2 2 0.125 0.080 0.000 0 643 515042969 3 -1 5 24 2 0 0.016 0.016 0.000 0 -643 515042969 3 -1 0 24 2 0 0.016 0.016 0.000 \ No newline at end of file +644 515042969 3 -1 0 24 2 0 0.016 0.016 0.000 0 +644 6218406404 853 294816715 2 472 2 0 0.041 0.021 0.000 0 +644 6218406404 853 -1 0 472 2 0 0.041 0.021 0.000 0 +644 6218406404 192 294816715 2 472 2 0 0.041 0.021 0.000 0 +644 6218406404 192 -1 0 472 2 0 0.041 0.021 0.000 0 +644 6114773114 124 329168536 2 356 0 0 0.013 0.008 0.000 0 +644 6114773114 124 -1 0 356 0 0 0.013 0.008 0.000 0 +644 6218406404 232 329168536 2 472 2 0 0.041 0.021 0.000 0 +644 6218406404 232 -1 0 472 2 0 0.041 0.021 0.000 0 +644 6249832520 94 329168536 2 153 0 0 0.056 0.032 0.000 0 +644 6249832520 94 -1 0 153 0 0 0.056 0.032 0.000 0 +644 6251706807 0 329168536 1 256 2 2 0.125 0.080 0.000 0 +644 515042969 1 -1 5 24 2 0 0.016 0.016 0.000 0 +644 515042969 1 -1 0 24 2 0 0.016 0.016 0.000 0 +645 515042969 23 610044506 1 24 2 0 0.016 0.016 0.000 0 +645 6114773114 1850 227459271 1 356 0 0 0.013 0.008 0.000 0 +645 6221861800 1365 8055696 1 356 0 0 0.013 0.016 0.000 0 +645 6221861800 7720 4023910289 1 356 0 0 0.013 0.016 0.000 0 +645 6221861800 9723 988445 1 356 0 0 0.013 0.016 0.000 0 +645 6238340468 1084 711085 1 340 1 0 0.063 0.040 0.000 0 +645 6238340468 2083 603107 1 340 1 0 0.063 0.040 0.000 0 +645 6250345153 1199 257500619 2 501 0 0 0.069 0.012 0.000 0 +645 6250345153 1199 -1 0 501 0 0 0.069 0.012 0.000 0 +645 6218406404 112 257500619 2 472 2 0 0.041 0.021 0.000 0 +645 6218406404 112 -1 0 472 2 0 0.041 0.021 0.000 0 +645 6251706807 7 257500619 1 256 2 2 0.125 0.080 0.000 0 +645 6238340468 948 854237645 2 340 1 0 0.063 0.040 0.000 0 +645 6238340468 948 -1 0 340 1 0 0.063 0.040 0.000 0 +645 6221861800 14659 854237645 2 356 0 0 0.013 0.016 0.000 0 +645 6221861800 14659 -1 0 356 0 0 0.013 0.016 0.000 0 +645 6218406404 890 854237645 2 472 2 0 0.041 0.021 0.000 0 +645 6218406404 890 -1 0 472 2 0 0.041 0.021 0.000 0 +645 6218406404 1060 854237645 2 472 2 0 0.041 0.021 0.000 0 +645 6218406404 1060 -1 0 472 2 0 0.041 0.021 0.000 0 +645 6218406404 1062 854237645 2 472 2 0 0.041 0.021 0.000 0 +645 6218406404 1062 -1 0 472 2 0 0.041 0.021 0.000 0 +645 6251706807 9 854237645 1 256 2 2 0.125 0.080 0.000 0 +645 4297552690 0 4820156549 3 118 2 9 0.006 0.008 0.000 1 +645 4297552690 0 -1 0 118 2 9 0.006 0.008 0.000 1 +645 515042969 15 -1 5 24 2 0 0.016 0.016 0.000 0 +645 515042969 15 -1 0 24 2 0 0.016 0.016 0.000 0 +645 515042969 1 336055200 1 24 2 0 0.016 0.016 0.000 0 +645 5715747784 17 63676345 1 256 2 0 0.019 0.014 0.001 0 +645 6114773114 124 1095431 1 356 0 0 0.013 0.008 0.000 0 +645 6221861800 694 381139 1 356 0 0 0.013 0.016 0.000 0 +645 6249832520 94 1274930 1 153 0 0 0.056 0.032 0.000 0 +646 515042969 16 -1 5 24 2 0 0.016 0.016 0.000 0 +646 515042969 16 -1 0 24 2 0 0.016 0.016 0.000 0 +646 6251539840 335 38673936 4 328 0 2 0.025 0.025 0.000 1 +646 6221861800 2494 84899412 2 356 0 0 0.013 0.016 0.000 0 +646 6221861800 2494 -1 0 356 0 0 0.013 0.016 0.000 0 +646 6250345153 240 84899412 2 501 0 0 0.069 0.012 0.000 0 +646 6250345153 240 -1 0 501 0 0 0.069 0.012 0.000 0 +646 6114773114 1864 84899412 2 356 0 0 0.013 0.008 0.000 0 +646 6114773114 1864 -1 0 356 0 0 0.013 0.008 0.000 0 +646 6218406404 1020 84899412 2 472 2 0 0.041 0.021 0.000 0 +646 6218406404 1020 -1 0 472 2 0 0.041 0.021 0.000 0 +646 6218406404 268 84899412 2 472 2 0 0.041 0.021 0.000 0 +646 6218406404 268 -1 0 472 2 0 0.041 0.021 0.000 0 +646 6251706807 24 84899412 1 256 2 2 0.125 0.080 0.000 0 +646 6251693920 39 16914033 4 249 0 2 0.063 0.006 0.000 0 +646 4297552690 0 3231094147 1 118 2 9 0.006 0.008 0.000 1 +646 515042969 15 6565184 1 24 2 0 0.016 0.016 0.000 0 +646 6221861800 14659 449615529 1 356 0 0 0.013 0.016 0.000 0 +646 6238340468 948 974071212 1 340 1 0 0.063 0.040 0.000 0 +646 6250345153 1199 4820356727 1 501 0 0 0.069 0.012 0.000 0 +647 5003132532 0 351634303 2 243 2 0 0.125 0.003 0.000 0 +647 5003132532 0 -1 0 243 2 0 0.125 0.003 0.000 0 +647 6221861800 15888 351634303 2 356 0 0 0.013 0.016 0.000 0 +647 6221861800 15888 -1 0 356 0 0 0.013 0.016 0.000 0 +647 6251706807 13 351634303 1 256 2 2 0.125 0.080 0.000 0 +647 515042969 21 -1 5 24 2 0 0.016 0.016 0.000 0 +647 515042969 21 -1 0 24 2 0 0.016 0.016 0.000 0 +647 515042969 13 -1 5 24 2 0 0.016 0.016 0.000 0 +647 515042969 13 -1 0 24 2 0 0.016 0.016 0.000 0 +647 515042969 24 -1 5 24 2 0 0.016 0.016 0.000 0 +647 515042969 24 -1 0 24 2 0 0.016 0.016 0.000 0 +647 515042969 26 -1 5 24 2 0 0.016 0.016 0.000 0 +647 515042969 26 -1 0 24 2 0 0.016 0.016 0.000 0 +647 6251539840 239 6192629659 4 328 0 2 0.025 0.025 0.000 1 +647 515042969 16 927964999 1 24 2 0 0.016 0.016 0.000 0 +647 6114773114 1864 5781854 1 356 0 0 0.013 0.008 0.000 0 +647 6221861800 2494 228617356 1 356 0 0 0.013 0.016 0.000 0 +647 6250345153 240 227418843 1 501 0 0 0.069 0.012 0.000 0 +647 6251693920 45 38707778 4 249 0 2 0.063 0.006 0.000 0 +647 6221861800 7406 336051749 2 356 0 0 0.013 0.016 0.000 0 +647 6221861800 7406 -1 0 356 0 0 0.013 0.016 0.000 0 +647 6250345153 513 336051749 2 501 0 0 0.069 0.012 0.000 0 +647 6250345153 513 -1 0 501 0 0 0.069 0.012 0.000 0 +647 6218406404 447 336051749 2 472 2 0 0.041 0.021 0.000 0 +647 6218406404 447 -1 0 472 2 0 0.041 0.021 0.000 0 +647 6251706807 25 336051749 1 256 2 2 0.125 0.080 0.000 0 +647 515042969 3 -1 5 24 2 0 0.016 0.016 0.000 0 +647 515042969 3 -1 0 24 2 0 0.016 0.016 0.000 0 \ No newline at end of file diff --git a/resources/datasets/smartgrid/smartgrid-data.txt b/resources/datasets/smartgrid/smartgrid-data.txt index 19c4a49..a51c576 100644 --- a/resources/datasets/smartgrid/smartgrid-data.txt +++ b/resources/datasets/smartgrid/smartgrid-data.txt @@ -33625,4 +33625,4 @@ 1377986484 0.000 1 13 7 4 1377986484 3.216 0 13 7 4 1377986484 0.000 1 2 7 4 -1377986484 3.216 0 \ No newline at end of file +1377986484 3.216 0 2 7 4 \ No newline at end of file diff --git a/scripts/build.sh b/scripts/build.sh index f11e06e..5598b26 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -8,6 +8,7 @@ cd build #cmake .. -DCMAKE_BUILD_TYPE=Debug -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON cmake .. -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON make -j$(nproc) +cd .. # Run example #cd $HOME/LightSaber/build/benchmarks/microbenchmarks diff --git a/scripts/lightsaber-bench/run-benchmarks-lightsaber.sh b/scripts/lightsaber-bench/run-benchmarks-lightsaber.sh new file mode 100755 index 0000000..a69b1ce --- /dev/null +++ b/scripts/lightsaber-bench/run-benchmarks-lightsaber.sh @@ -0,0 +1,139 @@ +#!/bin/bash + +echo "Start running benchmarks for LightSaber" + +allThreads=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) +iterations=(1 2 3 4 5) +t10=(10) +t14=(14) +t15=(15) + +path="$HOME/LightSaber/build/test/benchmarks/applicationsWithCheckpoints/" + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + +echo "CM1" >> ls_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 16777216 --unbounded-size 524288 --batch-size 524288 --bundle-size 524288 \ + --query 1 --latency true --threads $t >> ls_bench_res.txt + done +done + +echo "CM2" >> ls_bench_res.txt +for t in ${t14[@]}; # 8 +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 16777216 --unbounded-size 1048576 --batch-size 524288 --bundle-size 524288 \ + --query 2 --latency true --threads $t >> ls_bench_res.txt + done +done + +echo "SG1" >> ls_bench_res.txt +for t in ${t10[@]}; #5 +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 1 --unbounded-size 262144 --batch-size 1048576 --circular-size 33554432 \ + --bundle-size 1048576 --slots 128 --latency true --threads $t >> ls_bench_res.txt +# --query 1 --unbounded-size 262144 --batch-size 524288 --circular-size 16777216 \ +# --bundle-size 524288 --slots 128 --latency true --threads $t >> ls_bench_res.txt + done +done + +echo "SG2" >> ls_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 2 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 \ + --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 \ + --create-merge true --parallel-merge true --latency true --threads $t >> ls_bench_res.txt + done +done + +# increase the circular-size or decouple memory buffering from storage +echo "SG3" >> ls_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 3 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 \ + --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 \ + --create-merge true --parallel-merge true --latency true --threads $t >> ls_bench_res.txt + done +done + +echo "LRB1" >> ls_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 8388608 --circular-size 16777216 \ + --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 \ + --create-merge true --parallel-merge true --latency true --threads $t >> ls_bench_res.txt + done +done + +echo "LRB2" >> ls_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 2 \ + --create-merge true --parallel-merge true --latency true --threads $t >> ls_bench_res.txt + done +done + +echo "LRB3" >> ls_bench_res.txt +for t in ${t14[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 3 \ + --create-merge true --parallel-merge true --latency true --threads $t >> ls_bench_res.txt + done +done + +echo "YSB" >> ls_bench_res.txt +for t in ${t15[@]}; #10 +do + for it in ${iterations[@]}; + do + ./yahoo_benchmark_checkpoints --circular-size 16777216 --slots 128 --batch-size 1048576 \ + --bundle-size 1048576 --latency true --threads $t >> ls_bench_res.txt + done +done + +echo "NBQ5" >> ls_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./nexmark_checkpoints --circular-size 33554432 --batch-size 1048576 --bundle-size 1048576 \ + --unbounded-size 262144 --latency true --parallel-merge true --threads $t >> ls_bench_res.txt + done +done + +echo "All done..." \ No newline at end of file diff --git a/scripts/prepare-software.sh b/scripts/prepare-software.sh index 9b6f664..5cebb74 100755 --- a/scripts/prepare-software.sh +++ b/scripts/prepare-software.sh @@ -30,9 +30,20 @@ sudo apt-get install libboost-all-dev # TBB sudo apt-get install libtbb-dev +# KAIO +sudo apt-get install libaio-dev + +# IBVerbs +sudo apt install libibverbs-dev +#sudo apt-get install libmlx4–1 infiniband-diags ibutils ibverbs-utils rdmacm-utils + # Used for disk profiling sudo apt install bpfcc-tools # sudo biolatency-bpfcc -D 30 1 +sudo apt install sysstat +# iostat -dx 30 2 (iostat -dx 1) // mpstat 30 1 (cpu) // sar -n DEV 30 1 (network) +sudo apt install fio +# sudo fio --name=writefile --size=1G --filesize=1G --filename=/home/grt17/dat --bs=1M --nrfiles=1 --direct=1 --sync=0 --randrepeat=0 --rw=write --refill_buffers --end_fsync=1 --iodepth=200 --ioengine=libaio # CMake 3.16 sudo apt remove --purge --auto-remove cmake @@ -77,7 +88,7 @@ cmake -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Release \ -DLLVM_TARGETS_TO_BUILD=X86 -G "Unix Makefiles" ../llvm make -j$(nproc) sudo make install -echo 'export LLVM_HOME=$(pwd)' >> $HOME/.profile +echo "export LLVM_HOME=$(pwd)" >> $HOME/.profile echo 'export PATH=$LLVM_HOME/bin:$PATH' >> $HOME/.profile echo 'export LIBRARY_PATH=$LLVM_HOME/lib:$LIBRARY_PATH' >> $HOME/.profile source $HOME/.profile @@ -92,8 +103,11 @@ sudo apt-get install libgtest-dev cd /usr/src/gtest sudo cmake CMakeLists.txt sudo make -j$(nproc) -# which one is the correct? -sudo cp ./lib/*.a /usr/lib +# cd /usr/src/googletest +# sudo cmake CMakeLists.txt +# sudo make -j$(nproc) +# sudo cp googlemock/gtest/*a /usr/lib +# sudo cp ./lib/*.a /usr/lib sudo cp *.a /usr/lib/ sudo mkdir /usr/local/lib/gtest sudo ln -s /usr/lib/libgtest.a /usr/local/lib/gtest/libgtest.a @@ -110,6 +124,48 @@ make -j$(nproc) sudo make install cd +# PMDK +sudo apt install -y git gcc g++ autoconf automake asciidoc asciidoctor bash-completion xmlto libtool pkg-config libglib2.0-0 libglib2.0-dev libfabric1 libfabric-dev doxygen graphviz pandoc libncurses5 libkmod2 libkmod-dev libudev-dev uuid-dev libjson-c-dev libkeyutils-dev +git clone https://github.com/pmem/ndctl +cd ndctl +git checkout c7767834871f7ce50a2abe1da946e9e16fb08eda +sudo ./autogen.sh +sudo ./configure CFLAGS='-g -O2' --prefix=/usr/local --sysconfdir=/etc --libdir=/usr/local/lib64 +#sudo ./configure CFLAGS='-g -O2' --prefix=/usr --sysconfdir=/etc --libdir=/usr/lib +sudo make -j$(nproc) +sudo make install +cd + +sudo apt install autoconf automake pkg-config libglib2.0-dev libfabric-dev pandoc libncurses5-dev +git clone https://github.com/pmem/pmdk +cd pmdk +git checkout 3bc5b0da5a7a5d5752ad2cb4f5f9bf0edfd47d67 +export PKG_CONFIG_PATH=/usr/local/lib64/pkgconfig:/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH} +make -j$(nproc) +sudo PKG_CONFIG_PATH=/usr/local/lib64/pkgconfig:/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH} make install +sudo sh -c "echo /usr/local/lib >> /etc/ld.so.conf" +sudo sh -c "echo /usr/local/lib64 >> /etc/ld.so.conf" +sudo ldconfig +cd +# PKG_CONFIG_PATH +echo 'export PKG_CONFIG_PATH=/usr/local/lib64/pkgconfig:/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH}' >> $HOME/.profile +source $HOME/.profile + + +git clone https://github.com/pmem/libpmemobj-cpp.git +cd libpmemobj-cpp +git checkout 9f784bba07b94cd36c9eebeaa88c5df4f05045b2 +mkdir build +cd build +cmake -DTESTS_USE_VALGRIND=OFF .. +make -j$(nproc) +sudo make install +cd + +## Set up home directory +# echo "Setting up the home directory in src/utils/SystemConf.cpp" +# sed -i '65s#.*#"'$HOME'"#' $HOME/LightSaber/src/CMakeLists.txt + # Build LightSaber #cd $HOME/LightSaber #mkdir build @@ -121,4 +177,4 @@ cd #cd $HOME/LightSaber/build/test/benchmarks/microbenchmarks/ #./TestProjection -echo "All done..." \ No newline at end of file +echo "All done..." diff --git a/scripts/scabbard-bench/other/run-checkpoint-only-benchmarks-different-confs.sh b/scripts/scabbard-bench/other/run-checkpoint-only-benchmarks-different-confs.sh new file mode 100755 index 0000000..bcbdf01 --- /dev/null +++ b/scripts/scabbard-bench/other/run-checkpoint-only-benchmarks-different-confs.sh @@ -0,0 +1,215 @@ +#!/bin/bash + +echo "Start running benchmarks with checkpoints" + +allThreads=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) +iterations=(1 2 3 4 5) + +path="$HOME/LightSaber/build/test/benchmarks/applicationsWithCheckpoints/" + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + +echo "CM1" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 33554432 --unbounded-size 524288 --batch-size 524288 --bundle-size 524288 \ + --query 1 --disk-block-size 65536 --latency true --threads $t >> bench_res.txt + done +done + +echo "CM1-CH" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 33554432 --unbounded-size 524288 --batch-size 524288 --bundle-size 524288 \ + --query 1 --checkpoint-duration 1000 --disk-block-size 65536 --latency true --threads $t >> bench_res.txt + done +done + +echo "CM2" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 16777216 --unbounded-size 524288 --batch-size 524288 --bundle-size 524288 \ + --query 2 --disk-block-size 131072 --latency true --threads $t >> bench_res.txt + done +done + +echo "CM2-CH" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 16777216 --unbounded-size 1048576 --batch-size 524288 --bundle-size 524288 \ + --query 2 --checkpoint-duration 1000 --disk-block-size 131072 --latency true --threads $t >> bench_res.txt + done +done + +echo "SG2" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 2 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 \ + --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 \ + --disk-block-size 4194304 --create-merge true \ + --parallel-merge true --latency true --threads $t >> bench_res.txt + done +done + + +echo "SG2-CH" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 2 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 \ + --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 \ + --checkpoint-duration 1000 --disk-block-size 4194304 --create-merge true \ + --parallel-merge true --latency true --threads $t >> bench_res.txt + done +done + +echo "SG2-CH-CMP" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 2 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 \ + --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 \ + --checkpoint-duration 1000 --disk-block-size 4194304 --create-merge true \ + --parallel-merge true --latency true --checkpoint-compression true --threads $t >> bench_res.txt + done +done + +echo "LRB1" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 8388608 --circular-size 16777216 \ + --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 \ + --disk-block-size 16777216 \ + --create-merge true --parallel-merge true --latency true --threads $t >> bench_res.txt + done +done + +echo "LRB1-CH" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 8388608 --circular-size 16777216 \ + --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 \ + --checkpoint-duration 1000 --disk-block-size 16777216 \ + --create-merge true --parallel-merge true --latency true --threads $t >> bench_res.txt + done +done + +echo "LRB1-CH-CMP" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 8388608 --circular-size 16777216 \ + --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 \ + --checkpoint-duration 1000 --disk-block-size 16777216 \ + --create-merge true --parallel-merge true --latency true --checkpoint-compression true --threads $t >> bench_res.txt + done +done + +echo "LRB2" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 2 \ + --disk-block-size 8388608 \ + --create-merge true --parallel-merge true --latency true --threads $t >> bench_res.txt + done +done + +echo "LRB2-CH" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 2 \ + --checkpoint-duration 1000 --disk-block-size 8388608 \ + --create-merge true --parallel-merge true --latency true --threads $t >> bench_res.txt + done +done + +echo "LRB2-CH-CMP" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 2 \ + --checkpoint-duration 1000 --disk-block-size 8388608 \ + --create-merge true --parallel-merge true --latency true --checkpoint-compression true --threads $t >> bench_res.txt + done +done + +echo "YSB" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./yahoo_benchmark_checkpoints \ + --circular-size 33554432 --slots 128 --batch-size 524288 --bundle-size 524288 \ + --disk-block-size 32768 --latency true --threads $t >> bench_res.txt + done +done + +echo "YSB-CH" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./yahoo_benchmark_checkpoints \ + --circular-size 33554432 --slots 128 --batch-size 524288 --bundle-size 524288 \ + --latency true --checkpoint-duration 1000 --threads $t >> bench_res.txt + done +done + +echo "NBQ5-CH" >> bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./nexmark_checkpoints --circular-size 33554432 --batch-size 1048576 --bundle-size 1048576 \ + --unbounded-size 262144 --disk-block-size 1048576 --checkpoint-compression true --persist-input tru \ + --lineage true --latency true --parallel-merge true --checkpoint-duration 1000 --threads $t >> bench_res.txt + done +done + +echo "All done..." \ No newline at end of file diff --git a/scripts/scabbard-bench/other/run-scalability-benchmarks.sh b/scripts/scabbard-bench/other/run-scalability-benchmarks.sh new file mode 100755 index 0000000..53bee29 --- /dev/null +++ b/scripts/scabbard-bench/other/run-scalability-benchmarks.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +echo "Start running scalability benchmarks" + +allThreads=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) +iterations=(1 2 3 4 5) + +path="$HOME/LightSaber/build/test/benchmarks/applicationsWithCheckpoints/" + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + + +echo "YSB-chk" >> scal_bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./yahoo_benchmark_checkpoints --circular-size 33554432 --slots 128 --batch-size 1048576 \ + --bundle-size 1048576 --disk-block-size 32768 --latency true --checkpoint-compression tru \ + --persist-input true --lineage true --threads $t >> scal_bench_res.txt + done +done + +echo "YSB-ALL-chk" >> scal_bench_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./yahoo_benchmark_checkpoints --circular-size 33554432 --slots 128 --batch-size 1048576 \ + --bundle-size 1048576 --disk-block-size 32768 --latency true --checkpoint-compression true \ + --persist-input true --lineage true --threads $t >> scal_bench_res.txt + done +done + +echo "All done..." \ No newline at end of file diff --git a/scripts/scabbard-bench/paper/run-adaptive-benchmarks-FIG12.sh b/scripts/scabbard-bench/paper/run-adaptive-benchmarks-FIG12.sh new file mode 100755 index 0000000..ba0d148 --- /dev/null +++ b/scripts/scabbard-bench/paper/run-adaptive-benchmarks-FIG12.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +echo "Start running adaptive benchmarks" + +path="$HOME/LightSaber/build/test/benchmarks/applicationsWithCheckpoints/" + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + +# run with adaptive compression +echo "ME1 adaptive" >> adapt_res.txt +./manufacturing_equipment_checkpoints --unbounded-size 32768 --circular-size 16777216 --batch-size 262144 --bundle-size 262144 --disk-block-size 32768 --latency tru --checkpoint-compression true --persist-input true --lineage true --threads 10 --adaptive-compression true --adaptive-data true >> adapt_res.txt + +# run without adaptive compression +echo "ME1" >> adapt_res.txt +./manufacturing_equipment_checkpoints --unbounded-size 32768 --circular-size 16777216 --batch-size 262144 --bundle-size 262144 --disk-block-size 32768 --latency tru --checkpoint-compression true --persist-input true --lineage true --threads 10 --adaptive-data true >> adapt_res.txt + +echo "All done..." \ No newline at end of file diff --git a/scripts/scabbard-bench/paper/run-aws-gp3-benchmarks-FIG14.sh b/scripts/scabbard-bench/paper/run-aws-gp3-benchmarks-FIG14.sh new file mode 100755 index 0000000..b69778a --- /dev/null +++ b/scripts/scabbard-bench/paper/run-aws-gp3-benchmarks-FIG14.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +echo "Start running benchmarks" + +# These experiments run with gp3 disks (700 MB/s and 16000 IOPS). +allThreads=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) +iterations=(1 2 3 4 5) +t7=(7) +t8=(8) +t10=(10) +t14=(14) +t15=(15) + +path="$HOME/LightSaber/build/test/benchmarks/applicationsWithCheckpoints/" +#path="/home/ubuntu/tmp/cmake-build-debug-aws/test/benchmarks/applicationsWithCheckpoints/" + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + +echo "CM1-ALL" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 67108864 --unbounded-size 4194304 --batch-size 2097152 --bundle-size 2097152 \ + --query 1 --checkpoint-duration 1000 --disk-block-size 262144 --checkpoint-compression true \ + --persist-input true --lineage true --latency true --threads $t >> scb_bench_res.txt + done +done + +echo "CM2-ALL" >> scb_bench_res.txt +for t in ${t8[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 67108864 --unbounded-size 4194304 --batch-size 2097152 --bundle-size 2097152 \ + --query 2 --checkpoint-duration 1000 --disk-block-size 131072 --latency true --checkpoint-compression true \ + --checkpoint-duration 1000 --persist-input true --lineage true --threads $t >> scb_bench_res.txt + done +done + +echo "SG1-Simple" >> scb_bench_res.txt +for t in ${t7[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 1 --unbounded-size 262144 --batch-size 1048576 --circular-size 33554432 \ + --bundle-size 1048576 --slots 128 --latency true --threads $t >> scb_bench_res.txt + done +done + +echo "SG1-ALL" >> scb_bench_res.txt +for t in ${t7[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 1 --unbounded-size 262144 --batch-size 1048576 --circular-size 33554432 \ + --bundle-size 1048576 --slots 128 --latency true --checkpoint-compression true \ + --disk-block-size 131072 --persist-input true --lineage true \ + --checkpoint-duration 1000 --threads $t >> scb_bench_res.txt + done +done + +echo "SG2-ALL" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 2 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 \ + --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 \ + --checkpoint-duration 1000 --disk-block-size 4194304 --create-merge true --persist-input true \ + --checkpoint-compression true --lineage true --parallel-merge true --latency true --threads $t >> scb_bench_res.txt + done +done + +# increase the circular-size or decouple memory buffering from storage +echo "SG3-ALL" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 3 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 \ + --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 \ + --checkpoint-duration 1000 --disk-block-size 4194304 --create-merge true --persist-input true \ + --checkpoint-compression true --lineage tru --parallel-merge true --latency true --threads $t >> scb_bench_res.txt + done +done + +echo "LRB1-ALL-CMP" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 8388608 --circular-size 16777216 \ + --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 \ + --checkpoint-duration 1000 --disk-block-size 16777216 --persist-input true \ + --checkpoint-compression true --lineage true \ + --create-merge true --parallel-merge true --latency true --threads $t >> scb_bench_res.txt + done +done + +echo "LRB2-ALL-CMP" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 2 \ + --checkpoint-duration 1000 --disk-block-size 8388608 \ + --persist-input true \ + --checkpoint-compression true --lineage true \ + --create-merge true --parallel-merge true --latency true --threads $t >> scb_bench_res.txt + done +done + +echo "LRB3-ALL-CMP" >> scb_bench_res.txt +for t in ${t14[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 3 \ + --disk-block-size 8388608 --persist-input true \ + --checkpoint-compression true --lineage true \ + --create-merge true --parallel-merge true --latency true --threads $t >> scb_bench_res.txt + done +done + +echo "YSB-ALL" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./yahoo_benchmark_checkpoints --circular-size 67108864 --slots 128 --batch-size 2097152 \ + --bundle-size 2097152 --disk-block-size 32768 --latency true --checkpoint-compression true \ + --persist-input true --lineage true --checkpoint-duration 1000 --threads $t >> scb_bench_res.txt + done +done + +echo "All done..." \ No newline at end of file diff --git a/scripts/scabbard-bench/paper/run-benchmarks-FIG7.sh b/scripts/scabbard-bench/paper/run-benchmarks-FIG7.sh new file mode 100755 index 0000000..071cfb2 --- /dev/null +++ b/scripts/scabbard-bench/paper/run-benchmarks-FIG7.sh @@ -0,0 +1,199 @@ +#!/bin/bash + +echo "Start running benchmarks" + +allThreads=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) +iterations=(1 2 3 4 5) +t12=(12) +t14=(14) +t15=(15) + +path="$HOME/LightSaber/build/test/benchmarks/applicationsWithCheckpoints/" + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + +echo "CM1-ALL" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 33554432 --unbounded-size 524288 --batch-size 524288 --bundle-size 524288 \ + --query 1 --checkpoint-duration 1000 --disk-block-size 65536 --checkpoint-compression true \ + --persist-input true --lineage true --latency true --threads $t >> scb_bench_res.txt + done +done + +echo "CM2-ALL" >> scb_bench_res.txt +for t in ${t14[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 16777216 --unbounded-size 1048576 --batch-size 524288 --bundle-size 524288 \ + --query 2 --checkpoint-duration 1000 --disk-block-size 131072 --latency true --checkpoint-compression true \ + --checkpoint-duration 1000 --persist-input true --lineage true --threads $t >> scb_bench_res.txt + done +done + +echo "SG1-ALL" >> scb_bench_res.txt +for t in ${t12[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 1 --unbounded-size 262144 --batch-size 1048576 --circular-size 33554432 \ + --bundle-size 1048576 --slots 128 --latency true --checkpoint-compression true \ + --disk-block-size 131072 --persist-input true --lineage true \ + --checkpoint-duration 1000 --threads $t >> scb_bench_res.txt +# --query 1 --unbounded-size 262144 --batch-size 524288 --circular-size 16777216 \ +# --bundle-size 524288 --slots 128 --latency true --checkpoint-compression true \ +# --disk-block-size 131072 --persist-input true --lineage true \ + done +done + +echo "SG2-ALL" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 2 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 \ + --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 \ + --checkpoint-duration 1000 --disk-block-size 4194304 --create-merge true --persist-input true \ + --checkpoint-compression true --lineage true --parallel-merge true --latency true --threads $t >> scb_bench_res.txt + done +done + +# increase the circular-size or decouple memory buffering from storage +echo "SG3-ALL" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 3 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 \ + --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 \ + --checkpoint-duration 1000 --disk-block-size 4194304 --create-merge true --persist-input true \ + --checkpoint-compression true --lineage tru --parallel-merge true --latency true --threads $t >> scb_bench_res.txt + done +done + +#echo "LRB1-ALL" >> scb_bench_res.txt +#for t in ${t15[@]}; +#do +# for it in ${iterations[@]}; +# do +# ./linear_road_benchmark_checkpoints \ +# --unbounded-size 8388608 --circular-size 16777216 \ +# --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 \ +# --checkpoint-duration 1000 --disk-block-size 16777216 --persist-input true \ +# --checkpoint-compression tru --lineage true \ +# --create-merge true --parallel-merge true --latency true --threads $t >> scb_bench_res.txt +# done +#done + +echo "LRB1-ALL-CMP" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 8388608 --circular-size 16777216 \ + --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 \ + --checkpoint-duration 1000 --disk-block-size 16777216 --persist-input true \ + --checkpoint-compression true --lineage true \ + --create-merge true --parallel-merge true --latency true --threads $t >> scb_bench_res.txt + done +done + +#echo "LRB2-ALL" >> scb_bench_res.txt +#for t in ${t15[@]}; +#do +# for it in ${iterations[@]}; +# do +# ./linear_road_benchmark_checkpoints \ +# --unbounded-size 16777216 --circular-size 16777216 \ +# --batch-size 262144 --bundle-size 262144 --query 2 \ +# --checkpoint-duration 1000 --disk-block-size 8388608 \ +# --persist-input true \ +# --checkpoint-compression tru --lineage true \ +# --create-merge true --parallel-merge true --latency true --threads $t >> scb_bench_res.txt +# done +#done + +echo "LRB2-ALL-CMP" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 2 \ + --checkpoint-duration 1000 --disk-block-size 8388608 \ + --persist-input true \ + --checkpoint-compression true --lineage true \ + --create-merge true --parallel-merge true --latency true --threads $t >> scb_bench_res.txt + done +done + +#echo "LRB3-ALL" >> scb_bench_res.txt +#for t in ${t14[@]}; +#do +# for it in ${iterations[@]}; +# do +# ./linear_road_benchmark_checkpoints \ +# --unbounded-size 16777216 --circular-size 16777216 \ +# --batch-size 262144 --bundle-size 262144 --query 3 \ +# --disk-block-size 8388608 \ +# --persist-input true \ +# --checkpoint-compression tru --lineage true \ +# --create-merge true --parallel-merge true --latency true --threads $t >> scb_bench_res.txt +# done +#done + +echo "LRB3-ALL-CMP" >> scb_bench_res.txt +for t in ${t14[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 3 \ + --disk-block-size 8388608 --persist-input true \ + --checkpoint-compression true --lineage true \ + --create-merge true --parallel-merge true --latency true --threads $t >> scb_bench_res.txt + done +done + +echo "YSB-ALL" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./yahoo_benchmark_checkpoints --circular-size 33554432 --slots 128 --batch-size 1048576 \ + --bundle-size 1048576 --disk-block-size 32768 --latency true --checkpoint-compression true \ + --persist-input true --lineage true --checkpoint-duration 1000 --threads $t >> scb_bench_res.txt + done +done + +echo "NBQ5-ALL" >> scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./nexmark_checkpoints --circular-size 33554432 --batch-size 1048576 --bundle-size 1048576 \ + --unbounded-size 262144 --disk-block-size 1048576 --checkpoint-compression true --persist-input true \ + --lineage true --latency true --parallel-merge true --checkpoint-duration 1000 --threads $t >> scb_bench_res.txt + done +done + +echo "All done..." \ No newline at end of file diff --git a/scripts/scabbard-bench/paper/run-breakdown-benchmarks-FIG11.sh b/scripts/scabbard-bench/paper/run-breakdown-benchmarks-FIG11.sh new file mode 100755 index 0000000..23d62fe --- /dev/null +++ b/scripts/scabbard-bench/paper/run-breakdown-benchmarks-FIG11.sh @@ -0,0 +1,109 @@ +#!/bin/bash + +echo "Start running benchmarks" + +allThreads=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) +iterations=(1 2 3 4 5) +t12=(12) +t14=(14) +t15=(15) + +path="$HOME/LightSaber/build/test/benchmarks/applicationsWithCheckpoints/" + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + + + +#1) no-opt +echo "CM1-NO-OPT" >> breakdown_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 33554432 --unbounded-size 524288 --batch-size 524288 --bundle-size 524288 \ + --query 1 --checkpoint-duration 1000 --disk-block-size 65536 --checkpoint-compression tru \ + --persist-input true --lineage true --latency true --threads $t >> breakdown_bench_res.txt + done +done +#2) only delayed persistence +# use noCompressInput function and --checkpoint-compression true +#3) only compression (lossless) +# use onlyCompressInputLossless function and --checkpoint-compression true +#4) both => lossless floats +# use compressGenInput function and --checkpoint-compression true +#5) => lossy floats +# use compressInput function and --checkpoint-compression true +#6) no-disk => --checkpoint-compression true or false +# use -DNO_DISK flag when compiling +echo "CM1-OPTS" >> breakdown_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 33554432 --unbounded-size 524288 --batch-size 524288 --bundle-size 524288 \ + --query 1 --checkpoint-duration 1000 --disk-block-size 65536 --checkpoint-compression true \ + --persist-input true --lineage true --latency true --threads $t >> breakdown_bench_res.txt + done +done + +echo "CM2-ALL" >> breakdown_bench_res.txt +for t in ${t14[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 16777216 --unbounded-size 1048576 --batch-size 524288 --bundle-size 524288 \ + --query 2 --checkpoint-duration 1000 --disk-block-size 131072 --latency true --checkpoint-compression true \ + --checkpoint-duration 1000 --persist-input true --lineage true --threads $t >> breakdown_bench_res.txt + done +done + +echo "SG1-ALL" >> breakdown_bench_res.txt +for t in ${t12[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 1 --unbounded-size 262144 --batch-size 1048576 --circular-size 33554432 \ + --bundle-size 1048576 --slots 128 --latency true --checkpoint-compression true \ + --disk-block-size 131072 --persist-input true --lineage true \ + --checkpoint-duration 1000 --threads $t >> breakdown_bench_res.txt +# --query 1 --unbounded-size 262144 --batch-size 524288 --circular-size 16777216 \ +# --bundle-size 524288 --slots 128 --latency true --checkpoint-compression true \ +# --disk-block-size 131072 --persist-input true --lineage true \ + done +done + +echo "YSB-ALL" >> breakdown_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./yahoo_benchmark_checkpoints --circular-size 33554432 --slots 128 --batch-size 1048576 \ + --bundle-size 1048576 --disk-block-size 32768 --latency true --checkpoint-compression true \ + --persist-input true --lineage true --checkpoint-duration 1000 --threads $t >> breakdown_bench_res.txt + done +done + +#echo "NBQ5-ALL" >> breakdown_bench_res.txt +#for t in ${t15[@]}; +#do +# for it in ${iterations[@]}; +# do +# ./nexmark_checkpoints --circular-size 33554432 --batch-size 1048576 --bundle-size 1048576 \ +# --unbounded-size 262144 --disk-block-size 1048576 --checkpoint-compression true --persist-input true \ +# --lineage true --latency true --parallel-merge true --checkpoint-duration 1000 --threads $t >> breakdown_bench_res.txt +# done +#done + +echo "All done..." \ No newline at end of file diff --git a/scripts/scabbard-bench/paper/run-checkpoint-only-benchmarks-TABLE4.sh b/scripts/scabbard-bench/paper/run-checkpoint-only-benchmarks-TABLE4.sh new file mode 100755 index 0000000..2368948 --- /dev/null +++ b/scripts/scabbard-bench/paper/run-checkpoint-only-benchmarks-TABLE4.sh @@ -0,0 +1,150 @@ +#!/bin/bash + +echo "Start running checkpointing benchmarks" + +allThreads=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) +iterations=(1 2 3 4 5) +t12=(12) +t14=(14) +t15=(15) + +path="$HOME/LightSaber/build/test/benchmarks/applicationsWithCheckpoints/" + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + +echo "CM1" >> bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 33554432 --unbounded-size 524288 --batch-size 524288 --bundle-size 524288 \ + --query 1 --disk-block-size 65536 --latency true --checkpoint-duration 1000 --threads $t >> bench_res.txt + done +done + +echo "CM2" >> bench_res.txt +for t in ${t14[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 16777216 --unbounded-size 524288 --batch-size 524288 --bundle-size 524288 \ + --query 2 --disk-block-size 131072 --latency true --checkpoint-duration 1000 --threads $t >> bench_res.txt + done +done + +echo "SG1" >> bench_res.txt +for t in ${t12[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 1 --unbounded-size 262144 --batch-size 1048576 --circular-size 33554432 \ + --bundle-size 1048576 --slots 128 --latency true --checkpoint-compression true \ + --disk-block-size 131072 --persist-input tru --lineage true \ + --checkpoint-duration 1000 --threads $t >> bench_res.txt +# --query 1 --unbounded-size 262144 --batch-size 524288 --circular-size 16777216 \ +# --bundle-size 524288 --slots 128 --latency true --checkpoint-compression true \ +# --disk-block-size 131072 --persist-input tru --lineage true \ +# --threads $t >> bench_res.txt + done +done + +echo "SG2" >> bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 2 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 \ + --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 \ + --disk-block-size 4194304 --create-merge true --checkpoint-duration 1000 \ + --parallel-merge true --latency true --threads $t >> bench_res.txt + done +done + +echo "SG3" >> bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 3 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 \ + --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 \ + --disk-block-size 4194304 --create-merge true --checkpoint-duration 1000\ + --parallel-merge true --latency true --threads $t >> bench_res.txt + done +done + +echo "LRB1" >> bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 8388608 --circular-size 16777216 \ + --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 \ + --disk-block-size 16777216 --checkpoint-duration 1000 \ + --create-merge true --parallel-merge true --latency true --threads $t >> bench_res.txt + done +done + +echo "LRB2" >> bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 2 \ + --disk-block-size 8388608 --checkpoint-duration 1000 \ + --create-merge true --parallel-merge true --latency true --threads $t >> bench_res.txt + done +done + +echo "LRB3" >> bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 3 \ + --disk-block-size 8388608 --checkpoint-duration 1000 \ + --create-merge true --parallel-merge true --latency true --threads $t >> bench_res.txt + done +done + + +echo "YSB" >> bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./yahoo_benchmark_checkpoints \ + --circular-size 33554432 --slots 128 --batch-size 524288 --bundle-size 524288 \ + --disk-block-size 32768 --latency true --threads --checkpoint-duration 1000 $t >> bench_res.txt + done +done + +echo "NBQ5-ALL" >> bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + ./nexmark_checkpoints --circular-size 33554432 --batch-size 1048576 --bundle-size 1048576 \ + --unbounded-size 262144 --disk-block-size 1048576 --checkpoint-compression true --persist-input tru \ + --lineage true --latency true --parallel-merge true --checkpoint-duration 1000 --threads $t >> bench_res.txt + done +done + +echo "All done..." \ No newline at end of file diff --git a/scripts/scabbard-bench/paper/run-flink-benchmarks-FIG9.sh b/scripts/scabbard-bench/paper/run-flink-benchmarks-FIG9.sh new file mode 100755 index 0000000..7bfd1b8 --- /dev/null +++ b/scripts/scabbard-bench/paper/run-flink-benchmarks-FIG9.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +echo "Start running scalability benchmarks" + +batchSize=(524288 1048576 2097152 4194304) +iterations=(1 2 3 4 5) +buffers=(1 2 4 8) + +path="$HOME/LightSaber/build/test/benchmarks/kafka-flink/" +#path="/tmp/tmp.0lki8nQd4R/cmake-build-debug/test/benchmarks/kafka-flink/" + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + +echo "YSB" >> flink_bench_res.txt +for b in ${batchSize[@]}; +do + for it in ${iterations[@]}; + do + for buf in ${buffers[@]}; + do + ./yahoo_benchmark_flink --disk-block-size $b --threads 16 --latency true --disk-buffer $buf --use-checkpoints true >> flink_bench_res.txt + done + done +done + +echo "YSB-Kafka" >> flink_bench_res.txt +for b in ${batchSize[@]}; +do + for it in ${iterations[@]}; + do + for buf in ${buffers[@]}; + do + ./yahoo_benchmark_flink --disk-block-size $b --threads 16 --latency true --disk-buffer $buf --use-checkpoints true --use-kafka true >> flink_bench_res.txt + done + done +done + +echo "All done..." \ No newline at end of file diff --git a/scripts/scabbard-bench/paper/run-ingestion-only-benchmarks-FIG8.sh b/scripts/scabbard-bench/paper/run-ingestion-only-benchmarks-FIG8.sh new file mode 100755 index 0000000..3eb1c8d --- /dev/null +++ b/scripts/scabbard-bench/paper/run-ingestion-only-benchmarks-FIG8.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +echo "Start running ingestion benchmarks" + +allThreads=(7 8 9 10 14 15) +empty=() +iterations=(1 2 3 4 5) + +# Before running the following experiments, set m_doProcessing variable in OperatorKernel to false +path="$HOME/LightSaber/build/test/benchmarks/applicationsWithCheckpoints/" + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + +echo "CM1-ALL" >> ingestion_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 33554432 --unbounded-size 524288 --batch-size 524288 --bundle-size 524288 \ + --query 1 --disk-block-size 65536 --checkpoint-compression true \ + --persist-input true --lineage true --latency true --threads $t >> ingestion_res.txt + done +done + +echo "CM2-ALL" >> ingestion_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./cluster_monitoring_checkpoints \ + --circular-size 16777216 --unbounded-size 1048576 --batch-size 524288 --bundle-size 524288 \ + --query 2 --disk-block-size 131072 --latency true --checkpoint-compression true \ + --persist-input true --lineage true --threads $t >> ingestion_res.txt + done +done + +echo "SG1-ALL" >> ingestion_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 1 --unbounded-size 262144 --batch-size 1048576 --circular-size 33554432 \ + --bundle-size 1048576 --slots 128 --latency true --checkpoint-compression true \ + --disk-block-size 131072 --persist-input true --lineage true \ + --threads $t >> ingestion_res.txt +# --query 1 --unbounded-size 262144 --batch-size 524288 --circular-size 16777216 \ +# --bundle-size 524288 --slots 128 --latency true --checkpoint-compression true \ +# --disk-block-size 131072 --persist-input true --lineage true \ +# --threads $t >> ingestion_res.txt + done +done + +echo "SG2-ALL" >> ingestion_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./smartgrid_checkpoints \ + --query 2 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 \ + --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 \ + --disk-block-size 4194304 --create-merge true --persist-input true \ + --checkpoint-compression true --lineage true --parallel-merge tru --latency true --threads $t >> ingestion_res.txt + done +done + +echo "LRB1-ALL" >> ingestion_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 8388608 --circular-size 16777216 \ + --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 \ + --disk-block-size 16777216 --persist-input true \ + --checkpoint-compression tru --lineage true \ + --create-merge true --parallel-merge tru --latency true --threads $t >> ingestion_res.txt + done +done + +echo "LRB1-ALL-CMP" >> ingestion_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 8388608 --circular-size 16777216 \ + --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 \ + --disk-block-size 16777216 --persist-input true \ + --checkpoint-compression true --lineage true \ + --create-merge true --parallel-merge tru --latency true --threads $t >> ingestion_res.txt + done +done + +echo "LRB2-ALL" >> ingestion_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 2 \ + --disk-block-size 8388608 \ + --persist-input true \ + --checkpoint-compression tru --lineage true \ + --create-merge true --parallel-merge tru --latency true --threads $t >> ingestion_res.txt + done +done + +echo "LRB2-ALL-CMP" >> ingestion_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./linear_road_benchmark_checkpoints \ + --unbounded-size 16777216 --circular-size 16777216 \ + --batch-size 262144 --bundle-size 262144 --query 2 \ + --disk-block-size 8388608 \ + --persist-input true \ + --checkpoint-compression true --lineage true \ + --create-merge true --parallel-merge tru --latency true --threads $t >> ingestion_res.txt + done +done + +echo "YSB-ALL" >> ingestion_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./yahoo_benchmark_checkpoints --circular-size 33554432 --slots 128 --batch-size 1048576 \ + --bundle-size 1048576 --disk-block-size 32768 --latency true --checkpoint-compression true \ + --persist-input true --lineage true --threads $t >> ingestion_res.txt + done +done + +echo "NBQ5-CH" >> ingestion_res.txt +for t in ${allThreads[@]}; +do + for it in ${iterations[@]}; + do + ./nexmark_checkpoints --circular-size 33554432 --batch-size 1048576 --bundle-size 1048576 \ + --unbounded-size 262144 --disk-block-size 1048576 --checkpoint-compression true --persist-input true \ + --lineage true --latency true --parallel-merge true --threads $t >> ingestion_res.txt + done +done + +echo "All done..." \ No newline at end of file diff --git a/scripts/scabbard-bench/paper/run-kafka-benchmarks-FIG9.sh b/scripts/scabbard-bench/paper/run-kafka-benchmarks-FIG9.sh new file mode 100755 index 0000000..b323d2f --- /dev/null +++ b/scripts/scabbard-bench/paper/run-kafka-benchmarks-FIG9.sh @@ -0,0 +1,104 @@ +#!/bin/bash + +echo "Start running scalability benchmarks" + +batchSize=(524288 1048576 2097152 4194304) +iterations=(1 2 3 4 5) + +path="$HOME/LightSaber/build/test/benchmarks/kafka-flink/" +#path="/tmp/tmp.0lki8nQd4R/cmake-build-debug/test/benchmarks/kafka-flink/" + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + +echo "CM1" >> kafka_bench_res.txt +for b in ${batchSize[@]}; +do + for it in ${iterations[@]}; + do + ./kafka_benchmarks --disk-block-size $b --threads 4 --query 0 --latency true --disk-buffer 16 --batch-size $b --bundle-size $b >> kafka_bench_res.txt + #./kafka_benchmarks --disk-block-size $b --threads 4 --query 0 --latency true --disk-buffer 2 --batch-size $b --bundle-size $b >> kafka_bench_res.txt + done +done + +echo "CM2" >> kafka_bench_res.txt +for b in ${batchSize[@]}; +do + bb=$((2*b)) + for it in ${iterations[@]}; + do + ./kafka_benchmarks --disk-block-size $b --threads 10 --query 1 --latency true --disk-buffer 32 --bundle-size $bb --batch-size $bb >> kafka_bench_res.txt + #./kafka_benchmarks --disk-block-size $b --threads 10 --query 1 --latency true --disk-buffer 4 --bundle-size $bb --batch-size $bb >> kafka_bench_res.txt + done +done + +echo "SG1" >> kafka_bench_res.txt +for b in ${batchSize[@]}; +do + for it in ${iterations[@]}; + do + ./kafka_benchmarks --disk-block-size $b --threads 1 --query 2 --latency true --disk-buffer 32 >> kafka_bench_res.txt + #./kafka_benchmarks --disk-block-size $b --threads 1 --query 2 --latency true --disk-buffer 2 >> kafka_bench_res.txt + done +done + +echo "SG2" >> kafka_bench_res.txt +for b in ${batchSize[@]}; +do + bb=$((8*b)) + for it in ${iterations[@]}; + do + ./kafka_benchmarks --disk-block-size $b --threads 16 --query 3 --latency true --bundle-size $bb --batch-size $bb >> kafka_bench_res.txt + #./kafka_benchmarks --disk-block-size $b --threads 16 --query 3 --latency true --bundle-size $bb --batch-size $bb --disk-buffer 2 >> kafka_bench_res.txt + done +done + +echo "LRB1" >> kafka_bench_res.txt +for b in ${batchSize[@]}; +do + bb=$((2*b)) + for it in ${iterations[@]}; + do + ./kafka_benchmarks --disk-block-size $b --threads 16 --query 5 --latency true --disk-buffer 16 --batch-size $bb --bundle-size $bb >> kafka_bench_res.txt + #./kafka_benchmarks --disk-block-size $b --threads 16 --query 5 --latency true --disk-buffer 2 --batch-size $bb --bundle-size $bb >> kafka_bench_res.txt + done +done + +echo "LRB2" >> kafka_bench_res.txt +for b in ${batchSize[@]}; +do + bb=$((8*b)) + for it in ${iterations[@]}; + do + ./kafka_benchmarks --disk-block-size $b --threads 16 --query 6 --latency true --disk-buffer 8 --batch-size $bb --bundle-size $bb >> kafka_bench_res.txt + #./kafka_benchmarks --disk-block-size $b --threads 16 --query 6 --latency true --disk-buffer 2 --batch-size $bb --bundle-size $bb >> kafka_bench_res.txt + done +done + +echo "YSB" >> kafka_bench_res.txt +for b in ${batchSize[@]}; +do + for it in ${iterations[@]}; + do + ./kafka_benchmarks --disk-block-size $b --threads 16 --query 8 --latency true >> kafka_bench_res.txt + #./kafka_benchmarks --disk-block-size $b --threads 16 --query 8 --latency true --disk-buffer 1 >> kafka_bench_res.txt + done +done + +echo "NXB5" >> kafka_bench_res.txt +for b in ${batchSize[@]}; +do + for it in ${iterations[@]}; + do + ./kafka_benchmarks --disk-block-size $b --threads 16 --query 10 --latency true --disk-buffer 16 >> kafka_bench_res.txt + done +done + +echo "All done..." \ No newline at end of file diff --git a/scripts/scabbard-bench/paper/run-rdma-benchmarks-FIG13.sh b/scripts/scabbard-bench/paper/run-rdma-benchmarks-FIG13.sh new file mode 100755 index 0000000..7311ace --- /dev/null +++ b/scripts/scabbard-bench/paper/run-rdma-benchmarks-FIG13.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +echo "Start running benchmarks" + +allThreads=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) +iterations=(1) # 2 3 4 5) +t12=(12) +t14=(14) +t15=(15) + +# sudo nano /sys/devices/system/cpu/smt/control (off) +# cat /sys/devices/system/cpu/smt/control +# install pcm https://software.opensuse.org/download/package?package=pcm&project=home%3Aopcm +# 1) Login as root +# 2) Execute the command 'modprobe msr' + +# qperf (server) +# qperf -t 10 rc_bw +# qperf -t 10 --rem_id mlx5_1 rc_bw +# server: iperf -s +# client: iperf -c 10.0.0.40 + +# ibdev2netdev : check status +# sudo ifconfig ib0 10.0.0.30/24 up +# sudo ifconfig ib1 11.0.0.31/24 up + +# if run in wallaby, use the second socket => change the core mapping in Utils.cpp +# unset the DHAVE_NUMA FLAG +# use args.push_back("--gcc-toolchain=/usr/local/gcc/7.5.0"); in OperatorJit.cpp => https://stackoverflow.com/questions/40486053/selecting-a-specific-libstdc-version-with-clang + +#path="$HOME/LightSaber/cmake-build-debug/test/benchmarks/applicationsWithCheckpoints/" +path="/tmp/tmp.Ogl5pzLSii/cmake-build-debug-wallaby/test/benchmarks/applicationsWithCheckpoints/" + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + +echo "CM1" >> remote_scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + echo "Starting Sink" + nohup ssh -t kea04 '/tmp/tmp.VuzIsR9Kxl/cmake-build-debug-kea04/test/benchmarks/applications/remoteRDMASink --batch-size 1048576 --bundle-size 1048576' & + + echo "Starting app" + nohup bash -c "./cluster_monitoring_checkpoints \ + --circular-size 33554432 --unbounded-size 524288 --batch-size 524288 --bundle-size 524288 \ + --query 1 --checkpoint-duration 1000 --disk-block-size 65536 --checkpoint-compression true \ + --persist-input true --lineage true --latency true --threads 15 >> remote_scb_bench_res.txt" & + + echo "Starting Source" + nohup ssh -t kea03 '/tmp/tmp.42rD7Z5vpA/cmake-build-debug-kea03/test/benchmarks/applications/remoteRDMASource --batch-size 1048576 --bundle-size 1048576 --query 0' & + done +done + + +echo "YSB" >> remote_scb_bench_res.txt +for t in ${t15[@]}; +do + for it in ${iterations[@]}; + do + echo "Starting Sink" + nohup ssh -t kea04 '/tmp/tmp.VuzIsR9Kxl/cmake-build-debug-kea04/test/benchmarks/applications/remoteRDMASink --batch-size 1048576 --bundle-size 1048576' & + + echo "Starting app" + nohup bash -c " ./yahoo_benchmark_checkpoints \ + --circular-size 33554432 --slots 128 --batch-size 1048576 --bundle-size 1048576 \ + --disk-block-size 32768 --latency true --checkpoint-compression true --persist-input true --lineage true \ + --checkpoint-duration 1000 --threads 15 >> remote_scb_bench_res.txt" & + + echo "Starting Source" + nohup ssh -t kea03 '/tmp/tmp.42rD7Z5vpA/cmake-build-debug-kea03/test/benchmarks/applications/remoteRDMASource --batch-size 1048576 --bundle-size 1048576 --query 8' & + done +done + + +echo "All done..." \ No newline at end of file diff --git a/scripts/scabbard-bench/paper/run-recovery-benchmarks-FIG10.sh b/scripts/scabbard-bench/paper/run-recovery-benchmarks-FIG10.sh new file mode 100755 index 0000000..a479eb4 --- /dev/null +++ b/scripts/scabbard-bench/paper/run-recovery-benchmarks-FIG10.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +echo "Start running recovery benchmarks" + +# Set SystemConf::PERFORMANCE_MONITOR_INTERVAL = 100 or 10 +# Adjust manually the throughput in test/benchmarks/applications/BenchmarkQuery.h by setting the sleep time of line 156 +# Set in LRB1.cpp, line 610 to false => if (true) {...} +# Increase the sleep time if the recovering application can't access the pmem files locks +path="$HOME/LightSaber/build/test/benchmarks/applicationsWithCheckpoints/" +#path="/home/ubuntu/tmp/cmake-build-debug-aws/test/benchmarks/applicationsWithCheckpoints/" +#path=/tmp/tmp.0lki8nQd4R/cmake-build-debug/test/benchmarks/applicationsWithCheckpoints + +if [ -n "$path" ]; then + echo "The build path is set to $path" +else + echo "Set the application build path. Exiting..." + exit +fi + +cd $path # If the application doesn't run from the build folder, it breaks. This happens because of the where the code files are generated. +echo $PWD + +bash -c "exec -a LRBRecover ./linear_road_benchmark_checkpoints --unbounded-size 8388608 --circular-size 16777216 --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 --checkpoint-duration 1000 --disk-block-size 16777216 --persist-input true --checkpoint-compression true --lineage true --create-merge true --parallel-merge true --latency true --threads 15 --ingestion 300 --performance-monitor-interval 100 >> recovery.txt" & + +sleep 7 +pkill -f LRBRecover +sleep 0.07 + + +bash -c "exec -a LRBRecover ./linear_road_benchmark_checkpoints --unbounded-size 8388608 --circular-size 16777216 --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 --checkpoint-duration 1000 --disk-block-size 16777216 --persist-input true --checkpoint-compression true --lineage true --create-merge true --parallel-merge true --latency true --recover true --threads 15 --ingestion 300 --performance-monitor-interval 100 >> recovery.txt" & +sleep 15 +pkill -f LRBRecover + +echo "All done..." \ No newline at end of file diff --git a/scripts/utils/memcheck.sh b/scripts/utils/memcheck.sh new file mode 100644 index 0000000..bbe06f1 --- /dev/null +++ b/scripts/utils/memcheck.sh @@ -0,0 +1,2 @@ +# pass a name as a parameter +while true; do ps -eo rss,pid,euser,args:100 --sort %mem | grep -v grep | grep -i $@ | awk '{printf $1/1024 "MB"; $1=""; print }' ; sleep 0.5s ; done diff --git a/scripts/utils/run_with_huge_pages.sh b/scripts/utils/run_with_huge_pages.sh new file mode 100644 index 0000000..18ba8cd --- /dev/null +++ b/scripts/utils/run_with_huge_pages.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# taken from https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog +if ! [ $(id -u) = 0 ]; then + echo "The script need to be run as root." >&2 + exit 1 +fi + +if [ $SUDO_USER ]; then + real_user=$SUDO_USER +else + real_user=$(whoami) +fi + +origval=$(sudo cat /sys/kernel/mm/transparent_hugepage/enabled) +sudo -u $real_user echo $origval +set -e +function cleanup { + echo "Restoring hugepages to madvise" + echo "madvise" > /sys/kernel/mm/transparent_hugepage/enabled +} +trap cleanup EXIT + +for mode in "always" ; do # "never + sudo -u $real_user echo "mode: " $mode + echo $mode > /sys/kernel/mm/transparent_hugepage/enabled + echo $(sudo cat /sys/kernel/mm/transparent_hugepage/enabled) + $@ +done +echo "Done." + +# echo 2048 | sudo tee /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages +# cat /proc/meminfo | grep Huge \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 52cb646..2323e5a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -46,7 +46,7 @@ target_compile_options(operatorJITLib PRIVATE -Wall -Wextra -O3 -march=native) SET(HEADERS utils/Utils.h - buffers/NUMACircularQueryBuffer.h + buffers/NumaCircularQueryBuffer.h buffers/CircularQueryBuffer.h buffers/QueryBuffer.h buffers/PartialWindowResults.h @@ -75,6 +75,8 @@ SET(HEADERS cql/predicates/ComparisonPredicate.h cql/predicates/ANDPredicate.h cql/predicates/ORPredicate.h + dispatcher/ITaskDispatcher.h + dispatcher/JoinTaskDispatcher.h dispatcher/TaskDispatcher.h monitors/LatencyMonitor.h monitors/Measurement.h @@ -96,7 +98,16 @@ SET(HEADERS utils/WindowDefinition.h ) SET(CPP_FILES + checkpoint/FileBackedCheckpointCoordinator.cpp + checkpoint/BlockManager.cpp + checkpoint/LineageGraph.cpp + cql/expressions/Expression.cpp + dispatcher/ITaskDispatcher.cpp + dispatcher/JoinTaskDispatcher.cpp dispatcher/TaskDispatcher.cpp + compression/CompressionCodeGenUtils.cpp + compression/CompressionStatistics.cpp + monitors/CompressionMonitor.cpp monitors/PerformanceMonitor.cpp monitors/LatencyMonitor.cpp monitors/Measurement.cpp @@ -111,13 +122,25 @@ SET(CPP_FILES utils/SystemConf.cpp utils/Utils.cpp ) +SET(RDMA_CPP_FILES + RDMA/infinity/core/Context.cpp + RDMA/infinity/memory/Atomic.cpp + RDMA/infinity/memory/Buffer.cpp + RDMA/infinity/memory/Region.cpp + RDMA/infinity/memory/RegionToken.cpp + RDMA/infinity/memory/RegisteredMemory.cpp + RDMA/infinity/queues/QueuePair.cpp + RDMA/infinity/queues/QueuePairFactory.cpp + RDMA/infinity/requests/RequestToken.cpp + RDMA/infinity/utils/Address.cpp + ) SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -lnuma") -SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -g -Wall -Wextra -DHAVE_NUMA" ) +SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -g -Wall -Wextra -DHAVE_NUMA") #--gcc-toolchain=/usr/local/gcc/7.5.0 #-mcx16 -add_executable(LightSaber ${HEADERS} ${CPP_FILES} main.cpp) +add_executable(LightSaber ${HEADERS} ${CPP_FILES} ${RDMA_CPP_FILES} main.cpp) target_compile_options(LightSaber PRIVATE -O3 -march=native) @@ -146,11 +169,25 @@ set_target_properties(operatorJITLib PROPERTIES CXX_EXTENSIONS OFF ) +#set(USER_PATH "/media/george/DATA") +#set(USER_PATH "/home/grt17") +set(USER_PATH $ENV{HOME}) +set(ENV{LLVM_HOME} "${USER_PATH}/llvm-project/build") +message(STATUS "LLVM_HOME: $ENV{LLVM_HOME}") +set(ENV{PATH} "$ENV{LLVM_HOME}/bin:$ENV{PATH}") +message(STATUS "PATH: $ENV{PATH}") +set(ENV{LIBRARY_PATH} "$ENV{LLVM_HOME}/lib:$ENV{LIBRARY_PATH}") +message(STATUS "LIBRARY_PATH: $ENV{LIBRARY_PATH}") + # add LLVM libraries, set LLVM_* variables +set(LLVM_DIR "$ENV{LLVM_HOME}/lib/cmake/llvm") find_package(LLVM 9.0 REQUIRED PATHS ${LLVM_DIR}) message(STATUS "LLVM_DIR: ${LLVM_DIR}") message(STATUS "LLVM_PACKAGE_VERSION: ${LLVM_PACKAGE_VERSION}") +set(LLVM_BUILD_BINARY_DIR $ENV{LLVM_HOME}) +message(STATUS "LLVM_BUILD_BINARY_DIR: ${LLVM_BUILD_BINARY_DIR}") + # add Clang libraries include(${LLVM_BUILD_BINARY_DIR}/lib/cmake/clang/ClangTargets.cmake) @@ -164,6 +201,10 @@ if (USE_LLD) target_link_libraries(operatorJITLib PRIVATE "-fuse-ld=lld") endif () +set(LLVM_BUILD_MAIN_SRC_DIR "${USER_PATH}/llvm-project/llvm") +message(STATUS "LLVM_BUILD_MAIN_SRC_DIR: ${LLVM_BUILD_MAIN_SRC_DIR}") + + # find Clang source directory if (EXISTS ${LLVM_BUILD_MAIN_SRC_DIR}/tools/clang) set(CLANG_SRC_DIR_PREFIX tools) # sources in-tree (default build) diff --git a/src/RDMA/infinity/core/Configuration.h b/src/RDMA/infinity/core/Configuration.h new file mode 100644 index 0000000..09974e2 --- /dev/null +++ b/src/RDMA/infinity/core/Configuration.h @@ -0,0 +1,54 @@ +/** + * Core - Configuration + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef CORE_CONFIGURATION_H_ +#define CORE_CONFIGURATION_H_ + +#include + +namespace infinity { +namespace core { + +class Configuration { + +public: + + /** + * Queue length settings + */ + + static const uint32_t SEND_COMPLETION_QUEUE_LENGTH = 8 * 1024; //16351; // Must be less than MAX_CQE + + static const uint32_t RECV_COMPLETION_QUEUE_LENGTH = 16351; // Must be less than MAX_CQE + + static const uint32_t SHARED_RECV_QUEUE_LENGTH = 16351; // Must be less than MAX_SRQ_WR + + static const uint32_t MAX_NUMBER_OF_OUTSTANDING_REQUESTS = 16351; // Must be less than (MAX_QP_WR * MAX_QP) + // Since we use one single shared receive queue, + // this number should be less than MAX_SRQ_WR + + static const uint32_t MAX_NUMBER_OF_SGE_ELEMENTS = 1; // Must be less than MAX_SGE + +public: + + /** + * System settings + */ + + static const uint32_t PAGE_SIZE = 4096; // Memory regions will be page aligned by the Infinity library + + static const uint32_t MAX_CONNECTION_USER_DATA_SIZE = 1024; // Size of the user data which can be transmitted when establishing a connection + + static constexpr const char* DEFAULT_IB_DEVICE = "ib0"; // Default name of IB device + +}; + +} /* namespace core */ +} /* namespace infinity */ + +#endif /* CORE_CONFIGURATION_H_ */ diff --git a/src/RDMA/infinity/core/Context.cpp b/src/RDMA/infinity/core/Context.cpp new file mode 100644 index 0000000..b2ac92c --- /dev/null +++ b/src/RDMA/infinity/core/Context.cpp @@ -0,0 +1,224 @@ +/** + * Core - Context + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#include "Context.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +namespace infinity { +namespace core { + +/******************************* + * Context + ******************************/ + +Context::Context(uint16_t device, uint16_t devicePort) { + + // Get IB device list + int32_t numberOfInstalledDevices = 0; + ibv_device **ibvDeviceList = ibv_get_device_list(&numberOfInstalledDevices); + INFINITY_ASSERT(numberOfInstalledDevices > 0, "[INFINITY][CORE][CONTEXT] No InfiniBand devices found.\n"); + INFINITY_ASSERT(device < numberOfInstalledDevices, "[INFINITY][CORE][CONTEXT] Requested device %d not found. There are %d devices available.\n", + device, numberOfInstalledDevices); + INFINITY_ASSERT(ibvDeviceList != NULL, "[INFINITY][CORE][CONTEXT] Device list was NULL.\n"); + + // Get IB device + this->ibvDevice = ibvDeviceList[device]; + INFINITY_ASSERT(this->ibvDevice != NULL, "[INFINITY][CORE][CONTEXT] Requested device %d was NULL.\n", device); + + // Open IB device and allocate protection domain + this->ibvContext = ibv_open_device(this->ibvDevice); + INFINITY_ASSERT(this->ibvContext != NULL, "[INFINITY][CORE][CONTEXT] Could not open device %d.\n", device); + this->ibvProtectionDomain = ibv_alloc_pd(this->ibvContext); + INFINITY_ASSERT(this->ibvProtectionDomain != NULL, "[INFINITY][CORE][CONTEXT] Could not allocate protection domain.\n"); + + // Get the LID + ibv_port_attr portAttributes; + ibv_query_port(this->ibvContext, devicePort, &portAttributes); + this->ibvLocalDeviceId = portAttributes.lid; + this->ibvDevicePort = devicePort; + + // Allocate completion queues + this->ibvSendCompletionQueue = ibv_create_cq(this->ibvContext, MAX(Configuration::SEND_COMPLETION_QUEUE_LENGTH, 1), NULL, NULL, 0); + this->ibvReceiveCompletionQueue = ibv_create_cq(this->ibvContext, MAX(Configuration::RECV_COMPLETION_QUEUE_LENGTH, 1), NULL, NULL, 0); + + // Allocate shared receive queue + ibv_srq_init_attr sia; + memset(&sia, 0, sizeof(ibv_srq_init_attr)); + sia.srq_context = this->ibvContext; + sia.attr.max_wr = MAX(Configuration::SHARED_RECV_QUEUE_LENGTH, 1); + sia.attr.max_sge = 1; + this->ibvSharedReceiveQueue = ibv_create_srq(this->ibvProtectionDomain, &sia); + INFINITY_ASSERT(this->ibvSharedReceiveQueue != NULL, "[INFINITY][CORE][CONTEXT] Could not allocate shared receive queue.\n"); + + // Create a default request token + defaultRequestToken = new infinity::requests::RequestToken(this); + defaultAtomic = new infinity::memory::Atomic(this); + +} + +Context::~Context() { + + // Delete default token + delete defaultRequestToken; + delete defaultAtomic; + + // Destroy shared receive queue + int returnValue = ibv_destroy_srq(this->ibvSharedReceiveQueue); + INFINITY_ASSERT(returnValue == 0, "[INFINITY][CORE][CONTEXT] Could not delete shared receive queue\n"); + + // Destroy completion queues + returnValue = ibv_destroy_cq(this->ibvSendCompletionQueue); + INFINITY_ASSERT(returnValue == 0, "[INFINITY][CORE][CONTEXT] Could not delete send completion queue\n"); + returnValue = ibv_destroy_cq(this->ibvReceiveCompletionQueue); + INFINITY_ASSERT(returnValue == 0, "[INFINITY][CORE][CONTEXT] Could not delete receive completion queue\n"); + + // Destroy protection domain + returnValue = ibv_dealloc_pd(this->ibvProtectionDomain); + INFINITY_ASSERT(returnValue == 0, "[INFINITY][CORE][CONTEXT] Could not delete protection domain\n"); + + // Close device + returnValue = ibv_close_device(this->ibvContext); + INFINITY_ASSERT(returnValue == 0, "[INFINITY][CORE][CONTEXT] Could not close device\n"); + +} + +void Context::postReceiveBuffer(infinity::memory::Buffer* buffer) { + + INFINITY_ASSERT(buffer->getSizeInBytes() <= std::numeric_limits::max(), + "[INFINITY][CORE][CONTEXT] Cannot post receive buffer which is larger than max(uint32_t).\n"); + + // Create scatter-getter + ibv_sge isge; + memset(&isge, 0, sizeof(ibv_sge)); + isge.addr = buffer->getAddress(); + isge.length = static_cast(buffer->getSizeInBytes()); + isge.lkey = buffer->getLocalKey(); + + // Create work request + ibv_recv_wr wr; + memset(&wr, 0, sizeof(ibv_recv_wr)); + wr.wr_id = reinterpret_cast(buffer); + wr.next = NULL; + wr.sg_list = &isge; + wr.num_sge = 1; + + // Post buffer to shared receive queue + ibv_recv_wr *badwr; + uint32_t returnValue = ibv_post_srq_recv(this->ibvSharedReceiveQueue, &wr, &badwr); + INFINITY_ASSERT(returnValue == 0, "[INFINITY][CORE][CONTEXT] Cannot post buffer to receive queue.\n"); + +} + +bool Context::receive(receive_element_t* receiveElement) { + + return receive(&(receiveElement->buffer), &(receiveElement->bytesWritten), &(receiveElement->immediateValue), &(receiveElement->immediateValueValid), &(receiveElement->queuePair)); + +} + +bool Context::receive(infinity::memory::Buffer** buffer, uint32_t *bytesWritten, uint32_t *immediateValue, bool *immediateValueValid, infinity::queues::QueuePair **queuePair) { + + ibv_wc wc; + if (ibv_poll_cq(this->ibvReceiveCompletionQueue, 1, &wc) > 0) { + + if(wc.opcode == IBV_WC_RECV) { + *(buffer) = reinterpret_cast(wc.wr_id); + *(bytesWritten) = wc.byte_len; + } else if (wc.opcode == IBV_WC_RECV_RDMA_WITH_IMM) { + *(buffer) = NULL; + *(bytesWritten) = wc.byte_len; + infinity::memory::Buffer* receiveBuffer = reinterpret_cast(wc.wr_id); + this->postReceiveBuffer(receiveBuffer); + } + + if(wc.wc_flags & IBV_WC_WITH_IMM) { + *(immediateValue) = ntohl(wc.imm_data); + *(immediateValueValid) = true; + } else { + *(immediateValue) = 0; + *(immediateValueValid) = false; + } + + if(queuePair != NULL) { + *(queuePair) = queuePairMap.at(wc.qp_num); + } + + return true; + } + + return false; + +} + +bool Context::pollSendCompletionQueue() { + + ibv_wc wc; + if (ibv_poll_cq(this->ibvSendCompletionQueue, 1, &wc) > 0) { + + infinity::requests::RequestToken * request = reinterpret_cast(wc.wr_id); + if (request != NULL) { + request->setCompleted(wc.status == IBV_WC_SUCCESS); + } + + if (wc.status == IBV_WC_SUCCESS) { + INFINITY_DEBUG("[INFINITY][CORE][CONTEXT] Request completed (id %lu).\n", wc.wr_id); + } else { + INFINITY_DEBUG("[INFINITY][CORE][CONTEXT] Request failed (id %lu).\n", wc.wr_id); + } + return true; + } + + return false; + +} + +void Context::registerQueuePair(infinity::queues::QueuePair* queuePair) { + this->queuePairMap.insert({queuePair->getQueuePairNumber(), queuePair}); +} + +ibv_context* Context::getInfiniBandContext() { + return this->ibvContext; +} + +uint16_t Context::getLocalDeviceId() { + return this->ibvLocalDeviceId; +} + +uint16_t Context::getDevicePort() { + return this->ibvDevicePort; +} + +ibv_pd* Context::getProtectionDomain() { + return this->ibvProtectionDomain; +} + +ibv_cq* Context::getSendCompletionQueue() { + return this->ibvSendCompletionQueue; +} + +ibv_cq* Context::getReceiveCompletionQueue() { + return this->ibvReceiveCompletionQueue; +} + +ibv_srq* Context::getSharedReceiveQueue() { + return this->ibvSharedReceiveQueue; +} + +} /* namespace core */ +} /* namespace infinity */ diff --git a/src/RDMA/infinity/core/Context.h b/src/RDMA/infinity/core/Context.h new file mode 100644 index 0000000..2627fb0 --- /dev/null +++ b/src/RDMA/infinity/core/Context.h @@ -0,0 +1,166 @@ +/** + * Core - Context + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef CORE_CONTEXT_H_ +#define CORE_CONTEXT_H_ + +#include +#include +#include +#include + +namespace infinity { +namespace memory { +class Region; +class Buffer; +class Atomic; +class RegisteredMemory; +} +} + +namespace infinity { +namespace queues { +class QueuePair; +class QueuePairFactory; +} +} + +namespace infinity { +namespace requests { +class RequestToken; +} +} + +namespace infinity { +namespace core { + +typedef struct { + infinity::memory::Buffer *buffer; + uint32_t bytesWritten; + uint32_t immediateValue; + bool immediateValueValid; + infinity::queues::QueuePair *queuePair; +} receive_element_t; + +class Context { + + friend class infinity::memory::Region; + friend class infinity::memory::Buffer; + friend class infinity::memory::Atomic; + friend class infinity::memory::RegisteredMemory; + friend class infinity::queues::QueuePair; + friend class infinity::queues::QueuePairFactory; + friend class infinity::requests::RequestToken; + +public: + + /** + * Constructors + */ + Context(uint16_t device = 0, uint16_t devicePort = 1); + + /** + * Destructor + */ + ~Context(); + +public: + + /** + * Check if receive operation completed + */ + bool receive(receive_element_t *receiveElement); + bool receive(infinity::memory::Buffer **buffer, uint32_t *bytesWritten, uint32_t *immediateValue, bool *immediateValueValid, infinity::queues::QueuePair **queuePair = NULL); + + /** + * Post a new buffer for receiving messages + */ + void postReceiveBuffer(infinity::memory::Buffer *buffer); + +public: + + infinity::requests::RequestToken * defaultRequestToken; + infinity::memory::Atomic * defaultAtomic; + +protected: + + /** + * Returns ibVerbs context + */ + ibv_context * getInfiniBandContext(); + + /** + * Returns local device id + */ + uint16_t getLocalDeviceId(); + + /** + * Returns device port + */ + uint16_t getDevicePort(); + + /** + * Returns ibVerbs protection domain + */ + ibv_pd * getProtectionDomain(); + +protected: + + /** + * Check if send operation completed + */ + bool pollSendCompletionQueue(); + + /** + * Returns ibVerbs completion queue for sending + */ + ibv_cq * getSendCompletionQueue(); + + /** + * Returns ibVerbs completion queue for receiving + */ + ibv_cq * getReceiveCompletionQueue(); + + /** + * Returns ibVerbs shared receive queue + */ + ibv_srq * getSharedReceiveQueue(); + +protected: + + /** + * IB context and protection domain + */ + ibv_context *ibvContext; + ibv_pd *ibvProtectionDomain; + + /** + * Local device id and port + */ + ibv_device *ibvDevice; + uint16_t ibvLocalDeviceId; + uint16_t ibvDevicePort; + + /** + * IB send and receive completion queues + */ + ibv_cq *ibvSendCompletionQueue; + ibv_cq *ibvReceiveCompletionQueue; + ibv_srq *ibvSharedReceiveQueue; + +protected: + + void registerQueuePair(infinity::queues::QueuePair *queuePair); + std::unordered_map queuePairMap; + +}; + +} /* namespace core */ +} /* namespace infinity */ + +#endif /* CORE_CONTEXT_H_ */ diff --git a/src/RDMA/infinity/infinity.h b/src/RDMA/infinity/infinity.h new file mode 100644 index 0000000..31d44aa --- /dev/null +++ b/src/RDMA/infinity/infinity.h @@ -0,0 +1,26 @@ +/** + * Infinity - A C++ RDMA library for InfiniBand + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef INFINITY_H_ +#define INFINITY_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif /* INFINITY_H_ */ diff --git a/src/RDMA/infinity/memory/Atomic.cpp b/src/RDMA/infinity/memory/Atomic.cpp new file mode 100644 index 0000000..5c22e8a --- /dev/null +++ b/src/RDMA/infinity/memory/Atomic.cpp @@ -0,0 +1,51 @@ +/* + * Memory - Atomic + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#include "Atomic.h" + +#include + +namespace infinity { +namespace memory { + +Atomic::Atomic(infinity::core::Context* context) { + + this->context = context; + this->sizeInBytes = sizeof(uint64_t); + this->memoryRegionType = RegionType::ATOMIC; + + this->value = 0; + this->data = &value; + + this->ibvMemoryRegion = ibv_reg_mr(this->context->getProtectionDomain(), &(this->value), this->sizeInBytes, + IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_ATOMIC | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); + + +} + +uint64_t infinity::memory::Atomic::getValue() { + + return this->value; + +} + +void infinity::memory::Atomic::setValueNonAtomic(uint64_t value) { + + this->value = value; + +} + + +Atomic::~Atomic() { + + ibv_dereg_mr(this->ibvMemoryRegion); + +} + +} /* namespace memory */ +} /* namespace infinity */ diff --git a/src/RDMA/infinity/memory/Atomic.h b/src/RDMA/infinity/memory/Atomic.h new file mode 100644 index 0000000..d6912a3 --- /dev/null +++ b/src/RDMA/infinity/memory/Atomic.h @@ -0,0 +1,42 @@ +/* + * Memory - Atomic + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef MEMORY_ATOMIC_H_ +#define MEMORY_ATOMIC_H_ + +#include +#include +#include + +namespace infinity { +namespace memory { + +class Atomic : public Region { + +public: + + Atomic(infinity::core::Context *context); + virtual ~Atomic(); + +public: + + uint64_t getValue(); + + void setValueNonAtomic(uint64_t value); + +protected: + + uint64_t value; + + +}; + +} /* namespace memory */ +} /* namespace infinity */ + +#endif /* MEMORY_ATOMIC_H_ */ diff --git a/src/RDMA/infinity/memory/Buffer.cpp b/src/RDMA/infinity/memory/Buffer.cpp new file mode 100644 index 0000000..1b12d1f --- /dev/null +++ b/src/RDMA/infinity/memory/Buffer.cpp @@ -0,0 +1,112 @@ +/* + * Memory - Buffer + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#include "Buffer.h" + +#include +#include +#include +#include + +#define MIN(a,b) (((a)<(b)) ? (a) : (b)) + +namespace infinity { +namespace memory { + +Buffer::Buffer(infinity::core::Context* context, uint64_t sizeInBytes) { + + this->context = context; + this->sizeInBytes = sizeInBytes; + this->memoryRegionType = RegionType::BUFFER; + + int res = posix_memalign(&(this->data), infinity::core::Configuration::PAGE_SIZE, sizeInBytes); + INFINITY_ASSERT(res == 0, "[INFINITY][MEMORY][BUFFER] Cannot allocate and align buffer.\n"); + + memset(this->data, 0, sizeInBytes); + + this->ibvMemoryRegion = ibv_reg_mr(this->context->getProtectionDomain(), this->data, this->sizeInBytes, + IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ); + INFINITY_ASSERT(this->ibvMemoryRegion != NULL, "[INFINITY][MEMORY][BUFFER] Registration failed.\n"); + + this->memoryAllocated = true; + this->memoryRegistered = true; + +} + +Buffer::Buffer(infinity::core::Context* context, infinity::memory::RegisteredMemory* memory, uint64_t offset, uint64_t sizeInBytes) { + + this->context = context; + this->sizeInBytes = sizeInBytes; + this->memoryRegionType = RegionType::BUFFER; + + this->data = reinterpret_cast(memory->getData()) + offset; + this->ibvMemoryRegion = memory->getRegion(); + + this->memoryAllocated = false; + this->memoryRegistered = false; + +} + +Buffer::Buffer(infinity::core::Context *context, void *memory, uint64_t sizeInBytes) { + + this->context = context; + this->sizeInBytes = sizeInBytes; + this->memoryRegionType = RegionType::BUFFER; + + this->data = memory; + this->ibvMemoryRegion = ibv_reg_mr(this->context->getProtectionDomain(), this->data, this->sizeInBytes, + IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ); + INFINITY_ASSERT(this->ibvMemoryRegion != NULL, "[INFINITY][MEMORY][BUFFER] Registration failed.\n"); + + this->memoryAllocated = false; + this->memoryRegistered = true; + +} + +Buffer::~Buffer() { + + if (this->memoryRegistered) { + ibv_dereg_mr(this->ibvMemoryRegion); + } + if (this->memoryAllocated) { + free(this->data); + } + +} + +void* Buffer::getData() { + return reinterpret_cast(this->getAddress()); +} + +void Buffer::resize(uint64_t newSize, void* newData) { + + void *oldData = this->data; + uint32_t oldSize = this->sizeInBytes; + + if (newData == NULL) { + newData = this->data; + } + + if (oldData != newData) { + uint64_t copySize = MIN(newSize, oldSize); + memcpy(newData, oldData, copySize); + } + + if (memoryRegistered) { + ibv_dereg_mr(this->ibvMemoryRegion); + this->ibvMemoryRegion = ibv_reg_mr(this->context->getProtectionDomain(), newData, newSize, + IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ); + this->data = newData; + this->sizeInBytes = newSize; + } else { + INFINITY_ASSERT(false, "[INFINITY][MEMORY][BUFFER] You can only resize memory which has registered by this buffer.\n"); + } +} + +} /* namespace memory */ +} /* namespace infinity */ diff --git a/src/RDMA/infinity/memory/Buffer.h b/src/RDMA/infinity/memory/Buffer.h new file mode 100644 index 0000000..c9bd3f2 --- /dev/null +++ b/src/RDMA/infinity/memory/Buffer.h @@ -0,0 +1,44 @@ +/* + * Memory - Buffer + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef MEMORY_BUFFER_H_ +#define MEMORY_BUFFER_H_ + +#include +#include +#include + +namespace infinity { +namespace memory { + +class Buffer : public Region { + +public: + + Buffer(infinity::core::Context *context, uint64_t sizeInBytes); + Buffer(infinity::core::Context *context, infinity::memory::RegisteredMemory *memory, uint64_t offset, uint64_t sizeInBytes); + Buffer(infinity::core::Context *context, void *memory, uint64_t sizeInBytes); + ~Buffer(); + +public: + + void * getData(); + void resize(uint64_t newSize, void *newData = NULL); + +protected: + + bool memoryRegistered; + bool memoryAllocated; + + +}; + +} /* namespace memory */ +} /* namespace infinity */ + +#endif /* MEMORY_BUFFER_H_ */ diff --git a/src/RDMA/infinity/memory/Region.cpp b/src/RDMA/infinity/memory/Region.cpp new file mode 100644 index 0000000..929e105 --- /dev/null +++ b/src/RDMA/infinity/memory/Region.cpp @@ -0,0 +1,62 @@ +/* + * Memory - Region + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#include +#include + +#include "Buffer.h" + +namespace infinity { +namespace memory { + +Region::~Region() { + // To be overwritten in sub class +} + +RegionToken* Region::createRegionToken() { + return new RegionToken(this, getMemoryRegionType(), getSizeInBytes(), getAddress(), getLocalKey(), getRemoteKey()); +} + +RegionToken * Region::createRegionToken(uint64_t offset) { + return new RegionToken(this, getMemoryRegionType(), getRemainingSizeInBytes(offset), getAddressWithOffset(offset), getLocalKey(), getRemoteKey()); +} + +RegionToken * Region::createRegionToken(uint64_t offset, uint64_t size) { + return new RegionToken(this, getMemoryRegionType(), size, getAddressWithOffset(offset), getLocalKey(), getRemoteKey()); +} + +RegionType Region::getMemoryRegionType() { + return this->memoryRegionType; +} + +uint64_t Region::getSizeInBytes() { + return this->sizeInBytes; +} + +uint64_t Region::getRemainingSizeInBytes(uint64_t offset) { + return this->sizeInBytes - offset; +} + +uint64_t Region::getAddress() { + return reinterpret_cast(this->data); +} + +uint64_t Region::getAddressWithOffset(uint64_t offset) { + return reinterpret_cast(this->data) + offset; +} + +uint32_t Region::getLocalKey() { + return this->ibvMemoryRegion->lkey; +} + +uint32_t Region::getRemoteKey() { + return this->ibvMemoryRegion->rkey; +} + +} /* namespace memory */ +} /* namespace infinity */ diff --git a/src/RDMA/infinity/memory/Region.h b/src/RDMA/infinity/memory/Region.h new file mode 100644 index 0000000..5191077 --- /dev/null +++ b/src/RDMA/infinity/memory/Region.h @@ -0,0 +1,58 @@ +/* + * Memory - Region + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef MEMORY_REGION_H_ +#define MEMORY_REGION_H_ + +#include +#include +#include +#include + +namespace infinity { +namespace memory { + +class RegionToken; + +class Region { + +public: + + virtual ~Region(); + + RegionToken * createRegionToken(); + RegionToken * createRegionToken(uint64_t offset); + RegionToken * createRegionToken(uint64_t offset, uint64_t size); + +public: + + RegionType getMemoryRegionType(); + uint64_t getSizeInBytes(); + uint64_t getRemainingSizeInBytes(uint64_t offset); + uint64_t getAddress(); + uint64_t getAddressWithOffset(uint64_t offset); + uint32_t getLocalKey(); + uint32_t getRemoteKey(); + +protected: + + infinity::core::Context* context; + RegionType memoryRegionType; + ibv_mr *ibvMemoryRegion; + +protected: + + void * data; + uint64_t sizeInBytes; + +}; + +} /* namespace memory */ +} /* namespace infinity */ + +#endif /* MEMORY_REGION_H_ */ diff --git a/src/RDMA/infinity/memory/RegionToken.cpp b/src/RDMA/infinity/memory/RegionToken.cpp new file mode 100644 index 0000000..190054e --- /dev/null +++ b/src/RDMA/infinity/memory/RegionToken.cpp @@ -0,0 +1,72 @@ +/* + * Memory - Region Token + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#include + +namespace infinity { +namespace memory { + +RegionToken::RegionToken() : + memoryRegion (NULL), + memoryRegionType (UNKNOWN), + sizeInBytes(0), + address(0), + localKey(0), + remoteKey(0) { + + // Nothing to do here + +} + +RegionToken::RegionToken(Region *memoryRegion, RegionType memoryRegionType, uint64_t sizeInBytes, uint64_t address, uint32_t localKey, uint32_t remoteKey) : + memoryRegion (memoryRegion), + memoryRegionType (memoryRegionType), + sizeInBytes(sizeInBytes), + address(address), + localKey(localKey), + remoteKey(remoteKey) { + + // Nothing to do here + +} + +Region* RegionToken::getMemoryRegion() { + return memoryRegion; +} + +RegionType RegionToken::getMemoryRegionType() { + return this->memoryRegionType; +} + +uint64_t RegionToken::getSizeInBytes() { + return this->sizeInBytes; +} + +uint64_t RegionToken::getRemainingSizeInBytes(uint64_t offset) { + return this->sizeInBytes-offset; +} + +uint64_t RegionToken::getAddress() { + return address; +} + +uint64_t RegionToken::getAddressWithOffset(uint64_t offset) { + return address + offset; +} + +uint32_t RegionToken::getLocalKey() { + return this->localKey; +} + +uint32_t RegionToken::getRemoteKey() { + return this->remoteKey; +} + + +} /* namespace memory */ +} /* namespace infinity */ diff --git a/src/RDMA/infinity/memory/RegionToken.h b/src/RDMA/infinity/memory/RegionToken.h new file mode 100644 index 0000000..b84dbc3 --- /dev/null +++ b/src/RDMA/infinity/memory/RegionToken.h @@ -0,0 +1,51 @@ +/* + * Memory - Region Token + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef MEMORY_REGIONTOKEN_H_ +#define MEMORY_REGIONTOKEN_H_ + +#include +#include +#include + +namespace infinity { +namespace memory { + +class RegionToken { + +public: + + RegionToken(); + RegionToken(Region *memoryRegion, RegionType memoryRegionType, uint64_t sizeInBytes, uint64_t address, uint32_t localKey, uint32_t remoteKey); + +public: + + Region * getMemoryRegion(); + RegionType getMemoryRegionType(); + uint64_t getSizeInBytes(); + uint64_t getRemainingSizeInBytes(uint64_t offset); + uint64_t getAddress(); + uint64_t getAddressWithOffset(uint64_t offset); + uint32_t getLocalKey(); + uint32_t getRemoteKey(); + +protected: + + Region *memoryRegion; + const RegionType memoryRegionType; + const uint64_t sizeInBytes; + const uint64_t address; + const uint32_t localKey; + const uint32_t remoteKey; + +}; + +} /* namespace memory */ +} /* namespace infinity */ + +#endif /* MEMORY_REGIONTOKEN_H_ */ diff --git a/src/RDMA/infinity/memory/RegionType.h b/src/RDMA/infinity/memory/RegionType.h new file mode 100644 index 0000000..2f1ca9e --- /dev/null +++ b/src/RDMA/infinity/memory/RegionType.h @@ -0,0 +1,20 @@ +/* + * Memory - Region Type + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef MEMORY_REGIONTYPE_H_ +#define MEMORY_REGIONTYPE_H_ + +namespace infinity { +namespace memory { + +enum RegionType {BUFFER, ATOMIC, UNKNOWN}; + +} /* namespace memory */ +} /* namespace infinity */ + +#endif /* MEMORY_REGIONTYPE_H_ */ diff --git a/src/RDMA/infinity/memory/RegisteredMemory.cpp b/src/RDMA/infinity/memory/RegisteredMemory.cpp new file mode 100644 index 0000000..2ed7a7d --- /dev/null +++ b/src/RDMA/infinity/memory/RegisteredMemory.cpp @@ -0,0 +1,78 @@ +/* + * Memory - Registered Memory + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#include "RegisteredMemory.h" + +#include +#include +#include +#include + +namespace infinity { +namespace memory { + +RegisteredMemory::RegisteredMemory(infinity::core::Context* context, uint64_t sizeInBytes) { + + this->context = context; + this->sizeInBytes = sizeInBytes; + this->memoryAllocated = true; + + int res = posix_memalign(&(this->data), infinity::core::Configuration::PAGE_SIZE, sizeInBytes); + INFINITY_ASSERT(res == 0, "[INFINITY][MEMORY][REGISTERED] Cannot allocate and align buffer.\n"); + + memset(this->data, 0, sizeInBytes); + + this->ibvMemoryRegion = ibv_reg_mr(this->context->getProtectionDomain(), this->data, this->sizeInBytes, + IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ); + INFINITY_ASSERT(this->ibvMemoryRegion != NULL, "[INFINITY][MEMORY][REGISTERED] Registration failed.\n"); +} + +RegisteredMemory::RegisteredMemory(infinity::core::Context* context, void *data, uint64_t sizeInBytes) { + + this->context = context; + this->sizeInBytes = sizeInBytes; + this->memoryAllocated = false; + + this->data = data; + + this->ibvMemoryRegion = ibv_reg_mr(this->context->getProtectionDomain(), this->data, this->sizeInBytes, + IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ); + INFINITY_ASSERT(this->ibvMemoryRegion != NULL, "[INFINITY][MEMORY][REGISTERED] Registration failed.\n"); +} + + +RegisteredMemory::~RegisteredMemory() { + + ibv_dereg_mr(this->ibvMemoryRegion); + + if(this->memoryAllocated) { + free(this->data); + } + +} + +void* RegisteredMemory::getData() { + + return this->data; + +} + +uint64_t RegisteredMemory::getSizeInBytes() { + + return this->sizeInBytes; + +} + +ibv_mr* RegisteredMemory::getRegion() { + + return this->ibvMemoryRegion; + +} + +} /* namespace pool */ +} /* namespace ivory */ diff --git a/src/RDMA/infinity/memory/RegisteredMemory.h b/src/RDMA/infinity/memory/RegisteredMemory.h new file mode 100644 index 0000000..b2d6fb3 --- /dev/null +++ b/src/RDMA/infinity/memory/RegisteredMemory.h @@ -0,0 +1,50 @@ +/* + * Memory - Registered Memory + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef INFINITY_MEMORY_REGISTEREDMEMORY_H_ +#define INFINITY_MEMORY_REGISTEREDMEMORY_H_ + +#include + +namespace infinity { +namespace memory { + +class RegisteredMemory { + +public: + + RegisteredMemory(infinity::core::Context *context, uint64_t sizeInBytes); + RegisteredMemory(infinity::core::Context *context, void *data, uint64_t sizeInBytes); + ~RegisteredMemory(); + + void * getData(); + + uint64_t getSizeInBytes(); + + ibv_mr * getRegion(); + + +protected: + + infinity::core::Context* context; + + void *data; + uint64_t sizeInBytes; + + ibv_mr *ibvMemoryRegion; + +protected: + + bool memoryAllocated; + +}; + +} /* namespace infinity */ +} /* namespace memory */ + +#endif /* INFINITY_MEMORY_REGISTEREDMEMORY_H_ */ diff --git a/src/RDMA/infinity/queues/QueuePair.cpp b/src/RDMA/infinity/queues/QueuePair.cpp new file mode 100644 index 0000000..9626214 --- /dev/null +++ b/src/RDMA/infinity/queues/QueuePair.cpp @@ -0,0 +1,589 @@ +/** + * Queues - Queue Pair + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#include "QueuePair.h" + +#include +#include +#include +#include + +#include +#include + +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +namespace infinity { +namespace queues { + +int OperationFlags::ibvFlags() { + int flags = 0; + if (fenced) { + flags |= IBV_SEND_FENCE; + } + if (signaled) { + flags |= IBV_SEND_SIGNALED; + } + if (inlined) { + flags |= IBV_SEND_INLINE; + } + return flags; +} + +QueuePair::QueuePair(infinity::core::Context* context) : + context(context) { + + ibv_qp_init_attr qpInitAttributes; + memset(&qpInitAttributes, 0, sizeof(qpInitAttributes)); + + qpInitAttributes.send_cq = context->getSendCompletionQueue(); + qpInitAttributes.recv_cq = context->getReceiveCompletionQueue(); + qpInitAttributes.srq = context->getSharedReceiveQueue(); + qpInitAttributes.cap.max_send_wr = MAX(infinity::core::Configuration::SEND_COMPLETION_QUEUE_LENGTH, 1); + qpInitAttributes.cap.max_send_sge = infinity::core::Configuration::MAX_NUMBER_OF_SGE_ELEMENTS; + qpInitAttributes.cap.max_recv_wr = MAX(infinity::core::Configuration::RECV_COMPLETION_QUEUE_LENGTH, 1); + qpInitAttributes.cap.max_recv_sge = infinity::core::Configuration::MAX_NUMBER_OF_SGE_ELEMENTS; + qpInitAttributes.qp_type = IBV_QPT_RC; + qpInitAttributes.sq_sig_all = 0; + + this->ibvQueuePair = ibv_create_qp(context->getProtectionDomain(), &(qpInitAttributes)); + INFINITY_ASSERT(this->ibvQueuePair != NULL, "[INFINITY][QUEUES][QUEUEPAIR] Cannot create queue pair.\n"); + + ibv_qp_attr qpAttributes; + memset(&qpAttributes, 0, sizeof(qpAttributes)); + + qpAttributes.qp_state = IBV_QPS_INIT; + qpAttributes.pkey_index = 0; + qpAttributes.port_num = context->getDevicePort(); + qpAttributes.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_ATOMIC; + + int32_t returnValue = ibv_modify_qp(this->ibvQueuePair, &(qpAttributes), IBV_QP_STATE | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS | IBV_QP_PKEY_INDEX); + + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Cannot transition to INIT state.\n"); + + std::random_device randomGenerator; + std::uniform_int_distribution range(0, 1<<24); + this->sequenceNumber = range(randomGenerator); + + this->userData = NULL; + this->userDataSize = 0; +} + +QueuePair::~QueuePair() { + + int32_t returnValue = ibv_destroy_qp(this->ibvQueuePair); + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Cannot delete queue pair.\n"); + + if (this->userData != NULL && this->userDataSize != 0) { + free(this->userData); + this->userDataSize = 0; + } + +} + +void QueuePair::activate(uint16_t remoteDeviceId, uint32_t remoteQueuePairNumber, uint32_t remoteSequenceNumber) { + + ibv_qp_attr qpAttributes; + memset(&(qpAttributes), 0, sizeof(qpAttributes)); + + qpAttributes.qp_state = IBV_QPS_RTR; + qpAttributes.path_mtu = IBV_MTU_4096; + qpAttributes.dest_qp_num = remoteQueuePairNumber; + qpAttributes.rq_psn = remoteSequenceNumber; + qpAttributes.max_dest_rd_atomic = 1; + qpAttributes.min_rnr_timer = 12; + qpAttributes.ah_attr.is_global = 0; + qpAttributes.ah_attr.dlid = remoteDeviceId; + qpAttributes.ah_attr.sl = 0; + qpAttributes.ah_attr.src_path_bits = 0; + qpAttributes.ah_attr.port_num = context->getDevicePort(); + + int32_t returnValue = ibv_modify_qp(this->ibvQueuePair, &qpAttributes, + IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MIN_RNR_TIMER | IBV_QP_MAX_DEST_RD_ATOMIC); + + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Cannot transition to RTR state.\n"); + + qpAttributes.qp_state = IBV_QPS_RTS; + qpAttributes.timeout = 14; + qpAttributes.retry_cnt = 7; + qpAttributes.rnr_retry = 7; + qpAttributes.sq_psn = this->getSequenceNumber(); + qpAttributes.max_rd_atomic = 1; + + returnValue = ibv_modify_qp(this->ibvQueuePair, &qpAttributes, + IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC); + + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Cannot transition to RTS state.\n"); + +} + +void QueuePair::setRemoteUserData(void* userData, uint32_t userDataSize) { + if (userDataSize > 0) { + this->userData = new char[userDataSize]; + memcpy(this->userData, userData, userDataSize); + this->userDataSize = userDataSize; + } +} + +uint16_t QueuePair::getLocalDeviceId() { + return this->context->getLocalDeviceId(); +} + +uint32_t QueuePair::getQueuePairNumber() { + return this->ibvQueuePair->qp_num; +} + +uint32_t QueuePair::getSequenceNumber() { + return this->sequenceNumber; +} + +void QueuePair::send(infinity::memory::Buffer* buffer, infinity::requests::RequestToken *requestToken) { + send(buffer, 0, buffer->getSizeInBytes(), OperationFlags(), requestToken); +} + +void QueuePair::send(infinity::memory::Buffer* buffer, uint32_t sizeInBytes, infinity::requests::RequestToken *requestToken) { + send(buffer, 0, sizeInBytes, OperationFlags(), requestToken); +} + +void QueuePair::send(infinity::memory::Buffer* buffer, uint64_t localOffset, uint32_t sizeInBytes, OperationFlags send_flags, + infinity::requests::RequestToken *requestToken) { + + if (requestToken != NULL) { + requestToken->reset(); + requestToken->setRegion(buffer); + } + + struct ibv_sge sgElement; + struct ibv_send_wr workRequest; + struct ibv_send_wr *badWorkRequest; + + memset(&sgElement, 0, sizeof(ibv_sge)); + sgElement.addr = buffer->getAddress() + localOffset; + sgElement.length = sizeInBytes; + sgElement.lkey = buffer->getLocalKey(); + + INFINITY_ASSERT(sizeInBytes <= buffer->getRemainingSizeInBytes(localOffset), + "[INFINITY][QUEUES][QUEUEPAIR] Segmentation fault while creating scatter-getter element.\n"); + + memset(&workRequest, 0, sizeof(ibv_send_wr)); + workRequest.wr_id = reinterpret_cast(requestToken); + workRequest.sg_list = &sgElement; + workRequest.num_sge = 1; + workRequest.opcode = IBV_WR_SEND; + workRequest.send_flags = send_flags.ibvFlags(); + if (requestToken != NULL) { + workRequest.send_flags |= IBV_SEND_SIGNALED; + } + + int returnValue = ibv_post_send(this->ibvQueuePair, &workRequest, &badWorkRequest); + + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Posting send request failed. %s.\n", strerror(errno)); + + INFINITY_DEBUG("[INFINITY][QUEUES][QUEUEPAIR] Send request created (id %lu).\n", workRequest.wr_id); + +} + +void QueuePair::sendWithImmediate(infinity::memory::Buffer* buffer, uint64_t localOffset, uint32_t sizeInBytes, uint32_t immediateValue, + OperationFlags send_flags, infinity::requests::RequestToken* requestToken) { + + if (requestToken != NULL) { + requestToken->reset(); + requestToken->setRegion(buffer); + requestToken->setImmediateValue(immediateValue); + } + + struct ibv_sge sgElement; + struct ibv_send_wr workRequest; + struct ibv_send_wr *badWorkRequest; + + memset(&sgElement, 0, sizeof(ibv_sge)); + sgElement.addr = buffer->getAddress() + localOffset; + sgElement.length = sizeInBytes; + sgElement.lkey = buffer->getLocalKey(); + + INFINITY_ASSERT(sizeInBytes <= buffer->getRemainingSizeInBytes(localOffset), + "[INFINITY][QUEUES][QUEUEPAIR] Segmentation fault while creating scatter-getter element.\n"); + + memset(&workRequest, 0, sizeof(ibv_send_wr)); + workRequest.wr_id = reinterpret_cast(requestToken); + workRequest.sg_list = &sgElement; + workRequest.num_sge = 1; + workRequest.opcode = IBV_WR_SEND_WITH_IMM; + workRequest.imm_data = htonl(immediateValue); + workRequest.send_flags = send_flags.ibvFlags(); + if (requestToken != NULL) { + workRequest.send_flags |= IBV_SEND_SIGNALED; + } + + int returnValue = ibv_post_send(this->ibvQueuePair, &workRequest, &badWorkRequest); + + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Posting send request failed. %s.\n", strerror(errno)); + + INFINITY_DEBUG("[INFINITY][QUEUES][QUEUEPAIR] Send request created (id %lu).\n", workRequest.wr_id); + +} + +void QueuePair::write(infinity::memory::Buffer* buffer, infinity::memory::RegionToken* destination, infinity::requests::RequestToken *requestToken) { + write(buffer, 0, destination, 0, buffer->getSizeInBytes(), OperationFlags(), requestToken); + INFINITY_ASSERT(buffer->getSizeInBytes() <= ((uint64_t) UINT32_MAX), "[INFINITY][QUEUES][QUEUEPAIR] Request must be smaller or equal to UINT_32_MAX bytes. This memory region is larger. Please explicitly indicate the size of the data to transfer.\n"); +} + +void QueuePair::write(infinity::memory::Buffer* buffer, infinity::memory::RegionToken* destination, uint32_t sizeInBytes, + infinity::requests::RequestToken *requestToken) { + write(buffer, 0, destination, 0, sizeInBytes, OperationFlags(), requestToken); +} + +void QueuePair::write(infinity::memory::Buffer* buffer, uint64_t localOffset, infinity::memory::RegionToken* destination, uint64_t remoteOffset, + uint32_t sizeInBytes, OperationFlags send_flags, infinity::requests::RequestToken *requestToken) { + + if (requestToken != NULL) { + requestToken->reset(); + requestToken->setRegion(buffer); + } + + struct ibv_sge sgElement; + struct ibv_send_wr workRequest; + struct ibv_send_wr *badWorkRequest; + + memset(&sgElement, 0, sizeof(ibv_sge)); + sgElement.addr = buffer->getAddress() + localOffset; + sgElement.length = sizeInBytes; + sgElement.lkey = buffer->getLocalKey(); + + INFINITY_ASSERT(sizeInBytes <= buffer->getRemainingSizeInBytes(localOffset), + "[INFINITY][QUEUES][QUEUEPAIR] Segmentation fault while creating scatter-getter element.\n"); + + memset(&workRequest, 0, sizeof(ibv_send_wr)); + workRequest.wr_id = reinterpret_cast(requestToken); + workRequest.sg_list = &sgElement; + workRequest.num_sge = 1; + workRequest.opcode = IBV_WR_RDMA_WRITE; + workRequest.send_flags = send_flags.ibvFlags(); + if (requestToken != NULL) { + workRequest.send_flags |= IBV_SEND_SIGNALED; + } + workRequest.wr.rdma.remote_addr = destination->getAddress() + remoteOffset; + workRequest.wr.rdma.rkey = destination->getRemoteKey(); + + INFINITY_ASSERT(sizeInBytes <= destination->getRemainingSizeInBytes(remoteOffset), + "[INFINITY][QUEUES][QUEUEPAIR] Segmentation fault while writing to remote memory.\n"); + + int returnValue = ibv_post_send(this->ibvQueuePair, &workRequest, &badWorkRequest); + + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Posting write request failed. %s.\n", strerror(errno)); + + INFINITY_DEBUG("[INFINITY][QUEUES][QUEUEPAIR] Write request created (id %lu).\n", workRequest.wr_id); + +} + +void QueuePair::writeWithImmediate(infinity::memory::Buffer* buffer, uint64_t localOffset, infinity::memory::RegionToken* destination, uint64_t remoteOffset, + uint32_t sizeInBytes, uint32_t immediateValue, OperationFlags send_flags, infinity::requests::RequestToken* requestToken) { + + if (requestToken != NULL) { + requestToken->reset(); + requestToken->setRegion(buffer); + requestToken->setImmediateValue(immediateValue); + } + + struct ibv_sge sgElement; + struct ibv_send_wr workRequest; + struct ibv_send_wr *badWorkRequest; + + memset(&sgElement, 0, sizeof(ibv_sge)); + sgElement.addr = buffer->getAddress() + localOffset; + sgElement.length = sizeInBytes; + sgElement.lkey = buffer->getLocalKey(); + + INFINITY_ASSERT(sizeInBytes <= buffer->getRemainingSizeInBytes(localOffset), + "[INFINITY][QUEUES][QUEUEPAIR] Segmentation fault while creating scatter-getter element.\n"); + + memset(&workRequest, 0, sizeof(ibv_send_wr)); + workRequest.wr_id = reinterpret_cast(requestToken); + workRequest.sg_list = &sgElement; + workRequest.num_sge = 1; + workRequest.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; + workRequest.imm_data = htonl(immediateValue); + workRequest.send_flags = send_flags.ibvFlags(); + if (requestToken != NULL) { + workRequest.send_flags |= IBV_SEND_SIGNALED; + } + workRequest.wr.rdma.remote_addr = destination->getAddress() + remoteOffset; + workRequest.wr.rdma.rkey = destination->getRemoteKey(); + + INFINITY_ASSERT(sizeInBytes <= destination->getRemainingSizeInBytes(remoteOffset), + "[INFINITY][QUEUES][QUEUEPAIR] Segmentation fault while writing to remote memory.\n"); + + int returnValue = ibv_post_send(this->ibvQueuePair, &workRequest, &badWorkRequest); + + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Posting write request failed. %s.\n", strerror(errno)); + + INFINITY_DEBUG("[INFINITY][QUEUES][QUEUEPAIR] Write request created (id %lu).\n", workRequest.wr_id); + +} + +void QueuePair::multiWrite(infinity::memory::Buffer** buffers, uint32_t* sizesInBytes, uint64_t* localOffsets, uint32_t numberOfElements, + infinity::memory::RegionToken* destination, uint64_t remoteOffset, OperationFlags send_flags, infinity::requests::RequestToken* requestToken) { + + if (requestToken != NULL) { + requestToken->reset(); + requestToken->setRegion(buffers[0]); + } + + struct ibv_sge *sgElements = (ibv_sge *) calloc(numberOfElements, sizeof(ibv_sge)); + struct ibv_send_wr workRequest; + struct ibv_send_wr *badWorkRequest; + + INFINITY_ASSERT(numberOfElements <= infinity::core::Configuration::MAX_NUMBER_OF_SGE_ELEMENTS, "[INFINITY][QUEUES][QUEUEPAIR] Request contains too many SGE.\n"); + + uint32_t totalSizeInBytes = 0; + for (uint32_t i = 0; i < numberOfElements; ++i) { + if (localOffsets != NULL) { + sgElements[i].addr = buffers[i]->getAddress() + localOffsets[i]; + } else { + sgElements[i].addr = buffers[i]->getAddress(); + } + if (sizesInBytes != NULL) { + sgElements[i].length = sizesInBytes[i]; + } else { + sgElements[i].length = buffers[i]->getSizeInBytes(); + } + totalSizeInBytes += sgElements[i].length; + sgElements[i].lkey = buffers[i]->getLocalKey(); + } + + memset(&workRequest, 0, sizeof(ibv_send_wr)); + workRequest.wr_id = reinterpret_cast(requestToken); + workRequest.sg_list = sgElements; + workRequest.num_sge = numberOfElements; + workRequest.opcode = IBV_WR_RDMA_WRITE; + workRequest.send_flags = send_flags.ibvFlags(); + if (requestToken != NULL) { + workRequest.send_flags |= IBV_SEND_SIGNALED; + } + workRequest.wr.rdma.remote_addr = destination->getAddress() + remoteOffset; + workRequest.wr.rdma.rkey = destination->getRemoteKey(); + + INFINITY_ASSERT(totalSizeInBytes <= destination->getRemainingSizeInBytes(remoteOffset), + "[INFINITY][QUEUES][QUEUEPAIR] Segmentation fault while writing to remote memory.\n"); + + int returnValue = ibv_post_send(this->ibvQueuePair, &workRequest, &badWorkRequest); + + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Posting write request failed. %s.\n", strerror(errno)); + + INFINITY_DEBUG("[INFINITY][QUEUES][QUEUEPAIR] Multi-Write request created (id %lu).\n", workRequest.wr_id); +} + +void QueuePair::multiWriteWithImmediate(infinity::memory::Buffer** buffers, uint32_t* sizesInBytes, uint64_t* localOffsets, uint32_t numberOfElements, + infinity::memory::RegionToken* destination, uint64_t remoteOffset, uint32_t immediateValue, OperationFlags send_flags, infinity::requests::RequestToken* requestToken) { + + if (requestToken != NULL) { + requestToken->reset(); + requestToken->setRegion(buffers[0]); + requestToken->setImmediateValue(immediateValue); + } + + struct ibv_sge *sgElements = (ibv_sge *) calloc(numberOfElements, sizeof(ibv_sge)); + struct ibv_send_wr workRequest; + struct ibv_send_wr *badWorkRequest; + + INFINITY_ASSERT(numberOfElements <= infinity::core::Configuration::MAX_NUMBER_OF_SGE_ELEMENTS, "[INFINITY][QUEUES][QUEUEPAIR] Request contains too many SGE.\n"); + + uint32_t totalSizeInBytes = 0; + for (uint32_t i = 0; i < numberOfElements; ++i) { + if (localOffsets != NULL) { + sgElements[i].addr = buffers[i]->getAddress() + localOffsets[i]; + } else { + sgElements[i].addr = buffers[i]->getAddress(); + } + if (sizesInBytes != NULL) { + sgElements[i].length = sizesInBytes[i]; + } else { + sgElements[i].length = buffers[i]->getSizeInBytes(); + } + totalSizeInBytes += sgElements[i].length; + sgElements[i].lkey = buffers[i]->getLocalKey(); + } + + memset(&workRequest, 0, sizeof(ibv_send_wr)); + workRequest.wr_id = reinterpret_cast(requestToken); + workRequest.sg_list = sgElements; + workRequest.num_sge = numberOfElements; + workRequest.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; + workRequest.imm_data = htonl(immediateValue); + workRequest.send_flags = send_flags.ibvFlags(); + if (requestToken != NULL) { + workRequest.send_flags |= IBV_SEND_SIGNALED; + } + workRequest.wr.rdma.remote_addr = destination->getAddress() + remoteOffset; + workRequest.wr.rdma.rkey = destination->getRemoteKey(); + + INFINITY_ASSERT(totalSizeInBytes <= destination->getRemainingSizeInBytes(remoteOffset), + "[INFINITY][QUEUES][QUEUEPAIR] Segmentation fault while writing to remote memory.\n"); + + int returnValue = ibv_post_send(this->ibvQueuePair, &workRequest, &badWorkRequest); + + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Posting write request failed. %s.\n", strerror(errno)); + + INFINITY_DEBUG("[INFINITY][QUEUES][QUEUEPAIR] Multi-Write request created (id %lu).\n", workRequest.wr_id); + +} + +void QueuePair::read(infinity::memory::Buffer* buffer, infinity::memory::RegionToken* source, infinity::requests::RequestToken *requestToken) { + read(buffer, 0, source, 0, buffer->getSizeInBytes(), OperationFlags(), requestToken); + INFINITY_ASSERT(buffer->getSizeInBytes() <= ((uint64_t) UINT32_MAX), "[INFINITY][QUEUES][QUEUEPAIR] Request must be smaller or equal to UINT_32_MAX bytes. This memory region is larger. Please explicitly indicate the size of the data to transfer.\n"); +} + +void QueuePair::read(infinity::memory::Buffer* buffer, infinity::memory::RegionToken* source, uint32_t sizeInBytes, + infinity::requests::RequestToken *requestToken) { + read(buffer, 0, source, 0, sizeInBytes, OperationFlags(), requestToken); +} + +void QueuePair::read(infinity::memory::Buffer* buffer, uint64_t localOffset, infinity::memory::RegionToken* source, uint64_t remoteOffset, uint32_t sizeInBytes, + OperationFlags send_flags, infinity::requests::RequestToken *requestToken) { + + if (requestToken != NULL) { + requestToken->reset(); + requestToken->setRegion(buffer); + } + + struct ibv_sge sgElement; + struct ibv_send_wr workRequest; + struct ibv_send_wr *badWorkRequest; + + memset(&sgElement, 0, sizeof(ibv_sge)); + sgElement.addr = buffer->getAddress() + localOffset; + sgElement.length = sizeInBytes; + sgElement.lkey = buffer->getLocalKey(); + + INFINITY_ASSERT(sizeInBytes <= buffer->getRemainingSizeInBytes(localOffset), + "[INFINITY][QUEUES][QUEUEPAIR] Segmentation fault while creating scatter-getter element.\n"); + + memset(&workRequest, 0, sizeof(ibv_send_wr)); + workRequest.wr_id = reinterpret_cast(requestToken); + workRequest.sg_list = &sgElement; + workRequest.num_sge = 1; + workRequest.opcode = IBV_WR_RDMA_READ; + workRequest.send_flags = send_flags.ibvFlags(); + if (requestToken != NULL) { + workRequest.send_flags |= IBV_SEND_SIGNALED; + } + workRequest.wr.rdma.remote_addr = source->getAddress() + remoteOffset; + workRequest.wr.rdma.rkey = source->getRemoteKey(); + + INFINITY_ASSERT(sizeInBytes <= source->getRemainingSizeInBytes(remoteOffset), + "[INFINITY][QUEUES][QUEUEPAIR] Segmentation fault while reading from remote memory.\n"); + + int returnValue = ibv_post_send(this->ibvQueuePair, &workRequest, &badWorkRequest); + + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Posting read request failed. %s.\n", strerror(errno)); + + INFINITY_DEBUG("[INFINITY][QUEUES][QUEUEPAIR] Read request created (id %lu).\n", workRequest.wr_id); + +} + +void QueuePair::compareAndSwap(infinity::memory::RegionToken* destination, infinity::memory::Atomic* previousValue, uint64_t compare, uint64_t swap, + OperationFlags send_flags, infinity::requests::RequestToken *requestToken) { + + if (requestToken != NULL) { + requestToken->reset(); + requestToken->setRegion(previousValue); + } + + struct ibv_sge sgElement; + struct ibv_send_wr workRequest; + struct ibv_send_wr *badWorkRequest; + + memset(&sgElement, 0, sizeof(ibv_sge)); + sgElement.addr = previousValue->getAddress(); + sgElement.length = previousValue->getSizeInBytes(); + sgElement.lkey = previousValue->getLocalKey(); + + memset(&workRequest, 0, sizeof(ibv_send_wr)); + workRequest.wr_id = reinterpret_cast(requestToken); + workRequest.sg_list = &sgElement; + workRequest.num_sge = 1; + workRequest.opcode = IBV_WR_ATOMIC_CMP_AND_SWP; + workRequest.send_flags = send_flags.ibvFlags(); + if (requestToken != NULL) { + workRequest.send_flags |= IBV_SEND_SIGNALED; + } + workRequest.wr.atomic.remote_addr = destination->getAddress(); + workRequest.wr.atomic.rkey = destination->getRemoteKey(); + workRequest.wr.atomic.compare_add = compare; + workRequest.wr.atomic.swap = swap; + + int returnValue = ibv_post_send(this->ibvQueuePair, &workRequest, &badWorkRequest); + + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Posting cmp-and-swp request failed. %s.\n", strerror(errno)); + + INFINITY_DEBUG("[INFINITY][QUEUES][QUEUEPAIR] Cmp-and-swp request created (id %lu).\n", workRequest.wr_id); + +} + +void QueuePair::compareAndSwap(infinity::memory::RegionToken* destination, uint64_t compare, uint64_t swap, infinity::requests::RequestToken *requestToken) { + compareAndSwap(destination, context->defaultAtomic, compare, swap, OperationFlags(), requestToken); +} + +void QueuePair::fetchAndAdd(infinity::memory::RegionToken* destination, uint64_t add, infinity::requests::RequestToken *requestToken) { + fetchAndAdd(destination, context->defaultAtomic, add, OperationFlags(), requestToken); +} + +void QueuePair::fetchAndAdd(infinity::memory::RegionToken* destination, infinity::memory::Atomic* previousValue, uint64_t add, + OperationFlags send_flags, infinity::requests::RequestToken *requestToken) { + + if (requestToken != NULL) { + requestToken->reset(); + requestToken->setRegion(previousValue); + } + + struct ibv_sge sgElement; + struct ibv_send_wr workRequest; + struct ibv_send_wr *badWorkRequest; + + memset(&sgElement, 0, sizeof(ibv_sge)); + sgElement.addr = previousValue->getAddress(); + sgElement.length = previousValue->getSizeInBytes(); + sgElement.lkey = previousValue->getLocalKey(); + + memset(&workRequest, 0, sizeof(ibv_send_wr)); + workRequest.wr_id = reinterpret_cast(requestToken); + workRequest.sg_list = &sgElement; + workRequest.num_sge = 1; + workRequest.opcode = IBV_WR_ATOMIC_FETCH_AND_ADD; + workRequest.send_flags = send_flags.ibvFlags(); + if (requestToken != NULL) { + workRequest.send_flags |= IBV_SEND_SIGNALED; + } + workRequest.wr.atomic.remote_addr = destination->getAddress(); + workRequest.wr.atomic.rkey = destination->getRemoteKey(); + workRequest.wr.atomic.compare_add = add; + + int returnValue = ibv_post_send(this->ibvQueuePair, &workRequest, &badWorkRequest); + + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][QUEUEPAIR] Posting fetch-add request failed. %s.\n", strerror(errno)); + + INFINITY_DEBUG("[INFINITY][QUEUES][QUEUEPAIR] Fetch-add request created (id %lu).\n", workRequest.wr_id); + +} + + + +bool QueuePair::hasUserData() { + return (this->userData != NULL && this->userDataSize != 0); +} + +uint32_t QueuePair::getUserDataSize() { + return this->userDataSize; +} + +void* QueuePair::getUserData() { + return this->userData; +} + +} /* namespace queues */ +} /* namespace infinity */ diff --git a/src/RDMA/infinity/queues/QueuePair.h b/src/RDMA/infinity/queues/QueuePair.h new file mode 100644 index 0000000..ff6b148 --- /dev/null +++ b/src/RDMA/infinity/queues/QueuePair.h @@ -0,0 +1,157 @@ +/** + * Queues - Queue Pair + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef QUEUES_QUEUEPAIR_H_ +#define QUEUES_QUEUEPAIR_H_ + +#include +#include +#include +#include +#include +#include + +namespace infinity { +namespace queues { +class QueuePairFactory; +} +} + +namespace infinity { +namespace queues { + +class OperationFlags { + +public: + bool fenced; + bool signaled; + bool inlined; + + OperationFlags() : fenced(false), signaled(false), inlined(false) { }; + + /** + * Turn the bools into a bit field. + */ + int ibvFlags(); +}; + +class QueuePair { + + friend class infinity::queues::QueuePairFactory; + +public: + + /** + * Constructor + */ + QueuePair(infinity::core::Context *context); + + /** + * Destructor + */ + ~QueuePair(); + +protected: + + /** + * Activation methods + */ + + void activate(uint16_t remoteDeviceId, uint32_t remoteQueuePairNumber, uint32_t remoteSequenceNumber); + void setRemoteUserData(void *userData, uint32_t userDataSize); + +public: + + /** + * User data received during connection setup + */ + + bool hasUserData(); + uint32_t getUserDataSize(); + void * getUserData(); + +public: + + /** + * Queue pair information + */ + + uint16_t getLocalDeviceId(); + uint32_t getQueuePairNumber(); + uint32_t getSequenceNumber(); + +public: + + /** + * Buffer operations + */ + + void send(infinity::memory::Buffer *buffer, infinity::requests::RequestToken *requestToken = NULL); + void send(infinity::memory::Buffer *buffer, uint32_t sizeInBytes, infinity::requests::RequestToken *requestToken = NULL); + void send(infinity::memory::Buffer *buffer, uint64_t localOffset, uint32_t sizeInBytes, OperationFlags flags, + infinity::requests::RequestToken *requestToken = NULL); + + void write(infinity::memory::Buffer *buffer, infinity::memory::RegionToken *destination, infinity::requests::RequestToken *requestToken = NULL); + void write(infinity::memory::Buffer *buffer, infinity::memory::RegionToken *destination, uint32_t sizeInBytes, + infinity::requests::RequestToken *requestToken = NULL); + void write(infinity::memory::Buffer *buffer, uint64_t localOffset, infinity::memory::RegionToken *destination, uint64_t remoteOffset, uint32_t sizeInBytes, + OperationFlags flags, infinity::requests::RequestToken *requestToken = NULL); + + void read(infinity::memory::Buffer *buffer, infinity::memory::RegionToken *source, infinity::requests::RequestToken *requestToken = NULL); + void read(infinity::memory::Buffer *buffer, infinity::memory::RegionToken *source, uint32_t sizeInBytes, infinity::requests::RequestToken *requestToken = + NULL); + void read(infinity::memory::Buffer *buffer, uint64_t localOffset, infinity::memory::RegionToken *source, uint64_t remoteOffset, uint32_t sizeInBytes, + OperationFlags flags, infinity::requests::RequestToken *requestToken = NULL); + +public: + + /** + * Complex buffer operations + */ + + void multiWrite(infinity::memory::Buffer **buffers, uint32_t *sizesInBytes, uint64_t *localOffsets, uint32_t numberOfElements, + infinity::memory::RegionToken *destination, uint64_t remoteOffset, OperationFlags flags, infinity::requests::RequestToken *requestToken = NULL); + + void sendWithImmediate(infinity::memory::Buffer *buffer, uint64_t localOffset, uint32_t sizeInBytes, uint32_t immediateValue, + OperationFlags flags, infinity::requests::RequestToken *requestToken = NULL); + + void writeWithImmediate(infinity::memory::Buffer *buffer, uint64_t localOffset, infinity::memory::RegionToken *destination, uint64_t remoteOffset, + uint32_t sizeInBytes, uint32_t immediateValue, OperationFlags flags, infinity::requests::RequestToken *requestToken = NULL); + + void multiWriteWithImmediate(infinity::memory::Buffer **buffers, uint32_t *sizesInBytes, uint64_t *localOffsets, uint32_t numberOfElements, + infinity::memory::RegionToken *destination, uint64_t remoteOffset, uint32_t immediateValue, OperationFlags flags, infinity::requests::RequestToken *requestToken = NULL); + +public: + + /** + * Atomic value operations + */ + + void compareAndSwap(infinity::memory::RegionToken *destination, uint64_t compare, uint64_t swap, infinity::requests::RequestToken *requestToken = NULL); + void compareAndSwap(infinity::memory::RegionToken *destination, infinity::memory::Atomic *previousValue, uint64_t compare, uint64_t swap, + OperationFlags flags, infinity::requests::RequestToken *requestToken = NULL); + void fetchAndAdd(infinity::memory::RegionToken *destination, uint64_t add, infinity::requests::RequestToken *requestToken = NULL); + void fetchAndAdd(infinity::memory::RegionToken *destination, infinity::memory::Atomic *previousValue, uint64_t add, + OperationFlags flags, infinity::requests::RequestToken *requestToken = NULL); + +protected: + + infinity::core::Context * const context; + + ibv_qp* ibvQueuePair; + uint32_t sequenceNumber; + + void *userData; + uint32_t userDataSize; + +}; + +} /* namespace queues */ +} /* namespace infinity */ + +#endif /* QUEUES_QUEUEPAIR_H_ */ diff --git a/src/RDMA/infinity/queues/QueuePairFactory.cpp b/src/RDMA/infinity/queues/QueuePairFactory.cpp new file mode 100644 index 0000000..3ef04e0 --- /dev/null +++ b/src/RDMA/infinity/queues/QueuePairFactory.cpp @@ -0,0 +1,183 @@ +/** + * Queues - Queue Pair Factory + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#include "QueuePairFactory.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace infinity { +namespace queues { + +typedef struct { + + uint16_t localDeviceId; + uint32_t queuePairNumber; + uint32_t sequenceNumber; + uint32_t userDataSize; + char userData[infinity::core::Configuration::MAX_CONNECTION_USER_DATA_SIZE]; + +} serializedQueuePair; + +QueuePairFactory::QueuePairFactory(infinity::core::Context *context) { + + this->context = context; + this->serverSocket = -1; + +} + +QueuePairFactory::~QueuePairFactory() { + + if (serverSocket >= 0) { + close(serverSocket); + } + +} + +void QueuePairFactory::bindToPort(uint16_t port) { + + serverSocket = socket(AF_INET, SOCK_STREAM, 0); + INFINITY_ASSERT(serverSocket >= 0, "[INFINITY][QUEUES][FACTORY] Cannot open server socket.\n"); + + sockaddr_in serverAddress; + memset(&(serverAddress), 0, sizeof(sockaddr_in)); + serverAddress.sin_family = AF_INET; + serverAddress.sin_port = htons(port); + + int32_t enabled = 1; + int32_t returnValue = setsockopt(serverSocket, SOL_SOCKET, SO_REUSEADDR, &enabled, sizeof(enabled)); + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][FACTORY] Cannot set socket option to reuse address.\n"); + + returnValue = bind(serverSocket, (sockaddr *) &serverAddress, sizeof(sockaddr_in)); + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][FACTORY] Cannot bind to local address and port.\n"); + + returnValue = listen(serverSocket, 128); + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][FACTORY] Cannot listen on server socket.\n"); + + char *ipAddressOfDevice = infinity::utils::Address::getIpAddressOfInterface(infinity::core::Configuration::DEFAULT_IB_DEVICE); + INFINITY_DEBUG("[INFINITY][QUEUES][FACTORY] Accepting connections on IP address %s and port %d.\n", ipAddressOfDevice, port); + free(ipAddressOfDevice); + +} + +QueuePair * QueuePairFactory::acceptIncomingConnection(void *userData, uint32_t userDataSizeInBytes) { + + INFINITY_ASSERT(userDataSizeInBytes < infinity::core::Configuration::MAX_CONNECTION_USER_DATA_SIZE, + "[INFINITY][QUEUES][FACTORY] User data size is too large.\n") + + serializedQueuePair *receiveBuffer = (serializedQueuePair*) calloc(1, sizeof(serializedQueuePair)); + serializedQueuePair *sendBuffer = (serializedQueuePair*) calloc(1, sizeof(serializedQueuePair)); + + int connectionSocket = accept(this->serverSocket, (sockaddr *) NULL, NULL); + INFINITY_ASSERT(connectionSocket >= 0, "[INFINITY][QUEUES][FACTORY] Cannot open connection socket.\n"); + + int32_t returnValue = recv(connectionSocket, receiveBuffer, sizeof(serializedQueuePair), 0); + INFINITY_ASSERT(returnValue == sizeof(serializedQueuePair), "[INFINITY][QUEUES][FACTORY] Incorrect number of bytes received. Expected %lu. Received %d.\n", + sizeof(serializedQueuePair), returnValue); + + QueuePair *queuePair = new QueuePair(this->context); + + sendBuffer->localDeviceId = queuePair->getLocalDeviceId(); + sendBuffer->queuePairNumber = queuePair->getQueuePairNumber(); + sendBuffer->sequenceNumber = queuePair->getSequenceNumber(); + sendBuffer->userDataSize = userDataSizeInBytes; + memcpy(sendBuffer->userData, userData, userDataSizeInBytes); + + returnValue = send(connectionSocket, sendBuffer, sizeof(serializedQueuePair), 0); + INFINITY_ASSERT(returnValue == sizeof(serializedQueuePair), + "[INFINITY][QUEUES][FACTORY] Incorrect number of bytes transmitted. Expected %lu. Received %d.\n", sizeof(serializedQueuePair), returnValue); + + INFINITY_DEBUG("[INFINITY][QUEUES][FACTORY] Pairing (%u, %u, %u, %u)-(%u, %u, %u, %u)\n", queuePair->getLocalDeviceId(), queuePair->getQueuePairNumber(), + queuePair->getSequenceNumber(), userDataSizeInBytes, receiveBuffer->localDeviceId, receiveBuffer->queuePairNumber, receiveBuffer->sequenceNumber, + receiveBuffer->userDataSize); + + queuePair->activate(receiveBuffer->localDeviceId, receiveBuffer->queuePairNumber, receiveBuffer->sequenceNumber); + queuePair->setRemoteUserData(receiveBuffer->userData, receiveBuffer->userDataSize); + + this->context->registerQueuePair(queuePair); + + close(connectionSocket); + free(receiveBuffer); + free(sendBuffer); + + return queuePair; + +} + +QueuePair * QueuePairFactory::connectToRemoteHost(const char* hostAddress, uint16_t port, void *userData, uint32_t userDataSizeInBytes) { + + INFINITY_ASSERT(userDataSizeInBytes < infinity::core::Configuration::MAX_CONNECTION_USER_DATA_SIZE, + "[INFINITY][QUEUES][FACTORY] User data size is too large.\n") + + serializedQueuePair *receiveBuffer = (serializedQueuePair*) calloc(1, sizeof(serializedQueuePair)); + serializedQueuePair *sendBuffer = (serializedQueuePair*) calloc(1, sizeof(serializedQueuePair)); + + sockaddr_in remoteAddress; + memset(&(remoteAddress), 0, sizeof(sockaddr_in)); + remoteAddress.sin_family = AF_INET; + inet_pton(AF_INET, hostAddress, &(remoteAddress.sin_addr)); + remoteAddress.sin_port = htons(port); + + int connectionSocket = socket(AF_INET, SOCK_STREAM, 0); + INFINITY_ASSERT(connectionSocket >= 0, "[INFINITY][QUEUES][FACTORY] Cannot open connection socket.\n"); + + int returnValue = connect(connectionSocket, (sockaddr *) &(remoteAddress), sizeof(sockaddr_in)); + INFINITY_ASSERT(returnValue == 0, "[INFINITY][QUEUES][FACTORY] Could not connect to server.\n"); + + QueuePair *queuePair = new QueuePair(this->context); + + sendBuffer->localDeviceId = queuePair->getLocalDeviceId(); + sendBuffer->queuePairNumber = queuePair->getQueuePairNumber(); + sendBuffer->sequenceNumber = queuePair->getSequenceNumber(); + sendBuffer->userDataSize = userDataSizeInBytes; + memcpy(sendBuffer->userData, userData, userDataSizeInBytes); + + returnValue = send(connectionSocket, sendBuffer, sizeof(serializedQueuePair), 0); + INFINITY_ASSERT(returnValue == sizeof(serializedQueuePair), + "[INFINITY][QUEUES][FACTORY] Incorrect number of bytes transmitted. Expected %lu. Received %d.\n", sizeof(serializedQueuePair), returnValue); + + returnValue = recv(connectionSocket, receiveBuffer, sizeof(serializedQueuePair), 0); + INFINITY_ASSERT(returnValue == sizeof(serializedQueuePair), + "[INFINITY][QUEUES][FACTORY] Incorrect number of bytes received. Expected %lu. Received %d.\n", sizeof(serializedQueuePair), returnValue); + + INFINITY_DEBUG("[INFINITY][QUEUES][FACTORY] Pairing (%u, %u, %u, %u)-(%u, %u, %u, %u)\n", queuePair->getLocalDeviceId(), queuePair->getQueuePairNumber(), + queuePair->getSequenceNumber(), userDataSizeInBytes, receiveBuffer->localDeviceId, receiveBuffer->queuePairNumber, receiveBuffer->sequenceNumber, + receiveBuffer->userDataSize); + + queuePair->activate(receiveBuffer->localDeviceId, receiveBuffer->queuePairNumber, receiveBuffer->sequenceNumber); + queuePair->setRemoteUserData(receiveBuffer->userData, receiveBuffer->userDataSize); + + this->context->registerQueuePair(queuePair); + + close(connectionSocket); + free(receiveBuffer); + free(sendBuffer); + + return queuePair; + +} + +QueuePair* QueuePairFactory::createLoopback(void *userData, uint32_t userDataSizeInBytes) { + + QueuePair *queuePair = new QueuePair(this->context); + queuePair->activate(queuePair->getLocalDeviceId(), queuePair->getQueuePairNumber(), queuePair->getSequenceNumber()); + queuePair->setRemoteUserData(userData, userDataSizeInBytes); + + this->context->registerQueuePair(queuePair); + + return queuePair; + +} + +} /* namespace queues */ +} /* namespace infinity */ diff --git a/src/RDMA/infinity/queues/QueuePairFactory.h b/src/RDMA/infinity/queues/QueuePairFactory.h new file mode 100644 index 0000000..42eb226 --- /dev/null +++ b/src/RDMA/infinity/queues/QueuePairFactory.h @@ -0,0 +1,57 @@ +/** + * Queues - Queue Pair Factory + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef QUEUES_QUEUEPAIRFACTORY_H_ +#define QUEUES_QUEUEPAIRFACTORY_H_ + +#include +#include +#include +#include + +namespace infinity { +namespace queues { + +class QueuePairFactory { +public: + + QueuePairFactory(infinity::core::Context *context); + ~QueuePairFactory(); + + /** + * Bind to port for listening to incoming connections + */ + void bindToPort(uint16_t port); + + /** + * Accept incoming connection request (passive side) + */ + QueuePair * acceptIncomingConnection(void *userData = NULL, uint32_t userDataSizeInBytes = 0); + + /** + * Connect to remote machine (active side) + */ + QueuePair * connectToRemoteHost(const char* hostAddress, uint16_t port, void *userData = NULL, uint32_t userDataSizeInBytes = 0); + + /** + * Create loopback queue pair + */ + QueuePair * createLoopback(void *userData = NULL, uint32_t userDataSizeInBytes = 0); + +protected: + + infinity::core::Context * context; + + int32_t serverSocket; + +}; + +} /* namespace queues */ +} /* namespace infinity */ + +#endif /* QUEUES_QUEUEPAIRFACTORY_H_ */ diff --git a/src/RDMA/infinity/requests/RequestToken.cpp b/src/RDMA/infinity/requests/RequestToken.cpp new file mode 100644 index 0000000..52f446e --- /dev/null +++ b/src/RDMA/infinity/requests/RequestToken.cpp @@ -0,0 +1,101 @@ +/** + * Requests - Request Token + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#include "RequestToken.h" + +namespace infinity { +namespace requests { + +RequestToken::RequestToken(infinity::core::Context *context) : + context(context) { + this->success.store(false); + this->completed.store(false); + this->region = NULL; + this->userData = NULL; + this->userDataValid = false; + this->userDataSize = 0; + this->immediateValue = 0; + this->immediateValueValid = false; +} + +void RequestToken::setCompleted(bool success) { + this->success.store(success); + this->completed.store(true); +} + +bool RequestToken::checkIfCompleted() { + if (this->completed.load()) { + return true; + } else { + this->context->pollSendCompletionQueue(); + return this->completed.load(); + } +} + +void RequestToken::waitUntilCompleted() { + while (!this->completed.load()) { + this->context->pollSendCompletionQueue(); + } +} + +bool RequestToken::wasSuccessful() { + return this->success.load(); +} + +void RequestToken::reset() { + this->success.store(false); + this->completed.store(false); + this->region = NULL; + this->userData = NULL; + this->userDataValid = false; + this->userDataSize = 0; + this->immediateValue = 0; + this->immediateValueValid = false; +} + +void RequestToken::setRegion(infinity::memory::Region* region) { + this->region = region; +} + +infinity::memory::Region* RequestToken::getRegion() { + return this->region; +} + +void RequestToken::setUserData(void* userData, uint32_t userDataSize) { + this->userData = userData; + this->userDataSize = userDataSize; + this->userDataValid = true; +} + +void* RequestToken::getUserData() { + return this->userData; +} + +bool RequestToken::hasUserData() { + return this->userDataValid; +} + +uint32_t RequestToken::getUserDataSize() { + return this->userDataSize; +} + +void RequestToken::setImmediateValue(uint32_t immediateValue) { + this->immediateValue = immediateValue; + this->immediateValueValid = true; +} + +uint32_t RequestToken::getImmediateValue() { + return this->immediateValue; +} + +bool RequestToken::hasImmediateValue() { + return this->immediateValueValid; +} + +} /* namespace requests */ +} /* namespace infinity */ diff --git a/src/RDMA/infinity/requests/RequestToken.h b/src/RDMA/infinity/requests/RequestToken.h new file mode 100644 index 0000000..1aefb6d --- /dev/null +++ b/src/RDMA/infinity/requests/RequestToken.h @@ -0,0 +1,67 @@ +/** + * Requests - Request Token + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef REQUESTS_REQUESTTOKEN_H_ +#define REQUESTS_REQUESTTOKEN_H_ + +#include +#include +#include + +#include + +namespace infinity { +namespace requests { + +class RequestToken { + +public: + + RequestToken(infinity::core::Context *context); + + void reset(); + + void setRegion(infinity::memory::Region * region); + infinity::memory::Region * getRegion(); + + void setCompleted(bool success); + bool wasSuccessful(); + + bool checkIfCompleted(); + void waitUntilCompleted(); + + void setImmediateValue(uint32_t immediateValue); + bool hasImmediateValue(); + uint32_t getImmediateValue(); + + void setUserData(void* userData, uint32_t userDataSize); + bool hasUserData(); + void* getUserData(); + uint32_t getUserDataSize(); + +protected: + + infinity::core::Context * const context; + infinity::memory::Region * region; + + std::atomic completed; + std::atomic success; + + void *userData; + uint32_t userDataSize; + bool userDataValid; + + uint32_t immediateValue; + bool immediateValueValid; + +}; + +} /* namespace requests */ +} /* namespace infinity */ + +#endif /* REQUESTS_REQUESTTOKEN_H_ */ diff --git a/src/RDMA/infinity/utils/Address.cpp b/src/RDMA/infinity/utils/Address.cpp new file mode 100644 index 0000000..0dbe56e --- /dev/null +++ b/src/RDMA/infinity/utils/Address.cpp @@ -0,0 +1,57 @@ +/** + * Utils - Address + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#include "Address.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace infinity { +namespace utils { + +char* Address::getIpAddressOfInterface(const char* interfaceName) { + + struct ifaddrs *ifAddr; + struct ifaddrs *ifa; + char *ipAddress = (char*) calloc(16, sizeof(char)); + + int returnValue = getifaddrs(&ifAddr); + INFINITY_ASSERT(returnValue != -1, "[INFINITY][UTILS][ADDRESS] Cannot read interface list.\n"); + + for (ifa = ifAddr; ifa != NULL; ifa = ifa->ifa_next) { + if (ifa->ifa_addr == NULL) { + continue; + } + if ((ifa->ifa_addr->sa_family == AF_INET) && (strcasecmp(interfaceName, ifa->ifa_name) == 0)) { + sprintf(ipAddress, "%s", inet_ntoa(((struct sockaddr_in *) ifa->ifa_addr)->sin_addr)); + break; + } + } + INFINITY_ASSERT(ifa != NULL, "[INFINITY][UTILS][ADDRESS] Cannot find interface named %s.\n", interfaceName); + + freeifaddrs(ifAddr); + + return ipAddress; + +} + +uint32_t Address::getIpAddressAsUint32(const char* ipAddress) { + + uint32_t ipAddressNumbers[4]; + sscanf(ipAddress, "%d.%d.%d.%d", &ipAddressNumbers[3], &ipAddressNumbers[2], &ipAddressNumbers[1], &ipAddressNumbers[0]); + uint32_t ipAddressNumber(ipAddressNumbers[0] | ipAddressNumbers[1] << 8 | ipAddressNumbers[2] << 16 | ipAddressNumbers[3] << 24); + return ipAddressNumber; +} + +} /* namespace utils */ +} /* namespace infinity */ diff --git a/src/RDMA/infinity/utils/Address.h b/src/RDMA/infinity/utils/Address.h new file mode 100644 index 0000000..4d4df0c --- /dev/null +++ b/src/RDMA/infinity/utils/Address.h @@ -0,0 +1,29 @@ +/** + * Utils - Address + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef UTILS_ADDRESS_H_ +#define UTILS_ADDRESS_H_ + +#include + +namespace infinity { +namespace utils { + +class Address { + +public: + + static char * getIpAddressOfInterface(const char *interfaceName); + static uint32_t getIpAddressAsUint32(const char *ipAddress); + +}; + +} /* namespace utils */ +} /* namespace infinity */ + +#endif /* UTILS_ADDRESS_H_ */ diff --git a/src/RDMA/infinity/utils/Debug.h b/src/RDMA/infinity/utils/Debug.h new file mode 100644 index 0000000..d18c86f --- /dev/null +++ b/src/RDMA/infinity/utils/Debug.h @@ -0,0 +1,27 @@ +/** + * Utils - Debug + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#ifndef UTILS_DEBUG_H_ +#define UTILS_DEBUG_H_ + +#include +#include + +#ifdef INFINITY_DEBUG_ON + #define INFINITY_DEBUG(X, ...) {fprintf(stdout, X, ##__VA_ARGS__); fflush(stdout);} +#else + #define INFINITY_DEBUG(X, ...) {} +#endif + +#ifdef INFINITY_ASSERT_ON + #define INFINITY_ASSERT(B, X, ...) {if(!(B)) {fprintf(stdout, X, ##__VA_ARGS__); fflush(stdout); exit(-1);}} +#else + #define INFINITY_ASSERT(B, X, ...) {} +#endif + +#endif /* UTILS_DEBUG_H_ */ diff --git a/src/buffers/CircularQueryBuffer.h b/src/buffers/CircularQueryBuffer.h index 372c7e8..8435a85 100644 --- a/src/buffers/CircularQueryBuffer.h +++ b/src/buffers/CircularQueryBuffer.h @@ -1,15 +1,16 @@ #pragma once #include -#include +#include #include #include -#include -#include +#include -#include "QueryBuffer.h" +#include "buffers/QueryBuffer.h" +#include "buffers/UnboundedQueryBufferFactory.h" #include "utils/PaddedLong.h" #include "utils/SystemConf.h" +#include "utils/Utils.h" /* * \brief This class implements a non-NUMA-aware circular buffer. @@ -21,44 +22,212 @@ class CircularQueryBuffer : public QueryBuffer { ByteBuffer m_buffer; public: - CircularQueryBuffer(int id, size_t capacity, int tupleSize = 1, bool copyDataOnInsert = true) : - QueryBuffer(id, capacity, false, tupleSize, copyDataOnInsert), m_buffer(m_capacity) {}; + CircularQueryBuffer(int id, size_t capacity, int tupleSize = 1, + bool copyDataOnInsert = true, size_t batchSize = SystemConf::getInstance().BATCH_SIZE, bool clearFiles = true) + : QueryBuffer(id, capacity, false, tupleSize, copyDataOnInsert, batchSize, nullptr, clearFiles), + m_buffer(m_capacity){ + if (SystemConf::getInstance().LINEAGE_ON) { + setupForCheckpoints(nullptr); + } + }; - long put(char *values, long bytes, long latencyMark = -1) override { + + long put(char *values, long bytes, long latencyMark, long retainMark = -1, std::shared_ptr graph = nullptr) override { if (values == nullptr || bytes <= 0) - throw std::invalid_argument("error: cannot put null to circular buffer"); + throw std::invalid_argument("error: cannot put null to circular buffer " + std::to_string(m_id)); - /* Get the end pointer */ + /* Get the end pointer */ long end = m_endP.m_value.load(std::memory_order_relaxed); + /* Find remaining bytes until the circular buffer wraps */ long wrapPoint = (end + bytes - 1) - m_capacity; if (m_temp.m_value <= wrapPoint) { + m_temp.m_value = m_startP.m_value.load(std::memory_order_relaxed); if (m_temp.m_value <= wrapPoint) { + //std::cout << "[DBG] Circular Buffer " << std::to_string(m_id) + // << " is full with startP " + // << m_startP.m_value.load() << " and endP " + // << m_endP.m_value.load() << std::endl; return -1; } } long index = normalise(end); - if (m_copyDataOnInsert || m_wraps == 0) { // copy only until the buffer is filled once - if (bytes > ((long) m_capacity - index)) { /* Copy in two parts */ + + if (SystemConf::getInstance().LINEAGE_ON) { + auto firstSlotId = index / m_batchSize; + auto endSlotId = (index + bytes) / m_batchSize; + auto slotId = endSlotId; //firstSlotId; + while (slotId <= endSlotId) { + auto normSlot = slotId % m_numberOfSlots; + if (slotId != endSlotId) { + std::lock_guard l (m_slots[normSlot].m_updateLock); + if (!m_slots[normSlot].m_graph) { + auto newGraph = LineageGraphFactory::getInstance().newInstance(); + m_slots[normSlot].setLineageGraph(newGraph); + } + } else { + std::lock_guard l (m_slots[normSlot].m_updateLock); + if (!graph && !m_slots[normSlot].m_graph) { + graph = LineageGraphFactory::getInstance().newInstance(); + } + if (m_slots[normSlot].m_graph && graph) { + m_slots[normSlot].m_graph->mergeGraphs(graph); + } else if (graph) { + m_slots[normSlot].setLineageGraph(graph); + } + } + /*if (!m_slots[normSlot].m_graph) { + throw std::runtime_error( + "error: the lineage graph is not initialized when inserting for slot " + + std::to_string(normSlot)); + }*/ + slotId ++; + } + } + + if (m_copyDataOnInsert || + m_wraps == 0) { // copy only until the buffer is filled once + if (bytes > ((long)m_capacity - index)) { /* Copy in two parts */ long right = m_capacity - index; long left = bytes - (m_capacity - index); std::memcpy(&m_buffer[index], values, (right) * sizeof(char)); - std::memcpy(&m_buffer[0], &values[m_capacity - index], (left) * sizeof(char)); + std::memcpy(&m_buffer[0], &values[m_capacity - index], + (left) * sizeof(char)); } else { std::memcpy(&m_buffer[index], values, (bytes) * sizeof(char)); } } if (SystemConf::getInstance().LATENCY_ON && !m_copyDataOnInsert) { - long *buf = (long *) &m_buffer[index]; - buf[0] = Utils::pack(latencyMark, (int) buf[0]); + long *buf = (long *)&m_buffer[index]; + buf[0] = Utils::pack(latencyMark, (int)buf[0]); } long p = normalise(end + bytes); - if (p <= index) - m_wraps++; + if (p <= index) m_wraps++; m_endP.m_value.store(end + bytes, std::memory_order_relaxed); + + // debug (); + return index; + } + + long put(std::shared_ptr &input, long latencyMark, long retainMark = -1, std::shared_ptr graph = nullptr) override { + auto values = input->getBuffer().data(); + auto bytes = input->getBuffer().size(); + + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer " + std::to_string(m_id)); + + /* Get the end pointer */ + long end = m_endP.m_value.load(std::memory_order_relaxed); + + /* Find remaining bytes until the circular buffer wraps */ + long wrapPoint = (end + bytes - 1) - m_capacity; + if (m_temp.m_value <= wrapPoint) { + + m_temp.m_value = m_startP.m_value.load(std::memory_order_relaxed); + if (m_temp.m_value <= wrapPoint) { + //std::cout << "[DBG] Circular Buffer " << std::to_string(m_id) + // << " is full with startP " + // << m_startP.m_value.load() << " and endP " + // << m_endP.m_value.load() << std::endl; + return -1; + } + } + + long index = normalise(end); + + if (SystemConf::getInstance().LINEAGE_ON) { + throw std::runtime_error("error: lineage not supported during insertion"); + } + + // always copy + if (bytes > ((long)m_capacity - index)) { /* Copy in two parts */ + long right = m_capacity - index; + long left = bytes - (m_capacity - index); + std::memcpy(&m_buffer[index], values, (right) * sizeof(char)); + std::memcpy(&m_buffer[0], &values[m_capacity - index], + (left) * sizeof(char)); + } else { + std::memcpy(&m_buffer[index], values, (bytes) * sizeof(char)); + } + if (SystemConf::getInstance().LATENCY_ON && !m_copyDataOnInsert) { + long *buf = (long *)&m_buffer[index]; + buf[0] = Utils::pack(latencyMark, (int)buf[0]); + } + + long p = normalise(end + bytes); + if (p <= index) m_wraps++; + m_endP.m_value.store(end + bytes, std::memory_order_relaxed); + + // free UnboundedQueryBuffer + UnboundedQueryBufferFactory::getInstance().freeNB(input->getBufferId(), input); + + // debug (); + return index; + } + + long put(void *val, long latencyMark, long retainMark = -1, std::shared_ptr graph = nullptr) override { + auto *values = (infinity::core::receive_element_t *) val; + if (!values) { + throw std::runtime_error("error: values is not set"); + } + + auto bytes = values->buffer->getSizeInBytes(); + if (bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer " + std::to_string(m_id)); + + /* Get the end pointer */ + long end = m_endP.m_value.load(std::memory_order_relaxed); + + /* Find remaining bytes until the circular buffer wraps */ + long wrapPoint = (end + bytes - 1) - m_capacity; + if (m_temp.m_value <= wrapPoint) { + + m_temp.m_value = m_startP.m_value.load(std::memory_order_relaxed); + if (m_temp.m_value <= wrapPoint) { + //std::cout << "[DBG] Circular Buffer " << std::to_string(m_id) + // << " is full with startP " + // << m_startP.m_value.load() << " and endP " + // << m_endP.m_value.load() << std::endl; + return -1; + } + } + + long index = normalise(end); + + if (SystemConf::getInstance().LINEAGE_ON) { + throw std::runtime_error("error: lineage not supported during insertion"); + } + + // always copy + if (bytes > ((long)m_capacity - index)) { /* Copy in two parts */ + long right = m_capacity - index; + long left = bytes - (m_capacity - index); + std::memcpy(&m_buffer[index], values->buffer->getData(), (right) * sizeof(char)); + std::memcpy(&m_buffer[0], (char*)values->buffer->getData() + (m_capacity - index), + (left) * sizeof(char)); + } else { + size_t idx = 0; + if (m_filter) { + m_filterFP((char*) values->buffer->getData(), 0, (int) bytes, &m_buffer[index], 0, reinterpret_cast(idx)); + } else { + std::memcpy(&m_buffer[index], values->buffer->getData(), (bytes) * sizeof(char)); + } + } + if (SystemConf::getInstance().LATENCY_ON && !m_copyDataOnInsert) { + long *buf = (long *)&m_buffer[index]; + buf[0] = Utils::pack(latencyMark, (int)buf[0]); + } + + long p = normalise(end + bytes); + if (p <= index) m_wraps++; + m_endP.m_value.store(end + bytes, std::memory_order_relaxed); + + // + RDMABufferPool::getInstance().free(values); + // debug (); return index; } @@ -75,14 +244,15 @@ class CircularQueryBuffer : public QueryBuffer { bytes = offset - index + 1; m_bytesProcessed.fetch_add(bytes, std::memory_order_relaxed); - m_tuplesProcessed.fetch_add((bytes / (size_t) m_tupleSize), std::memory_order_relaxed); + m_tuplesProcessed.fetch_add((bytes / (size_t)m_tupleSize), + std::memory_order_relaxed); m_tasksProcessed.fetch_add(1, std::memory_order_relaxed); /* Set new start pointer */ m_startP.m_value.store(_start + bytes, std::memory_order_relaxed); } - void free(long offset) override { + void free(long offset, bool isPersistent = false) override { long _start = m_startP.m_value.load(std::memory_order_relaxed); long index = normalise(_start); long bytes; @@ -92,50 +262,154 @@ class CircularQueryBuffer : public QueryBuffer { else bytes = offset - index + 1; + if (m_filesystem) { + //auto slotId = normalise(index + bytes) / m_batchSize; + //auto oldVal = 3; + //if (!m_slots[slotId].m_slot.compare_exchange_weak(oldVal, -1)) { + // throw std::runtime_error("error: failed to free values from slot " + std::to_string(slotId)); + //} + //std::cout << "[DBG] free " + // << offset << " offset of " << slotId << " slotId with " + // << m_slots[slotId].m_slot.load() << " slot " << std::endl; + //if (m_slots[slotId].m_slot.load() != -1 && m_slots[slotId].m_slot.load() != 1 && + // m_slots[slotId].m_slot.load() != 3 && !m_slots[slotId].m_ready) { + // debugSlots(); + // throw std::runtime_error("error: wrong value when freeing slot " + + // std::to_string(slotId) + " with " + + // std::to_string(m_slots[slotId].m_slot.load())); + //} + //m_slots[slotId].reset(); + } + m_bytesProcessed.fetch_add(bytes, std::memory_order_relaxed); - m_tuplesProcessed.fetch_add((bytes / (size_t) m_tupleSize), std::memory_order_relaxed); + m_tuplesProcessed.fetch_add((bytes / (size_t)m_tupleSize), + std::memory_order_relaxed); m_tasksProcessed.fetch_add(1, std::memory_order_relaxed); /* Set new start pointer */ m_startP.m_value.store(_start + bytes, std::memory_order_relaxed); + //debug (); } - ByteBuffer &getBuffer() override { - return m_buffer; - } + ByteBuffer &getBuffer() override { return m_buffer; } char *getBufferRaw() override { return m_buffer.data(); } size_t getBufferCapacity(int id) override { - (void) id; + (void)id; return m_capacity; } long getLong(size_t index) override { - auto p = (long *) m_buffer.data(); - return p[normalise(index / sizeof(long))]; + auto p = (long *)m_buffer.data(); + return p[normalise(index) / sizeof(size_t)]; } void setLong(size_t index, long value) override { - auto p = (long *) m_buffer.data(); - p[normalise(index / sizeof(long))] = value; + auto p = (long *)m_buffer.data(); + p[normalise(index) / sizeof(size_t)] = value; } - void appendBytesTo(int startPos, int endPos, ByteBuffer &outputBuffer) override { + void appendBytesTo(int startPos, int endPos, + ByteBuffer &outputBuffer) override { if (endPos > startPos) { - std::copy(m_buffer.begin() + startPos, m_buffer.begin() + endPos, outputBuffer.begin()); + std::copy(m_buffer.begin() + startPos, m_buffer.begin() + endPos, + outputBuffer.begin()); } else { - std::copy(m_buffer.begin() + startPos, m_buffer.end(), outputBuffer.begin()); - std::copy(m_buffer.begin(), m_buffer.begin() + endPos, outputBuffer.begin() + (m_capacity - startPos)); + std::copy(m_buffer.begin() + startPos, m_buffer.end(), + outputBuffer.begin()); + std::copy(m_buffer.begin(), m_buffer.begin() + endPos, + outputBuffer.begin() + (m_capacity - startPos)); } } void appendBytesTo(int startPos, int endPos, char *output) override { if (endPos > startPos) { - std::memcpy(output, &(m_buffer[startPos]), (endPos - startPos) * sizeof(char)); + std::memcpy(output, m_buffer.data() + startPos, + (endPos - startPos) * sizeof(char)); } else { - std::memcpy(output, &(m_buffer[startPos]), (m_capacity - startPos) * sizeof(char)); - std::memcpy(output + (m_capacity - startPos), &(m_buffer[0]), (endPos) * sizeof(char)); + std::memcpy(output, m_buffer.data() + startPos, + (m_capacity - startPos) * sizeof(char)); + std::memcpy(output + (m_capacity - startPos), m_buffer.data(), + (endPos) * sizeof(char)); + } + } + + void setupForCheckpoints(std::shared_ptr filesystem) override { + m_filesystem = filesystem; + // Initialize the slots + for (size_t slotId = 0; slotId < m_numberOfSlots; ++slotId) { + m_slots[slotId].setId(slotId, m_batchSize, m_buffer.data() + slotId * m_batchSize); + } + } + + int prepareCheckpoint(long freePtr, tbb::concurrent_queue &readySlots, int &firstSlot, int &lastSlot) override { + auto endPtr = m_endP.m_value.load(); + endPtr = normalise(endPtr); + if (freePtr > endPtr) + endPtr += m_capacity; + int numOfSlots = 0; + + firstSlot = -1; + lastSlot = -1; + bool isFirst = true; + //auto first = freePtr; + //std::cout << "[DBG] preparing checkpoint for circular buffer " + // << std::to_string(m_id) << " with " + // << freePtr << " freePtr " << endPtr << " endPtr " + // << (endPtr-freePtr)/m_batchSize << " slots " << std::endl; + while (freePtr < endPtr - (long) m_batchSize) { + freePtr += (long) m_batchSize; + auto slotId = normalise(freePtr) / m_batchSize; + + if (isFirst) { + firstSlot = slotId; + isFirst = false; + } + lastSlot = slotId; + //std::cout << "[DBG] preparing checkpoint for circular buffer " + // << std::to_string(m_id) << " with " + // << std::to_string(m_batchSize) << " batchSize " + // << std::to_string(first) << " first " << freePtr << " freePtr " + // << endPtr << " endPtr " << slotId << " slotId " << std::endl; + //if (m_slots[slotId].m_slot.load() != 1 && m_slots[slotId].m_slot.load() != 3) { + // debugSlots(); + // throw std::runtime_error("error: wrong value in slot " + + // std::to_string(slotId) + " with " + + // std::to_string(m_slots[slotId].m_slot.load())); + //} + m_slots[slotId].reset(); + m_slots[slotId].setPreviousSlot(3); + m_slots[slotId].m_slot.store(5); + m_slots[slotId].setNumberOfResults(); + readySlots.push(slotId); + numOfSlots++; + } + if (numOfSlots == 0 && freePtr < endPtr) { + freePtr += (long) m_batchSize; + auto slotId = normalise(freePtr) / m_batchSize; + firstSlot = slotId; + lastSlot = slotId; + //std::cout << "[DBG] preparing checkpoint for circular buffer " + // << std::to_string(m_id) << " with " + // << std::to_string(m_batchSize) << " batchSize " + // << std::to_string(first) << " first " << freePtr << " freePtr " + // << endPtr << " endPtr " << slotId << " slotId " << std::endl; + m_slots[slotId].reset(); + m_slots[slotId].setPreviousSlot(3); + m_slots[slotId].m_slot.store(5); + m_slots[slotId].setNumberOfResults(); + readySlots.push(slotId); + numOfSlots++; + } + return numOfSlots; + } + + void debugSlots(){ + for (auto &slot : m_slots) { + if (slot.m_id == -1) break; + std::cout << slot.m_id << " slotId " << slot.m_slot.load() + << " slot " << std::endl; } } diff --git a/src/buffers/NUMACircularQueryBuffer.h b/src/buffers/NUMACircularQueryBuffer.h index 51c1ec2..77f978f 100644 --- a/src/buffers/NUMACircularQueryBuffer.h +++ b/src/buffers/NUMACircularQueryBuffer.h @@ -1,16 +1,19 @@ #pragma once +#include + #include -#include +#include #include #include -#include -#include +#include -#include "utils/Utils.h" +#include "buffers/NumaBuffer.h" +#include "buffers/QueryBuffer.h" +#include "buffers/UnboundedQueryBufferFactory.h" #include "utils/PaddedLong.h" #include "utils/SystemConf.h" -#include "buffers/QueryBuffer.h" +#include "utils/Utils.h" /* * \brief This class implements a NUMA-aware circular buffer. @@ -20,33 +23,20 @@ * * */ -class NUMACircularQueryBuffer : public QueryBuffer { +class NumaCircularQueryBuffer : public QueryBuffer, public NumaBuffer { private: - const int m_cpusPerNode; - const int m_numaNodes; - const size_t m_maxBufferCapacity; - size_t m_minBufferCapacity; std::vector> m_buffers; - std::vector m_orderedCpus; public: - NUMACircularQueryBuffer(int id, size_t capacity, int tupleSize = 1, bool copyDataOnInsert = true) : - QueryBuffer(id, capacity, true, tupleSize, copyDataOnInsert), -#if defined(HAVE_NUMA) - m_cpusPerNode(Utils::getNumberOfCoresPerSocket()), - m_numaNodes( - (numa_available() < 0) ? 1 : - (int) std::ceil(((double) SystemConf::getInstance().WORKER_THREADS + 1) / m_cpusPerNode)), -#else - m_cpusPerNode(SystemConf::getInstance().THREADS), - m_numaNodes(1), -#endif - m_maxBufferCapacity(Utils::getPowerOfTwo(m_capacity / m_numaNodes)), - m_buffers(m_numaNodes) { + NumaCircularQueryBuffer(int id, size_t capacity, int tupleSize = 1, bool copyDataOnInsert = true, + size_t batchSize = SystemConf::getInstance().BATCH_SIZE, bool clearFiles = true) : + QueryBuffer(id, capacity, true, tupleSize, copyDataOnInsert, batchSize, nullptr, clearFiles), + NumaBuffer(capacity, tupleSize), m_buffers(m_numaNodes) { assert(m_maxBufferCapacity % tupleSize == 0 && "Buffer capacity has to be divisible by the tuple size."); #if defined(HAVE_NUMA) int numa_node = -1; - long bufferSizePerThread = (long) (m_maxBufferCapacity / 8); + long bufferSizePerThread = (long) (m_maxBufferCapacity / m_cpusPerNode); + bufferSizePerThread = Utils::getPowerOfTwo(bufferSizePerThread); size_t totalSize = 0; Utils::getOrderedCores(m_orderedCpus); for (int i = 0; i < m_numaNodes; ++i) { @@ -54,11 +44,15 @@ class NUMACircularQueryBuffer : public QueryBuffer { long bufferSize = (i != m_numaNodes-1) ? (long) m_maxBufferCapacity : (long) ((SystemConf::getInstance().WORKER_THREADS - i * m_cpusPerNode + 1) % m_cpusPerNode) * bufferSizePerThread; - if (bufferSize == 0) + if (m_numaNodes == 1) { + bufferSize = m_maxBufferCapacity; + } + if (bufferSize == 0 || bufferSize > m_maxBufferCapacity) bufferSize = m_maxBufferCapacity; + bufferSize = Utils::getPowerOfTwo(bufferSize); m_buffers[i] = std::make_unique(bufferSize); m_buffers[i]->data()[0] = 0; - std::cout << "[DBG] Creating ByteBuffer " << i << std::endl; + std::cout << "[DBG] Creating ByteBuffer " << i << " with " << bufferSize << " size" << std::endl; SystemConf::getInstance().findMemoryNodeForAddress(m_buffers[i].get()->data(), numa_node); std::cout << "[DBG] ByteBuffer " << i << " is allocated in " << numa_node << " numa node." << std::endl; /*if (numa_node != i) { @@ -71,16 +65,28 @@ class NUMACircularQueryBuffer : public QueryBuffer { } setMask(totalSize); setCapacity(totalSize); + m_numberOfSlots = totalSize/m_batchSize; #else m_buffers[0] = std::make_unique(m_maxBufferCapacity); + m_numberOfSlots = m_maxBufferCapacity/m_batchSize; #endif + + m_emptySlots.store(m_numberOfSlots); + //m_slots.resize(m_numberOfSlots); + + //if (m_capacity != 1 && m_capacity % m_batchSize != 0) + // throw std::runtime_error("error: the capacity is not a multiple of the slot size"); + + if (m_capacity > 2 && m_tupleSize != 1 && SystemConf::getInstance().LINEAGE_ON) { + setupForCheckpoints(nullptr); + } }; - long put(char *values, long bytes, long latencyMark = -1) override { + long put(char *values, long bytes, long latencyMark, long retainMark = -1, std::shared_ptr graph = nullptr) override { if (values == nullptr || bytes <= 0) throw std::invalid_argument("error: cannot put null to circular buffer"); assert((long) m_maxBufferCapacity > bytes && "Buffer capacity has to be at least " - "the number of bytes writter to avoid spilling to more than two buffers"); + "the number of bytes writer to avoid spilling to more than two buffers"); /* Get the end pointer */ long end = m_endP.m_value.load(std::memory_order_relaxed); @@ -94,6 +100,38 @@ class NUMACircularQueryBuffer : public QueryBuffer { } long index = normaliseNotPowerOfTwo(end); + + if (SystemConf::getInstance().LINEAGE_ON) { + auto firstSlotId = index / m_batchSize; + auto endSlotId = (index + bytes) / m_batchSize; + auto slotId = firstSlotId; + while (slotId <= endSlotId) { + auto normSlot = slotId % m_numberOfSlots; + if (slotId != endSlotId) { + if (!m_slots[normSlot].m_graph) { + std::lock_guard l (m_slots[normSlot].m_updateLock); + auto newGraph = LineageGraphFactory::getInstance().newInstance(); + m_slots[normSlot].setLineageGraph(newGraph); + } + } else { + if (!graph && !m_slots[normSlot].m_graph) { + graph = LineageGraphFactory::getInstance().newInstance(); + } + if (graph) { + std::lock_guard l (m_slots[normSlot].m_updateLock); + m_slots[normSlot].setLineageGraph(graph); + } + } + + /*if (!m_slots[normSlot].m_graph) { + throw std::runtime_error( + "error: the lineage graph is not initialized when inserting for slot " + + std::to_string(normSlot)); + }*/ + slotId ++; + } + } + if (m_copyDataOnInsert || m_wraps == 0) { // copy only until the buffer is filled once int bufferIdx = getBufferIndex(index); /* TODO: fix the case when the batch spills to multiple buffers */ @@ -122,6 +160,64 @@ class NUMACircularQueryBuffer : public QueryBuffer { return index; } + long put(std::shared_ptr &input, long latencyMark, long retainMark = -1, std::shared_ptr graph = nullptr) override { + auto values = input->getBuffer().data(); + auto bytes = input->getBuffer().size(); + + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + assert((long) m_maxBufferCapacity > bytes && "Buffer capacity has to be at least " + "the number of bytes writer to avoid spilling to more than two buffers"); + + /* Get the end pointer */ + long end = m_endP.m_value.load(std::memory_order_relaxed); + /* Find remaining bytes until the circular buffer wraps */ + long wrapPoint = (end + bytes - 1) - m_capacity; + if (m_temp.m_value <= wrapPoint) { + m_temp.m_value = m_startP.m_value.load(std::memory_order_relaxed); + if (m_temp.m_value <= wrapPoint) { + return -1; + } + } + + long index = normaliseNotPowerOfTwo(end); + + if (SystemConf::getInstance().LINEAGE_ON) { + throw std::runtime_error("error: lineage not supported during insertion"); + } + + // always copy + int bufferIdx = getBufferIndex(index); + /* TODO: fix the case when the batch spills to multiple buffers */ + if (bytes > ((long) m_capacity - index)) { /* Copy in two parts */ + throw std::runtime_error("error: batches should not be spilled to more than one buffer"); + } else if (bytes > ((long) ((bufferIdx + 1) * m_maxBufferCapacity) - index)) { /* Copy in two parts */ + throw std::runtime_error("error: batches should not be spilled to more than one buffer"); + } else { + long normIndex = normaliseIndex(index, bufferIdx); + std::memcpy(&m_buffers[bufferIdx].get()->data()[normIndex], values, (bytes) * sizeof(char)); + } + + if (SystemConf::getInstance().LATENCY_ON && !m_copyDataOnInsert) { + int bufferIdx = getBufferIndex(index); + long normIndex = normaliseIndex(index, bufferIdx); + long *buf = (long *) &m_buffers[bufferIdx].get()->data()[normIndex]; + buf[0] = Utils::pack(latencyMark, (int) buf[0]); + } + + //long p = normalise(_end + bytes); + long p = normaliseNotPowerOfTwo(end + bytes); + if (p <= index) + m_wraps++; + m_endP.m_value.store(end + bytes, std::memory_order_relaxed); + + // free UnboundedQueryBuffer + UnboundedQueryBufferFactory::getInstance().freeNB(input->getBufferId(), input); + + // debug (); + return index; + } + void free() override { long _start = m_startP.m_value.load(std::memory_order_relaxed); long offset = normaliseNotPowerOfTwo(SystemConf::getInstance().BATCH_SIZE + _start); @@ -141,7 +237,7 @@ class NUMACircularQueryBuffer : public QueryBuffer { m_startP.m_value.store(_start + bytes, std::memory_order_relaxed); } - void free(long offset) override { + void free(long offset, bool isPersistent = false) override { long _start = m_startP.m_value.load(std::memory_order_relaxed); long index = normaliseNotPowerOfTwo(_start); long bytes; @@ -159,42 +255,48 @@ class NUMACircularQueryBuffer : public QueryBuffer { m_startP.m_value.store(_start + bytes, std::memory_order_relaxed); } - int getBufferIndex(long index) { + int getBufferIndex(const long index) override { return (int) (index / m_maxBufferCapacity); } - long normaliseIndex(long index) { + long normaliseIndex(const long index) override { return (index % m_maxBufferCapacity); } - long normaliseIndex(long index, int bufferIdx) { + long normaliseIndex(const long index, const int bufferIdx) override { //return index % maxBufferCapacity; return (bufferIdx != m_numaNodes - 1) ? (index % m_maxBufferCapacity) : (index % m_minBufferCapacity); } - ByteBuffer &getBuffer(long index) { + ByteBuffer &getBuffer(long index) override { //index = normalise(index); index = normaliseNotPowerOfTwo(index); int bufferIdx = getBufferIndex(index); return getBuffer(bufferIdx); } - ByteBuffer &getBuffer() override { - return *m_buffers[0].get(); + char *getBufferRaw(long index) override { + index = normaliseNotPowerOfTwo(index); + int bufferIdx = getBufferIndex(index); + return getBufferRaw(bufferIdx); } - ByteBuffer &getBuffer(int bufferIdx = 0) { + ByteBuffer &getBuffer(int bufferIdx) override { return *m_buffers[bufferIdx].get(); } - char *getBufferRaw(long index) { - index = normaliseNotPowerOfTwo(index); + char *getBufferRaw(int bufferIdx) override { + return m_buffers[bufferIdx].get()->data(); + } + + int geNumaNodeWithPtr(int index) override { + index = (int) normaliseNotPowerOfTwo(index); int bufferIdx = getBufferIndex(index); - return getBufferRaw(bufferIdx); + return bufferIdx; } - char *getBufferRaw(int bufferIdx) { - return m_buffers[bufferIdx].get()->data(); + ByteBuffer &getBuffer() override { + return *m_buffers[0].get(); } char *getBufferRaw() override { @@ -231,15 +333,85 @@ class NUMACircularQueryBuffer : public QueryBuffer { throw std::runtime_error("error: this method is not supported for the NUMA-aware Circular Buffer"); } + void setupForCheckpoints(std::shared_ptr filesystem) override { + if (m_capacity != 1 && m_capacity % m_batchSize != 0) + throw std::runtime_error("error: the capacity is not a multiple of the slot size"); + m_filesystem = filesystem; + // Initialize the slots + for (size_t slotId = 0; slotId < m_numberOfSlots; ++slotId) { + m_slots[slotId].setId(slotId, m_batchSize, getBufferRaw((long)(slotId * m_batchSize))); + } + } + + int prepareCheckpoint(long freePtr, tbb::concurrent_queue &readySlots, int &firstSlot, int &lastSlot) override { + auto endPtr = m_endP.m_value.load(); + endPtr = normaliseNotPowerOfTwo(endPtr); + if (freePtr > endPtr) + endPtr += m_capacity; + int numOfSlots = 0; + + firstSlot = -1; + lastSlot = -1; + bool isFirst = true; + //auto first = freePtr; + //std::cout << "[DBG] preparing checkpoint for circular buffer " + // << std::to_string(m_id) << " with " + // << freePtr << " freePtr " << endPtr << " endPtr " + // << (endPtr-freePtr)/m_batchSize << " slots " << std::endl; + while (freePtr < endPtr - (long) m_batchSize) { + freePtr += (long) m_batchSize; + auto slotId = normaliseNotPowerOfTwo(freePtr) / m_batchSize; + + if (isFirst) { + firstSlot = slotId; + isFirst = false; + } + lastSlot = slotId; + //std::cout << "[DBG] preparing checkpoint for circular buffer " + // << std::to_string(m_id) << " with " + // << std::to_string(m_batchSize) << " batchSize " + // << std::to_string(first) << " first " << freePtr << " freePtr " + // << endPtr << " endPtr " << slotId << " slotId " << std::endl; + //if (m_slots[slotId].m_slot.load() != 1 && m_slots[slotId].m_slot.load() != 3) { + // debugSlots(); + // throw std::runtime_error("error: wrong value in slot " + + // std::to_string(slotId) + " with " + + // std::to_string(m_slots[slotId].m_slot.load())); + //} + m_slots[slotId].reset(); + m_slots[slotId].setPreviousSlot(3); + m_slots[slotId].m_slot.store(5); + m_slots[slotId].setNumberOfResults(); + readySlots.push(slotId); + numOfSlots++; + } + if (numOfSlots == 0 && freePtr < endPtr) { + freePtr += (long) m_batchSize; + auto slotId = normaliseNotPowerOfTwo(freePtr) / m_batchSize; + firstSlot = slotId; + lastSlot = slotId; + //std::cout << "[DBG] preparing checkpoint for circular buffer " + // << std::to_string(m_id) << " with " + // << std::to_string(m_batchSize) << " batchSize " + // << std::to_string(first) << " first " << freePtr << " freePtr " + // << endPtr << " endPtr " << slotId << " slotId " << std::endl; + m_slots[slotId].reset(); + m_slots[slotId].setPreviousSlot(3); + m_slots[slotId].m_slot.store(5); + m_slots[slotId].setNumberOfResults(); + readySlots.push(slotId); + numOfSlots++; + } + return numOfSlots; + } + size_t getBufferCapacity(int id) override { return (id != m_numaNodes - 1) ? m_maxBufferCapacity : m_minBufferCapacity; } - int geNumaNodeWithPtr(int index) { - index = (int) normaliseNotPowerOfTwo(index); - int bufferIdx = getBufferIndex(index); - return bufferIdx; + void fixTimestamps(size_t index, long timestamp, long step, long batchSize) { + throw std::runtime_error("error: this method is not supported for the NUMA-aware Circular Buffer"); } - ~NUMACircularQueryBuffer() override = default; + ~NumaCircularQueryBuffer() override = default; }; \ No newline at end of file diff --git a/src/buffers/NumaBuffer.h b/src/buffers/NumaBuffer.h new file mode 100644 index 0000000..56ab009 --- /dev/null +++ b/src/buffers/NumaBuffer.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "utils/Utils.h" +#include "utils/PaddedLong.h" +#include "utils/SystemConf.h" + +class Query; + +/* + * \brief NumaBuffer is used as a base class for implementing a lock-free numa-aware circular buffer. + * + * */ + +class NumaBuffer { + + protected: + const int m_cpusPerNode; + const int m_numaNodes; + const size_t m_maxBufferCapacity; + size_t m_minBufferCapacity; + std::vector m_orderedCpus; + + public: + NumaBuffer(size_t capacity, int tupleSize = 1) : +#if defined(HAVE_NUMA) + m_cpusPerNode(Utils::getNumberOfCoresPerSocket()), + m_numaNodes( + (numa_available() < 0) ? 1 : + (int) std::ceil(((double) SystemConf::getInstance().WORKER_THREADS + 1) / m_cpusPerNode)), +#else + m_cpusPerNode(SystemConf::getInstance().THREADS), + m_numaNodes(1), +#endif + m_maxBufferCapacity(Utils::getPowerOfTwo(capacity / m_numaNodes)) { + assert(m_maxBufferCapacity % tupleSize == 0 && "Buffer capacity has to be divisible by the tuple size."); + }; + + virtual int getBufferIndex(const long index) { + return (int) (index / m_maxBufferCapacity); + } + + virtual long normaliseIndex(const long index) { + return (index % m_maxBufferCapacity); + } + + virtual long normaliseIndex(const long index, const int bufferIdx) { + //return index % maxBufferCapacity; + return (bufferIdx != m_numaNodes - 1) ? (index % m_maxBufferCapacity) : (index % m_minBufferCapacity); + } + + virtual ByteBuffer &getBuffer(long index) = 0; + + virtual char *getBufferRaw(long index) = 0; + + virtual int geNumaNodeWithPtr(int index) = 0; + + virtual ByteBuffer &getBuffer(int bufferIdx = 0) = 0; + + virtual char *getBufferRaw(int bufferIdx = 0) = 0; + + virtual ~NumaBuffer() = default; +}; \ No newline at end of file diff --git a/src/buffers/PartialWindowResults.h b/src/buffers/PartialWindowResults.h index 1cf368a..3b1d7f7 100644 --- a/src/buffers/PartialWindowResults.h +++ b/src/buffers/PartialWindowResults.h @@ -48,7 +48,8 @@ class PartialWindowResults { //std::cout << "[DBG] Thread " << sched_getcpu() << " in " << node << " numa node." << std::endl; if (type == 1) { for (size_t i = 0; i < m_bufferPtrs.size(); ++i) { - m_bufferPtrs[i] = (char *) _mm_malloc(capacity * sizeof(char), 64); + //m_bufferPtrs[i] = (char *) _mm_malloc(capacity * sizeof(char), 64); + m_bufferPtrs[i] = (char *) _mm_malloc(capacity * sizeof(char), 512); } } //SystemConf::getInstance().findMemoryNodeForAddress(&bufferPtrs[2], node); @@ -81,6 +82,14 @@ class PartialWindowResults { "error: getting a buffer of pointers from partial window buffer of this type is not implemented"); } + char **getBufferPtrsRaw() { + if (m_type == 1) + return m_bufferPtrs.data(); + else + throw std::runtime_error( + "error: getting a buffer of pointers from partial window buffer of this type is not implemented"); + } + size_t getCapacity() { return m_capacity; } @@ -91,6 +100,9 @@ class PartialWindowResults { void clear() { //std::fill(buffer.begin(), buffer.end(), 0); + if (m_type == 0) { + m_position = 0; + } } void reset() { @@ -105,6 +117,7 @@ class PartialWindowResults { void init() { m_count = 0; + m_position = 0; } void nullify() { @@ -116,7 +129,8 @@ class PartialWindowResults { void increment() { if (m_count > (int) m_partialWindows) - throw std::out_of_range("error: partial window result index out of bounds while incrementing the counter"); + throw std::out_of_range("error: partial window result index out of bounds while incrementing the counter (" + + std::to_string(m_count) + " > " + std::to_string(m_partialWindows) + ")"); m_startPointers[m_count] = (int) getPosition(); m_count++; } @@ -125,14 +139,16 @@ class PartialWindowResults { m_count += cnt; if (m_count > (int) m_partialWindows) throw std::out_of_range( - "error: partial window result index out of bounds while incrementing the counter with a value"); + "error: partial window result index out of bounds while incrementing the counter (" + + std::to_string(m_count) + " > " + std::to_string(m_partialWindows) + ")"); } void setCount(int cnt) { m_count = cnt; if (m_count > (int) m_partialWindows) throw std::out_of_range( - "error: partial window result index out of bounds while incrementing the counter with a value"); + "error: partial window result index out of bounds while incrementing the counter with a value (" + + std::to_string(m_count) + " > " + std::to_string(m_partialWindows) + ")"); } int getStartPointer(int idx) { diff --git a/src/buffers/PartialWindowResultsFactory.h b/src/buffers/PartialWindowResultsFactory.h index 60ac7f6..b30b200 100644 --- a/src/buffers/PartialWindowResultsFactory.h +++ b/src/buffers/PartialWindowResultsFactory.h @@ -48,6 +48,13 @@ class PartialWindowResultsFactory { std::make_shared(pid, SystemConf::getInstance().UNBOUNDED_BUFFER_SIZE); } partialWindowResults->init(); + + /*if (partialWindowResults.use_count() != 1) { + std::cout << "warning: the partial result has a reference counter of " + std::to_string(partialWindowResults.use_count()) << std::endl; + //throw std::runtime_error("error: the partial result should have only one reference when we free it: " + + // std::to_string(partialWindowResults.use_count())); + }*/ + return partialWindowResults; } @@ -73,8 +80,14 @@ class PartialWindowResultsFactory { } void free(int pid, std::shared_ptr &partialWindowResults) { - //if (partialWindowResults.use_count() != 1) - // throw std::runtime_error("error: the partial result should have only one reference when we free it: " + std::to_string(partialWindowResults.use_count())); + /*if (partialWindowResults.use_count() != 1) { + std::cout << "warning: the partial result has a reference counter of " + std::to_string(partialWindowResults.use_count()) << std::endl; + throw std::runtime_error("error: the partial result should have only one reference when we free it: " + + std::to_string(partialWindowResults.use_count())); + }*/ + if (pid >= m_numberOfThreads) + throw std::runtime_error("error: attempting to free partial window with pid: " + std::to_string(pid) + + " >= " + std::to_string(m_numberOfThreads)); if (partialWindowResults->getType() == 0) { m_poolSeqMem[pid].push(partialWindowResults); diff --git a/src/buffers/PersistentCircularQueryBuffer.h b/src/buffers/PersistentCircularQueryBuffer.h new file mode 100644 index 0000000..03b2322 --- /dev/null +++ b/src/buffers/PersistentCircularQueryBuffer.h @@ -0,0 +1,1292 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "PartialWindowResultsFactory.h" +#include "UnboundedQueryBufferFactory.h" +#include "buffers/QueryBuffer.h" +#include "tasks/Task.h" +#include "tasks/TaskFactory.h" +#include "tasks/WindowBatchFactory.h" +#include "utils/Async.h" +#include "utils/PaddedLong.h" +#include "utils/Query.h" +#include "utils/SystemConf.h" +#include "utils/TupleSchema.h" +#include "utils/Utils.h" +#include "utils/WindowDefinition.h" + +/* + * \brief This class implements a non-NUMA-aware circular buffer + * that persists data to disk asynchronously. It used to implement a P-Stream. + * + * */ + +class AckSlotContext : public IAsyncContext { + public: + AckSlotContext(std::atomic *slot, std::shared_ptr parW = nullptr) : m_slot(slot), m_parW(parW) {} + + protected: + Status deepCopyInternal(IAsyncContext *&context_copy) final { + return IAsyncContext::deepCopyInternal(*this, context_copy); + } + + public: + std::atomic *m_slot; + std::shared_ptr m_parW; +}; + +class PersistentCircularQueryBuffer : public QueryBuffer { + private: + struct PMem; + + /// The buffer holding the in-memory data + ByteBuffer m_buffer; + + // todo: these have to be persisted + std::atomic m_nextFreeSlot; + /* Lock protecting the acknowledgment of persisting the input to a file */ + std::mutex m_ackLock; + std::atomic m_nextPersistentSlot; + + // Variables for persisting the file pointers + const size_t m_poolSize; + const std::string m_layout = ""; + pmem::obj::pool m_pop; + pmem::obj::persistent_ptr m_root; + std::string m_pmFileName; + file_t *m_pmFile; + + // Variables for persisting asynchronously the actual data + std::string m_asyncFileName; + file_t *m_asyncFile; + FileOptions m_asyncFileOptions; + + std::atomic m_wraps = 0; + + // Used for compression + std::vector m_copyBuffers; + + bool m_ready = false; + const bool m_debug = false; + + public: + PersistentCircularQueryBuffer(int id, size_t capacity, int tupleSize = 1, + bool copyDataOnInsert = true, size_t batchSize = SystemConf::getInstance().BATCH_SIZE, + std::shared_ptr filesystem = nullptr, bool clearFiles = true, + bool unbuffered = true, bool delete_on_close = false) + : QueryBuffer(id, capacity, false, tupleSize, copyDataOnInsert, batchSize, filesystem, clearFiles), + m_buffer(capacity), + m_nextFreeSlot(0), + m_nextPersistentSlot(0), + m_poolSize(PMEMOBJ_MIN_POOL), + m_pmFileName("scabbard/queue_pm_" + std::to_string(id)), + m_asyncFileName("scabbard/queue_data_" + std::to_string(id)), + m_asyncFileOptions(unbuffered, delete_on_close), + m_copyBuffers(SystemConf::getInstance().WORKER_THREADS, ByteBuffer(m_batchSize)) { + if (m_capacity % m_batchSize != 0) + throw std::runtime_error("error: the capacity is not a multiple of the slot size"); + if (!(m_numberOfSlots && !(m_numberOfSlots & (m_numberOfSlots - 1)))) { + throw std::runtime_error ("error: the number of slots has to be a power of two"); + } + try { + if (!m_filesystem) { + std::cout << "warning: no filesystem passed to the constructor. " + "Initializing a new filesystem..." << std::endl; + m_filesystem = std::make_shared(SystemConf::FILE_ROOT_PATH, SystemConf::getInstance().WORKER_THREADS); + } + + Utils::tryCreateDirectory(m_filesystem->getRootPath() + "scabbard"); + auto pmPath = m_filesystem->getRootPath() + m_pmFileName; + if (Utils::fileExists(pmPath.c_str()) != 0) { + m_pop = pmem::obj::pool::create(pmPath.c_str(), + "", m_poolSize, CREATE_MODE_RW); + m_root = m_pop.root(); + pmem::obj::make_persistent_atomic(m_pop, m_root->next); + pmem::obj::transaction::run(m_pop, [&] { m_root = m_root->next; }); + m_previousBlockSize = SystemConf::getInstance().BATCH_SIZE; + } else { + m_pop = pmem::obj::pool::open(pmPath, ""); + m_root = m_pop.root(); + m_root = m_root->next; + m_previousBlockSize = m_root->m_blockSize.get_ro(); + } + } catch (const pmem::pool_error &e) { + std::cerr << "Exception: " << e.what() << std::endl; + return; + } catch (const pmem::transaction_error &e) { + std::cerr << "Exception: " << e.what() << std::endl; + return; + } + + m_root->m_blockSize.get_rw() = SystemConf::getInstance().BLOCK_SIZE; + + // Initialize the slots + for (size_t slotId = 0; slotId < m_numberOfSlots; ++slotId) { + m_slots[slotId].setId(slotId, m_batchSize, nullptr); + } + + // Open File handlers + m_pmFile = m_filesystem->newFile(m_pmFileName); // do I need this? + if (!SystemConf::getInstance().LINEAGE_ON) { + m_asyncFile = m_filesystem->newFile(m_asyncFileName, m_numberOfSlots * m_batchSize); + } + m_ready = true; + }; + + long put(char *values, long bytes, long latencyMark, long retainMark = -1, std::shared_ptr graph = nullptr) override { + while (!m_ready) + ; + + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + if (bytes != m_batchSize) + throw std::invalid_argument("error: the size of the input must be equal to the slot ("+std::to_string(m_batchSize)+" != "+std::to_string(bytes)+")"); + + /* Get the end pointer */ + long end = m_endP.m_value.load(std::memory_order_relaxed); + + bool hasEmptySlots = (m_emptySlots > 0); + + /* Find remaining bytes until the circular buffer wraps */ + long wrapPoint = (end + bytes - 1) - m_capacity; + if (m_temp.m_value <= wrapPoint || !hasEmptySlots) { + m_temp.m_value = m_startP.m_value.load(std::memory_order_relaxed); + if (m_temp.m_value <= wrapPoint || !hasEmptySlots) { + // std::cout << "[DBG] Circular Buffer is full with startP " + // << m_startP.m_value.load() << " and endP " + // << m_endP.m_value.load() << std::endl; + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + return -1; + } + } + + // create task + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " Creating insertion task for slot " + + std::to_string(m_nextSlotToWrite) + + " with empty slots " + std::to_string(m_emptySlots.load()-1) << std::endl; + } + createInsertionTask(values, bytes, latencyMark, retainMark, graph); + m_emptySlots.fetch_add(-1); + // try to forward the end pointer + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + //long index = tryConsumeNextSlot(); + + if (m_debug) { + debug(); + } + return 0; + } + + long put(std::shared_ptr &values, long latencyMark, long retainMark = -1, std::shared_ptr graph = nullptr) override { + while (!m_ready) + ; + + auto bytes = values->getBuffer().size(); + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + if (bytes != m_batchSize) + throw std::invalid_argument("error: the size of the input must be equal to the slot ("+std::to_string(m_batchSize)+" != "+std::to_string(bytes)+")"); + + /* Get the end pointer */ + long end = m_endP.m_value.load(std::memory_order_relaxed); + + bool hasEmptySlots = (m_emptySlots > 0); + + /* Find remaining bytes until the circular buffer wraps */ + long wrapPoint = (end + bytes - 1) - m_capacity; + if (m_temp.m_value <= wrapPoint || !hasEmptySlots) { + m_temp.m_value = m_startP.m_value.load(std::memory_order_relaxed); + if (m_temp.m_value <= wrapPoint || !hasEmptySlots) { + // std::cout << "[DBG] Circular Buffer is full with startP " + // << m_startP.m_value.load() << " and endP " + // << m_endP.m_value.load() << std::endl; + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + return -1; + } + } + + // create task + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " Creating insertion task for slot " + + std::to_string(m_nextSlotToWrite) + + " with empty slots " + std::to_string(m_emptySlots.load()-1) << std::endl; + } + createInsertionTask(values, bytes, latencyMark, retainMark, graph); + m_emptySlots.fetch_add(-1); + // try to forward the end pointer + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + //long index = tryConsumeNextSlot(); + + if (m_debug) { + debug(); + } + return 0; + } + + long put(void *val, long latencyMark = -1, long retainMark = -1, std::shared_ptr graph = nullptr) override { + while (!m_ready) + ; + + infinity::core::receive_element_t *values = (infinity::core::receive_element_t *) val; + if (!values) { + throw std::runtime_error("error: values is not set"); + } + + auto bytes = values->buffer->getSizeInBytes(); + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + if (bytes != m_batchSize) + throw std::invalid_argument("error: the size of the input must be equal to the slot ("+std::to_string(m_batchSize)+" != "+std::to_string(bytes)+")"); + + /* Get the end pointer */ + long end = m_endP.m_value.load(std::memory_order_relaxed); + + bool hasEmptySlots = (m_emptySlots > 0); + + /* Find remaining bytes until the circular buffer wraps */ + long wrapPoint = (end + bytes - 1) - m_capacity; + if (m_temp.m_value <= wrapPoint || !hasEmptySlots) { + m_temp.m_value = m_startP.m_value.load(std::memory_order_relaxed); + if (m_temp.m_value <= wrapPoint || !hasEmptySlots) { + // std::cout << "[DBG] Circular Buffer is full with startP " + // << m_startP.m_value.load() << " and endP " + // << m_endP.m_value.load() << std::endl; + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + return -1; + } + } + + // create task + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " Creating insertion task for slot " + + std::to_string(m_nextSlotToWrite) + + " with empty slots " + std::to_string(m_emptySlots.load()-1) << std::endl; + } + createInsertionTask(values, bytes, latencyMark, retainMark, graph); + m_emptySlots.fetch_add(-1); + // try to forward the end pointer + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + //long index = tryConsumeNextSlot(); + + if (m_debug) { + debug(); + } + return 0; + + } + + void putRows(int pid, char *values, long bytes, size_t slot, long latencyMark, long retainMark, int wraps) override { + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + + auto id = slot; + auto slotId = id % m_numberOfSlots; + //m_slots[slotId].m_slot.store(0); + while (m_slots[slotId].m_slot.load() != 0) { + std::cout << "error: inserting data to slot " << slotId + << " is blocked with oldVal " << m_slots[slotId].m_slot.load() << std::endl; + exit(1); + _mm_pause(); + } + + auto index = slotId * m_batchSize; + + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " Worker writing to slot " + std::to_string(slotId) + + " with " + std::to_string(index) + " startPointer" + + " and " + std::to_string(index+bytes) + " endPointer " << std::endl; + } + + // check if some async calls have finished + // m_filesystem->getHandler().tryCompleteMultiple(); + // tryToAcknowledge(); + + if (!retainMark) { + if (SystemConf::getInstance().LINEAGE_ON) { + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized for slot " + std::to_string(slotId)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_slots[slotId].m_fptr; //m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + } + auto oldVal = 0; + m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 1); + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + std::cout << "warning: no retain marker was used" << std::endl; + return; + } + + // async write here + auto copyBuffer = PartialWindowResultsFactory::getInstance().newInstance(pid); + AckSlotContext context{&m_slots[slotId].m_slot, copyBuffer}; + auto callback = [](IAsyncContext *ctxt, Status result, + size_t bytes_transferred) { + CallbackContext context{ctxt}; + if (result != Status::Ok) { + fprintf(stderr, "AsyncFlushPages(), error: %u\n", + static_cast(result)); + } + + // std::cout << "[DBG] callback setting the slot status with " + // << bytes_transferred << " bytes_transferred" << std::endl; + // Set the slot status to ready + auto oldVal = 0; + while (!context->m_slot->compare_exchange_weak(oldVal, 1)) { + std::cout << "warning: callback (" << std::this_thread::get_id() + << ") blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + if (context->m_parW) { + PartialWindowResultsFactory::getInstance().free(context->m_parW->getThreadId(), context->m_parW); + context->m_parW.reset(); + } + }; + + if (m_copyDataOnInsert || wraps == 0) { // copy only until the buffer is filled once + std::memcpy(&m_buffer[index], values, bytes); + } + if (SystemConf::getInstance().LATENCY_ON && !m_copyDataOnInsert) { + long *buf = (long *)&m_buffer[index]; + buf[0] = Utils::pack(latencyMark, (int)buf[0]); + } + + // memcpy may succeed after the write on disk in a multi-threaded scenario! + m_slots[slotId].m_memcpyFinished.store(true); + + int diskBytes = 0; + char *diskValues = &m_buffer[index]; + bool clear = false; + if (m_compress) { + if (SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON && m_startInstr && id % 128 == 0) { + // results + uint32_t *dVs = nullptr; + double *cVs = nullptr, *mns = nullptr, *mxs = nullptr, *mxDs = nullptr; + m_instrFP(pid, values, (int) bytes, dVs, cVs, mns, mxs, mxDs); + m_compStats->addStatistics(dVs, cVs, mns, mxs, mxDs); + } + int metadataSize = 128; + if (pid >= m_copyBuffers.size()) + throw std::runtime_error("error: invalid pid for data insertions with compression"); + + //m_compressionFP[m_compPos](pid, values, 0, (int) bytes, m_copyBuffers[pid].data() + metadataSize, diskBytes, (int) m_copyBuffers[pid].size(), clear, -1); + m_compressionFP[m_compPos](pid, values, 0, (int) bytes, copyBuffer->getBufferRaw() + metadataSize, diskBytes, (int) copyBuffer->getCapacity(), clear, -1); + // if compression fails, fall back to the initial compression scheme + if (clear) { + //std::cout << "[DBG] falling back to the initial compression scheme" << std::endl; + //m_compressionFP[0](pid, values, 0, (int) bytes, m_copyBuffers[pid].data() + metadataSize, diskBytes, (int) m_copyBuffers[pid].size(), clear, -1); + m_compressionFP[0](pid, values, 0, (int) bytes, copyBuffer->getBufferRaw() + metadataSize, diskBytes, (int) copyBuffer->getCapacity(), clear, -1); + m_compPos = 0; + } + diskBytes += metadataSize; + latencyMark = (SystemConf::getInstance().LATENCY_ON) ? latencyMark : -1; + if (clear || m_compPos == 0) { + //m_compressionFP[0](pid, values, 0, -1, m_copyBuffers[pid].data(), diskBytes, (int) m_copyBuffers[pid].size(), clear, latencyMark); + m_compressionFP[0](pid, values, 0, -1, copyBuffer->getBufferRaw(), diskBytes, (int) copyBuffer->getCapacity(), clear, latencyMark); + } + //diskValues = m_copyBuffers[pid].data(); + diskValues = copyBuffer->getBufferRaw(); + + m_storedBytes.fetch_add(diskBytes); + m_storedCounter.fetch_add(1); + //diskBytes = 64 * 1024;//bytes; + } else { + diskBytes = bytes; + } + diskBytes = roundOffset(Utils::getPowerOfTwo(diskBytes)); +#if defined(NO_DISK) + diskBytes = 0; +#endif + m_root->updateBlockSize(diskBytes); + + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " Worker compressing data for slot " + std::to_string(slotId) + + " of " + std::to_string(bytes) + " bytes to " + + std::to_string(diskBytes) + " with " + + std::to_string((double)bytes/(double)diskBytes) + " ratio " << std::endl; + } + + if (diskBytes > m_batchSize) + throw std::runtime_error("error: the write exceeds the size of slots in the input log"); + if (!SystemConf::getInstance().LINEAGE_ON) { + assert(m_asyncFile->writeAsync(reinterpret_cast(diskValues), + slotId * m_batchSize, diskBytes, callback, + context) == Status::Ok); + } else { + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized for slot " + std::to_string(slotId)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_slots[slotId].m_fptr; //m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + assert(fptr->writeAsync(reinterpret_cast(diskValues), + slotId * m_batchSize, diskBytes, callback, + context) == Status::Ok); + } + // copyBuffer.reset(); + + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + //tryToAcknowledge(); + } + + void putRows(int pid, std::shared_ptr &values, long bytes, size_t slot, long latencyMark, long retainMark, int wraps) override { + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + + auto id = slot; + auto slotId = id % m_numberOfSlots; + //m_slots[slotId].m_slot.store(0); + while (m_slots[slotId].m_slot.load() != 0) { + std::cout << "error: inserting data to slot " << slotId + << " is blocked with oldVal " << m_slots[slotId].m_slot.load() << std::endl; + exit(1); + _mm_pause(); + } + + auto index = slotId * m_batchSize; + + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " Worker writing to slot " + std::to_string(slotId) + + " with " + std::to_string(index) + " startPointer" + + " and " + std::to_string(index+bytes) + " endPointer " << std::endl; + } + + // check if some async calls have finished + // m_filesystem->getHandler().tryCompleteMultiple(); + // tryToAcknowledge(); + + if (!retainMark) { + if (SystemConf::getInstance().LINEAGE_ON) { + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized for slot " + std::to_string(slotId)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_slots[slotId].m_fptr; //m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + } + auto oldVal = 0; + m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 1); + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + std::cout << "warning: no retain marker was used" << std::endl; + return; + } + + // async write here + auto copyBuffer = PartialWindowResultsFactory::getInstance().newInstance(pid); + AckSlotContext context{&m_slots[slotId].m_slot, copyBuffer}; + auto callback = [](IAsyncContext *ctxt, Status result, + size_t bytes_transferred) { + CallbackContext context{ctxt}; + if (result != Status::Ok) { + fprintf(stderr, "AsyncFlushPages(), error: %u\n", + static_cast(result)); + } + + // std::cout << "[DBG] callback setting the slot status with " + // << bytes_transferred << " bytes_transferred" << std::endl; + // Set the slot status to ready + auto oldVal = 0; + while (!context->m_slot->compare_exchange_weak(oldVal, 1)) { + std::cout << "warning: callback (" << std::this_thread::get_id() + << ") blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + if (context->m_parW) { + PartialWindowResultsFactory::getInstance().free(context->m_parW->getThreadId(), context->m_parW); + context->m_parW.reset(); + } + }; + + if (m_copyDataOnInsert || wraps == 0) { // copy only until the buffer is filled once + std::memcpy(&m_buffer[index], values->getBuffer().data(), bytes); + } + if (SystemConf::getInstance().LATENCY_ON && !m_copyDataOnInsert) { + long *buf = (long *)&m_buffer[index]; + buf[0] = Utils::pack(latencyMark, (int)buf[0]); + } + + // memcpy may succeed after the write on disk in a multi-threaded scenario! + m_slots[slotId].m_memcpyFinished.store(true); + + int diskBytes = 0; + char *diskValues = &m_buffer[index]; + bool clear = false; + if (m_compress) { + if (SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON && m_startInstr && id % 128 == 0) { + // results + uint32_t *dVs = nullptr; + double *cVs = nullptr, *mns = nullptr, *mxs = nullptr, *mxDs = nullptr; + m_instrFP(pid, values->getBuffer().data(), (int) bytes, dVs, cVs, mns, mxs, mxDs); + m_compStats->addStatistics(dVs, cVs, mns, mxs, mxDs); + } + int metadataSize = 128; + if (pid >= m_copyBuffers.size()) + throw std::runtime_error("error: invalid pid for data insertions with compression"); + + //m_compressionFP[m_compPos](pid, values, 0, (int) bytes, m_copyBuffers[pid].data() + metadataSize, diskBytes, (int) m_copyBuffers[pid].size(), clear, -1); + m_compressionFP[m_compPos](pid, values->getBuffer().data(), 0, (int) bytes, copyBuffer->getBufferRaw() + metadataSize, diskBytes, (int) copyBuffer->getCapacity(), clear, -1); + // if compression fails, fall back to the initial compression scheme + if (clear) { + //std::cout << "[DBG] falling back to the initial compression scheme" << std::endl; + //m_compressionFP[0](pid, values, 0, (int) bytes, m_copyBuffers[pid].data() + metadataSize, diskBytes, (int) m_copyBuffers[pid].size(), clear, -1); + m_compressionFP[0](pid, values->getBuffer().data(), 0, (int) bytes, copyBuffer->getBufferRaw() + metadataSize, diskBytes, (int) copyBuffer->getCapacity(), clear, -1); + m_compPos = 0; + } + diskBytes += metadataSize; + latencyMark = (SystemConf::getInstance().LATENCY_ON) ? latencyMark : -1; + if (clear || m_compPos == 0) { + //m_compressionFP[0](pid, values, 0, -1, m_copyBuffers[pid].data(), diskBytes, (int) m_copyBuffers[pid].size(), clear, latencyMark); + m_compressionFP[0](pid, values->getBuffer().data(), 0, -1, copyBuffer->getBufferRaw(), diskBytes, (int) copyBuffer->getCapacity(), clear, latencyMark); + } + //diskValues = m_copyBuffers[pid].data(); + diskValues = copyBuffer->getBufferRaw(); + + m_storedBytes.fetch_add(diskBytes); + m_storedCounter.fetch_add(1); + //diskBytes = 64 * 1024;//bytes; + + // free UnboundedQueryBuffer + UnboundedQueryBufferFactory::getInstance().freeNB(values->getBufferId(), values); + } else { + diskBytes = bytes; + } + diskBytes = roundOffset(Utils::getPowerOfTwo(diskBytes)); +#if defined(NO_DISK) + diskBytes = 0; +#endif + m_root->updateBlockSize(diskBytes); + + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " Worker compressing data for slot " + std::to_string(slotId) + + " of " + std::to_string(bytes) + " bytes to " + + std::to_string(diskBytes) + " with " + + std::to_string((double)bytes/(double)diskBytes) + " ratio " << std::endl; + } + + if (diskBytes > m_batchSize) + throw std::runtime_error("error: the write exceeds the size of slots in the input log"); + if (!SystemConf::getInstance().LINEAGE_ON) { + assert(m_asyncFile->writeAsync(reinterpret_cast(diskValues), + slotId * m_batchSize, diskBytes, callback, + context) == Status::Ok); + } else { + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized for slot " + std::to_string(slotId)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_slots[slotId].m_fptr; //m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + assert(fptr->writeAsync(reinterpret_cast(diskValues), + slotId * m_batchSize, diskBytes, callback, + context) == Status::Ok); + } + // copyBuffer.reset(); + + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + //tryToAcknowledge(); + } + + void putRows(int pid, void *val, long bytes, size_t slot, long latencyMark, long retainMark, int wraps) override { + auto *values = (infinity::core::receive_element_t *) val; + + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + + /*for (auto ii = 0; ii < bytes; ii+=8) { + __builtin_prefetch(((char*) values->buffer->getData() + ii), 1, 3); + }*/ + + auto id = slot; + auto slotId = id % m_numberOfSlots; + //m_slots[slotId].m_slot.store(0); + while (m_slots[slotId].m_slot.load() != 0) { + std::cout << "error: inserting data to slot " << slotId + << " is blocked with oldVal " << m_slots[slotId].m_slot.load() << std::endl; + exit(1); + _mm_pause(); + } + + auto index = slotId * m_batchSize; + + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " Worker writing to slot " + std::to_string(slotId) + + " with " + std::to_string(index) + " startPointer" + + " and " + std::to_string(index+bytes) + " endPointer " << std::endl; + } + + // check if some async calls have finished + // m_filesystem->getHandler().tryCompleteMultiple(); + // tryToAcknowledge(); + + if (!retainMark) { + if (SystemConf::getInstance().LINEAGE_ON) { + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized for slot " + std::to_string(slotId)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_slots[slotId].m_fptr; //m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + } + auto oldVal = 0; + m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 1); + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + std::cout << "warning: no retain marker was used" << std::endl; + return; + } + + // async write here + auto copyBuffer = PartialWindowResultsFactory::getInstance().newInstance(pid); + AckSlotContext context{&m_slots[slotId].m_slot, copyBuffer}; + auto callback = [](IAsyncContext *ctxt, Status result, + size_t bytes_transferred) { + CallbackContext context{ctxt}; + if (result != Status::Ok) { + fprintf(stderr, "AsyncFlushPages(), error: %u\n", + static_cast(result)); + } + + // std::cout << "[DBG] callback setting the slot status with " + // << bytes_transferred << " bytes_transferred" << std::endl; + // Set the slot status to ready + auto oldVal = 0; + while (!context->m_slot->compare_exchange_weak(oldVal, 1)) { + std::cout << "warning: callback (" << std::this_thread::get_id() + << ") blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + if (context->m_parW) { + PartialWindowResultsFactory::getInstance().free(context->m_parW->getThreadId(), context->m_parW); + context->m_parW.reset(); + } + }; + + if (m_copyDataOnInsert || wraps == 0) { // copy only until the buffer is filled once + //std::memcpy(&m_buffer[index], values->buffer->getData(), bytes); + size_t idx = 0; + if (m_filter) { + m_filterFP((char*) values->buffer->getData(), 0, (int) bytes, &m_buffer[index], 0, reinterpret_cast(idx)); + } else { + std::memcpy(&m_buffer[index], values->buffer->getData(), bytes); + } + } + //if (wraps == 0 && !m_copyDataOnInsert) { + // std::cout << " I am not copying data " << std::endl; + //} + if (SystemConf::getInstance().LATENCY_ON && !m_copyDataOnInsert) { + long *buf = (long *)&m_buffer[index]; + buf[0] = Utils::pack(latencyMark, (int)buf[0]); + } + + // memcpy may succeed after the write on disk in a multi-threaded scenario! + m_slots[slotId].m_memcpyFinished.store(true); + + int diskBytes = 0; + char *diskValues = &m_buffer[index]; + bool clear = false; + if (m_compress) { + if (SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON && m_startInstr && id % 128 == 0) { + // results + uint32_t *dVs = nullptr; + double *cVs = nullptr, *mns = nullptr, *mxs = nullptr, *mxDs = nullptr; + m_instrFP(pid, (char*) values->buffer->getData(), (int) bytes, dVs, cVs, mns, mxs, mxDs); + m_compStats->addStatistics(dVs, cVs, mns, mxs, mxDs); + } + int metadataSize = 128; + if (pid >= m_copyBuffers.size()) + throw std::runtime_error("error: invalid pid for data insertions with compression"); + + //m_compressionFP[m_compPos](pid, values, 0, (int) bytes, m_copyBuffers[pid].data() + metadataSize, diskBytes, (int) m_copyBuffers[pid].size(), clear, -1); + m_compressionFP[m_compPos](pid, (char*) values->buffer->getData(), 0, (int) bytes, copyBuffer->getBufferRaw() + metadataSize, diskBytes, (int) copyBuffer->getCapacity(), clear, -1); + // if compression fails, fall back to the initial compression scheme + if (clear) { + //std::cout << "[DBG] falling back to the initial compression scheme" << std::endl; + //m_compressionFP[0](pid, values, 0, (int) bytes, m_copyBuffers[pid].data() + metadataSize, diskBytes, (int) m_copyBuffers[pid].size(), clear, -1); + m_compressionFP[0](pid, (char*) values->buffer->getData(), 0, (int) bytes, copyBuffer->getBufferRaw() + metadataSize, diskBytes, (int) copyBuffer->getCapacity(), clear, -1); + m_compPos = 0; + } + diskBytes += metadataSize; + latencyMark = (SystemConf::getInstance().LATENCY_ON) ? latencyMark : -1; + if (clear || m_compPos == 0) { + //m_compressionFP[0](pid, values, 0, -1, m_copyBuffers[pid].data(), diskBytes, (int) m_copyBuffers[pid].size(), clear, latencyMark); + m_compressionFP[0](pid, (char*) values->buffer->getData(), 0, -1, copyBuffer->getBufferRaw(), diskBytes, (int) copyBuffer->getCapacity(), clear, latencyMark); + } + //diskValues = m_copyBuffers[pid].data(); + diskValues = copyBuffer->getBufferRaw(); + + m_storedBytes.fetch_add(diskBytes); + m_storedCounter.fetch_add(1); + //diskBytes = 64 * 1024;//bytes; + } else { + diskBytes = bytes; + } + // free rdma buffer + RDMABufferPool::getInstance().free(values); + + diskBytes = roundOffset(Utils::getPowerOfTwo(diskBytes)); +#if defined(NO_DISK) + diskBytes = 0; +#endif + m_root->updateBlockSize(diskBytes); + + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " Worker compressing data for slot " + std::to_string(slotId) + + " of " + std::to_string(bytes) + " bytes to " + + std::to_string(diskBytes) + " with " + + std::to_string((double)bytes/(double)diskBytes) + " ratio " << std::endl; + } + + if (diskBytes > m_batchSize) + throw std::runtime_error("error: the write exceeds the size of slots in the input log"); + if (!SystemConf::getInstance().LINEAGE_ON) { + assert(m_asyncFile->writeAsync(reinterpret_cast(diskValues), + slotId * m_batchSize, diskBytes, callback, + context) == Status::Ok); + } else { + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized for slot " + std::to_string(slotId)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_slots[slotId].m_fptr; //m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + assert(fptr->writeAsync(reinterpret_cast(diskValues), + slotId * m_batchSize, diskBytes, callback, + context) == Status::Ok); + } + // copyBuffer.reset(); + + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + //tryToAcknowledge(); + } + + long recover(int &bytes) override { + if (!SystemConf::getInstance().LINEAGE_ON) { + throw std::runtime_error("error: lineage must be enabled for recovery"); + } + if (m_numberOfSlotsToRecover == 0) { + return -1; + } + + if (m_emptySlots <= 0) { + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + return 0; + } + + /* Get the slot to write */ + auto slot = getNextSlotToWrite(); + if (slot >= m_numberOfSlots) + m_wraps = 1; + auto slotId = slot % m_numberOfSlots; + auto index = slotId * m_batchSize; + auto end = (m_compress) ? slotId * SystemConf::getInstance().BLOCK_SIZE : slotId * m_batchSize; + long readEnd = m_pop.root()->m_startP.get_ro().load() + end; + long readIndex = normalise(readEnd); + + auto graph = LineageGraphFactory::getInstance().newInstance(); + + m_slots[slotId].setLineageGraph(graph); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_fileStore->getUnsafeFilePtr(m_query->getId(), bufferId, readEnd, m_pop.root()->m_startId.get_ro().load()); + m_slots[slotId].m_fptr = fptr; + + AckSlotContext context{&m_slots[slotId].m_slot}; + auto callback = [](IAsyncContext *ctxt, Status result, + size_t bytes_transferred) { + CallbackContext context{ctxt}; + if (result != Status::Ok) { + fprintf(stderr, "AsyncFlushPages(), error: %u\n", + static_cast(result)); + } + + // std::cout << "[DBG] callback setting the slot status with " + // << bytes_transferred << " bytes_transferred" << std::endl; + // Set the slot status to ready + auto oldVal = 0; + while (!context->m_slot->compare_exchange_weak(oldVal, 1)) { + std::cout << "warning: callback (" << std::this_thread::get_id() + << ") blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + }; + + auto oldVal = -1; + while (!m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 0)) { + std::cout << "warning: (during recovery) adding data to slot " << slotId + << " is blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + + bytes = (!m_compress) ? m_batchSize : std::min((size_t)m_previousBlockSize, m_batchSize); + if (m_copyDataOnInsert) { + assert(fptr->readAsync(readIndex, &m_buffer[index], bytes, + callback, context) == Status::Ok); + } else { + //std::cout << "reading async data for slot " << slotId << std::endl; + assert(fptr->readAsync(0, &m_buffer[index], bytes, + callback, context) == Status::Ok); + } + + m_slots[slotId].m_memcpyFinished.store(true); + + // reduce number of slots + m_numberOfSlotsToRecover--; + m_emptySlots.fetch_add(-1); + + if (m_debug) { + debug(); + } + + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + return 0; + } + + void prepareRecovery() override { + if (!m_copyDataOnInsert) { + if (m_numberOfSlotsToRecover < m_numberOfSlots) { + long readEnd = m_pop.root()->m_startP.get_ro().load(); + long readIndex = normalise(readEnd); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_fileStore->getUnsafeFilePtr(m_query->getId(), bufferId, readEnd, m_pop.root()->m_startId.get_ro().load()); + if (m_compress) { + auto sIndex = m_numberOfSlotsToRecover * m_batchSize; + auto readBytes = std::min((size_t)m_previousBlockSize, m_batchSize); + assert(fptr->readSync(0, m_copyBuffers[0].data(), readBytes) == Status::Ok); + int writePos = 0; + bool copy = false; + auto latency = (SystemConf::getInstance().LATENCY_ON) ? 0 : -1; + m_decompressionFP[m_compPos](0, &m_buffer[sIndex], 0, m_batchSize, m_copyBuffers[0].data(), writePos, SystemConf::getInstance().BLOCK_SIZE, copy, latency); + if (writePos != SystemConf::getInstance().BATCH_SIZE) + throw std::runtime_error("error: the write position is not equal to the batch size after decompression"); + //std::cout << "[DBG] decompressing for slot " << m_numberOfSlotsToRecover << " and index " << sIndex << std::endl; + for (size_t slotId = m_numberOfSlotsToRecover + 1; slotId < m_numberOfSlots; ++slotId) { + auto index = slotId * m_batchSize; + std::memcpy(&m_buffer[index], &m_buffer[sIndex], m_batchSize); + //std::cout << "[DBG] copying for slot " << slotId << " and index " << index << std::endl; + } + } else { + assert(fptr->readSync(0, m_copyBuffers[0].data(), m_batchSize) == Status::Ok); + for (size_t slotId = m_numberOfSlotsToRecover; slotId < m_numberOfSlots; ++slotId) { + auto index = slotId * m_batchSize; + std::memcpy(&m_buffer[index], m_copyBuffers[0].data(), m_batchSize); + } + } + } + m_wraps = 1; + } + + } + + bool tryConsumeNextSlot(long &index, int &length, bool recover = false) { + if (recover) { + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + } + bool found = false; + if (isSlotReady(m_readerSlot)) { + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " Creating processing task for slot " + std::to_string(m_readerSlot) + + " with reader slot " + std::to_string(m_readerSlot) << std::endl; + } + m_slots[m_readerSlot].m_slot.store(3); + index = m_readerSlot * m_batchSize; + length = m_batchSize; + m_readerSlot++; + if (m_readerSlot == getNumberOfSlots()) { + m_readerSlot = 0; + } + found = true; + } + return found; + } + + void free() override { + throw std::invalid_argument("error: this operator is not supported yet"); + } + + void free(long offset, bool isPersistent = false) override { + if (SystemConf::getInstance().LINEAGE_ON && !isPersistent) + return; + + long _start = m_startP.m_value.load(std::memory_order_relaxed); + + long index = normalise(_start); + long bytes; + /* Measurements */ + if (offset <= index) + bytes = m_capacity - index + offset + 1; + else + bytes = offset - index + 1; + + m_bytesProcessed.fetch_add(bytes, std::memory_order_relaxed); + m_tuplesProcessed.fetch_add((bytes / (size_t)m_tupleSize), + std::memory_order_relaxed); + m_tasksProcessed.fetch_add(1, std::memory_order_relaxed); + + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " Freeing slot " + std::to_string(m_nextFreeSlot) + + " with empty slots " + std::to_string(m_emptySlots) << std::endl; + } + + m_slots[m_nextFreeSlot].m_memcpyFinished.store(false); + m_slots[m_nextFreeSlot].m_slot.store(-1); + m_emptySlots.fetch_add(1); + + m_nextFreeSlot++; + if (m_nextFreeSlot == m_numberOfSlots) m_nextFreeSlot = 0; + + /* Set new start pointer */ + m_startP.m_value.store(_start + bytes, std::memory_order_relaxed); + m_root->m_bytesProcessed.get_rw().fetch_add(bytes, + std::memory_order_release); + //m_root->m_startP.get_rw().store(_start + bytes, std::memory_order_release); + } + + ByteBuffer &getBuffer() override { return m_buffer; } + + char *getBufferRaw() override { return m_buffer.data(); } + + size_t getBufferCapacity(int id) override { + (void)id; + return m_capacity; + } + + long getLong(size_t index) override { + auto p = (long *)m_buffer.data(); + return p[normalise(index) / sizeof(size_t)]; + } + + void setLong(size_t index, long value) override { + auto p = (long *)m_buffer.data(); + p[normalise(index) / sizeof(size_t)] = value; + } + + void appendBytesTo(int startPos, int endPos, + ByteBuffer &outputBuffer) override { + if (endPos > startPos) { + std::copy(m_buffer.begin() + startPos, m_buffer.begin() + endPos, + outputBuffer.begin()); + } else { + std::copy(m_buffer.begin() + startPos, m_buffer.end(), + outputBuffer.begin()); + std::copy(m_buffer.begin(), m_buffer.begin() + endPos, + outputBuffer.begin() + (m_capacity - startPos)); + } + } + + void appendBytesTo(int startPos, int endPos, char *output) override { + if (endPos > startPos) { + std::memcpy(output, m_buffer.data() + startPos, + (endPos - startPos) * sizeof(char)); + } else { + std::memcpy(output, m_buffer.data() + startPos, + (m_capacity - startPos) * sizeof(char)); + std::memcpy(output + (m_capacity - startPos), m_buffer.data(), + (endPos) * sizeof(char)); + } + } + + void createInsertionTask(char *values, long bytes, long latencyMark = -1, long retainMark = -1, std::shared_ptr graph = nullptr) { + auto batch = WindowBatchFactory::getInstance().newInstance( + 0, 0, -1, -1, m_query, nullptr, + &m_query->getWindowDefinition(), m_query->getSchema(), + -1); + batch->setTaskType(TaskType::INSERT); + auto task = + TaskFactory::getInstance().newInstance(0, batch, nullptr, TaskType::INSERT); + auto slot = getNextSlotToWrite(); + auto slotId = slot % m_numberOfSlots; + if (slot >= m_numberOfSlots) { + m_wraps = 1; + } + if (SystemConf::getInstance().LINEAGE_ON) { + if (!graph) + graph = LineageGraphFactory::getInstance().newInstance(); + m_slots[slotId].setLineageGraph(graph); + graph.reset(); + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized before task creation for slot " + std::to_string(slot)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + m_slots[slotId].m_fptr = fptr; + } + + auto oldVal = -1; + while (!m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 0)) { + std::cout << "warning: adding data to slot " << slotId + << " is blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + + if (m_debug) { + std::cout << "slot " << slotId << " is set to " << m_slots[slotId].m_slot.load() << std::endl; + } + task->setInsertion(values, bytes, slot, latencyMark, retainMark, m_wraps); + while (!m_query->getTaskQueue()->try_enqueue(task)) + ; + } + + void createInsertionTask(std::shared_ptr &values, long bytes, long latencyMark = -1, long retainMark = -1, std::shared_ptr graph = nullptr) { + auto batch = WindowBatchFactory::getInstance().newInstance( + 0, 0, -1, -1, m_query, nullptr, + &m_query->getWindowDefinition(), m_query->getSchema(), + -1); + batch->setTaskType(TaskType::INSERT); + auto task = + TaskFactory::getInstance().newInstance(0, batch, nullptr, TaskType::INSERT); + auto slot = getNextSlotToWrite(); + auto slotId = slot % m_numberOfSlots; + if (slot >= m_numberOfSlots) { + m_wraps = 1; + } + if (SystemConf::getInstance().LINEAGE_ON) { + if (!graph) + graph = LineageGraphFactory::getInstance().newInstance(); + m_slots[slotId].setLineageGraph(graph); + graph.reset(); + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized before task creation for slot " + std::to_string(slot)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + m_slots[slotId].m_fptr = fptr; + } + + auto oldVal = -1; + while (!m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 0)) { + std::cout << "warning: adding data to slot " << slotId + << " is blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + + if (m_debug) { + std::cout << "slot " << slotId << " is set to " << m_slots[slotId].m_slot.load() << std::endl; + } + task->setInsertion(values, bytes, slot, latencyMark, retainMark, m_wraps); + while (!m_query->getTaskQueue()->try_enqueue(task)) + ; + } + + void createInsertionTask(void *values, long bytes, long latencyMark = -1, long retainMark = -1, std::shared_ptr graph = nullptr) { + auto batch = WindowBatchFactory::getInstance().newInstance( + 0, 0, -1, -1, m_query, nullptr, + &m_query->getWindowDefinition(), m_query->getSchema(), + -1); + batch->setTaskType(TaskType::INSERT); + auto task = + TaskFactory::getInstance().newInstance(0, batch, nullptr, TaskType::INSERT); + auto slot = getNextSlotToWrite(); + auto slotId = slot % m_numberOfSlots; + if (slot >= m_numberOfSlots) { + m_wraps = 1; + } + if (SystemConf::getInstance().LINEAGE_ON) { + if (!graph) + graph = LineageGraphFactory::getInstance().newInstance(); + m_slots[slotId].setLineageGraph(graph); + graph.reset(); + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized before task creation for slot " + std::to_string(slot)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + m_slots[slotId].m_fptr = fptr; + } + + auto oldVal = -1; + while (!m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 0)) { + std::cout << "warning: adding data to slot " << slotId + << " is blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + + if (m_debug) { + std::cout << "slot " << slotId << " is set to " << m_slots[slotId].m_slot.load() << std::endl; + } + task->setInsertion((void *)values, bytes, slot, latencyMark, retainMark, m_wraps); + while (!m_query->getTaskQueue()->try_enqueue(task)) + ; + } + + bool isPersistent() override { return true; } + + void updateFileEndPtr(long id) override { + m_root->m_endId.get_rw().store(id); + } + + void updateFileStartPtr(long id, long offset) override { + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " garbage collection: updating the start id to " + std::to_string(id) + + " and the offset to " + std::to_string(offset) + " (prev offset: " + + std::to_string(m_prevFreeOffset) + ")" << std::endl; + } + if (offset < m_prevFreeOffset) { + //throw std::runtime_error("error: trying to free an invalid offset " + + // std::to_string(offset) + " < " + + // std::to_string(m_prevFreeOffset)); + //std::cout << "warning: trying to free an invalid offset in cqbuffer " + + // std::to_string(offset) + " < " + std::to_string(m_prevFreeOffset) << std::endl; + return; + } + m_prevFreeOffset = offset; + + m_root->m_startId.get_rw().store(id); + m_root->m_startP.get_rw().store(offset); + } + + void updateStepAndOffset(long step, long offset) { + m_root->m_step.get_rw().store(step); + m_root->m_offset.get_rw().store(offset); + } + + void getStepAndOffset(long &step, long &offset) { + step = m_root->m_step.get_ro().load(); + offset = m_root->m_startP.get_ro().load(); //m_root->m_offset.get_ro().load(); + } + + size_t getBytesProcessed() override { return m_root->m_bytesProcessed.get_ro(); } + + size_t getUnsafeStartPointer() override { + return m_root->m_startP.get_ro().load(std::memory_order_relaxed); + } + + void incrementUnsafeStartPointer(size_t offset) override { + auto start = m_root->m_startP.get_ro().load(); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + m_fileStore->freePersistent(m_query->getId(), bufferId, start + offset); + } + + size_t getUnsafeEndPointer() override { + return m_root->m_endP.get_ro().load(std::memory_order_relaxed); + } + + size_t getUnsafeRemainingBytes() override { + auto start = m_root->m_startP.get_ro().load(); + auto end = m_root->m_endP.get_ro().load(); + return end-start; + } + + bool isSlotReady(size_t slotId) { + checkSlotNumber(slotId); + return m_slots[slotId].m_slot.load() == 2 && m_slots[slotId].m_memcpyFinished; + } + + void checkSlotNumber(size_t slotId) { + if (slotId >= m_numberOfSlots) + throw std::invalid_argument("error: slotId >= m_numberOfSlots"); + } + + size_t getSlotId(size_t index) { return std::floor(index / m_batchSize); } + + void clearPersistentMemory() { + m_pop.close(); + m_filesystem->eraseFiles(); + } + + ~PersistentCircularQueryBuffer() override { + m_pop.close(); + }; + + private: + void tryToAcknowledge() { + try { + if (!m_ackLock.try_lock()) return; + + while (true) { + m_filesystem->getHandler().tryCompleteMultiple(); + auto slotId = m_nextPersistentSlot.load(); + if (m_slots[slotId].m_slot.load() != 1) { + break; + } + + if (m_debug) { + std::cout << "[DBG] CB " + std::to_string(m_id) + " Acknowledging slot " + std::to_string(slotId) << std::endl; + } + + // m_endP.fetch_add(_4MB); + if (m_numberOfSlotsToFree == 0) { + m_root->m_endP.get_rw().fetch_add(m_batchSize, std::memory_order_release); + } else { + if (m_compress) { + auto index = slotId * m_batchSize; + int writePos = 0; + bool copy = true; + auto latency = (SystemConf::getInstance().LATENCY_ON) ? 0 : -1; + m_decompressionFP[m_compPos](0, &m_buffer[index], 0, m_batchSize, m_copyBuffers[0].data(), writePos, SystemConf::getInstance().BLOCK_SIZE, copy, latency); + if (writePos != SystemConf::getInstance().BATCH_SIZE) + throw std::runtime_error("error: the write position is not equal to the batch size after decompression"); + //std::cout << "[DBG] decompressing for slot " << slotId << " and index " << index << std::endl; + } + m_numberOfSlotsToFree--; + } + m_endP.m_value.fetch_add(m_batchSize, std::memory_order_relaxed); + + m_nextPersistentSlot.fetch_add(1); + m_slots[slotId].m_slot.store(2); + + if (m_nextPersistentSlot.load() == m_numberOfSlots) + m_nextPersistentSlot.store(0); + } + + m_ackLock.unlock(); + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + } + } + + struct PMem { + pmem::obj::p> m_startP; + pmem::obj::p> m_endP; + pmem::obj::p> m_bytesProcessed; + pmem::obj::p> m_step; + pmem::obj::p> m_offset; + pmem::obj::p> m_startId; + pmem::obj::p> m_endId; + pmem::obj::p> m_blockSize; + pmem::obj::persistent_ptr next; + PMem() { + m_startP.get_rw() = 0L; + m_endP.get_rw() = 0L; + m_bytesProcessed.get_rw() = 0L; + m_blockSize.get_rw() = 0L; + }; + + void updateBlockSize(long const& value) { + auto prev_value = m_blockSize.get_ro().load(); + while(prev_value < value && + !m_blockSize.get_rw().compare_exchange_weak(prev_value, value)) + {} + } + + /** Copy constructor is deleted */ + PMem(const PMem &) = delete; + /** Assignment operator is deleted */ + PMem &operator=(const PMem &) = delete; + }; +}; \ No newline at end of file diff --git a/src/buffers/PersistentNumaCircularQueryBuffer.h b/src/buffers/PersistentNumaCircularQueryBuffer.h new file mode 100644 index 0000000..4179f69 --- /dev/null +++ b/src/buffers/PersistentNumaCircularQueryBuffer.h @@ -0,0 +1,1413 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "PartialWindowResultsFactory.h" +#include "UnboundedQueryBufferFactory.h" +#include "buffers/NumaBuffer.h" +#include "buffers/QueryBuffer.h" +#include "tasks/Task.h" +#include "tasks/TaskFactory.h" +#include "tasks/WindowBatchFactory.h" +#include "utils/Async.h" +#include "utils/PaddedLong.h" +#include "utils/Query.h" +#include "utils/SystemConf.h" +#include "utils/TupleSchema.h" +#include "utils/Utils.h" +#include "utils/WindowDefinition.h" + +/* + * \brief This class implements a NUMA-aware circular buffer + * that persists data to disk asynchronously. It used to implement a P-Stream. + * + * */ + +class AckNUMASlotContext : public IAsyncContext { + public: + AckNUMASlotContext(std::atomic *slot, std::shared_ptr parW = nullptr) : m_slot(slot), m_parW(parW) {} + + protected: + Status deepCopyInternal(IAsyncContext *&context_copy) final { + return IAsyncContext::deepCopyInternal(*this, context_copy); + } + + public: + std::atomic *m_slot; + std::shared_ptr m_parW; +}; + +class PersistentNumaCircularQueryBuffer : public QueryBuffer, public NumaBuffer { + private: + struct PMem; + + /// The buffers holding the in-memory data + std::vector> m_buffers; + + // todo: these have to be persisted + std::atomic m_nextFreeSlot; + /* Lock protecting the acknowledgment of persisting the input to a file */ + std::mutex m_ackLock; + std::atomic m_nextPersistentSlot; + + // Variables for persisting the file pointers + const size_t m_poolSize; + const std::string m_layout = ""; + pmem::obj::pool m_pop; + pmem::obj::persistent_ptr m_root; + std::string m_pmFileName; + file_t *m_pmFile; + + // Variables for persisting asynchronously the actual data + std::string m_asyncFileName; + file_t *m_asyncFile; + FileOptions m_asyncFileOptions; + + std::atomic m_wraps = 0; + + // Used for compression + std::vector m_copyBuffers; + + bool m_ready = false; + const bool m_debug = false; + + public: + PersistentNumaCircularQueryBuffer(int id, size_t capacity, int tupleSize = 1, bool copyDataOnInsert = true, + size_t batchSize = SystemConf::getInstance().BATCH_SIZE, + std::shared_ptr filesystem = nullptr, bool clearFiles = true, + bool unbuffered = true, bool delete_on_close = false) : + QueryBuffer(id, capacity, true, tupleSize, copyDataOnInsert, batchSize, filesystem, clearFiles), + NumaBuffer(capacity, tupleSize), + m_buffers(m_numaNodes), + m_nextFreeSlot(0), + m_nextPersistentSlot(0), + m_poolSize(PMEMOBJ_MIN_POOL), + m_pmFileName("scabbard/queue_pm_" + std::to_string(id)), + m_asyncFileName("scabbard/queue_data_" + std::to_string(id)), + m_asyncFileOptions(unbuffered, delete_on_close), + m_copyBuffers(SystemConf::getInstance().WORKER_THREADS, ByteBuffer(m_batchSize)) { + + assert(m_maxBufferCapacity % tupleSize == 0 && "Buffer capacity has to be divisible by the tuple size."); +#if defined(HAVE_NUMA) + int numa_node = -1; + long bufferSizePerThread = (long) (m_maxBufferCapacity / m_cpusPerNode); + bufferSizePerThread = Utils::getPowerOfTwo(bufferSizePerThread); + size_t totalSize = 0; + Utils::getOrderedCores(m_orderedCpus); + for (int i = 0; i < m_numaNodes; ++i) { + Utils::bindProcess(m_orderedCpus[i*m_cpusPerNode]); + long bufferSize = (i != m_numaNodes-1) ? (long) m_maxBufferCapacity : + (long) ((SystemConf::getInstance().WORKER_THREADS - i * m_cpusPerNode + 1) % m_cpusPerNode) + * bufferSizePerThread; + if (m_numaNodes == 1) { + bufferSize = m_maxBufferCapacity; + } + if (bufferSize == 0 || bufferSize > m_maxBufferCapacity) + bufferSize = m_maxBufferCapacity; + bufferSize = Utils::getPowerOfTwo(bufferSize); + m_buffers[i] = std::make_unique(bufferSize); + m_buffers[i]->data()[0] = 0; + std::cout << "[DBG] Creating ByteBuffer " << i << " with " << bufferSize << " size" << std::endl; + SystemConf::getInstance().findMemoryNodeForAddress(m_buffers[i].get()->data(), numa_node); + std::cout << "[DBG] ByteBuffer " << i << " is allocated in " << numa_node << " numa node." << std::endl; + /*if (numa_node != i) { + //throw std::runtime_error("ByteBuffers are not properly allocated."); + //numaNodes--; + //break; + }*/ + totalSize += bufferSize; + m_minBufferCapacity = bufferSize; + } + setMask(totalSize); + setCapacity(totalSize); + m_numberOfSlots = totalSize/m_batchSize; +#else + m_buffers[0] = std::make_unique(m_maxBufferCapacity); + m_numberOfSlots = m_maxBufferCapacity/m_batchSize; +#endif + + m_buffers.resize(m_numaNodes); + m_emptySlots.store(m_numberOfSlots); + //m_slots.resize(m_numberOfSlots); + + if (m_capacity % SystemConf::getInstance().BUNDLE_SIZE != 0) + throw std::runtime_error("error: the capacity is not a multiple of the slot size (" + + std::to_string(m_capacity) + " % " + + std::to_string(SystemConf::getInstance().BUNDLE_SIZE) + ")"); + /*if (!(m_numberOfSlots && !(m_numberOfSlots & (m_numberOfSlots - 1)))) { + throw std::runtime_error ("error: the number of slots has to be a power of two"); + }*/ + try { + if (!m_filesystem) { + std::cout << "warning: no filesystem passed to the constructor. " + "Initializing a new filesystem..." << std::endl; + m_filesystem = std::make_shared(SystemConf::FILE_ROOT_PATH, SystemConf::getInstance().WORKER_THREADS); + } + + Utils::tryCreateDirectory(m_filesystem->getRootPath() + "scabbard"); + auto pmPath = m_filesystem->getRootPath() + m_pmFileName; + if (Utils::fileExists(pmPath.c_str()) != 0) { + m_pop = pmem::obj::pool::create(pmPath.c_str(), + "", m_poolSize, CREATE_MODE_RW); + m_root = m_pop.root(); + pmem::obj::make_persistent_atomic(m_pop, m_root->next); + pmem::obj::transaction::run(m_pop, [&] { m_root = m_root->next; }); + m_previousBlockSize = SystemConf::getInstance().BATCH_SIZE; + } else { + m_pop = pmem::obj::pool::open(pmPath, ""); + m_root = m_pop.root(); + m_root = m_root->next; + m_previousBlockSize = m_root->m_blockSize.get_ro(); + } + } catch (const pmem::pool_error &e) { + std::cerr << "Exception: " << e.what() << std::endl; + return; + } catch (const pmem::transaction_error &e) { + std::cerr << "Exception: " << e.what() << std::endl; + return; + } + + m_root->m_blockSize.get_rw() = SystemConf::getInstance().BLOCK_SIZE; + + // Initialize the slots + for (size_t slotId = 0; slotId < m_numberOfSlots; ++slotId) { + m_slots[slotId].setId(slotId, m_batchSize, nullptr); + } + + // Open File handlers + m_pmFile = m_filesystem->newFile(m_pmFileName); // do I need this? + if (!SystemConf::getInstance().LINEAGE_ON) { + m_asyncFile = m_filesystem->newFile(m_asyncFileName, m_numberOfSlots * m_batchSize); + } + m_ready = true; + }; + + long put(char *values, long bytes, long latencyMark, long retainMark = -1, std::shared_ptr graph = nullptr) override { + while (!m_ready) + ; + + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + if (bytes != m_batchSize) + throw std::invalid_argument("error: the size of the input must be equal to the slot ("+std::to_string(m_batchSize)+" != "+std::to_string(bytes)+")"); + assert((long) m_maxBufferCapacity > bytes && "Buffer capacity has to be at least " + "the number of bytes writer to avoid spilling to more than two buffers"); + /* Get the end pointer */ + long end = m_endP.m_value.load(std::memory_order_relaxed); + + bool hasEmptySlots = (m_emptySlots > 0); + + /* Find remaining bytes until the circular buffer wraps */ + long wrapPoint = (end + bytes - 1) - m_capacity; + if (m_temp.m_value <= wrapPoint || !hasEmptySlots) { + m_temp.m_value = m_startP.m_value.load(std::memory_order_relaxed); + if (m_temp.m_value <= wrapPoint || !hasEmptySlots) { + // std::cout << "[DBG] Circular Buffer is full with startP " + // << m_startP.m_value.load() << " and endP " + // << m_endP.m_value.load() << std::endl; + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + return -1; + } + } + + // create task + if (m_debug) { + std::cout << "[DBG] Creating insertion task for slot " + << std::to_string(m_nextSlotToWrite) + << " with empty slots " << m_emptySlots.load()-1 << std::endl; + } + createInsertionTask(values, bytes, latencyMark, retainMark, graph); + m_emptySlots.fetch_add(-1); + + // try to forward the end pointer + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + //long index = tryConsumeNextSlot(); + + if (m_debug) { + debug(); + } + return 0; + } + + long put(std::shared_ptr &values, long latencyMark, long retainMark = -1, std::shared_ptr graph = nullptr) override { + while (!m_ready) + ; + + auto bytes = values->getBuffer().size(); + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + if (bytes != m_batchSize) + throw std::invalid_argument("error: the size of the input must be equal to the slot ("+std::to_string(m_batchSize)+")"); + assert((long) m_maxBufferCapacity > bytes && "Buffer capacity has to be at least " + "the number of bytes writer to avoid spilling to more than two buffers"); + /* Get the end pointer */ + long end = m_endP.m_value.load(std::memory_order_relaxed); + + bool hasEmptySlots = (m_emptySlots > 0); + + /* Find remaining bytes until the circular buffer wraps */ + long wrapPoint = (end + bytes - 1) - m_capacity; + if (m_temp.m_value <= wrapPoint || !hasEmptySlots) { + m_temp.m_value = m_startP.m_value.load(std::memory_order_relaxed); + if (m_temp.m_value <= wrapPoint || !hasEmptySlots) { + // std::cout << "[DBG] Circular Buffer is full with startP " + // << m_startP.m_value.load() << " and endP " + // << m_endP.m_value.load() << std::endl; + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + return -1; + } + } + + // create task + if (m_debug) { + std::cout << "[DBG] Creating insertion task for slot " + << std::to_string(m_nextSlotToWrite) + << " with empty slots " << m_emptySlots.load()-1 << std::endl; + } + createInsertionTask(values, bytes, latencyMark, retainMark, graph); + m_emptySlots.fetch_add(-1); + + // try to forward the end pointer + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + //long index = tryConsumeNextSlot(); + + if (m_debug) { + debug(); + } + return 0; + } + + long put(void *val, long latencyMark = -1, long retainMark = -1, std::shared_ptr graph = nullptr) override { + while (!m_ready) + ; + + infinity::core::receive_element_t *values = (infinity::core::receive_element_t *) val; + if (!values) { + throw std::runtime_error("error: values is not set"); + } + + auto bytes = values->buffer->getSizeInBytes(); + + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + if (bytes != m_batchSize) + throw std::invalid_argument("error: the size of the input must be equal to the slot ("+std::to_string(m_batchSize)+")"); + assert((long) m_maxBufferCapacity > bytes && "Buffer capacity has to be at least " + "the number of bytes writer to avoid spilling to more than two buffers"); + /* Get the end pointer */ + long end = m_endP.m_value.load(std::memory_order_relaxed); + + bool hasEmptySlots = (m_emptySlots > 0); + + /* Find remaining bytes until the circular buffer wraps */ + long wrapPoint = (end + bytes - 1) - m_capacity; + if (m_temp.m_value <= wrapPoint || !hasEmptySlots) { + m_temp.m_value = m_startP.m_value.load(std::memory_order_relaxed); + if (m_temp.m_value <= wrapPoint || !hasEmptySlots) { + // std::cout << "[DBG] Circular Buffer is full with startP " + // << m_startP.m_value.load() << " and endP " + // << m_endP.m_value.load() << std::endl; + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + return -1; + } + } + + // create task + if (m_debug) { + std::cout << "[DBG] Creating insertion task for slot " + << std::to_string(m_nextSlotToWrite) + << " with empty slots " << m_emptySlots.load()-1 << std::endl; + } + createInsertionTask(values, bytes, latencyMark, retainMark, graph); + m_emptySlots.fetch_add(-1); + + // try to forward the end pointer + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + //long index = tryConsumeNextSlot(); + + if (m_debug) { + debug(); + } + return 0; + } + + void putRows(int pid, char *values, long bytes, size_t slot, long latencyMark, long retainMark, int wraps) override { + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + + auto id = slot; + auto slotId = id % m_numberOfSlots; + //m_slots[slotId].m_slot.store(0); + while (m_slots[slotId].m_slot.load() != 0) { + std::cout << "error: inserting data to slot " << slotId + << " is blocked with oldVal " << m_slots[slotId].m_slot.load() << std::endl; + exit(1); + _mm_pause(); + } + + auto index = slotId * m_batchSize; + //long index = normaliseNotPowerOfTwo(end); + + if (m_debug) { + std::cout << "[DBG] Worker writing to slot " << std::to_string(slotId) + << " with " << std::to_string(index) << " startPointer" + << " and " << std::to_string(index+bytes) << " endPointer " << std::endl; + } + + // check if some async calls have finished + // m_filesystem->getHandler().tryCompleteMultiple(); + // tryToAcknowledge(); + + if (!retainMark) { + if (SystemConf::getInstance().LINEAGE_ON) { + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized for slot " + std::to_string(slotId)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_slots[slotId].m_fptr; //m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + } + auto oldVal = 0; + m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 1); + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + std::cout << "warning: no retain marker was used" << std::endl; + return; + } + + // async write here + auto copyBuffer = PartialWindowResultsFactory::getInstance().newInstance(pid); + AckNUMASlotContext context{&m_slots[slotId].m_slot, copyBuffer}; + auto callback = [](IAsyncContext *ctxt, Status result, + size_t bytes_transferred) { + CallbackContext context{ctxt}; + if (result != Status::Ok) { + fprintf(stderr, "AsyncFlushPages(), error: %u\n", + static_cast(result)); + } + + // std::cout << "[DBG] callback setting the slot status with " + // << bytes_transferred << " bytes_transferred" << std::endl; + // Set the slot status to ready + auto oldVal = 0; + while (!context->m_slot->compare_exchange_weak(oldVal, 1)) { + std::cout << "warning: callback (" << std::this_thread::get_id() + << ") blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + if (context->m_parW) { + PartialWindowResultsFactory::getInstance().free(context->m_parW->getThreadId(), context->m_parW); + context->m_parW->reset(); + } + }; + + int bufferIdx = getBufferIndex(index); + long normIndex = normaliseIndex(index, bufferIdx); + if (m_copyDataOnInsert || m_wraps == 0) { // copy only until the buffer is filled once + /* TODO: fix the case when the batch spills to multiple buffers */ + if (bytes > ((long) m_capacity - index)) { /* Copy in two parts */ + throw std::runtime_error("error: batches should not be spilled to more than one buffer"); + } else if (bytes > ((long) ((bufferIdx + 1) * m_maxBufferCapacity) - index)) { /* Copy in two parts */ + throw std::runtime_error("error: batches should not be spilled to more than one buffer"); + } else { + std::memcpy(&m_buffers[bufferIdx].get()->data()[normIndex], values, (bytes) * sizeof(char)); + } + } + if (SystemConf::getInstance().LATENCY_ON && !m_copyDataOnInsert) { + long *buf = (long *) &m_buffers[bufferIdx].get()->data()[normIndex]; + buf[0] = Utils::pack(latencyMark, (int) buf[0]); + } + + // memcpy may succeed after the write on disk in a multi-threaded scenario! + m_slots[slotId].m_memcpyFinished.store(true); + + int diskBytes = 0; + char *diskValues = (char *) &m_buffers[bufferIdx].get()->data()[normIndex]; + bool clear = false; + if (m_compress) { + if (SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON && m_startInstr && id % 128 == 0) { + // results + uint32_t *dVs = nullptr; + double *cVs = nullptr, *mns = nullptr, *mxs = nullptr, *mxDs = nullptr; + m_instrFP(pid, values, (int) bytes, dVs, cVs, mns, mxs, mxDs); + m_compStats->addStatistics(dVs, cVs, mns, mxs, mxDs); + } + int metadataSize = 128; + if (pid >= m_copyBuffers.size()) + throw std::runtime_error("error: invalid pid for data insertions with compression"); + //m_compressionFP[m_compPos](pid, values, 0, (int) bytes, m_copyBuffers[pid].data() + metadataSize, diskBytes, (int) m_copyBuffers[pid].size(), clear, -1); + m_compressionFP[m_compPos](pid, values, 0, (int) bytes, copyBuffer->getBufferRaw() + metadataSize, diskBytes, (int) copyBuffer->getCapacity(), clear, -1); + // if compression fails, fall back to the initial compression scheme + if (clear) { + //m_compressionFP[0](pid, values, 0, (int) bytes, m_copyBuffers[pid].data() + metadataSize, diskBytes, (int) m_copyBuffers[pid].size(), clear, -1); + m_compressionFP[0](pid, values, 0, (int) bytes, copyBuffer->getBufferRaw() + metadataSize, diskBytes, (int) copyBuffer->getCapacity(), clear, -1); + m_compPos = 0; + } + diskBytes += metadataSize; + latencyMark = (SystemConf::getInstance().LATENCY_ON) ? latencyMark : -1; + if (clear || m_compPos == 0) { + //m_compressionFP[m_compPos](pid, values, 0, -1, m_copyBuffers[pid].data(), diskBytes, (int) m_copyBuffers[pid].size(), clear, latencyMark); + m_compressionFP[0](pid, values, 0, -1, copyBuffer->getBufferRaw(), diskBytes, (int) copyBuffer->getCapacity(), clear, latencyMark); + } + //diskValues = m_copyBuffers[pid].data(); + diskValues = copyBuffer->getBufferRaw(); + + m_storedBytes.fetch_add(diskBytes); + m_storedCounter.fetch_add(1); + //diskBytes = 64 * 1024;//bytes; + } else { + diskBytes = bytes; + } + diskBytes = roundOffset(Utils::getPowerOfTwo(diskBytes)); +#if defined(NO_DISK) + diskBytes = 0; +#endif + m_root->updateBlockSize(diskBytes); + + if (m_debug) { + std::cout << "[DBG] Worker compressing data for slot " << std::to_string(slotId) + << " of " << std::to_string(bytes) << " bytes to " + << std::to_string(diskBytes) << " with " + << std::to_string((double)bytes/(double)diskBytes) << " ratio " << std::endl; + } + + if (diskBytes > m_batchSize) + throw std::runtime_error("error: the write exceeds the size of slots in the input log"); + + if (!SystemConf::getInstance().LINEAGE_ON) { + assert(m_asyncFile->writeAsync(reinterpret_cast(diskValues), slotId * m_batchSize, diskBytes, callback, context) == Status::Ok); + } else { + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized for slot " + std::to_string(slotId)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_slots[slotId].m_fptr; //m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + assert(fptr->writeAsync(reinterpret_cast(diskValues), slotId * m_batchSize, diskBytes, callback, context) == Status::Ok); + } + // copyBuffer.reset(); + + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + //tryToAcknowledge(); + } + + void putRows(int pid, std::shared_ptr &values, long bytes, size_t slot, long latencyMark, long retainMark, int wraps) override { + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + + auto id = slot; + auto slotId = id % m_numberOfSlots; + //m_slots[slotId].m_slot.store(0); + while (m_slots[slotId].m_slot.load() != 0) { + std::cout << "error: inserting data to slot " << slotId + << " is blocked with oldVal " << m_slots[slotId].m_slot.load() << std::endl; + exit(1); + _mm_pause(); + } + + auto index = slotId * m_batchSize; + //long index = normaliseNotPowerOfTwo(end); + + if (m_debug) { + std::cout << "[DBG] Worker writing to slot " << std::to_string(slotId) + << " with " << std::to_string(index) << " startPointer" + << " and " << std::to_string(index+bytes) << " endPointer " << std::endl; + } + + // check if some async calls have finished + // m_filesystem->getHandler().tryCompleteMultiple(); + // tryToAcknowledge(); + + if (!retainMark) { + if (SystemConf::getInstance().LINEAGE_ON) { + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized for slot " + std::to_string(slotId)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_slots[slotId].m_fptr; //m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + } + auto oldVal = 0; + m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 1); + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + std::cout << "warning: no retain marker was used" << std::endl; + return; + } + + // async write here + auto copyBuffer = PartialWindowResultsFactory::getInstance().newInstance(pid); + AckNUMASlotContext context{&m_slots[slotId].m_slot, copyBuffer}; + auto callback = [](IAsyncContext *ctxt, Status result, + size_t bytes_transferred) { + CallbackContext context{ctxt}; + if (result != Status::Ok) { + fprintf(stderr, "AsyncFlushPages(), error: %u\n", + static_cast(result)); + } + + // std::cout << "[DBG] callback setting the slot status with " + // << bytes_transferred << " bytes_transferred" << std::endl; + // Set the slot status to ready + auto oldVal = 0; + while (!context->m_slot->compare_exchange_weak(oldVal, 1)) { + std::cout << "warning: callback (" << std::this_thread::get_id() + << ") blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + if (context->m_parW) { + PartialWindowResultsFactory::getInstance().free(context->m_parW->getThreadId(), context->m_parW); + context->m_parW->reset(); + } + }; + + int bufferIdx = getBufferIndex(index); + long normIndex = normaliseIndex(index, bufferIdx); + if (m_copyDataOnInsert || m_wraps == 0) { // copy only until the buffer is filled once + /* TODO: fix the case when the batch spills to multiple buffers */ + if (bytes > ((long) m_capacity - index)) { /* Copy in two parts */ + throw std::runtime_error("error: batches should not be spilled to more than one buffer"); + } else if (bytes > ((long) ((bufferIdx + 1) * m_maxBufferCapacity) - index)) { /* Copy in two parts */ + throw std::runtime_error("error: batches should not be spilled to more than one buffer"); + } else { + std::memcpy(&m_buffers[bufferIdx].get()->data()[normIndex], values->getBuffer().data(), (bytes) * sizeof(char)); + } + } + if (SystemConf::getInstance().LATENCY_ON && !m_copyDataOnInsert) { + long *buf = (long *) &m_buffers[bufferIdx].get()->data()[normIndex]; + buf[0] = Utils::pack(latencyMark, (int) buf[0]); + } + + // memcpy may succeed after the write on disk in a multi-threaded scenario! + m_slots[slotId].m_memcpyFinished.store(true); + + int diskBytes = 0; + char *diskValues = (char *) &m_buffers[bufferIdx].get()->data()[normIndex]; + bool clear = false; + if (m_compress) { + if (SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON && m_startInstr && id % 128 == 0) { + // results + uint32_t *dVs = nullptr; + double *cVs = nullptr, *mns = nullptr, *mxs = nullptr, *mxDs = nullptr; + m_instrFP(pid, values->getBuffer().data(), (int) bytes, dVs, cVs, mns, mxs, mxDs); + m_compStats->addStatistics(dVs, cVs, mns, mxs, mxDs); + } + int metadataSize = 128; + if (pid >= m_copyBuffers.size()) + throw std::runtime_error("error: invalid pid for data insertions with compression"); + //m_compressionFP[m_compPos](pid, values, 0, (int) bytes, m_copyBuffers[pid].data() + metadataSize, diskBytes, (int) m_copyBuffers[pid].size(), clear, -1); + m_compressionFP[m_compPos](pid, values->getBuffer().data(), 0, (int) bytes, copyBuffer->getBufferRaw() + metadataSize, diskBytes, (int) copyBuffer->getCapacity(), clear, -1); + // if compression fails, fall back to the initial compression scheme + if (clear) { + //m_compressionFP[0](pid, values, 0, (int) bytes, m_copyBuffers[pid].data() + metadataSize, diskBytes, (int) m_copyBuffers[pid].size(), clear, -1); + m_compressionFP[0](pid, values->getBuffer().data(), 0, (int) bytes, copyBuffer->getBufferRaw() + metadataSize, diskBytes, (int) copyBuffer->getCapacity(), clear, -1); + m_compPos = 0; + } + diskBytes += metadataSize; + latencyMark = (SystemConf::getInstance().LATENCY_ON) ? latencyMark : -1; + if (clear || m_compPos == 0) { + //m_compressionFP[m_compPos](pid, values, 0, -1, m_copyBuffers[pid].data(), diskBytes, (int) m_copyBuffers[pid].size(), clear, latencyMark); + m_compressionFP[0](pid, values->getBuffer().data(), 0, -1, copyBuffer->getBufferRaw(), diskBytes, (int) copyBuffer->getCapacity(), clear, latencyMark); + } + //diskValues = m_copyBuffers[pid].data(); + diskValues = copyBuffer->getBufferRaw(); + + m_storedBytes.fetch_add(diskBytes); + m_storedCounter.fetch_add(1); + //diskBytes = 64 * 1024;//bytes; + + // free UnboundedQueryBuffer + UnboundedQueryBufferFactory::getInstance().freeNB(values->getBufferId(), values); + } else { + diskBytes = bytes; + } + diskBytes = roundOffset(Utils::getPowerOfTwo(diskBytes)); +#if defined(NO_DISK) + diskBytes = 0; +#endif + m_root->updateBlockSize(diskBytes); + + if (m_debug) { + std::cout << "[DBG] Worker compressing data for slot " << std::to_string(slotId) + << " of " << std::to_string(bytes) << " bytes to " + << std::to_string(diskBytes) << " with " + << std::to_string((double)bytes/(double)diskBytes) << " ratio " << std::endl; + } + + if (diskBytes > m_batchSize) + throw std::runtime_error("error: the write exceeds the size of slots in the input log"); + + if (!SystemConf::getInstance().LINEAGE_ON) { + assert(m_asyncFile->writeAsync(reinterpret_cast(diskValues), slotId * m_batchSize, diskBytes, callback, context) == Status::Ok); + } else { + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized for slot " + std::to_string(slotId)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_slots[slotId].m_fptr; //m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + assert(fptr->writeAsync(reinterpret_cast(diskValues), slotId * m_batchSize, diskBytes, callback, context) == Status::Ok); + } + // copyBuffer.reset(); + + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + //tryToAcknowledge(); + } + + void putRows(int pid, void *val, long bytes, size_t slot, long latencyMark, long retainMark, int wraps) override { + auto *values = (infinity::core::receive_element_t *) val; + if (values == nullptr || bytes <= 0) + throw std::invalid_argument("error: cannot put null to circular buffer"); + + auto id = slot; + auto slotId = id % m_numberOfSlots; + //m_slots[slotId].m_slot.store(0); + while (m_slots[slotId].m_slot.load() != 0) { + std::cout << "error: inserting data to slot " << slotId + << " is blocked with oldVal " << m_slots[slotId].m_slot.load() << std::endl; + exit(1); + _mm_pause(); + } + + auto index = slotId * m_batchSize; + //long index = normaliseNotPowerOfTwo(end); + + if (m_debug) { + std::cout << "[DBG] Worker writing to slot " << std::to_string(slotId) + << " with " << std::to_string(index) << " startPointer" + << " and " << std::to_string(index+bytes) << " endPointer " << std::endl; + } + + // check if some async calls have finished + // m_filesystem->getHandler().tryCompleteMultiple(); + // tryToAcknowledge(); + + if (!retainMark) { + if (SystemConf::getInstance().LINEAGE_ON) { + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized for slot " + std::to_string(slotId)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_slots[slotId].m_fptr; //m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + } + auto oldVal = 0; + m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 1); + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + std::cout << "warning: no retain marker was used" << std::endl; + return; + } + + // async write here + auto copyBuffer = PartialWindowResultsFactory::getInstance().newInstance(pid); + AckNUMASlotContext context{&m_slots[slotId].m_slot, copyBuffer}; + auto callback = [](IAsyncContext *ctxt, Status result, + size_t bytes_transferred) { + CallbackContext context{ctxt}; + if (result != Status::Ok) { + fprintf(stderr, "AsyncFlushPages(), error: %u\n", + static_cast(result)); + } + + // std::cout << "[DBG] callback setting the slot status with " + // << bytes_transferred << " bytes_transferred" << std::endl; + // Set the slot status to ready + auto oldVal = 0; + while (!context->m_slot->compare_exchange_weak(oldVal, 1)) { + std::cout << "warning: callback (" << std::this_thread::get_id() + << ") blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + if (context->m_parW) { + PartialWindowResultsFactory::getInstance().free(context->m_parW->getThreadId(), context->m_parW); + context->m_parW->reset(); + } + }; + + int bufferIdx = getBufferIndex(index); + long normIndex = normaliseIndex(index, bufferIdx); + if (m_copyDataOnInsert || m_wraps == 0) { // copy only until the buffer is filled once + /* TODO: fix the case when the batch spills to multiple buffers */ + if (bytes > ((long) m_capacity - index)) { /* Copy in two parts */ + throw std::runtime_error("error: batches should not be spilled to more than one buffer"); + } else if (bytes > ((long) ((bufferIdx + 1) * m_maxBufferCapacity) - index)) { /* Copy in two parts */ + throw std::runtime_error("error: batches should not be spilled to more than one buffer"); + } else { + std::memcpy(&m_buffers[bufferIdx].get()->data()[normIndex], values->buffer->getData(), (bytes) * sizeof(char)); + } + } + if (SystemConf::getInstance().LATENCY_ON && !m_copyDataOnInsert) { + long *buf = (long *) &m_buffers[bufferIdx].get()->data()[normIndex]; + buf[0] = Utils::pack(latencyMark, (int) buf[0]); + } + + // memcpy may succeed after the write on disk in a multi-threaded scenario! + m_slots[slotId].m_memcpyFinished.store(true); + + int diskBytes = 0; + char *diskValues = (char *) &m_buffers[bufferIdx].get()->data()[normIndex]; + bool clear = false; + if (m_compress) { + if (SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON && m_startInstr && id % 128 == 0) { + // results + uint32_t *dVs = nullptr; + double *cVs = nullptr, *mns = nullptr, *mxs = nullptr, *mxDs = nullptr; + m_instrFP(pid, (char*)values->buffer->getData(), (int) bytes, dVs, cVs, mns, mxs, mxDs); + m_compStats->addStatistics(dVs, cVs, mns, mxs, mxDs); + } + int metadataSize = 128; + if (pid >= m_copyBuffers.size()) + throw std::runtime_error("error: invalid pid for data insertions with compression"); + //m_compressionFP[m_compPos](pid, values, 0, (int) bytes, m_copyBuffers[pid].data() + metadataSize, diskBytes, (int) m_copyBuffers[pid].size(), clear, -1); + m_compressionFP[m_compPos](pid, (char*)values->buffer->getData(), 0, (int) bytes, copyBuffer->getBufferRaw() + metadataSize, diskBytes, (int) copyBuffer->getCapacity(), clear, -1); + // if compression fails, fall back to the initial compression scheme + if (clear) { + //m_compressionFP[0](pid, values, 0, (int) bytes, m_copyBuffers[pid].data() + metadataSize, diskBytes, (int) m_copyBuffers[pid].size(), clear, -1); + m_compressionFP[0](pid, (char*)values->buffer->getData(), 0, (int) bytes, copyBuffer->getBufferRaw() + metadataSize, diskBytes, (int) copyBuffer->getCapacity(), clear, -1); + m_compPos = 0; + } + diskBytes += metadataSize; + latencyMark = (SystemConf::getInstance().LATENCY_ON) ? latencyMark : -1; + if (clear || m_compPos == 0) { + //m_compressionFP[m_compPos](pid, values, 0, -1, m_copyBuffers[pid].data(), diskBytes, (int) m_copyBuffers[pid].size(), clear, latencyMark); + m_compressionFP[0](pid, (char*)values->buffer->getData(), 0, -1, copyBuffer->getBufferRaw(), diskBytes, (int) copyBuffer->getCapacity(), clear, latencyMark); + } + //diskValues = m_copyBuffers[pid].data(); + diskValues = copyBuffer->getBufferRaw(); + + m_storedBytes.fetch_add(diskBytes); + m_storedCounter.fetch_add(1); + //diskBytes = 64 * 1024;//bytes; + + // free rdma buffer + RDMABufferPool::getInstance().free(values); + } else { + diskBytes = bytes; + } + diskBytes = roundOffset(Utils::getPowerOfTwo(diskBytes)); +#if defined(NO_DISK) + diskBytes = 0; +#endif + m_root->updateBlockSize(diskBytes); + + if (m_debug) { + std::cout << "[DBG] Worker compressing data for slot " << std::to_string(slotId) + << " of " << std::to_string(bytes) << " bytes to " + << std::to_string(diskBytes) << " with " + << std::to_string((double)bytes/(double)diskBytes) << " ratio " << std::endl; + } + + if (diskBytes > m_batchSize) + throw std::runtime_error("error: the write exceeds the size of slots in the input log"); + + if (!SystemConf::getInstance().LINEAGE_ON) { + assert(m_asyncFile->writeAsync(reinterpret_cast(diskValues), slotId * m_batchSize, diskBytes, callback, context) == Status::Ok); + } else { + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized for slot " + std::to_string(slotId)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_slots[slotId].m_fptr; //m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + assert(fptr->writeAsync(reinterpret_cast(diskValues), slotId * m_batchSize, diskBytes, callback, context) == Status::Ok); + } + // copyBuffer.reset(); + + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + //tryToAcknowledge(); + + } + + long recover(int &bytes) override { + if (!SystemConf::getInstance().LINEAGE_ON) { + throw std::runtime_error("error: lineage must be enabled for recovery"); + } + if (m_numberOfSlotsToRecover == 0) { + return -1; + } + + if (m_emptySlots <= 0) { + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + return 0; + } + + /* Get the slot to write */ + auto slot = getNextSlotToWrite(); + if (slot >= m_numberOfSlots) + m_wraps = 1; + auto slotId = slot % m_numberOfSlots; + auto index = slotId * m_batchSize; + auto end = (m_compress) ? slotId * SystemConf::getInstance().BLOCK_SIZE : slotId * m_batchSize; + long readEnd = m_pop.root()->m_startP.get_ro().load() + end; + long readIndex = normaliseNotPowerOfTwo(readEnd); + + auto graph = LineageGraphFactory::getInstance().newInstance(); + + m_slots[slotId].setLineageGraph(graph); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_fileStore->getUnsafeFilePtr(m_query->getId(), bufferId, readEnd, m_pop.root()->m_startId.get_ro().load()); + m_slots[slotId].m_fptr = fptr; + + AckNUMASlotContext context{&m_slots[slotId].m_slot}; + auto callback = [](IAsyncContext *ctxt, Status result, + size_t bytes_transferred) { + CallbackContext context{ctxt}; + if (result != Status::Ok) { + fprintf(stderr, "AsyncFlushPages(), error: %u\n", + static_cast(result)); + } + + // std::cout << "[DBG] callback setting the slot status with " + // << bytes_transferred << " bytes_transferred" << std::endl; + // Set the slot status to ready + auto oldVal = 0; + while (!context->m_slot->compare_exchange_weak(oldVal, 1)) { + std::cout << "warning: callback (" << std::this_thread::get_id() + << ") blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + }; + + auto oldVal = -1; + while (!m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 0)) { + std::cout << "warning: (during recovery) adding data to slot " << slotId + << " is blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + + int bufferIdx = getBufferIndex(index); + long normIndex = normaliseIndex(index, bufferIdx); + bytes = (!m_compress) ? m_batchSize : std::min((size_t)m_previousBlockSize, m_batchSize); + if (m_copyDataOnInsert) { + assert(fptr->readAsync(readIndex, &m_buffers[bufferIdx].get()->data()[normIndex], + bytes, callback, context) == Status::Ok); + } else { + //std::cout << "reading async data for slot " << slotId << std::endl; + assert(fptr->readAsync(0, &m_buffers[bufferIdx].get()->data()[normIndex], + bytes, callback, context) == Status::Ok); + } + + m_slots[slotId].m_memcpyFinished.store(true); + + // reduce number of slots + m_numberOfSlotsToRecover--; + m_emptySlots.fetch_add(-1); + + if (m_debug) { + debug(); + } + + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + return 0; + } + + void prepareRecovery() override { + if (!m_copyDataOnInsert) { + if (m_numberOfSlotsToRecover < m_numberOfSlots) { + long readEnd = m_pop.root()->m_startP.get_ro().load(); + long readIndex = normaliseNotPowerOfTwo(readEnd); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_fileStore->getUnsafeFilePtr(m_query->getId(), bufferId, readEnd, m_pop.root()->m_startId.get_ro().load()); + if (m_compress) { + auto sIndex = m_numberOfSlotsToRecover * m_batchSize; + auto readBytes = std::min((size_t)m_previousBlockSize, m_batchSize); + assert(fptr->readSync(0, m_copyBuffers[0].data(), readBytes) == Status::Ok); + int writePos = 0; + bool copy = false; + auto latency = (SystemConf::getInstance().LATENCY_ON) ? 0 : -1; + int sBufferIdx = getBufferIndex(sIndex); + long sNormIndex = normaliseIndex(sIndex, sBufferIdx); + m_decompressionFP[m_compPos](0, &m_buffers[sBufferIdx].get()->data()[sNormIndex], 0, m_batchSize, m_copyBuffers[0].data(), writePos, SystemConf::getInstance().BLOCK_SIZE, copy, latency); + if (writePos != SystemConf::getInstance().BATCH_SIZE) + throw std::runtime_error("error: the write position is not equal to the batch size after decompression"); + //std::cout << "[DBG] decompressing for slot " << m_numberOfSlotsToRecover << " and index " << sIndex << std::endl; + for (size_t slotId = m_numberOfSlotsToRecover + 1; slotId < m_numberOfSlots; ++slotId) { + auto index = slotId * m_batchSize; + auto bufferIdx = getBufferIndex(index); + auto normIndex = normaliseIndex(index, bufferIdx); + std::memcpy(&m_buffers[bufferIdx].get()->data()[normIndex], &m_buffers[sBufferIdx].get()->data()[sNormIndex], m_batchSize); + //std::cout << "[DBG] copying for slot " << slotId << " and index " << index << std::endl; + } + } else { + assert(fptr->readSync(0, m_copyBuffers[0].data(), m_batchSize) == Status::Ok); + for (size_t slotId = m_numberOfSlotsToRecover; slotId < m_numberOfSlots; ++slotId) { + auto index = slotId * m_batchSize; + auto bufferIdx = getBufferIndex(index); + auto normIndex = normaliseIndex(index, bufferIdx); + std::memcpy(&m_buffers[bufferIdx].get()->data()[normIndex], m_copyBuffers[0].data(), m_batchSize); + } + } + } + m_wraps = 1; + } + } + + bool tryConsumeNextSlot(long &index, int &length, bool recover = false) { + if (recover) { + m_filesystem->getHandler().tryCompleteMultiple(); + tryToAcknowledge(); + } + bool found = false; + if (isSlotReady(m_readerSlot)) { + if (m_debug) { + std::cout << "[DBG] Creating processing task for slot " << std::to_string(m_readerSlot) << std::endl; + } + m_slots[m_readerSlot].m_slot.store(3); + index = m_readerSlot * m_batchSize; + length = m_batchSize; + m_readerSlot++; + if (m_readerSlot == getNumberOfSlots()) { + m_readerSlot = 0; + } + found = true; + } + return found; + } + + void free() override { + throw std::invalid_argument("error: this operator is not supported yet"); + } + + void free(long offset, bool isPersistent = false) override { + if (SystemConf::getInstance().LINEAGE_ON && !isPersistent) { + // std::cout << "non-freeing offset " + std::to_string(offset) << std::endl; + return; + } + // std::cout << "freeing offset " + std::to_string(offset) << std::endl; + + long _start = m_startP.m_value.load(std::memory_order_relaxed); + long index = normaliseNotPowerOfTwo(_start); + long bytes; + /* Measurements */ + if (offset <= index) + bytes = (long) (m_capacity - index + offset + 1); + else + bytes = offset - index + 1; + + m_bytesProcessed.fetch_add(bytes, std::memory_order_relaxed); + m_tuplesProcessed.fetch_add((bytes / (size_t) m_tupleSize), std::memory_order_relaxed); + m_tasksProcessed.fetch_add(1, std::memory_order_relaxed); + + if (m_debug) { + std::cout << "[DBG] Freeing slot " + std::to_string(m_nextFreeSlot) + + " with empty slots " + std::to_string(m_emptySlots) << std::endl; + } + + m_slots[m_nextFreeSlot].m_memcpyFinished.store(false); + m_slots[m_nextFreeSlot].m_slot.store(-1); + m_emptySlots.fetch_add(1); + + m_nextFreeSlot++; + if (m_nextFreeSlot == m_numberOfSlots) m_nextFreeSlot = 0; + + /* Set new start pointer */ + m_startP.m_value.store(_start + bytes, std::memory_order_relaxed); + m_root->m_bytesProcessed.get_rw().fetch_add(bytes, + std::memory_order_release); + //m_root->m_startP.get_rw().store(_start + bytes, std::memory_order_release); + } + + int getBufferIndex(const long index) override { + return (int) (index / m_maxBufferCapacity); + } + + long normaliseIndex(const long index) override { + return (index % m_maxBufferCapacity); + } + + long normaliseIndex(const long index, const int bufferIdx) override { + //return index % maxBufferCapacity; + return (bufferIdx != m_numaNodes - 1) ? (index % m_maxBufferCapacity) : (index % m_minBufferCapacity); + } + + ByteBuffer &getBuffer(long index) override { + //index = normalise(index); + index = normaliseNotPowerOfTwo(index); + int bufferIdx = getBufferIndex(index); + return getBuffer(bufferIdx); + } + + char *getBufferRaw(long index) override { + index = normaliseNotPowerOfTwo(index); + int bufferIdx = getBufferIndex(index); + return getBufferRaw(bufferIdx); + } + + ByteBuffer &getBuffer(int bufferIdx) override { + return *m_buffers[bufferIdx].get(); + } + + char *getBufferRaw(int bufferIdx) override { + return m_buffers[bufferIdx].get()->data(); + } + + int geNumaNodeWithPtr(int index) override { + index = (int) normaliseNotPowerOfTwo(index); + int bufferIdx = getBufferIndex(index); + return bufferIdx; + } + + ByteBuffer &getBuffer() override { + return *m_buffers[0].get(); + } + + char *getBufferRaw() override { + return m_buffers[0].get()->data(); + } + + long getLong(size_t index) override { + index = normaliseNotPowerOfTwo(index); + int bufferIdx = getBufferIndex(index); + long normIndex = normaliseIndex(index, bufferIdx); + auto p = (long *) m_buffers[bufferIdx].get()->data(); + return p[normIndex / sizeof(long)]; + } + + void setLong(size_t index, long value) override { + index = normaliseNotPowerOfTwo(index); + int bufferIdx = getBufferIndex(index); + long normIndex = normaliseIndex(index, bufferIdx); + auto p = (long *) m_buffers[bufferIdx].get()->data(); + p[normIndex / sizeof(long)] = value; + } + + void appendBytesTo(int startPos, int endPos, ByteBuffer &outputBuffer) override { + (void) startPos; + (void) endPos; + (void) outputBuffer; + throw std::runtime_error("error: this method is not supported for the NUMA-aware Circular Buffer"); + } + + void appendBytesTo(int startPos, int endPos, char *output) override { + (void) startPos; + (void) endPos; + (void) output; + throw std::runtime_error("error: this method is not supported for the NUMA-aware Circular Buffer"); + } + + void createInsertionTask(char *values, long bytes, long latencyMark = -1, long retainMark = -1, std::shared_ptr graph = nullptr) { + auto batch = WindowBatchFactory::getInstance().newInstance( + 0, 0, -1, -1, m_query, nullptr, + &m_query->getWindowDefinition(), m_query->getSchema(), + -1); + batch->setTaskType(TaskType::INSERT); + auto task = + TaskFactory::getInstance().newInstance(0, batch, nullptr, TaskType::INSERT); + auto slot = getNextSlotToWrite(); + auto slotId = slot % m_numberOfSlots; + if (slot >= m_numberOfSlots) { + m_wraps = 1; + } + if (SystemConf::getInstance().LINEAGE_ON) { + if (!graph) + graph = LineageGraphFactory::getInstance().newInstance(); + m_slots[slotId].setLineageGraph(graph); + graph.reset(); + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized before task creation for slot " + std::to_string(slot)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + m_slots[slotId].m_fptr = fptr; + } + + auto oldVal = -1; + while (!m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 0)) { + std::cout << "warning: adding data to slot " << slotId + << " is blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + + if (m_debug) { + std::cout << "slot " << slotId << " is set to " << m_slots[slotId].m_slot.load() << std::endl; + } + task->setInsertion(values, bytes, slot, latencyMark, retainMark, m_wraps); + task->setNumaNodeId(getNumaNode(slot)); + while (!m_query->getTaskQueue()->try_enqueue(task)) + ; + } + + void createInsertionTask(std::shared_ptr &values, long bytes, long latencyMark = -1, long retainMark = -1, std::shared_ptr graph = nullptr) { + auto batch = WindowBatchFactory::getInstance().newInstance( + 0, 0, -1, -1, m_query, nullptr, + &m_query->getWindowDefinition(), m_query->getSchema(), + -1); + batch->setTaskType(TaskType::INSERT); + auto task = + TaskFactory::getInstance().newInstance(0, batch, nullptr, TaskType::INSERT); + auto slot = getNextSlotToWrite(); + auto slotId = slot % m_numberOfSlots; + if (slot >= m_numberOfSlots) { + m_wraps = 1; + } + if (SystemConf::getInstance().LINEAGE_ON) { + if (!graph) + graph = LineageGraphFactory::getInstance().newInstance(); + m_slots[slotId].setLineageGraph(graph); + graph.reset(); + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized before task creation for slot " + std::to_string(slot)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + m_slots[slotId].m_fptr = fptr; + } + + auto oldVal = -1; + while (!m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 0)) { + std::cout << "warning: adding data to slot " << slotId + << " is blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + + if (m_debug) { + std::cout << "slot " << slotId << " is set to " << m_slots[slotId].m_slot.load() << std::endl; + } + task->setInsertion(values, bytes, slot, latencyMark, retainMark, m_wraps); + task->setNumaNodeId(getNumaNode(slot)); + while (!m_query->getTaskQueue()->try_enqueue(task)) + ; + } + + void createInsertionTask(void *values, long bytes, long latencyMark = -1, long retainMark = -1, std::shared_ptr graph = nullptr) { + auto batch = WindowBatchFactory::getInstance().newInstance( + 0, 0, -1, -1, m_query, nullptr, + &m_query->getWindowDefinition(), m_query->getSchema(), + -1); + batch->setTaskType(TaskType::INSERT); + auto task = + TaskFactory::getInstance().newInstance(0, batch, nullptr, TaskType::INSERT); + auto slot = getNextSlotToWrite(); + auto slotId = slot % m_numberOfSlots; + if (slot >= m_numberOfSlots) { + m_wraps = 1; + } + if (SystemConf::getInstance().LINEAGE_ON) { + if (!graph) + graph = LineageGraphFactory::getInstance().newInstance(); + m_slots[slotId].setLineageGraph(graph); + graph.reset(); + if (!m_slots[slotId].m_graph) + throw std::runtime_error("error: the lineage graph is not initialized before task creation for slot " + std::to_string(slot)); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + auto fptr = m_fileStore->getFilePtr(m_query->getId(), bufferId, slot * m_batchSize); + m_slots[slotId].m_fptr = fptr; + } + + auto oldVal = -1; + while (!m_slots[slotId].m_slot.compare_exchange_weak(oldVal, 0)) { + std::cout << "warning: adding data to slot " << slotId + << " is blocked with oldVal " << oldVal << std::endl; + _mm_pause(); + } + + if (m_debug) { + std::cout << "slot " << slotId << " is set to " << m_slots[slotId].m_slot.load() << std::endl; + } + task->setInsertion(values, bytes, slot, latencyMark, retainMark, m_wraps); + task->setNumaNodeId(getNumaNode(slot)); + while (!m_query->getTaskQueue()->try_enqueue(task)) + ; + } + + int getNumaNode(int slot) { + int node = 0; + auto numSlots = m_numberOfSlots; + if (numSlots % m_numaNodes != 0) { + auto d = numSlots / m_numaNodes; + numSlots = (d + 1) * m_numaNodes; + } + auto d = numSlots / m_numaNodes; + node = (slot / d) % m_numaNodes; + return node; + } + + bool isPersistent() override { return true; } + + void updateFileEndPtr(long id) override { + m_root->m_endId.get_rw().store(id); + } + + void updateFileStartPtr(long id, long offset) override { + if (m_debug) { + std::cout << "[DBG] garbage collection: updating the start id to " << std::to_string(id) + << " and the offset to " << std::to_string(offset) << std::endl; + } + if (offset < m_prevFreeOffset) { + //throw std::runtime_error("error: trying to free an invalid offset " + + // std::to_string(offset) + " < " + + // std::to_string(m_prevFreeOffset)); + //std::cout << "warning: trying to free an invalid offset in ncqbuffer " + + // std::to_string(offset) + " < " + std::to_string(m_prevFreeOffset) << std::endl; + return; + } + m_prevFreeOffset = offset; + + m_root->m_startId.get_rw().store(id); + m_root->m_startP.get_rw().store(offset); + } + + void updateStepAndOffset(long step, long offset) { + m_root->m_step.get_rw().store(step); + m_root->m_offset.get_rw().store(offset); + } + + void getStepAndOffset(long &step, long &offset) { + step = m_root->m_step.get_ro().load(); + offset = m_root->m_startP.get_ro().load(); //m_root->m_offset.get_ro().load(); + } + + size_t getBytesProcessed() override { return m_root->m_bytesProcessed.get_ro(); } + + size_t getUnsafeStartPointer() override { + return m_root->m_startP.get_ro().load(std::memory_order_relaxed); + } + + void incrementUnsafeStartPointer(size_t offset) override { + auto start = m_root->m_startP.get_ro().load(); + auto bufferId = (m_id % 2 == 0) ? 0 : 1; + m_fileStore->freePersistent(m_query->getId(), bufferId, start + offset); + } + + size_t getUnsafeEndPointer() override { + return m_root->m_endP.get_ro().load(std::memory_order_relaxed); + } + + size_t getUnsafeRemainingBytes() override { + auto start = m_root->m_startP.get_ro().load(); + auto end = m_root->m_endP.get_ro().load(); + return end-start; + } + + bool isSlotReady(size_t slotId) { + checkSlotNumber(slotId); + return m_slots[slotId].m_slot.load() == 2 && m_slots[slotId].m_memcpyFinished; + } + + void checkSlotNumber(size_t slotId) { + if (slotId >= m_numberOfSlots) + throw std::invalid_argument("error: slotId >= m_numberOfSlots"); + } + + size_t getSlotId(size_t index) { return std::floor(index / m_batchSize); } + + void clearPersistentMemory() { + m_pop.close(); + m_filesystem->eraseFiles(); + } + + size_t getBufferCapacity(int id) override { + return (id != m_numaNodes - 1) ? m_maxBufferCapacity : m_minBufferCapacity; + } + + void fixTimestamps(size_t index, long timestamp, long step, long batchSize) { + throw std::runtime_error("error: this method is not supported for the NUMA-aware Circular Buffer"); + } + + ~PersistentNumaCircularQueryBuffer() override { + m_pop.close(); + }; + + private: + void tryToAcknowledge() { + try { + if (!m_ackLock.try_lock()) return; + + while (true) { + m_filesystem->getHandler().tryCompleteMultiple(); + auto slotId = m_nextPersistentSlot.load(); + if (m_slots[slotId].m_slot.load() != 1) { + break; + } + + if (m_debug) { + std::cout << "[DBG] Acknowledging slot " << std::to_string(slotId) << std::endl; + } + + // m_endP.fetch_add(_4MB); + if (m_numberOfSlotsToFree == 0) { + m_root->m_endP.get_rw().fetch_add(m_batchSize, std::memory_order_release); + } else { + if (m_compress) { + auto index = slotId * m_batchSize; + int writePos = 0; + bool copy = true; + auto latency = (SystemConf::getInstance().LATENCY_ON) ? 0 : -1; + auto bufferIdx = getBufferIndex(index); + auto normIndex = normaliseIndex(index, bufferIdx); + m_decompressionFP[m_compPos](0, &m_buffers[bufferIdx].get()->data()[normIndex], 0, m_batchSize, m_copyBuffers[0].data(), writePos, SystemConf::getInstance().BLOCK_SIZE, copy, latency); + if (writePos != SystemConf::getInstance().BATCH_SIZE) + throw std::runtime_error("error: the write position is not equal to the batch size after decompression"); + //std::cout << "[DBG] decompressing for slot " << slotId << " and index " << index << std::endl; + } + m_numberOfSlotsToFree--; + } + m_endP.m_value.fetch_add(m_batchSize, std::memory_order_relaxed); + + m_nextPersistentSlot.fetch_add(1); + m_slots[slotId].m_slot.store(2); + + if (m_nextPersistentSlot.load() == m_numberOfSlots) + m_nextPersistentSlot.store(0); + } + + m_ackLock.unlock(); + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + } + } + + struct PMem { + pmem::obj::p> m_startP; + pmem::obj::p> m_endP; + pmem::obj::p> m_bytesProcessed; + pmem::obj::p> m_step; + pmem::obj::p> m_offset; + pmem::obj::p> m_startId; + pmem::obj::p> m_endId; + pmem::obj::p> m_blockSize; + pmem::obj::persistent_ptr next; + PMem() { + m_startP.get_rw() = 0L; + m_endP.get_rw() = 0L; + m_bytesProcessed.get_rw() = 0L; + m_blockSize.get_rw() = 0L; + }; + + void updateBlockSize(long const& value) { + auto prev_value = m_blockSize.get_ro().load(); + while(prev_value < value && + !m_blockSize.get_rw().compare_exchange_weak(prev_value, value)) + {} + } + + /** Copy constructor is deleted */ + PMem(const PMem &) = delete; + /** Assignment operator is deleted */ + PMem &operator=(const PMem &) = delete; + }; +}; \ No newline at end of file diff --git a/src/buffers/QueryBuffer.h b/src/buffers/QueryBuffer.h index 5f8cea4..b187e05 100644 --- a/src/buffers/QueryBuffer.h +++ b/src/buffers/QueryBuffer.h @@ -1,13 +1,25 @@ #pragma once #include -#include +#include +#include #include #include -#include +#include +#include "RDMABufferPool.h" +#include "checkpoint/BlockManager.h" +#include "checkpoint/LineageGraphFactory.h" +#include "compression/CompressionStatistics.h" +#include "filesystem/File.h" +#include "filesystem/FileSystemDisk.h" +#include "tbb/concurrent_queue.h" #include "utils/PaddedLong.h" #include "utils/SystemConf.h" +#include "utils/Utils.h" + +class Query; +class UnboundedQueryBuffer; /* * \brief QueryBuffer is used as a base class for implementing a lock-free circular buffer. @@ -19,7 +31,7 @@ class QueryBuffer { - private: + protected: AtomicPaddedLong m_startP; AtomicPaddedLong m_endP; size_t m_capacity; @@ -34,15 +46,85 @@ class QueryBuffer { const int m_tupleSize; bool m_copyDataOnInsert; - friend class CircularQueryBuffer; - friend class NUMACircularQueryBuffer; + typedef QueueIoHandler adapter_t; + typedef FileSystemDisk disk_t; + typedef typename FileSystemDisk::file_t file_t; + + std::shared_ptr m_filesystem = nullptr; + + // used for checkpoints + size_t m_batchSize; + Query *m_query; + bool m_compress = false; + std::unique_ptr m_compStats; + std::vector> m_compressionFP; + std::vector> m_decompressionFP; + std::atomic m_compPos = 0; + std::function m_instrFP; + std::atomic m_startInstr = false; + + long m_prevFreeOffset = -1; + + std::atomic m_storedBytes = 0; + std::atomic m_storedCounter = 0; + + // Slots should have a 4MB size for more efficient writes + struct Slot; + size_t m_numberOfSlots; + std::atomic m_emptySlots; + std::vector m_slots; + std::atomic m_nextSlotToWrite; + // Reader Variables + size_t m_readerSlot = 0; + + long m_previousBlockSize = 0; + int m_numberOfSlotsToRecover = 0; + int m_numberOfSlotsToFree = 0; + BlockManager *m_fileStore; + + // used for RDMA ingestion + bool m_filter = false; + std::function m_filterFP; public: - QueryBuffer(int id, size_t capacity, bool isNuma, int tupleSize = 1, bool copyDataOnInsert = true) : - m_startP(0L), m_endP(0L), m_capacity(upper_power_of_two(capacity)), - m_mask(m_capacity - 1), m_wraps(0), - m_bytesProcessed(0L), m_tuplesProcessed(0L), m_tasksProcessed(0L), - m_temp(0), m_id(id), m_isNuma(isNuma), m_tupleSize(tupleSize), m_copyDataOnInsert(copyDataOnInsert) {}; + QueryBuffer(int id, size_t capacity, bool isNuma, int tupleSize = 1, + bool copyDataOnInsert = true, + size_t batchSize = SystemConf::getInstance().BATCH_SIZE, + std::shared_ptr filesystem = nullptr, bool clearFiles = true) + : m_startP(0L), + m_endP(0L), + m_capacity(upper_power_of_two(capacity)), + m_mask(m_capacity - 1), + m_wraps(0), + m_bytesProcessed(0L), + m_tuplesProcessed(0L), + m_tasksProcessed(0L), + m_temp(0), + m_id(id), + m_isNuma(isNuma), + m_tupleSize(tupleSize), + m_copyDataOnInsert(copyDataOnInsert), + m_filesystem(filesystem), + m_batchSize(batchSize), + m_numberOfSlots(capacity / m_batchSize), + m_emptySlots(m_numberOfSlots), + m_slots(2 * m_numberOfSlots), + m_nextSlotToWrite(0) { + if (clearFiles) { + std::vector files; + auto path = SystemConf::FILE_ROOT_PATH + "/scabbard"; + Utils::tryCreateDirectory(path); + Utils::readDirectory(path, files); + for (auto &f : files) { + if (f == ("queue_pm_" + std::to_string(id)) || + f == ("queue_data_" + std::to_string(id))) { + auto res = std::remove((path+"/"+f).c_str()); + if (res != 0) + std::cout << "Failed to remove file " << (path+"/"+f) << std::endl; + } + } + } + }; size_t upper_power_of_two(size_t v) { size_t power = 1; @@ -51,11 +133,39 @@ class QueryBuffer { return power; } - virtual long put(char *values, long bytes, long latencyMark = -1) = 0; + virtual long put(char *values, long bytes, long latencyMark = -1, long retainMark = -1, std::shared_ptr graph = nullptr) = 0; + + virtual long put(std::shared_ptr &values, long latencyMark = -1, long retainMark = -1, std::shared_ptr graph = nullptr) { + throw std::runtime_error("error: the put function with UnboundedQueryBuffer is not implemented"); + } + + virtual long put(void *values, long latencyMark = -1, long retainMark = -1, std::shared_ptr graph = nullptr) { + throw std::runtime_error("error: the put function with UnboundedQueryBuffer is not implemented"); + } + + virtual void putRows(int pid, char *values, long bytes, size_t slot, long latencyMark = -1, long retainMark = -1, int wraps = 0) { + throw std::runtime_error("error: the putRows function is not implemented"); + } + + virtual void putRows(int pid, std::shared_ptr &values, long bytes, size_t slot, long latencyMark = -1, long retainMark = -1, int wraps = 0) { + throw std::runtime_error("error: the putRows function with UnboundedQueryBuffer is not implemented"); + } + + virtual void putRows(int pid, void *values, long bytes, size_t slot, long latencyMark = -1, long retainMark = -1, int wraps = 0) { + throw std::runtime_error("error: the putRows function with UnboundedQueryBuffer is not implemented"); + } + + virtual long recover(int &bytes) { + throw std::runtime_error("error: the recover function is not implemented"); + } + + virtual void prepareRecovery() { + throw std::runtime_error("error: the prepareRecovery function is not implemented"); + } virtual void free() = 0; - virtual void free(long offset) = 0; + virtual void free(long offset, bool isPersistent = false) = 0; long normalise(long index) { return (index & m_mask); @@ -71,29 +181,31 @@ class QueryBuffer { virtual ByteBuffer &getBuffer() = 0; + virtual char *getBufferRaw() = 0; + unsigned long getWraps() { return m_wraps; } - size_t getMask() { + virtual size_t getMask() { return m_mask; } - size_t getCapacity() { + virtual size_t getCapacity() { return m_capacity; } virtual size_t getBufferCapacity(int id) = 0; - size_t getBytesProcessed() { + virtual size_t getBytesProcessed() { return m_bytesProcessed.load(std::memory_order_relaxed); } - size_t getTuplesProcessed() { + virtual size_t getTuplesProcessed() { return m_tuplesProcessed.load(std::memory_order_relaxed); } - size_t getTasksProcessed() { + virtual size_t getTasksProcessed() { return m_tasksProcessed.load(std::memory_order_relaxed); } @@ -102,7 +214,7 @@ class QueryBuffer { long tail = m_endP.m_value.load(std::memory_order_relaxed); long remaining = (tail < head) ? (head - tail) : (m_capacity - (tail - head)); - std::cout << "[DBG]: start " + std::to_string(head) + " end " + std::to_string(tail) + + std::cout << "[DBG] start " + std::to_string(head) + " end " + std::to_string(tail) + " wraps " + std::to_string(m_wraps) + " " + std::to_string(remaining) << std::endl; } @@ -114,15 +226,144 @@ class QueryBuffer { virtual void appendBytesTo(int startPos, int endPos, char *output) = 0; - bool getIsNumaWrapper() { + virtual bool getIsNumaWrapper() { return m_isNuma; } - virtual char *getBufferRaw() = 0; + virtual bool isPersistent() { + return false; + } + + virtual size_t getUnsafeStartPointer() { + return m_startP.m_value.load(std::memory_order_relaxed); + } + + virtual void incrementUnsafeStartPointer(size_t offset) { + m_startP.m_value.fetch_add(offset); + } + + virtual size_t getUnsafeEndPointer() { + return m_endP.m_value.load(std::memory_order_relaxed); + } + + virtual size_t getUnsafeRemainingBytes() { + return m_endP.m_value.load(std::memory_order_relaxed)-m_startP.m_value.load(std::memory_order_relaxed); + } + + size_t getAverageStoredBytes() { + if (m_storedCounter > 0) { + return (m_storedBytes/m_storedCounter); + } + return 0; + } + + size_t getBatchSize() { + return m_batchSize; + } + + void fixTimestamps(size_t index, long timestamp, long step, long batchSize) { + throw std::runtime_error("error: this function is not implemented"); + } + + void setCompressionFP(std::function fp, size_t compPos = 0) { + m_compressionFP.push_back(fp); + m_compPos = compPos; + m_compress = true; + } + + void setDecompressionFP(std::function fp) { + m_decompressionFP.push_back(fp); + } + + void setFilterFP(std::function fp) { + m_filterFP = std::move(fp); + m_filter = true; + } + + bool hasCompressionPolicyChanged() { + if (m_compress && SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON) { + return m_compStats->updateCompressionDecision(); + } + return false; + } + + CompressionStatistics *getCompressionStatistics() { + if (!m_compress || !SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON || !m_compStats) { + throw std::runtime_error("error: adaptive compression is not enabled"); + } + return m_compStats.get(); + } + bool hasCompression(std::function &fp) { + if (m_compress) { + fp = m_compressionFP[m_compPos]; + return true; + } + return false; + } + + void enableInstrumentation(std::function fp) { + m_instrFP = fp; + m_startInstr = true; + } + + void setQuery(Query *query, std::vector *cols = nullptr) { + m_query = query; + if (SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON) + m_compStats = std::make_unique(m_id, cols); + } + + virtual void setupForCheckpoints(std::shared_ptr filesystem) { + throw std::runtime_error("error: the setupForCheckpoints function is not implemented"); + } + + virtual int prepareCheckpoint(long freePtr, tbb::concurrent_queue &readySlots, int &firstSlot, int &lastSlot) { + throw std::runtime_error("error: the prepareCheckpoint function is not implemented"); + } + + size_t getNumberOfSlots() { return m_numberOfSlots; } + + size_t getEmptySlots() { return m_emptySlots; } + + std::vector &getSlots() { return m_slots; } + + void setNumberOfSlotsToRecover(int slots) { + m_numberOfSlotsToRecover = slots; + m_numberOfSlotsToFree = slots; + std::cout << "[DBG] buffer " + std::to_string(m_id) << " has to recover " + + std::to_string(slots) + " slots" << std::endl; + } + + //void setFilesystem(std::shared_ptr &store) { m_fileStore = store; } + + std::shared_ptr getFilesystem() { return m_filesystem; } + + void setFileStore(BlockManager *store) { m_fileStore = store; } + + BlockManager *getFileStore() { return m_fileStore; } + + virtual void updateFileEndPtr(long id) { + throw std::runtime_error("error: the updateFileEndPtr function is not implemented"); + } + + virtual void updateFileStartPtr(long id, long offset) { + throw std::runtime_error("error: the updateFileStartPtr function is not implemented"); + } + + virtual void updateStepAndOffset(long step, long offset) { + throw std::runtime_error("error: the updateStepAndOffset function is not implemented"); + } + + virtual void getStepAndOffset(long &step, long &offset) { + throw std::runtime_error("error: the getStepAndOffset function is not implemented"); + } + + int getRemainingSlotsToFree() { + return m_numberOfSlotsToFree; + } virtual ~QueryBuffer() = default; - private: + protected: void setMask(const size_t mask) { m_mask = mask; } @@ -130,4 +371,99 @@ class QueryBuffer { void setCapacity(const size_t capacity) { m_capacity = capacity; } + + int roundOffset(int offset) { + if (!m_filesystem) + throw std::runtime_error("error: the filesystem is not initialized"); + auto alignment = m_filesystem->getSectorSize(); + if (offset < 8 * 1024 && offset != 0) { + offset = 8 * 1024; + } else if (offset % alignment != 0) { + auto d = offset / alignment; + offset = (d + 1) * alignment; + } + return offset; + } + + size_t getNextSlotToWrite() { + return m_nextSlotToWrite++; + } + + struct alignas(64) Slot { + int m_id = -1; + size_t m_size; //SystemConf::_4MB; + std::atomic m_slot; + std::atomic m_memcpyFinished; + std::atomic m_numberOfResults; + std::atomic m_taskId; + std::atomic m_previousSlot; + std::mutex m_updateLock; + char *m_bufferPtr = nullptr; + std::shared_ptr m_graph = nullptr; + bool m_ready; + file_t *m_fptr; + + Slot() : m_slot(-1), m_memcpyFinished(false), m_numberOfResults(0), m_taskId(-1), m_previousSlot(-1), m_ready(false) {} + + void setId(int id, size_t batchSize, char *bufferPtr) { + m_id = id; + m_size = batchSize; + m_bufferPtr = bufferPtr; + } + + void setLineageGraph(std::shared_ptr &graph) { + //std::lock_guard l (m_updateLock); + if (m_graph) { + if (m_graph.use_count() == 1) + LineageGraphFactory::getInstance().free(m_graph); + m_graph.reset(); + } + m_graph = std::move(graph); + graph.reset(); + } + + std::shared_ptr getLineageGraph() { + //std::lock_guard l (m_updateLock); + auto graph = m_graph; + m_graph.reset(); + return graph; + } + + void setNumberOfResults () { + m_numberOfResults.store(1); + } + + int getNumberOfResults () { + m_numberOfResults.store(1); + return m_numberOfResults.load(); + } + + void setPreviousSlot (int prev) { + if (prev != 1 && prev != 3) { + throw std::runtime_error("error: setting the previous slot value to " + std::to_string(prev)); + } + m_previousSlot = prev; + } + + int getPreviousSlot () { + if (m_previousSlot != 1 && m_previousSlot != 3) { + throw std::runtime_error("error: getting the previous slot value " + std::to_string(m_previousSlot)); + } + return m_previousSlot.load(); + } + + void prefetch() { + if (m_bufferPtr) + __builtin_prefetch(m_bufferPtr, 1, 3); + } + + void setReady() { + m_ready = true; + } + + void reset() { + m_slot.store(-1); + m_ready = false; + } + }; }; \ No newline at end of file diff --git a/src/buffers/RDMABufferPool.h b/src/buffers/RDMABufferPool.h new file mode 100644 index 0000000..98f1793 --- /dev/null +++ b/src/buffers/RDMABufferPool.h @@ -0,0 +1,54 @@ +#pragma once + +#include +#include + +#include "RDMA/infinity/infinity.h" + +/* + * \brief This class creates a single pool of buffers used by all workers to perform RDMA operations. + * + * */ + +class RDMABufferPool { + private: + std::atomic count{}; + infinity::core::Context *m_context = nullptr; + RDMABufferPool() = default; + + public: + static RDMABufferPool &getInstance() { + static RDMABufferPool instance; + return instance; + } + + RDMABufferPool(RDMABufferPool const &) = delete; + void operator=(RDMABufferPool const &) = delete; + + void setContext(infinity::core::Context *context) { + if (!context) { + throw std::runtime_error("error: setup the context"); + } + m_context = context; + } + + void free(infinity::core::receive_element_t *elem) { + if (!m_context) { + throw std::runtime_error("error: setup the context"); + } + m_context->postReceiveBuffer(elem->buffer); + delete(elem); + } + + void free(infinity::memory::Buffer *elem) { + if (!m_context) { + throw std::runtime_error("error: setup the context"); + } + m_context->postReceiveBuffer(elem); + delete(elem); + } + + long getCount() { + return count.load(); + } +}; diff --git a/src/buffers/UnboundedQueryBuffer.h b/src/buffers/UnboundedQueryBuffer.h index 4cb4e1a..0cf490e 100644 --- a/src/buffers/UnboundedQueryBuffer.h +++ b/src/buffers/UnboundedQueryBuffer.h @@ -53,4 +53,12 @@ class UnboundedQueryBuffer { auto p = (long *) m_buffer.data(); p[index] = value; } + + void putBytes(char *value, size_t length) { + if (m_position + length > m_capacity) { + throw std::runtime_error("error: increase the size of the UnboundedQueryBuffer"); + } + std::memcpy(m_buffer.data() + m_position, value, length); + m_position += length; + } }; \ No newline at end of file diff --git a/src/buffers/UnboundedQueryBufferFactory.h b/src/buffers/UnboundedQueryBufferFactory.h index 96f3e8e..de9ef56 100644 --- a/src/buffers/UnboundedQueryBufferFactory.h +++ b/src/buffers/UnboundedQueryBufferFactory.h @@ -15,9 +15,12 @@ class UnboundedQueryBufferFactory { private: + const int m_numberOfThreads; std::atomic count; - tbb::concurrent_queue> pool; - UnboundedQueryBufferFactory() {}; + tbb::concurrent_queue> m_pool; + std::vector>> m_poolCB, m_poolNB; + UnboundedQueryBufferFactory() : m_numberOfThreads(SystemConf::getInstance().WORKER_THREADS), m_poolCB(m_numberOfThreads), + m_poolNB(m_numberOfThreads + 1){}; public: static UnboundedQueryBufferFactory &getInstance() { @@ -30,7 +33,7 @@ class UnboundedQueryBufferFactory { std::shared_ptr newInstance() { std::shared_ptr buffer; - bool hasRemaining = pool.try_pop(buffer); + bool hasRemaining = m_pool.try_pop(buffer); if (!hasRemaining) { int id = (int) count.fetch_add(1); buffer = std::make_shared(UnboundedQueryBuffer(id, @@ -40,8 +43,46 @@ class UnboundedQueryBufferFactory { } void free(std::shared_ptr &buffer) { - buffer->clear(); - pool.push(buffer); + // buffer->clear(); + buffer->setPosition(0); + m_pool.push(buffer); + } + + std::shared_ptr newInstance(int pid) { + if (pid >= m_numberOfThreads) + throw std::runtime_error("error: invalid pid for creating an unbounded buffer"); + std::shared_ptr buffer; + bool hasRemaining = m_poolCB[pid].try_pop(buffer); + if (!hasRemaining) { + count.fetch_add(1); + buffer = std::make_shared(UnboundedQueryBuffer(pid, + SystemConf::getInstance().BLOCK_SIZE)); + } + return buffer; + } + + void free(int pid, std::shared_ptr &buffer) { + // buffer->clear(); + buffer->setPosition(0); + m_poolCB[pid].push(buffer); + } + + std::shared_ptr newNBInstance(int pid) { + if (pid >= m_numberOfThreads + 1) + throw std::runtime_error("error: invalid pid for creating an unbounded buffer"); + std::shared_ptr buffer; + bool hasRemaining = m_poolNB[pid].try_pop(buffer); + if (!hasRemaining) { + buffer = std::make_shared(UnboundedQueryBuffer(pid, + SystemConf::getInstance().BATCH_SIZE)); + } + return buffer; + } + + void freeNB(int pid, std::shared_ptr &buffer) { + //buffer->clear(); + buffer->setPosition(0); + m_poolNB[pid].push(buffer); } long getCount() { diff --git a/src/checkpoint/BlockManager.cpp b/src/checkpoint/BlockManager.cpp new file mode 100644 index 0000000..7559cf3 --- /dev/null +++ b/src/checkpoint/BlockManager.cpp @@ -0,0 +1,229 @@ +#include "checkpoint/BlockManager.h" + +#include +#include +#include +#include + +#include "buffers/QueryBuffer.h" +#include "filesystem/File.h" +#include "filesystem/FileSystemDisk.h" +#include "utils/Query.h" + +BlockManager::BlockManager(std::vector> &queries, bool clearFiles) + : m_numberOfQueries(queries.size()), + m_queries(queries), + m_filesystems(m_numberOfQueries, std::vector, tbb::cache_aligned_allocator>>(2)), + m_locks(m_numberOfQueries), + m_lFiles(m_numberOfQueries, CircularFileList(m_listSize)), + m_rFiles(m_numberOfQueries, CircularFileList(m_listSize)) { + + std::vector files; + auto path = SystemConf::FILE_ROOT_PATH + "/scabbard"; + Utils::tryCreateDirectory(path); + Utils::readDirectory(path, files); + std::sort(files.begin(), files.end()); + if (clearFiles) { + for (auto &f : files) { + if (f.find("fs_queue_data_") != std::string::npos) { + auto res = std::remove((path+"/"+f).c_str()); + if (res != 0) + std::cout << "Failed to remove file " << (path+"/"+f) << std::endl; + } + } + } + + for (auto &q : m_queries) { + auto qid = q->getId(); + q->getBuffer()->setFileStore(this); + if (q->getBuffer()->getFilesystem()) { + m_filesystems[qid][0] = q->getBuffer()->getFilesystem(); + if (!clearFiles) { + loadFiles(qid, 0, files); + } + } else { + //throw std::runtime_error("error: set the filesystem!"); + } + + q->getSecondBuffer()->setFileStore(this); + if (q->getSecondBuffer()->getFilesystem()) { + m_filesystems[qid][1] = q->getBuffer()->getFilesystem(); + if (!clearFiles) { + loadFiles(qid, 1, files); + } + } else { + // throw std::runtime_error("error: set the filesystem!"); + } + + + } +} + +BlockManager::file_t *BlockManager::getFilePtr(int query, int bufferId, long index) { + if (query >= m_numberOfQueries) + throw std::runtime_error("error: invalid query id"); + if (bufferId >= 2) throw std::runtime_error("error: invalid bufferId id"); + + bool found = false; + file_t *filePtr = nullptr; + { + //std::lock_guard l(m_locks[query]); + auto &files = (bufferId == 0) ? m_lFiles[query] : m_rFiles[query]; + + auto f = files.front(); + if (f) { + if (index < f->m_end) { + filePtr = f->m_filePtr; + found = true; + } else { + if (files.m_elements > 1) { + f = files.secondFront(); + filePtr = f->m_filePtr; + found = true; + } + } + } + + if (!found) { + auto buffer = (bufferId == 0) ? m_queries[query]->getBuffer() + : m_queries[query]->getSecondBuffer(); + auto bufferSize = buffer->getCapacity(); + auto filesystem = + (bufferId == 0) ? m_filesystems[query][0] : m_filesystems[query][1]; + auto id = index / bufferSize; + + if (!files.hasWrapped()) { + auto newFileName = std::make_shared( + "scabbard/fs_queue_data_" + std::to_string(bufferId) +"_" + std::to_string(id)); + + if (m_debug) { + std::cout << "[FS] allocating file " << newFileName << std::endl; + } + + if (!filesystem) + throw std::runtime_error("error: filesystem is not initialized"); + auto newFilePtr = filesystem->newFile(*newFileName); + if (!newFilePtr) + throw std::runtime_error("error: filesystem failed to initialize the filePtr"); + + files.push_back(newFileName, newFilePtr, (long)id * bufferSize, + (id + 1) * bufferSize); + filePtr = newFilePtr; + } else { + std::shared_ptr newFileName; + filePtr = files.push_back_dummy(newFileName); + if (m_debug) { + std::cout << "[FS] allocating file " << newFileName << std::endl; + } + } + + buffer->updateFileEndPtr(id); + } + } + + return filePtr; +} + +BlockManager::file_t *BlockManager::getUnsafeFilePtr(int query, int bufferId, long index, int fileId) { + if (query >= m_numberOfQueries) + throw std::runtime_error("error: invalid query id"); + if (bufferId >= 2) throw std::runtime_error("error: invalid bufferId id"); + + bool found = false; + file_t *filePtr = nullptr; + { + // std::lock_guard l(m_locks[query]); + auto &files = (bufferId == 0) ? m_lFiles[query] : m_rFiles[query]; + auto &fileVector = files.getUnsafeFiles(); + auto numOfFiles = fileVector.size(); + auto id = fileId; + int counter = 0; + while (true) { + // todo: fix this + if (index < fileVector[id].m_end || true) { + filePtr = fileVector[id].m_filePtr; + break; + } + id++; + counter++; + if (id == numOfFiles) { + id = 0; + } + if (counter == numOfFiles) { + throw std::runtime_error("error: file not found!"); + } + } + } + return filePtr; +} + +void BlockManager::freePersistent(int query, int bufferId, long index) { + if (query >= m_numberOfQueries) + throw std::runtime_error("error: invalid query id"); + if (bufferId >= 2) throw std::runtime_error("error: invalid bufferId id"); + + auto &files = (bufferId == 0) ? m_lFiles[query] : m_rFiles[query]; + auto filesystem = + (bufferId == 0) ? m_filesystems[query][0] : m_filesystems[query][1]; + auto buffer = (bufferId == 0) ? m_queries[query]->getBuffer() + : m_queries[query]->getSecondBuffer(); + auto capacity = buffer->getCapacity(); + auto id = index / capacity; + { + std::lock_guard l(m_locks[query]); + while (files.m_elements > 0) { + auto f = files.front(); + if (index < f->m_end) { + break; + } else { + if (!filesystem) + throw std::runtime_error("error: filesystem is not initialized"); + if(m_debug) { + std::cout << "[FS] freeing file " << f->m_fileName << std::endl; + } + //filesystem->eraseFile(f->m_filePtr); + // reset the slot; + f->m_end = f->m_end + capacity; + files.pop_front(); + } + } + buffer->updateFileStartPtr(id, index); + } +} + +void BlockManager::freeSlot(int query, int bufferId, long index) { + if (query >= m_numberOfQueries) + throw std::runtime_error("error: invalid query id"); + if (bufferId >= 2) throw std::runtime_error("error: invalid bufferId id"); + + throw std::runtime_error("error: this operation is not supported yet"); +} + +void BlockManager::loadFiles(int query, int bufferId, std::vector &fileNames) { + auto path = SystemConf::FILE_ROOT_PATH + "/scabbard"; + Utils::tryCreateDirectory(path); + auto fileSuffix = "fs_queue_data_" + std::to_string(bufferId); + auto &files = (bufferId == 0) ? m_lFiles[query] : m_rFiles[query]; + auto filesystem = + (bufferId == 0) ? m_filesystems[query][0] : m_filesystems[query][1]; + if (!filesystem) + throw std::runtime_error("error: filesystem is not initialized"); + auto buffer = (bufferId == 0) ? m_queries[query]->getBuffer() + : m_queries[query]->getSecondBuffer(); + auto bufferSize = buffer->getCapacity(); + int id = 0; + for (auto &f : fileNames) { + if (f.find(fileSuffix) != std::string::npos) { + if (m_debug) { + std::cout << "[FS] loading file " << f << std::endl; + } + std::string name; + name.append("scabbard").append("/").append(f); + std::shared_ptr newFileName(new std::string(name)); + auto newFilePtr = filesystem->newFile(*newFileName); + files.push_back(newFileName, newFilePtr, (long)id * bufferSize, + (id + 1) * bufferSize); + id++; + } + } +} diff --git a/src/checkpoint/BlockManager.h b/src/checkpoint/BlockManager.h new file mode 100644 index 0000000..e4f9251 --- /dev/null +++ b/src/checkpoint/BlockManager.h @@ -0,0 +1,155 @@ +#pragma once + +#include +#include +#include +#include + +#include + +#include "filesystem/File.h" +#include "filesystem/FileSystemDisk.h" + +class Query; +class QueryBuffer; + +/* + * \brief The BlockManager is responsible for manages the persistent data + * of a query by returning valid file pointers for persistence operations and + * tracking files for GC. + * + * */ + +class BlockManager { + private: + const int m_listSize = 10; + int m_numberOfQueries; + std::vector> m_queries; + + struct FileHelper; + struct CircularFileList; + typedef QueueIoHandler adapter_t; + typedef FileSystemDisk disk_t; + typedef typename FileSystemDisk::file_t file_t; + + std::vector, tbb::cache_aligned_allocator>>> m_filesystems; + std::vector> m_locks; + std::vector> m_lFiles; + std::vector> m_rFiles; + + const bool m_debug = false; + + public: + BlockManager(std::vector> &queries, bool clearFiles = true); + + file_t *getFilePtr(int query, int bufferId, long index); + + file_t *getUnsafeFilePtr(int query, int bufferId, long index, int fileId); + + void freePersistent(int query, int bufferId, long index); + + void freeSlot(int query, int bufferId, long index); + + private: + void loadFiles(int query, int bufferId, std::vector &files); + + struct alignas(64) FileHelper { + std::shared_ptr m_fileName; + file_t *m_filePtr; + long m_start, m_end; + FileHelper(std::shared_ptr fileName = nullptr, file_t *filePtr = nullptr, long start = INT_MIN, long end = INT_MIN) + : m_fileName(fileName), m_filePtr(filePtr), m_start(start), m_end(end) {} + ~FileHelper() { + m_fileName.reset(); + } + void reset() { + m_fileName.reset(); + m_filePtr = nullptr; + m_start = INT_MIN; + m_end = INT_MIN; + } + }; + + struct CircularFileList { + std::vector> m_buffer; + int m_size; + int m_readIdx; + int m_writeIdx; + int m_elements = 0; + int m_counter = 0; + CircularFileList(int size = 0) : m_buffer(size, FileHelper()), m_size(size) { + m_readIdx = 0; + m_writeIdx = size - 1; + } + void set_capacity(int size) { + m_buffer.resize(size, FileHelper()); + m_size = size; + m_readIdx = 0; + m_writeIdx = size - 1; + } + void push_back(std::shared_ptr &fileName, file_t *filePtr, long start, long end) { + if (m_elements == m_size) { + //m_buffer.resize(m_size * 2, FileHelper()); + //m_size = 2 * m_size; + throw std::runtime_error("error: increase the size of the list holding the files"); + } + + m_counter++; + m_writeIdx++; + if (m_writeIdx == (int) m_buffer.size()) + m_writeIdx = 0; + + m_buffer[m_writeIdx].m_fileName = fileName; + m_buffer[m_writeIdx].m_filePtr = filePtr; + m_buffer[m_writeIdx].m_start = start; + m_buffer[m_writeIdx].m_end = end; + + m_elements++; + } + file_t *push_back_dummy(std::shared_ptr &name) { + if (m_elements == m_size) { + throw std::runtime_error("error: increase the size of the list holding the files"); + } + + auto filePtr = m_buffer[m_writeIdx].m_filePtr; + name = m_buffer[m_writeIdx].m_fileName; + m_writeIdx++; + if (m_writeIdx == (int) m_buffer.size()) + m_writeIdx = 0; + + m_elements++; + return filePtr; + } + FileHelper *front() { + if (m_elements > 0) + return &m_buffer[m_readIdx]; + else + return nullptr; + //throw std::runtime_error("error: empty CircularList"); + } + FileHelper *secondFront() { + if (m_elements > 1) + return &m_buffer[(m_readIdx+1)%m_buffer.size()]; + else + throw std::runtime_error("error: empty CircularList in BlockManager"); + } + void pop_front() { + m_elements--; + //m_buffer[m_readIdx].reset(); + m_readIdx++; + if (m_readIdx == (int) m_buffer.size()) + m_readIdx = 0; + } + int size() { return m_elements; } + int capacity() { return m_size; } + std::vector> &getUnsafeFiles() { + return m_buffer; + } + bool hasWrapped () { + if (m_counter >= m_size) { + return true; + } + return false; + } + }; +}; \ No newline at end of file diff --git a/src/checkpoint/Checkpoint.h b/src/checkpoint/Checkpoint.h new file mode 100644 index 0000000..b4462e3 --- /dev/null +++ b/src/checkpoint/Checkpoint.h @@ -0,0 +1,157 @@ +#pragma once + +#include + +/* + * \brief This class describes a checkpoint, which is considered as completed + * if all required tasks have acknowledged it. It contains all the metadata of + * a checkpoint, while handles to the state are stored in the + * FileBackedCheckpointCoordinator + * + * */ + +enum CheckpointState : uint8_t { COMPLETED, PENDING }; + +// todo: should we resume pending checkpoints?? +class Checkpoint { + private: + long m_checkpointId; + int m_pipelineId; + long m_triggerTimestamp; + std::atomic m_numberOfSlots; + std::atomic m_inputQueueSlots; + std::mutex m_completionLock; + std::atomic m_counter; + std::atomic m_workers; + long m_lastTaskId; + std::atomic *m_checkpointCounter; + CheckpointState m_state; + std::atomic m_checkpointSize; + std::atomic m_checkpointDuration; + std::string m_filePath; + //std::atomic m_readyFlag; + + friend class FileBackedCheckpointCoordinator; + + public: + explicit Checkpoint(long id = -1, long timestamp = -1, int slots = -1, + long taskId = -1, + std::atomic *checkpointCounter = nullptr) + : m_checkpointId(id), + m_pipelineId(0), + m_triggerTimestamp(timestamp), + m_numberOfSlots(slots), + m_inputQueueSlots(0), + m_counter(0), + m_workers(SystemConf::getInstance().WORKER_THREADS), + m_lastTaskId(taskId), + m_checkpointCounter(checkpointCounter), + m_state(CheckpointState::PENDING), + m_checkpointSize(0), + m_checkpointDuration(0){}; + + void updateCounter(size_t size = 0) { + m_counter.fetch_add(1); + /*int oldValue = m_counter.load() ; + while(!m_counter.compare_exchange_weak(oldValue, oldValue + 1, + std::memory_order_release, + std::memory_order_relaxed)) { + _mm_pause(); + oldValue = m_counter.load() ; + }*/ + updateSize(size); + if (m_counter.load() == m_numberOfSlots.load()) { + const std::lock_guard lock(m_completionLock); + setComplete(); + if (m_counter.load() > m_numberOfSlots.load()) { + std::cout << "m_counter " << m_counter.load() << " m_numberOfSlots " + << m_numberOfSlots.load() << std::endl; + throw std::runtime_error( + "error: the counter of the checkpoint exceeds the expected number"); + } + } + } + + void updateSize(size_t size) { m_checkpointSize.fetch_add(size); } + + void updateDuration(size_t duration) { + // take the current timestamp after the last checkpoint for the duration + // m_checkpointDuration.fetch_add(duration); + throw std::runtime_error( + "error: the duration is measured only for a full snapshot and not " + "individual checkpoints yet"); + } + + void setFilePath(const std::string path) { m_filePath = path; } + + void setCheckpointCounter(std::atomic *checkpointCounter) { + m_checkpointCounter = checkpointCounter; + } + + void resetSlots() { m_numberOfSlots.store(0); } + + void increaseSlots(int slots, int inputQueueSlots = 0) { + m_numberOfSlots.fetch_add(slots); + m_inputQueueSlots.fetch_add(inputQueueSlots); + } + + int getSlots() { return m_numberOfSlots.load(); } + + int getInputQueueSlots() { return m_inputQueueSlots.load(); } + + size_t getCheckpointSize() { return m_checkpointSize.load(); } + + void setCheckpointId(long id, int pipeline = 0) { + m_checkpointId = id; + m_pipelineId = pipeline; + } + + void resetCheckpoint() { + m_checkpointId = -1; + m_triggerTimestamp = -1; + m_numberOfSlots.store(0); + m_inputQueueSlots.store(0); + m_counter.store(0); + m_workers.store(SystemConf::getInstance().WORKER_THREADS); + m_lastTaskId = -1; + m_state = CheckpointState::PENDING; + m_checkpointSize = 0; + m_checkpointDuration = 0; + //m_readyFlag.store(false); + } + + std::string toString() { + std::string s; + s.append(std::to_string(m_checkpointId)); + s.append(" ["); + if (m_state == CheckpointState::PENDING) + s.append("Pending").append(" "); + else + s.append("Completed").append(" "); + s.append("] "); + s.append("duration (" + std::to_string(m_checkpointDuration) + ") "); + s.append("triggered (" + std::to_string(m_triggerTimestamp) + ") "); + return s; + } + + private: + void setComplete() { + if (m_state != CheckpointState::COMPLETED /*&& m_readyFlag.load()*/) { + m_state = CheckpointState::COMPLETED; + if (m_checkpointCounter) { + std::atomic_thread_fence(std::memory_order_release); + m_checkpointCounter->fetch_add(1); + std::cout << "[CP] checkpoint " + std::to_string(m_checkpointId) + << " has finished for pipeline " + std::to_string(m_pipelineId) + << std::endl; + } + } + } + + void trySetComplete(){ + if (m_counter.load() == m_numberOfSlots.load()) { + const std::lock_guard lock(m_completionLock); + setComplete(); + } + } +}; \ No newline at end of file diff --git a/src/checkpoint/CheckpointStatistics.h b/src/checkpoint/CheckpointStatistics.h new file mode 100644 index 0000000..39898ea --- /dev/null +++ b/src/checkpoint/CheckpointStatistics.h @@ -0,0 +1,61 @@ +#pragma once + +#include +#include + +/* + * \brief Utilities to keep track of basic checkpoint statistics, such + * as average duration, or average size before and after compression. + * + * */ + +class CheckpointStatistics { + private: + double m_duration = 0; + size_t m_durationCounter = 0; + double m_preparation = 0; + size_t m_preparationCounter = 0; + size_t m_initialSize = 0; + size_t m_initialSizeCounter = 0; + size_t m_checkpointSize = 0; + size_t m_checkpointCounter = 0; + + public: + CheckpointStatistics() { + + } + + void registerDuration(double duration) { + m_duration += duration; + m_durationCounter++; + } + + void registerPreparation(double preparation) { + m_preparation += preparation; + m_preparationCounter++; + } + + void registerSize(size_t size) { + m_initialSize += size; + m_initialSizeCounter++; + } + + void registerCheckpointSize(size_t size) { + m_checkpointSize += size; + m_checkpointCounter++; + } + + std::string toString() { + std::string s; + s.append("[prep ") + .append(std::to_string(double(m_preparation/m_preparationCounter))) + .append(" dur ") + .append(std::to_string(double(m_duration/m_durationCounter))) + .append(" initial bytes ") + .append(std::to_string(m_initialSize/m_initialSizeCounter)) + .append(" stored bytes ") + .append(std::to_string(m_checkpointSize/m_checkpointCounter)) + .append("]"); + return s; + } +}; \ No newline at end of file diff --git a/src/checkpoint/FileBackedCheckpointCoordinator.cpp b/src/checkpoint/FileBackedCheckpointCoordinator.cpp new file mode 100644 index 0000000..bb78fb0 --- /dev/null +++ b/src/checkpoint/FileBackedCheckpointCoordinator.cpp @@ -0,0 +1,1991 @@ +#include "checkpoint/FileBackedCheckpointCoordinator.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "buffers/UnboundedQueryBufferFactory.h" +#include "checkpoint/Checkpoint.h" +#include "checkpoint/CheckpointStatistics.h" +#include "checkpoint/Recovery.h" +#include "dispatcher/ITaskDispatcher.h" +#include "filesystem/File.h" +#include "result/PartialResultSlot.h" +#include "result/ResultHandler.h" +#include "tasks/TaskFactory.h" +#include "tasks/WindowBatchFactory.h" +#include "utils/Channel.h" +#include "utils/Guid.h" +#include "utils/Query.h" +#include "utils/QueryConfig.h" +#include "utils/QueryOperator.h" +#include "utils/Utils.h" + +class AckCheckpointContext : public IAsyncContext { + public: + AckCheckpointContext(Checkpoint *checkpoint, int slotId, std::atomic *slot, + int previousState, std::atomic *frCounter, + std::mutex *lock, std::shared_ptr buffer) + : m_checkpoint(checkpoint), + m_slotId(slotId), + m_slot(slot), + m_previousState(previousState), + m_frCounter(frCounter), + m_lock(lock), + m_buffer(buffer) { + if (previousState != 1 && previousState != 3) + throw std::runtime_error("error: invalid previousState value " + std::to_string(previousState)); + } + + protected: + Status deepCopyInternal(IAsyncContext *&context_copy) final { + return IAsyncContext::deepCopyInternal(*this, context_copy); + } + + public: + Checkpoint *m_checkpoint; + int m_slotId; + std::atomic *m_slot; + int m_previousState; + std::atomic *m_frCounter; + std::mutex *m_lock; + std::shared_ptr m_buffer; +}; + +FileBackedCheckpointCoordinator::FileBackedCheckpointCoordinator( + long jobId, const std::vector> &queries, std::atomic *clearFiles, + std::shared_ptr filesystem, bool triggersCheckpoints) + : m_jobId(jobId), + m_triggersCheckpoints(triggersCheckpoints), + m_waitCondition(false), + m_guid(Guid::Create()), + m_checkpointId(0), + m_checkpointCounter(0), + m_recoveryCounter(0), + m_numOfQueries(queries.size()), + m_queries(queries), + m_checkpoints(m_numOfQueries, std::vector>( + m_numberOfCheckpoints)), + m_recoveries(m_numOfQueries), + m_checkpointInput(m_numOfQueries, std::vector(2, false)), + m_lastTaskId(m_numOfQueries), + m_checkpointPtrs(m_numOfQueries, std::vector(12, -1)), + m_taskDispatchers(m_numOfQueries), + m_resultHandlers(m_numOfQueries), + m_hasConcurrentCheckpoint(false), + m_intermSizes(m_numOfQueries), + m_outputSizes(m_numOfQueries), + m_expectedBytes(0), + m_measuredBytes(0), + m_statistics(std::make_unique()), + m_useCompression(m_numOfQueries), + m_compressionFP(m_numOfQueries), + m_decompressionFP(m_numOfQueries), + m_readySlots(m_numOfQueries, std::vector>(3)), + m_filesystem(filesystem), + m_poolSize(PMEMOBJ_MIN_POOL), + m_pmFileName("scabbard/checkpoint_metadata_" + std::to_string(jobId)), + m_slotFileSize(SystemConf::getInstance().BLOCK_SIZE), + m_asyncFileNames(m_numOfQueries, + std::vector(3 * m_numberOfCheckpoints)), + m_asyncFiles(m_numOfQueries, + std::vector(3 * m_numberOfCheckpoints)), + m_asyncFileOptions(std::make_unique(true, false)), + m_clearFiles(clearFiles), m_ready(false) { + // initialize persistent memory for metadata + try { + /* Bind main thread to a CPU core */ + Utils::bindProcess(SystemConf::getInstance().WORKER_THREADS + 1); + + if (m_clearFiles && m_clearFiles->load()) { + std::vector files; + auto path = SystemConf::FILE_ROOT_PATH + "/scabbard"; + Utils::readDirectory(path, files); + for (auto &f : files) { + auto absolutePath = path + "/" + f; + if (f != "." && f != ".." && std::experimental::filesystem::is_directory(absolutePath)) { + auto res = std::experimental::filesystem::remove_all(absolutePath); + if (res == 0) { + std::cout << "Failed to remove folder " << (absolutePath) << std::endl; + } else { + std::cout << "Removing folder " << (absolutePath) << std::endl; + } + } + if (f != "." && f != ".." && f.find("checkpoint_metadata_") != std::string::npos) { + auto res = std::experimental::filesystem::remove_all(absolutePath); + if (res == 0) { + std::cout << "Failed to remove file " << (absolutePath) << std::endl; + } else { + std::cout << "Removing file " << (absolutePath) << std::endl; + } + } + } + } + + if (!m_filesystem) { + std::cout << "warning: no filesystem passed to the constructor. " + "Initializing a new filesystem for CP..." + << std::endl; + m_filesystem = std::make_shared(SystemConf::FILE_ROOT_PATH); + } + + Utils::tryCreateDirectory(m_filesystem->getRootPath() + "scabbard"); + auto pmPath = m_filesystem->getRootPath() + m_pmFileName; + if (Utils::fileExists(pmPath.c_str()) != 0) { + m_pop = pmem::obj::pool::create(pmPath.c_str(), "", m_poolSize, + CREATE_MODE_RW); + m_root = m_pop.root(); + pmem::obj::make_persistent_atomic(m_pop, m_root->next); + pmem::obj::transaction::run(m_pop, [&] { m_root = m_root->next; }); + persistGuid(); + } else { + m_pop = pmem::obj::pool::open(pmPath, ""); + m_root = m_pop.root(); + m_root = m_root->next; + std::string guidS; + for (size_t idx = 0; idx < m_root->m_guidSize; ++idx) { + guidS += m_root->m_guid[idx]; + } + m_guid = Guid::Parse(guidS); + } + } catch (const pmem::pool_error &e) { + std::cerr << "Exception: " << e.what() << std::endl; + return; + } catch (const pmem::transaction_error &e) { + std::cerr << "Exception: " << e.what() << std::endl; + return; + } + // Open File handlers + m_pmFile = m_filesystem->newFile(m_pmFileName); + m_filesystem->createOrOpenCheckpointDirectory(m_guid); + + // get handles to task dispatcher and result buffers + for (int q = 0; q < m_numOfQueries; ++q) { + m_taskDispatchers[q] = m_queries[q]->getTaskDispatcher(); + m_resultHandlers[q] = m_queries[q]->getResultHandler(); + + if (m_clearFiles && !m_clearFiles->load()) { + m_recoveries[q] = std::make_shared(); + m_recoveries[q]->setRecoveryCounter(&m_recoveryCounter); + } + + for (int checkpoint = 0; checkpoint < m_numberOfCheckpoints; ++checkpoint) { + m_checkpoints[q][checkpoint] = std::make_shared(); + if (m_queries[q]->isMarkedForCheckpoint()) { + m_asyncFileNames[q][checkpoint] = + "checkpoint_" + std::to_string(jobId) + "_" + std::to_string(q) + + "_v" + std::to_string(checkpoint); + auto relativePath = m_filesystem->getRelativeCheckpointPath(m_guid); + m_asyncFiles[q][checkpoint] = m_filesystem->newFile( + relativePath + m_asyncFileNames[q][checkpoint], + m_resultHandlers[q]->m_numberOfSlots * m_slotFileSize); + if (!m_queries[q]->getBuffer()->isPersistent() && m_checkpointInputQueues) { + m_asyncFileNames[q][checkpoint + 2] = + "checkpoint_buffer1_" + std::to_string(jobId) + "_" + + std::to_string(q) + "_v" + std::to_string(checkpoint); + m_asyncFiles[q][checkpoint + 2] = m_filesystem->newFile( + relativePath + m_asyncFileNames[q][checkpoint + 2], m_queries[q]->getBuffer()->getCapacity()); + m_queries[q]->getBuffer()->setupForCheckpoints(m_filesystem); + m_checkpointInput[q][0] = true; + } + if (m_queries[q]->getSecondSchema() && + !m_queries[q]->getSecondBuffer()->isPersistent() && + m_checkpointInputQueues) { + m_asyncFileNames[q][checkpoint + 4] = + "checkpoint_buffer2_" + std::to_string(jobId) + "_" + + std::to_string(q) + "_v" + std::to_string(checkpoint); + m_asyncFiles[q][checkpoint + 4] = m_filesystem->newFile( + relativePath + m_asyncFileNames[q][checkpoint + 4], m_queries[q]->getSecondBuffer()->getCapacity()); + m_queries[q]->getSecondBuffer()->setupForCheckpoints(m_filesystem); + m_checkpointInput[q][1] = true; + } + + m_checkpoints[q][checkpoint]->setFilePath(relativePath); + } + m_checkpoints[q][checkpoint]->setCheckpointCounter(&m_checkpointCounter); + } + auto &code = m_queries[q]->getOperator()->getCode(); + m_intermSizes[q] = 0; + if (auto *aggrCode = dynamic_cast(&code)) { + m_intermSizes[q] = aggrCode->getBucketSize(); + } + m_outputSizes[q] = code.getOutputSchema().getTupleSize(); + } + + // topologically sort queries + //size_t maxBatchSize = SystemConf::getInstance().BATCH_SIZE; + std::vector visited (m_numOfQueries, false); + std::stack stack; + for (int i = 0; i < m_numOfQueries; i++) { + //if (m_queries[i]->getConfig()) + // maxBatchSize = std::max(m_queries[i]->getConfig()->getBatchSize(), maxBatchSize); + if (visited[i] == false) { + topologicalSort(i, visited, stack); + } + } + while (!stack.empty()) { + m_sortedQueries.push_back(m_queries[stack.top()]); + stack.pop(); + } + std::reverse(m_sortedQueries.begin(), m_sortedQueries.end()); + + // set the checkpoint coordinator only in the upstream task dispatcher + // todo: generalize this for multiple pipelines/join operators + // if (!m_triggersCheckpoints) { + for (int q = 0; q < m_numOfQueries; ++q) { + m_taskDispatchers[q]->m_triggerCheckpoints = false; + m_taskDispatchers[q]->m_coordinator = this; + } + // m_taskDispatchers[0]->m_checkpointFinished.store(true); + // m_taskDispatchers[0]->setCheckpointCoordinator(this); + //} + + if (m_clearFiles && m_clearFiles->load()) { + m_ready.store(true); + } +} + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wmissing-noreturn" +void FileBackedCheckpointCoordinator::operator()() { + if (m_clearFiles && !m_clearFiles->load()) { + prepareRecovery(); + m_clearFiles->store(true); + if (m_printMessages) { + std::cout << "[CP] waiting for the input buffers to load their data" << std::endl; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1 * SystemConf::getInstance().CHECKPOINT_INTERVAL)); + } + + while (!m_ready.load()) + ; + + //std::this_thread::sleep_for(std::chrono::milliseconds(10000)); + if (m_printMessages) { + std::cout << "[CP] starting the checkpoint coordinator" << std::endl; + } + auto t1 = std::chrono::high_resolution_clock::now(); + auto t2 = t1; + auto time_span = + std::chrono::duration_cast>(t2 - t1); + while (true) { + try { + if (m_triggersCheckpoints) { + auto duration = + std::max((int)(SystemConf::getInstance().CHECKPOINT_INTERVAL - + (size_t) (time_span.count() * 1000)), 0); + std::this_thread::sleep_for(std::chrono::milliseconds(duration)); + // for (int q = 0; q < m_numOfQueries; ++q) { + // auto lastTaskId = m_taskDispatchers[q]->m_nextTask.load(); + // m_taskDispatchers[q]->setLastTaskId(lastTaskId); + //} + } else { + while (!m_waitCondition.load()) { + // std::cout << "[CP] warning: the checkpoint is waiting for the " + // "condition to become true" << std::endl; + } + // reset the atomic for the next iteration + m_waitCondition.store(false); + } + while (m_hasConcurrentCheckpoint.load()) { + std::cout << "[CP] warning: the checkpoint is waiting for the " + "previous one to finish" << std::endl; + _mm_pause(); + } + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + } + + //m_hasConcurrentCheckpoint.store(true); + auto flag = false; + while (!m_hasConcurrentCheckpoint.compare_exchange_weak(flag, true)) { + flag = false; + } + t1 = std::chrono::high_resolution_clock::now(); + // first stop creating merging tasks + if (m_printMessages) { + std::cout << "[CP] disabling merge tasks for checkpoint preparation " << std::endl; + } + for (int q = 0; q < m_numOfQueries; ++q) { + m_taskDispatchers[q]->createMergeTasks(false); + } + + // trigger checkpoint + // take the latest task-id from all pipelines + // and block merge in checkpointed slots (result collector) + while (!prepareCheckpoint()) { + /*for (int q = 0; q < m_numOfQueries; ++q) { + if (m_taskDispatchers[q]->m_workerQueue) { + createMergeTask(q); + } + }*/ + } + + // first resume creating merging tasks + if (m_printMessages) { + std::cout << "[CP] enabling merge tasks for checkpoint preparation " << std::endl; + } + for (int q = 0; q < m_numOfQueries; ++q) { + m_taskDispatchers[q]->createMergeTasks(true); + } + + // start checkpointing + // checkpointAll(); // single-threaded implementation used for testing + + auto checkpointVersion = m_checkpointId % 2; + // wait until all checkpoints complete + //for (int q = 0; q < m_numOfQueries;) { + // if (m_checkpointCounter.load() >= (q+1) && m_checkpoints[q][checkpointVersion]->m_readyLock.try_lock()){ + // m_checkpoints[q][checkpointVersion]->m_readyLock.unlock(); + // q++; + // } else { + while (m_checkpointCounter.load(std::memory_order_acquire) != m_numOfQueries) { + std::atomic_thread_fence(std::memory_order_acquire); + // std::this_thread::yield(); + _mm_pause(); + m_filesystem->getHandler().tryCompleteMultiple(); + for (int q = 0; q < m_numOfQueries; ++q) { + if (m_checkpoints[q][checkpointVersion]->m_state != CheckpointState::COMPLETED && + m_checkpoints[q][checkpointVersion]->m_workers.load() > 0) { + //createCheckpointTask(q); + //m_checkpoints[q][checkpointVersion]->m_workers.fetch_add(-1); + } + } + // std::cout << "checkpointcounter " << m_checkpointCounter.load() << + // std::endl; + // std::cout << "[DBG] number of checkpoints " << + // m_checkpoints[0][checkpointVersion]->m_counter.load() << std::endl; + } + + // measure checkpoint size and acknowledge checkpoint + size_t checkpointSize = 0; + std::string metadata; // = std::to_string(m_checkpointId); + for (int q = 0; q < m_numOfQueries; ++q) { + // stop creating checkpointing tasks + m_taskDispatchers[q]->m_triggerCheckpoints = false; + //if (m_checkpointInput[q][0]) {} + //if (m_checkpointInput[q][1]) {} + auto sIdx = m_checkpointPtrs[q][0]; + auto eIdx = m_checkpointPtrs[q][1]; + auto b1SIdx = m_checkpointPtrs[q][4]; + auto b1EIdx = m_checkpointPtrs[q][5]; + auto b2SIdx = m_checkpointPtrs[q][6]; + auto b2EIdx = m_checkpointPtrs[q][7]; + auto task1 = m_checkpointPtrs[q][8]; + auto task2 = m_checkpointPtrs[q][9]; + metadata += " q " + std::to_string(q) + " " + std::to_string(b1SIdx) + " " + std::to_string(b1EIdx) + + " " + std::to_string(b2SIdx) + " " + std::to_string(b2EIdx) + + " " + std::to_string(sIdx) + " " + std::to_string(eIdx) + + " " + std::to_string(task1) + " " + std::to_string(task2); + auto numOfSlots = m_resultHandlers[q]->m_numberOfSlots; + while (sIdx != eIdx) { + if (m_resultHandlers[q]->m_hasWindowFragments) { + while (m_resultHandlers[q]->m_results[sIdx].m_slot.load() == 5) { + if (m_resultHandlers[q]->m_results[sIdx].m_previousSlot.load() == 5) { + unsafePrint(); + throw std::runtime_error( + "error: invalid previous slot value after the end of the checkpoint for query " + std::to_string(q)); + } + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + if (m_printMessages) { + std::cout << "[CP] warning: the checkpoint for query " + << std::to_string(q) + << " is waiting in the validation phase for slot " << sIdx + << " with value " + << m_resultHandlers[q]->m_results[sIdx].m_slot.load() + << ", previous value " + << m_resultHandlers[q]->m_results[sIdx].getPreviousSlot() + << std::endl; + } + // todo: do I need this? + /*m_resultHandlers[q]->m_results[sIdx].m_slot.store( + m_resultHandlers[q]->m_results[sIdx].getPreviousSlot(), + std::memory_order_release);*/ + } + } else { + while (m_resultHandlers[q]->m_resultsWithoutFrags[sIdx].m_slot.load() == 5) { + if (m_resultHandlers[q]->m_resultsWithoutFrags[sIdx].m_previousSlot.load() == 5) { + unsafePrint(); + throw std::runtime_error( + "error: invalid previous slot value after the end of the checkpoint for query " + std::to_string(q)); + } + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + if (m_printMessages) { + std::cout << "[CP] warning: the checkpoint for query " + << std::to_string(q) << " is waiting in the validation phase for slot " + << sIdx << " with value " << m_resultHandlers[q]->m_resultsWithoutFrags[sIdx].m_slot.load() + << ", previous value " << m_resultHandlers[q]->m_resultsWithoutFrags[sIdx].getPreviousSlot() + << std::endl; + } + // todo: do I need this? + /*m_resultHandlers[q]->m_resultsWithoutFrags[sIdx].m_slot.store( + m_resultHandlers[q]->m_resultsWithoutFrags[sIdx].getPreviousSlot(), + std::memory_order_release);*/ + } + } + sIdx++; + if (sIdx == numOfSlots) sIdx = 0; + } + // m_resultHandlers[q]->m_mergeLock.unlock(); + // create a merge task + if (m_taskDispatchers[q]->m_workerQueue) { + createMergeTask(q); + } + // if (m_readySlots[q][0].unsafe_size() != 0) + // throw std::runtime_error("error: the checkpoint queue is not empty for + // query " + std::to_string(q)); + // finalize checkpoint + m_taskDispatchers[q]->m_checkpointFinished.store(true); + checkpointSize += + m_checkpoints[q][checkpointVersion]->getCheckpointSize(); + } + persistMetadata(metadata); + m_root->m_version.get_rw().store(m_checkpointId); + m_root->m_valid.get_rw().store(true); + + if (SystemConf::getInstance().LINEAGE_ON) { + // todo: make this more general + for (int q = 0; q < m_numOfQueries; ++q) { + auto query = m_sortedQueries[q]; + long fo1 = m_checkpointPtrs[q][10]; + long fo2 = m_checkpointPtrs[q][11]; + if (query->getBuffer()->isPersistent() && fo1 != INT_MIN) { + query->getBuffer()->getFileStore()->freePersistent(query->getId(), 0, fo1); + } + if (query->getSecondBuffer()->isPersistent() && fo2 != INT_MIN) { + query->getSecondBuffer()->getFileStore()->freePersistent(query->getId(), 1, fo2); + } + } + } + + t2 = std::chrono::high_resolution_clock::now(); + time_span = + std::chrono::duration_cast>(t2 - t1); + + m_statistics->registerDuration(time_span.count()); + m_statistics->registerCheckpointSize(checkpointSize); + if (m_printMessages) { + std::cout << "[CP] checkpoint duration " << time_span.count() + << " with size " << checkpointSize << " " + << m_statistics->toString() << std::endl; + } + + if (checkpointSize < (m_expectedBytes) && + checkpointSize != m_measuredBytes.load()) { + std::cout << "warning: the checkpoint size (" + << std::to_string(checkpointSize) + << ") was smaller than the expected " + << std::to_string(m_expectedBytes) << " and measured " + << std::to_string(m_measuredBytes.load()) << std::endl; + /*throw std::runtime_error( + "error: the checkpoint size (" + std::to_string(checkpointSize) + + ") was smaller than the expected " + std::to_string(m_expectedBytes) + + " and measured " + std::to_string(m_measuredBytes.load()));*/ + } + + // reset checkpoints + m_hasConcurrentCheckpoint.store(false); + m_checkpointCounter.store(0); + m_expectedBytes = 0; + m_measuredBytes.store(0); + + if (SystemConf::getInstance().DURATION > 0) { + // this works assuming there is no stalling for triggering checkpoints + if ((long)((m_checkpointId * SystemConf::getInstance().CHECKPOINT_INTERVAL) / 1000) > + SystemConf::getInstance().DURATION) { + // delete checkpoint files in we reach the end of the experiment's + // duration + clearPersistentMemory(); + if (m_printMessages) { + std::cout << "[CP] Done." << std::endl; + } + break; + } + } + m_checkpointId++; + //if (m_checkpointId == 2) + // throw std::runtime_error("failing after checkpoint"); + } +} + +void FileBackedCheckpointCoordinator::checkpoint(int pid, int q) { + if (m_checkpointInput[q][0]) { + checkpointBuffer(pid, 0, q); + } + if (m_checkpointInput[q][1]) { + checkpointBuffer(pid, 1, q); + } + if (m_resultHandlers[q]->m_hasWindowFragments) { + checkpointWithFragments(pid, q); + } else { + checkpointWithoutFragments(pid, q); + } +} + +void FileBackedCheckpointCoordinator::recover(int pid, int q) { + if (m_checkpointInput[q][0]) { + recoverBuffer(pid, 0, q); + } + if (m_checkpointInput[q][1]) { + recoverBuffer(pid, 1, q); + } + if (m_resultHandlers[q]->m_hasWindowFragments) { + recoverWithFragments(pid, q); + } else { + recoverWithoutFragments(pid, q); + } +} + +int FileBackedCheckpointCoordinator::roundOffset(int offset) { + /*auto alignment = m_filesystem->getSectorSize(); + if (offset % alignment != 0) { + int d = offset / alignment; + offset = (d + 1) * alignment; + } + return offset;*/ + auto alignment = m_filesystem->getSectorSize(); + if (offset < 8 * 1024 && offset != 0) { + offset = 8 * 1024; + } else if (offset % alignment != 0) { + auto d = offset / alignment; + offset = (d + 1) * alignment; + } + return offset; +} + +bool FileBackedCheckpointCoordinator::prepareCheckpoint() { + if (m_debug) { + std::cout << "[CP] preparing checkpoint " << m_checkpointId << std::endl; + } + auto t1 = std::chrono::high_resolution_clock::now(); + + bool flag = false; + // prepare checkpoints using the topological sorted queries + for (auto &query: m_sortedQueries) { + auto q = query->getId(); + auto checkpointVersion = m_checkpointId % 2; + auto &slotsWithoutFrags = m_resultHandlers[q]->m_resultsWithoutFrags; + auto &slots = m_resultHandlers[q]->m_results; + m_checkpoints[q][checkpointVersion]->resetCheckpoint(); + m_checkpoints[q][checkpointVersion]->setCheckpointId(m_checkpointId, q); + m_checkpointPtrs[q][0] = -1; + m_checkpointPtrs[q][1] = -1; + m_checkpointPtrs[q][10] = INT_MIN; + m_checkpointPtrs[q][11] = INT_MIN; + + // Lock the forwarding section of the result handler for the preparation + // Do I need to keep the lock for the whole checkpoint duration? + if (query->isMarkedForCheckpoint()) { + if (m_printMessages) { + std::cout << "[CP] entering preparation for query " + << std::to_string(q) + "..." << std::endl; + } + m_resultHandlers[q]->m_stopMerging = true; + std::scoped_lock lock( + m_resultHandlers[q]->m_mergeLock, m_resultHandlers[q]->m_forwardLock); + auto idx = m_resultHandlers[q]->m_nextToForward.load(); + //std::cout << "[CP] entered preparation..." << std::endl; + if (m_lastTaskId[q] == m_resultHandlers[q]->m_maxTaskId.load()) { + std::cout << "[CP] warning: checkpointing has to wait for processing " + << "... increase the size of the checkpoint duration or " + << "the size of the input queue (last task: " + << m_lastTaskId[q] << ")" << std::endl; + m_resultHandlers[q]->m_stopMerging = false; + createMergeTask(q); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + // unsafePrint(); + // flag = true; + // break; + } else { + m_checkpointPtrs[q][0] = idx; + m_checkpointPtrs[q][1] = idx; + if (m_resultHandlers[q]->m_hasWindowFragments) { + while (slots[idx].m_slot.load() != -1 && + slots[idx].m_slot.load() != 1 && + slots[idx].m_slot.load() != 3) { + std::cout << "[CP] warning: the checkpoint for query " + << std::to_string(q) << " is waiting for the " + << "first slot to checkpoint with id " + << slots[idx].m_taskId << " and slot " + << slots[idx].m_slot.load() << std::endl; + if (slots[idx].m_slot.load() == 5) { + unsafePrint(); + throw std::runtime_error( + "error: wrong state type at the beginning of the checkpoint " + "preparation for query " + + std::to_string(q)); + } + } + } else { + while (slotsWithoutFrags[idx].m_slot.load() != -1 && + slotsWithoutFrags[idx].m_slot.load() != 1) { + std::cout << "[CP] warning: the checkpoint for query " + << std::to_string(q) + << " is waiting for the first slot to checkpoint with id " + << slotsWithoutFrags[idx].m_taskId << " and slot " + << slots[idx].m_slot.load() << std::endl; + if (slotsWithoutFrags[idx].m_slot.load() == 5) { + unsafePrint(); + throw std::runtime_error( + "error: wrong state type at the beginning of the checkpoint " + "preparation for query " + + std::to_string(q)); + } + } + } + auto firstTaskId = m_resultHandlers[q]->m_hasWindowFragments + ? slots[idx].m_taskId.load() + : slotsWithoutFrags[idx].m_taskId.load(); + auto numOfSlots = m_resultHandlers[q]->m_numberOfSlots; + // get the last task id after obtaining the locks + int tryCnt = 0; + long cnt = 0; + //while (cnt < numOfSlots) { + m_lastTaskId[q] = m_resultHandlers[q]->m_maxTaskId.load(); + + auto prevIdx = (idx - 1) < 0 ? numOfSlots-1 : (idx - 1); + auto prevTaskId = m_resultHandlers[q]->m_hasWindowFragments + ? slots[prevIdx].m_taskId.load() + : slotsWithoutFrags[prevIdx].m_taskId.load(); + if (prevTaskId >= m_lastTaskId[q]) { + cnt = 0; + } else { + if (prevTaskId < 0) + prevTaskId = 0; + cnt = m_lastTaskId[q] - prevTaskId; //firstTaskId + 1; + while (cnt > numOfSlots) { + //firstTaskId = m_resultHandlers[q]->m_hasWindowFragments + // ? slots[idx].m_taskId.load() + // : slotsWithoutFrags[idx].m_taskId.load(); + cnt = m_lastTaskId[q] - prevTaskId; //firstTaskId + 1; + } + } + + // mark input queues for checkpoint + if (m_checkpointInput[q][0]) { + auto freeIdx = (idx + cnt - 1) % numOfSlots; + if (freeIdx < 0) + freeIdx = 0; + auto freePtr = m_resultHandlers[q]->m_hasWindowFragments + ? slots[freeIdx].m_freePointer + : slotsWithoutFrags[freeIdx].m_freePointer1; + if (freePtr < 0) { + freePtr = m_resultHandlers[q]->m_hasWindowFragments + ? -1 : slotsWithoutFrags[freeIdx].m_prevFreePointer1; + if (freePtr < 0) + throw std::runtime_error("error: negative freePtr1"); + } + auto slotsToCheck = query->getBuffer()->prepareCheckpoint(freePtr, m_readySlots[q][1], m_checkpointPtrs[q][4], m_checkpointPtrs[q][5]); + m_checkpoints[q][checkpointVersion]->increaseSlots(slotsToCheck, slotsToCheck); + m_expectedBytes += slotsToCheck * query->getBuffer()->getSlots()[0].m_size; + } + if (m_checkpointInput[q][1]) { + auto freeIdx = (idx + cnt - 1) % numOfSlots; + if (freeIdx < 0) + freeIdx = 0; + auto freePtr = slotsWithoutFrags[freeIdx].m_freePointer2; + if (freePtr < 0) { + freePtr = slotsWithoutFrags[freeIdx].m_prevFreePointer2; + if (freePtr < 0) + throw std::runtime_error("error: negative freePtr2"); + } + auto slotsToCheck = query->getSecondBuffer()->prepareCheckpoint(freePtr, m_readySlots[q][2], m_checkpointPtrs[q][6], m_checkpointPtrs[q][7]); + m_checkpoints[q][checkpointVersion]->increaseSlots(slotsToCheck, slotsToCheck); + m_expectedBytes += slotsToCheck * query->getBuffer()->getSlots()[0].m_size; + } + // std::this_thread::sleep_for(std::chrono::milliseconds(1)); + // if (tryCnt++ == 0) break; + //} + + // todo: control the size of the checkpoint here + // cnt -= ... + + if (cnt < 0 || cnt > numOfSlots) { + unsafePrint(); + throw std::runtime_error( + "error: invalid number of measured slots to checkpoint (" + + std::to_string(cnt) + " - " + std::to_string(numOfSlots) + + ") last task id " + + std::to_string(m_lastTaskId[q]) + " first task id " + + std::to_string(firstTaskId)); + } + int workThreshold = ((int) cnt / SystemConf::getInstance().WORKER_THREADS) + 1; + + m_checkpointPtrs[q][8] = firstTaskId; + m_checkpointPtrs[q][9] = m_lastTaskId[q]; + if (cnt > 0) { + // std::cout << "[CP] the number of slots is " << cnt << std::endl; + std::unordered_set idxs; // used for testing correctness + if (m_resultHandlers[q]->m_hasWindowFragments) { + while (true) { + if (cnt == 0) break; + auto &slot = slots[idx]; + + auto waitCnt = 0; + // spin while waiting for the slots to become available + while (slot.m_slot.load() != 1 && slot.m_slot.load() != 3) { + if (waitCnt++ == 1000000) { + std::cout << "[CP] warning: the checkpoint for query " + << std::to_string(q) + << " is waiting in the preparation phase for slot " + << idx << " with value " << slots[idx].m_slot.load() + << " and taskId " << slots[idx].m_taskId + << std::endl; + waitCnt = 0; + if (slots[idx].m_previousSlot.load() == 5) { + unsafePrint(); + throw std::runtime_error( + "error: wrong state type at the beginning of the " + "checkpoint " + "preparation for query " + + std::to_string(q)); + } + } + _mm_pause(); + } + + int winFrags = slot.getNumberOfWindowFragments(true); + if (winFrags > 0) { + slot.setPreviousSlot(slot.m_slot.load()); // keep previous state + m_checkpoints[q][checkpointVersion]->increaseSlots(slot.getNumberOfWindowFragments(true)); + auto *code = dynamic_cast(&query->getOperator()->getCode()); + if (!code) + throw std::runtime_error( + "error: invalid aggregation casting in checkpoint " + "coordinator"); + auto hashtableSize = + code->hasGroupBy() + ? (query->getConfig() + ? query->getConfig()->getHashtableSize() + : SystemConf::getInstance().HASH_TABLE_SIZE) + : 1; + m_expectedBytes += + ((slot.m_numberOfWindows - slot.m_numberOfCompleteWindows) * + hashtableSize * m_intermSizes[q]) + + (slot.m_numberOfCompleteWindows * hashtableSize * + m_outputSizes[q]); + + slot.m_slot.store(5, std::memory_order_release); + // std::cout << "[DBG] setting slot " + + // std::to_string(slot.m_index) + " to 3" << std::endl; + + // fill the checkpoint queue + m_readySlots[q][0].push(idx); + if (idxs.find(idx) != idxs.end()) + throw std::runtime_error( + "error: the idx already exists during the checkpoint " + "preparation"); + else + idxs.insert(idx); + } + + cnt--; + // std::cout << "[CP] now the number of slots is " << cnt << std::endl; + idx++; + if (idx == numOfSlots) idx = 0; + } + } else { + while (true) { + if (cnt == 0) break; + auto &slot = slotsWithoutFrags[idx]; + + // spin while waiting for the slots to become available + while (slot.m_slot.load() != 1) { + auto waitCnt = 0; + if (waitCnt++ == 1000000) { + std::cout << "[CP] warning: the checkpoint for query " + << std::to_string(q) + << " is waiting in the preparation phase for slot " + << idx << " with value " + << slotsWithoutFrags[idx].m_slot.load() + << " and taskId " << slotsWithoutFrags[idx].m_taskId + << std::endl; + waitCnt = 0; + if (slotsWithoutFrags[idx].m_previousSlot.load() == 5) { + unsafePrint(); + throw std::runtime_error( + "error: wrong state type at the beginning of the " + "checkpoint " + "preparation for query " + + std::to_string(q)); + } + } + _mm_pause(); + } + int byteLength = slot.m_result->getPosition(); + if (byteLength > 0) { + slot.setPreviousSlot(slot.m_slot.load()); // keep previous state + m_checkpoints[q][checkpointVersion]->increaseSlots(slot.getNumberOfResults()); + m_expectedBytes += byteLength; + + slot.m_slot.store(5, std::memory_order_release); + // std::cout << "[DBG] setting slot " + std::to_string(slot.m_index) + " to 3" << std::endl; + + // fill the checkpoint queue + m_readySlots[q][0].push(idx); + if (idxs.find(idx) != idxs.end()) + throw std::runtime_error( + "error: the idx already exists during the checkpoint preparation"); + else + idxs.insert(idx); + } + + cnt--; + // std::cout << "[CP] now the number of slots is " << cnt << std::endl; + idx++; + if (idx == numOfSlots) idx = 0; + } + } + + idx--; + if (idx == -1) idx = numOfSlots - 1; + m_checkpointPtrs[q][1] = idx; + // get the input buffer offset here + m_checkpointPtrs[q][2] = + m_resultHandlers[q]->m_hasWindowFragments + ? slots[m_checkpointPtrs[q][1]].m_freePointer + : slotsWithoutFrags[m_checkpointPtrs[q][1]].m_freePointer1; + m_checkpointPtrs[q][3] = + m_resultHandlers[q]->m_hasWindowFragments + ? 0 + : slotsWithoutFrags[m_checkpointPtrs[q][1]].m_freePointer2; + + if (SystemConf::getInstance().LINEAGE_ON) { + m_checkpointPtrs[q][10] = + m_resultHandlers[q]->m_hasWindowFragments + ? slots[m_checkpointPtrs[q][1]].m_freeOffset + : slotsWithoutFrags[m_checkpointPtrs[q][1]].m_freeOffset1; + m_checkpointPtrs[q][11] = + m_resultHandlers[q]->m_hasWindowFragments + ? 0 + : slotsWithoutFrags[m_checkpointPtrs[q][1]].m_freeOffset2; + if (query->isMostDownstream()) { + if (m_resultHandlers[q]->m_hasWindowFragments) { + //slots[m_checkpointPtrs[q][1]]. + } else { + + } + } + } + if (m_checkpointPtrs[q][0] < 0 || + m_checkpointPtrs[q][0] >= numOfSlots || + m_checkpointPtrs[q][1] < 0 || + m_checkpointPtrs[q][1] >= numOfSlots) { + throw std::runtime_error("error: invalid checkpoint pointers: " + + std::to_string(m_checkpointPtrs[q][0]) + + " - " + + std::to_string(m_checkpointPtrs[q][1])); + } + } + + m_resultHandlers[q]->m_stopMerging = false; + // unlock the merge phase + // m_resultHandlers[q]->m_mergeLock.unlock(); + // m_resultHandlers[q]->m_forwardLock.unlock(); + } + // start creating checkpoint tasks + // m_taskDispatchers[q]->m_triggerCheckpoints = true; + } + + //if (m_checkpoints[q][checkpointVersion]->getSlots() == 0) { + // m_checkpoints[q][checkpointVersion]->setComplete(); + //} else { + // // create tasks for checkpointing + // for (int i = 0; i < SystemConf::getInstance().WORKER_THREADS; ++i) { + // createCheckpointTask(q); + // } + // // m_checkpoints[q][checkpointVersion]->m_readyFlag.store(true); + //} + + m_statistics->registerSize(m_expectedBytes); + if (m_printMessages) { + std::cout << "[CP] checkpoint " << m_checkpointId << " for pipeline " << q + << " with last task id " << m_lastTaskId[q] << " has to store " + << m_checkpoints[q][checkpointVersion]->getSlots() << " (q " + << m_checkpoints[q][checkpointVersion]->getInputQueueSlots() + << ")" << " fragments with expected size " << m_expectedBytes + << std::endl; + } + } + + for (auto &query: m_sortedQueries) { + auto q = query->getId(); + auto checkpointVersion = m_checkpointId % 2; + if (m_checkpoints[q][checkpointVersion]->getSlots() == 0) { + m_checkpoints[q][checkpointVersion]->setComplete(); + } else { + // create tasks for checkpointing + for (int i = 0; i < SystemConf::getInstance().WORKER_THREADS; ++i) { + createCheckpointTask(q); + } + // m_checkpoints[q][checkpointVersion]->m_readyFlag.store(true); + } + } + + if (flag) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + return false; + } + + auto t2 = std::chrono::high_resolution_clock::now(); + auto time_span = + std::chrono::duration_cast>(t2 - t1); + m_statistics->registerPreparation(time_span.count()); + + return true; +} + +bool FileBackedCheckpointCoordinator::prepareRecovery() { + m_checkpointId = m_root->m_version.get_ro().load(); + if (!m_root->m_valid.get_ro().load()) { + std::cout << "[CP] found invalid checkpoint during recovery" << std::endl; + return false; + } + + auto t1 = std::chrono::high_resolution_clock::now(); + auto currentMs = std::chrono::duration_cast(t1.time_since_epoch()).count(); + std::cout << "[CP] " << currentMs << " start recovering last checkpoint" << std::endl; + + // parse metadata + std::string metadata; + for (size_t idx = 0; idx < m_root->m_mSize; ++idx) { + metadata += m_root->m_metadata[idx]; + } + std::istringstream iss(metadata); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + + for (size_t idx = 0; idx < words.size(); idx+=10) { + auto q = std::stoi(words[idx + 1]); + m_checkpointPtrs[q][0] = std::stoi(words[idx + 6]); + m_checkpointPtrs[q][1] = std::stoi(words[idx + 7]); + m_checkpointPtrs[q][4] = std::stoi(words[idx + 2]); + m_checkpointPtrs[q][5] = std::stoi(words[idx + 3]); + m_checkpointPtrs[q][6] = std::stoi(words[idx + 4]); + m_checkpointPtrs[q][7] = std::stoi(words[idx + 5]); + m_checkpointPtrs[q][8] = std::stoi(words[idx + 8]); + m_checkpointPtrs[q][9] = std::stoi(words[idx + 9]); + + auto buffer = m_queries[q]->getBuffer(); + auto lastTask = (buffer->isPersistent()) ? (int)(buffer->getUnsafeStartPointer()/buffer->getBatchSize()) + 1 : 0; + if (lastTask <= m_checkpointPtrs[q][9]) { + auto numOfSlots = m_resultHandlers[q]->m_numberOfSlots; + if (m_checkpointPtrs[q][8] < lastTask) { + auto diff = lastTask - m_checkpointPtrs[q][8]; + m_checkpointPtrs[q][8] = m_checkpointPtrs[q][8] + diff; + m_checkpointPtrs[q][0] = (m_checkpointPtrs[q][0] + diff) % numOfSlots; + } + // update unsafe start pointer + auto diff = m_checkpointPtrs[q][9] - lastTask + 1; + buffer->incrementUnsafeStartPointer(diff * m_queries[q]->getBuffer()->getBatchSize()); + + std::cout << "[CP] start recovering from checkpoint of query " << q + << " with starting task " << m_checkpointPtrs[q][8] << " and" + << " last task " << m_checkpointPtrs[q][9] << std::endl; + + // enlist available slots + auto startSlot = m_checkpointPtrs[q][0]; + auto endSlot = m_checkpointPtrs[q][1]; + if (endSlot != -1) { + std::cout << "[DBG] For query " + std::to_string(q) + + " dropping tasks before task " + + std::to_string(m_checkpointPtrs[q][9]) + << std::endl; + m_queries[q]->startDroppingTasks(m_checkpointPtrs[q][9]); + m_resultHandlers[q]->restorePtrs(m_checkpointPtrs[q][8]); + endSlot = (endSlot + 1) % numOfSlots; + } + while (startSlot != -1 && endSlot != -1 && startSlot != endSlot) { + m_readySlots[q][0].push(startSlot); + m_recoveries[q]->increaseSlots(1, 0); + startSlot++; + if (startSlot == numOfSlots) { + startSlot = 0; + } + } + numOfSlots = m_queries[q]->getBuffer()->getNumberOfSlots(); + startSlot = m_checkpointPtrs[q][4]; + endSlot = m_checkpointPtrs[q][5]; + if (endSlot != -1) { + endSlot = (endSlot + 1) % numOfSlots; + } + while (startSlot != -1 && endSlot != -1 && startSlot != endSlot) { + m_readySlots[q][1].push(startSlot); + m_recoveries[q]->increaseSlots(1, 1); + startSlot++; + if (startSlot == numOfSlots) { + startSlot = 0; + } + } + numOfSlots = m_queries[q]->getSecondBuffer()->getNumberOfSlots(); + startSlot = m_checkpointPtrs[q][6]; + endSlot = m_checkpointPtrs[q][7]; + if (endSlot != -1) { + endSlot = (endSlot + 1) % numOfSlots; + } + while (startSlot != -1 && endSlot != -1 && startSlot != endSlot) { + m_readySlots[q][2].push(startSlot); + m_recoveries[q]->increaseSlots(1, 1); + startSlot++; + if (startSlot == numOfSlots) { + startSlot = 0; + } + } + + if (m_recoveries[q]->getSlots() > 0) { + // create recovery tasks + for (int i = 0; i < SystemConf::getInstance().WORKER_THREADS; ++i) { + createRecoveryTask(q); + } + } else { + m_recoveries[q]->setComplete(); + } + } else { + std::cout << "[CP] the last task id from the checkpoint of query " << q + << " was smaller than the last id kept in the input buffer (" + << lastTask << " > " << m_checkpointPtrs[q][9] << ")" << std::endl; + m_recoveries[q]->setComplete(); + } + } + + // wait for recovery to finish + while (m_recoveryCounter.load() != m_numOfQueries) { + // std::cout << "recoveryCounter " << m_recoveryCounter.load() << std::endl; + } + + //m_checkpointId = 0; + auto t2 = std::chrono::high_resolution_clock::now(); + auto time_span = + std::chrono::duration_cast>(t2 - t1); + std::cout << "[CP] finishing the recovery of checkpoints in " << time_span.count() << std::endl; + return true; +} + +void FileBackedCheckpointCoordinator::setReady() { + std::cout << "[CP] setting checkpoint coordinator ready to start" << std::endl; + m_ready.store(true); +} + +void FileBackedCheckpointCoordinator::setCompressionFP(int query, std::function fp) { + if (query >= m_numOfQueries) + throw std::runtime_error("error: invalid number of query when setting the compression function"); + m_useCompression[query] = true; + m_compressionFP[query] = fp; +} + +void FileBackedCheckpointCoordinator::setDecompressionFP(int query, std::function fp) { + if (query >= m_numOfQueries) + throw std::runtime_error("error: invalid number of query when setting the compression function"); + m_decompressionFP[query] = fp; +} + +void FileBackedCheckpointCoordinator::signalWaitCondition() { + m_waitCondition.store(true); +} + +bool FileBackedCheckpointCoordinator::hasWorkUnsafe(int query) { + return (m_readySlots[query][0].unsafe_size() + m_readySlots[query][1].unsafe_size() + m_readySlots[query][2].unsafe_size())> 0; +} + +void FileBackedCheckpointCoordinator::tryToPurgeCheckpoint(std::shared_ptr &graph) { + if (!SystemConf::getInstance().LINEAGE_ON || !m_root || !m_root->m_valid.get_ro() || !graph) + return; + auto flag = false; + while (!m_hasConcurrentCheckpoint.compare_exchange_weak(flag, true)) { + flag = false; + } + bool purge = true; + for (int q = 0; q < m_numOfQueries; ++q) { + if ((graph->m_graph[q]->m_freeOffset1 < m_checkpointPtrs[q][10]) || + (m_queries[q]->getSecondSchema() && + graph->m_graph[q]->m_freeOffset2 < m_checkpointPtrs[q][11])) { + purge = false; + break; + } + } + if (purge) { + // std::cout << "[DBG] invalidating the latest checkpoint" << std::endl; + m_root->m_valid.get_rw() = false; + } + m_hasConcurrentCheckpoint.store(false); +} + +void FileBackedCheckpointCoordinator::clearPersistentMemory() { + m_pop.close(); + m_filesystem->eraseFiles(); + m_filesystem->tryDeleteCheckpointDirectory(m_guid); +} + +FileBackedCheckpointCoordinator::~FileBackedCheckpointCoordinator() { + m_pop.close(); +} + +void FileBackedCheckpointCoordinator::checkpointBuffer(int pid, int bId, int q) { + auto callback = [](IAsyncContext *ctxt, Status result, + size_t bytes_transferred) { + CallbackContext context{ctxt}; + if (result != Status::Ok) { + fprintf(stderr, "AsyncFlushPages(), error: %u\n", + static_cast(result)); + } + auto debug = false; + if (debug) { + std::cout << "[DBG] callback updating slot " + + std::to_string(context->m_slotId) + " with " + + std::to_string(context->m_slot->load()) + " status, " + + std::to_string(bytes_transferred) + " bytes_transferred and " + + std::to_string(context->m_frCounter->load()) + " frCounter " << std::endl; + } + // Set the slot status to ready + context->m_frCounter->fetch_add(-1); + if (context->m_frCounter->load() == 0) { + context->m_slot->store(context->m_previousState); + } + if (debug) { + std::cout << "[DBG] callback setting the slot " + + std::to_string(context->m_slotId) + " status to " + + std::to_string(context->m_slot->load()) + + " with previous slot " + + std::to_string(context->m_previousState) + + " and frCounter " + + std::to_string(context->m_frCounter->load()) + << std::endl; + } + if (context->m_buffer) { + UnboundedQueryBufferFactory::getInstance().free(context->m_buffer->getBufferId(), context->m_buffer); + context->m_buffer.reset(); + } + context->m_checkpoint->updateCounter(bytes_transferred); + }; + + // try to acknowledge previous request + m_filesystem->getHandler().tryCompleteMultiple(); + auto checkpointVersion = m_checkpointId % 2; + + int slotId = -1; + auto buffer = (bId == 0) ? m_queries[q]->getBuffer() : + m_queries[q]->getSecondBuffer(); + while (m_readySlots[q][bId + 1].try_pop(slotId)) { + auto &slot = buffer->getSlots()[slotId]; +#if defined(PREFETCH) + // prefetch data here + slot.prefetch(); +#endif + auto copyBuffer = UnboundedQueryBufferFactory::getInstance().newInstance(pid); + int diskBytes = 0; + char *diskValues = copyBuffer->getBuffer().data(); + bool clear = false; + std::function fp; + auto bytes = slot.m_size; + if (buffer->hasCompression(fp)) { + fp(pid, slot.m_bufferPtr, 0, (int) bytes, copyBuffer->getBuffer().data(), diskBytes, (int) copyBuffer->getBuffer().size(), clear, -1); + auto latency = (SystemConf::getInstance().LATENCY_ON) ? 0 : -1; + fp(pid, slot.m_bufferPtr, 0, -1, copyBuffer->getBuffer().data(), diskBytes, (int) copyBuffer->getBuffer().size(), clear, latency); + //diskBytes = 64 * 1024;//bytes; + } else { + std::memcpy(copyBuffer->getBuffer().data(), slot.m_bufferPtr, bytes); + diskBytes = bytes; + } + diskBytes = roundOffset(Utils::getPowerOfTwo(diskBytes)); +#if defined(NO_DISK) + diskBytes = 0; +#endif + + if (m_debug) { + std::cout << "[DBG] Worker compressing data in query " << std::to_string(q) + << " for slot " << std::to_string(slotId) << " in buffer " << std::to_string(bId) + << " of " << std::to_string(bytes) << " bytes to " + << std::to_string(diskBytes) << " with " + << std::to_string((double)bytes/(double)diskBytes) << " ratio " << std::endl; + } + + if (diskBytes > (int) m_slotFileSize) + throw std::runtime_error("error: the write exceeds the size of slots in the input log"); + m_measuredBytes.fetch_add(diskBytes); + + AckCheckpointContext context{ + m_checkpoints[q][checkpointVersion].get(), slot.m_id, + &slot.m_slot, slot.getPreviousSlot(), + &slot.m_numberOfResults, &slot.m_updateLock, copyBuffer}; + assert(m_asyncFiles[q][checkpointVersion + 2 + bId]->writeAsync( + reinterpret_cast(diskValues), + slotId * m_slotFileSize, diskBytes, callback, + context) == Status::Ok); + + slot.setReady(); + + // check if some async calls have finished + m_filesystem->getHandler().tryCompleteMultiple(); + } +} + +void FileBackedCheckpointCoordinator::checkpointWithFragments(int pid, int q) { + auto callback = [](IAsyncContext *ctxt, Status result, + size_t bytes_transferred) { + CallbackContext context{ctxt}; + if (result != Status::Ok) { + fprintf(stderr, "AsyncFlushPages(), error: %u\n", + static_cast(result)); + } + auto debug = false; + if (debug) { + std::cout << "[DBG] callback updating slot " + + std::to_string(context->m_slotId) + " with " + + std::to_string(bytes_transferred) + " bytes_transferred" + << std::endl; + } + // Set the slot status to ready + context->m_frCounter->fetch_add(-1); + if (context->m_frCounter->load() == 0) { + if (context->m_slot->load() == 5) { + //const std::lock_guard lock(*context->m_lock); + auto oldVal = context->m_slot->load(); + if (context->m_previousState != 1 && context->m_previousState != 3) { + throw std::runtime_error( + "error: in the callback the previous slot value is " + + std::to_string(context->m_previousState)); + } + context->m_slot->store(context->m_previousState); + // if (!context->m_slot->compare_exchange_weak(oldVal, + // context->m_previousState)) + // { + if (context->m_slot->load() != 1 && context->m_slot->load() != 3) { + throw std::runtime_error( + "error: failed updating the result slot after checkpointing: " + + std::to_string(oldVal)); + } + } else { + throw std::runtime_error( + "error: failed updating the result slot because of invalid slot " + "value: " + + std::to_string(context->m_slot->load())); + } + } + if (debug) { + std::cout << "[DBG] callback setting the slot " + + std::to_string(context->m_slotId) + " status to " + + std::to_string(context->m_slot->load()) + + " with previous slot " + + std::to_string(context->m_previousState) + + " and frCounter " + + std::to_string(context->m_frCounter->load()) + << std::endl; + } + if (context->m_buffer) { + UnboundedQueryBufferFactory::getInstance().free(context->m_buffer->getBufferId(), context->m_buffer); + context->m_buffer.reset(); + } + context->m_checkpoint->updateCounter(bytes_transferred); + }; + + // try to acknowledge previous request + m_filesystem->getHandler().tryCompleteMultiple(); + auto checkpointVersion = m_checkpointId % 2; + // todo: create 4 different checkpoint functions (simple, agg, aggPtr, + // aggPar). The one bellow is only for the aggPtr case + auto &slots = m_resultHandlers[q]->m_results; + auto numOfSlots = m_resultHandlers[q]->m_numberOfSlots; + auto cnt = 0; + int idx = -1; + bool clear = true; + while (m_readySlots[q][0].try_pop(idx)) { + auto &slot = slots[idx]; + if (slot.m_slot.load(std::memory_order_acquire) == 5) { + if (m_debug) { + std::cout << "[DBG] creating callback for slot " + + std::to_string(slot.m_index) + + " with previous status " + + std::to_string(slot.getPreviousSlot()) + << std::endl; + } +#if defined(PREFETCH) + // prefetch data here + slot.prefetch(); +#endif + auto buffer = UnboundedQueryBufferFactory::getInstance().newInstance(pid); + auto capacity = buffer->getBuffer().size(); + std::string metadata; // metadata stored at the beginning of the slot + auto writeIdx = 6 * 1024; + auto query = m_queries[q]; + auto *code = dynamic_cast(&query->getOperator()->getCode()); + if (!code) + throw std::runtime_error("error: invalid aggregation casting in checkpoint coordinator"); + auto hashtableSize = code->hasGroupBy() ? (query->getConfig() ? query->getConfig()->getHashtableSize() + : SystemConf::getInstance().HASH_TABLE_SIZE) : 1; + auto offset = hashtableSize * m_intermSizes[q]; + metadata.append(std::to_string(slot.m_freePointer) + " ") + .append(std::to_string(slot.m_latencyMark) + " ") + .append(std::to_string(slot.m_taskId) + " ") + .append(std::to_string(slot.m_previousSlot.load()) + " ") + .append(std::to_string(hashtableSize) + " ") + .append(std::to_string(m_intermSizes[q]) + " ") + .append(std::to_string(offset) + " "); + // check all the fragments of the slot + auto copyIdx = 0; + if (slot.m_closingWindows) { + metadata.append("cl "+ + std::to_string(slot.m_closingWindows->numberOfWindows()) + " "); + if (code->hasGroupBy()) { + for (int wIdx = 0; wIdx < slot.m_closingWindows->numberOfWindows(); ++wIdx) { + checkBlockSize(copyIdx + offset + writeIdx, capacity); + if (!m_useCompression[q]) { + std::memcpy(buffer->getBuffer().data() + copyIdx + writeIdx, + slot.m_closingWindows->getBufferPtrs()[wIdx], offset); + copyIdx += offset; + } else { + m_compressionFP[q]( + pid, slot.m_closingWindows->getBufferPtrs()[wIdx], 0, offset, + buffer->getBuffer().data() + writeIdx, copyIdx, false, clear); + metadata.append(std::to_string(copyIdx) + " "); + } + } + } else if (slot.m_closingWindows->numberOfWindows() > 0) { + offset = slot.m_closingWindows->getPosition(); + checkBlockSize(offset + writeIdx, capacity); + if (!m_useCompression[q]) { + std::memcpy(buffer->getBuffer().data() + copyIdx + writeIdx, + slot.m_closingWindows->getBufferRaw(), offset); + copyIdx += offset; + } else { + m_compressionFP[q]( + pid, slot.m_closingWindows->getBufferRaw(), 0, offset, + buffer->getBuffer().data() + writeIdx, copyIdx, false, clear); + metadata.append(std::to_string(copyIdx) + " "); + } + } + } + if (slot.m_pendingWindows) { + metadata.append("p " + + std::to_string(slot.m_pendingWindows->numberOfWindows()) + " "); + if (code->hasGroupBy()) { + for (int wIdx = 0; wIdx < slot.m_pendingWindows->numberOfWindows(); ++wIdx) { + checkBlockSize(copyIdx + offset + writeIdx, capacity); + if (!m_useCompression[q]) { + std::memcpy(buffer->getBuffer().data() + copyIdx + writeIdx, + slot.m_pendingWindows->getBufferPtrs()[wIdx], offset); + copyIdx += offset; + } else { + m_compressionFP[q]( + pid, slot.m_pendingWindows->getBufferPtrs()[wIdx], 0, offset, + buffer->getBuffer().data() + writeIdx, copyIdx, false, clear); + metadata.append(std::to_string(copyIdx) + " "); + } + } + } else if (slot.m_pendingWindows->numberOfWindows() > 0) { + offset = slot.m_pendingWindows->getPosition(); + checkBlockSize(copyIdx + offset + writeIdx, capacity); + if (!m_useCompression[q]) { + std::memcpy(buffer->getBuffer().data() + copyIdx + writeIdx, + slot.m_pendingWindows->getBufferRaw(), offset); + copyIdx += offset; + } else { + m_compressionFP[q]( + pid, slot.m_pendingWindows->getBufferRaw(), 0, offset, + buffer->getBuffer().data() + writeIdx, copyIdx, false, clear); + metadata.append(std::to_string(copyIdx) + " "); + } + } + } + if (slot.m_openingWindows) { + metadata.append("o " + + std::to_string(slot.m_openingWindows->numberOfWindows()) + " "); + if (code->hasGroupBy()) { + for (int wIdx = 0; wIdx < slot.m_openingWindows->numberOfWindows(); ++wIdx) { + checkBlockSize(copyIdx + offset + writeIdx, capacity); + if (!m_useCompression[q]) { + std::memcpy(buffer->getBuffer().data() + copyIdx + writeIdx, + slot.m_openingWindows->getBufferPtrs()[wIdx], offset); + copyIdx += offset; + } else { + m_compressionFP[q]( + pid, slot.m_openingWindows->getBufferPtrs()[wIdx], 0, offset, + buffer->getBuffer().data() + writeIdx, copyIdx, false, clear); + metadata.append(std::to_string(copyIdx) + " "); + } + } + } else if (slot.m_openingWindows->numberOfWindows() > 0) { + offset = slot.m_openingWindows->getPosition(); + checkBlockSize(copyIdx + offset + writeIdx, capacity); + if (!m_useCompression[q]) { + std::memcpy(buffer->getBuffer().data() + copyIdx + writeIdx, + slot.m_openingWindows->getBufferRaw(), offset); + copyIdx += offset; + } else { + m_compressionFP[q]( + pid, slot.m_openingWindows->getBufferRaw(), 0, offset, + buffer->getBuffer().data() + writeIdx, copyIdx, false, clear); + metadata.append(std::to_string(copyIdx) + " "); + } + } + } + if (slot.m_completeWindows && + slot.m_completeWindows->numberOfWindows() > 0) { + offset = slot.m_completeWindows->getPosition(); + metadata.append("co " + std::to_string(offset) + " "); + checkBlockSize(copyIdx + offset + writeIdx, capacity); + if (!m_useCompression[q]) { + std::memcpy(buffer->getBuffer().data() + copyIdx + writeIdx, + slot.m_completeWindows->getBuffer().data(), offset); + copyIdx += offset; + } else { + metadata.append(std::to_string(copyIdx) + " "); + m_compressionFP[q]( + pid, slot.m_completeWindows->getBuffer().data(), 0, offset, + buffer->getBuffer().data() + writeIdx, copyIdx, true, clear); + metadata.append(std::to_string(copyIdx) + " "); + } + } + if (m_useCompression[q] && copyIdx > 0) { + m_compressionFP[q]( + pid, buffer->getBuffer().data() + writeIdx, 0, -1, + buffer->getBuffer().data() + writeIdx, copyIdx, false, clear); + } + if ((int) metadata.size() > writeIdx) + throw std::runtime_error("error: increase the metadata section (" + + std::to_string(metadata.size()) + " - " + + std::to_string(writeIdx) + ")"); + std::memcpy(buffer->getBuffer().data(), metadata.data(), metadata.size()); + if (copyIdx >= 0) { + copyIdx = roundOffset(Utils::getPowerOfTwo(copyIdx + writeIdx)); //roundOffset(copyIdx + writeIdx); +#if defined(NO_DISK) + copyIdx = 0; +#endif + checkBlockSize(copyIdx, capacity); + m_measuredBytes.fetch_add(copyIdx); + + AckCheckpointContext context{ + m_checkpoints[q][checkpointVersion].get(), slot.m_index, + &slot.m_slot, slot.getPreviousSlot(), + &slot.m_numberOfFragments, &slot.m_updateLock, buffer}; + assert(m_asyncFiles[q][checkpointVersion]->writeAsync( + reinterpret_cast(buffer->getBuffer().data()), + slot.m_index * m_slotFileSize, copyIdx, callback, + context) == Status::Ok); + if (m_debug) { + std::cout << "[DBG] submiting callback for slot " + + std::to_string(slot.m_index) + << std::endl; + } + m_filesystem->getHandler().tryCompleteMultiple(); + } + } else { + throw std::runtime_error( + "error: attempting to checkpoint slot " + std::to_string(idx) + + " with state " + std::to_string(slot.m_slot.load(std::memory_order_acquire)) + + " and checkpoint pointers: " + + std::to_string(m_checkpointPtrs[q][0]) + " - " + + std::to_string(m_checkpointPtrs[q][1])); + } + } + // std::cout << "[CP] worker leaving checkpoint function " << std::endl; +} + +void FileBackedCheckpointCoordinator::checkpointWithoutFragments(int pid, int q) { + auto callback = [](IAsyncContext *ctxt, Status result, + size_t bytes_transferred) { + CallbackContext context{ctxt}; + if (result != Status::Ok) { + fprintf(stderr, "AsyncFlushPages(), error: %u\n", + static_cast(result)); + } + auto debug = false; + if (debug) { + std::cout << "[DBG] callback updating slot " + + std::to_string(context->m_slotId) + " with " + + std::to_string(bytes_transferred) + " bytes_transferred" + << std::endl; + } + // Set the slot status to ready + context->m_frCounter->fetch_add(-1); + if (context->m_frCounter->load() == 0) { + if (context->m_slot->load() == 5) { + //const std::lock_guard lock(*context->m_lock); + auto oldVal = context->m_slot->load(); + if (context->m_previousState != 1) { + throw std::runtime_error( + "error: in the callback the previous slot value is " + + std::to_string(context->m_previousState)); + } + context->m_slot->store(context->m_previousState); + // if (!context->m_slot->compare_exchange_weak(oldVal, + // context->m_previousState)) + // { + if (context->m_slot->load() != 1) { + throw std::runtime_error( + "error: failed updating the result slot after checkpointing: " + + std::to_string(oldVal)); + } + } else { + throw std::runtime_error( + "error: failed updating the result slot because of invalid slot " + "value: " + + std::to_string(context->m_slot->load())); + } + } + if (debug) { + std::cout << "[DBG] callback setting the slot " + + std::to_string(context->m_slotId) + " status to " + + std::to_string(context->m_slot->load()) + + " with previous slot " + + std::to_string(context->m_previousState) + + " and frCounter " + + std::to_string(context->m_frCounter->load()) + << std::endl; + } + if (context->m_buffer) { + UnboundedQueryBufferFactory::getInstance().free(context->m_buffer->getBufferId(), context->m_buffer); + context->m_buffer.reset(); + } + context->m_checkpoint->updateCounter(bytes_transferred); + }; + + // try to acknowledge previous request + m_filesystem->getHandler().tryCompleteMultiple(); + auto checkpointVersion = m_checkpointId % 2; + auto &slots = m_resultHandlers[q]->m_resultsWithoutFrags; + auto numOfSlots = m_resultHandlers[q]->m_numberOfSlots; + auto cnt = 0; + int idx = -1; + bool clear = true; + while (m_readySlots[q][0].try_pop(idx)) { + auto &slot = slots[idx]; + if (slot.m_slot.load(std::memory_order_acquire) == 5) { + if (m_debug) { + std::cout << "[DBG] creating callback for slot " + + std::to_string(slot.m_index) + + " with previous status " + + std::to_string(slot.getPreviousSlot()) + << std::endl; + } +#if defined(PREFETCH) + // prefetch data here + slot.prefetch(); +#endif + auto buffer = UnboundedQueryBufferFactory::getInstance().newInstance(pid); + auto capacity = buffer->getBuffer().size(); + std::string metadata; // metadata stored at the beginning of the slot + auto writeIdx = 512; + auto query = m_queries[q]; + + auto offset = slot.m_result->getPosition(); + metadata.append(std::to_string(slot.m_freePointer1) + " ") + .append(std::to_string(slot.m_freePointer2) + " ") + .append(std::to_string(slot.m_prevFreePointer1) + " ") + .append(std::to_string(slot.m_prevFreePointer2) + " ") + .append(std::to_string(slot.m_latencyMark) + " ") + .append(std::to_string(slot.m_taskId) + " ") + .append(std::to_string(offset) + " "); + + // copy result + auto copyIdx = 0; + checkBlockSize(copyIdx + offset + writeIdx, capacity); + if (!m_useCompression[q]) { + std::memcpy(buffer->getBuffer().data() + copyIdx + writeIdx, + slot.m_result->getBufferRaw(), offset); + copyIdx += offset; + } else { + m_compressionFP[q]( + pid, slot.m_result->getBufferRaw(), 0, offset, + buffer->getBuffer().data() + writeIdx, copyIdx, false, clear); + metadata.append(std::to_string(copyIdx) + " "); + } + + if (m_useCompression[q] && copyIdx > 0) { + m_compressionFP[q]( + pid, buffer->getBuffer().data() + writeIdx, 0, -1, + buffer->getBuffer().data() + writeIdx, copyIdx, false, clear); + } + if ((int) metadata.size() > writeIdx) + throw std::runtime_error("error: increase the metadata section (" + + std::to_string(metadata.size()) + " - " + + std::to_string(writeIdx) + ")"); + std::memcpy(buffer->getBuffer().data(), metadata.data(), metadata.size()); + if (copyIdx >= 0) { + copyIdx = roundOffset(Utils::getPowerOfTwo(copyIdx + writeIdx)); //roundOffset(copyIdx + writeIdx); +#if defined(NO_DISK) + copyIdx = 0; +#endif + checkBlockSize(copyIdx, capacity); + m_measuredBytes.fetch_add(copyIdx); + + AckCheckpointContext context{ + m_checkpoints[q][checkpointVersion].get(), slot.m_index, + &slot.m_slot, slot.getPreviousSlot(), + &slot.m_numberOfResults, &slot.m_updateLock, buffer}; + assert(m_asyncFiles[q][checkpointVersion]->writeAsync( + reinterpret_cast(buffer->getBuffer().data()), + slot.m_index * m_slotFileSize, copyIdx, callback, + context) == Status::Ok); + if (m_debug) { + std::cout << "[DBG] submiting callback for slot " + + std::to_string(slot.m_index) + << std::endl; + } + m_filesystem->getHandler().tryCompleteMultiple(); + } + } else { + throw std::runtime_error( + "error: attempting to checkpoint slot " + std::to_string(idx) + + " with state " + std::to_string(slot.m_slot.load(std::memory_order_acquire)) + + " and checkpoint pointers: " + + std::to_string(m_checkpointPtrs[q][0]) + " - " + + std::to_string(m_checkpointPtrs[q][1])); + } + } + // std::cout << "[CP] worker leaving checkpoint function " << std::endl; +} + +void FileBackedCheckpointCoordinator::recoverBuffer(int pid, int bId, int q) { + throw std::runtime_error("error: not implemented yet."); +} + +void FileBackedCheckpointCoordinator::recoverWithFragments(int pid, int q) { + auto checkpointVersion = m_checkpointId % 2; + auto &slots = m_resultHandlers[q]->m_results; + auto numOfSlots = m_resultHandlers[q]->m_numberOfSlots; + auto cnt = 0; + int idx = -1; + bool clear = true; + + auto buffer = UnboundedQueryBufferFactory::getInstance().newInstance(pid); + while (m_readySlots[q][0].try_pop(idx)) { + auto &slot = slots[idx]; + + if (m_debug) { + std::cout << "[DBG] restoring slot " + std::to_string(idx) + + " for query " + std::to_string(q) << std::endl; + } + + auto capacity = buffer->getBuffer().size(); + auto writeIdx = 6 * 1024; + std::string metadata (writeIdx, '\0'); // metadata stored at the beginning of the slot + auto query = m_queries[q]; + auto *code = dynamic_cast(&query->getOperator()->getCode()); + if (!code) + throw std::runtime_error("error: invalid aggregation casting in checkpoint coordinator"); + auto hashtableSize = code->hasGroupBy() ? (query->getConfig() ? query->getConfig()->getHashtableSize() + : SystemConf::getInstance().HASH_TABLE_SIZE) : 1; + auto offset = hashtableSize * m_intermSizes[q]; + + auto fptr = m_asyncFiles[q][checkpointVersion]; + assert(fptr->readSync(slot.m_index * m_slotFileSize, buffer->getBuffer().data(), SystemConf::getInstance().BLOCK_SIZE) == Status::Ok); + + std::memcpy(metadata.data(), buffer->getBuffer().data(), writeIdx); + std::istringstream iss(metadata); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + slot.m_slot.store(0); + slot.m_freePointer = INT_MIN;//std::stol(words[0]); + slot.m_latencyMark = std::stol(words[1]); + slot.m_taskId = std::stoi(words[2]); + auto hashTableSize = std::stoi(words[4]); + auto intermSize = std::stoi(words[5]); + auto storedOffset = std::stoi(words[6]); + auto hashTableStart = (m_useCompression[q]) ? std::stoi(words[words.size()-2]) : 0; + size_t wIdx = 7; + auto fileOffset = writeIdx; + + // initialize window fragments + slot.m_closingWindows = (!code->hasGroupBy()) ? PartialWindowResultsFactory::getInstance().newInstance(pid) : + PartialWindowResultsFactory::getInstance().newInstance(pid, storedOffset); + slot.m_pendingWindows = (!code->hasGroupBy()) ? PartialWindowResultsFactory::getInstance().newInstance(pid) : + PartialWindowResultsFactory::getInstance().newInstance(pid, storedOffset); + slot.m_openingWindows = (!code->hasGroupBy()) ? PartialWindowResultsFactory::getInstance().newInstance(pid) : + PartialWindowResultsFactory::getInstance().newInstance(pid, storedOffset); + slot.m_completeWindows = PartialWindowResultsFactory::getInstance().newInstance(pid); + + slot.m_graph = LineageGraphFactory::getInstance().newInstance(); + + int partialWindows = 0; + bool flag = false; + while (wIdx < words.size()) { + if (words[wIdx] == "cl") { + auto numOfWindows = std::stoi(words[wIdx+1]); + partialWindows += numOfWindows; + slot.m_closingWindows->incrementCount(numOfWindows); + if (code->hasGroupBy()) { + auto startPtrs = slot.m_closingWindows->getStartPointers().data(); + auto prevStartPos = 0; + for (auto w = 0; w < numOfWindows; ++w) { + int pos; + if (!m_useCompression[q]) { + pos = storedOffset; + startPtrs[w] = w * hashTableSize; + std::memcpy(slot.m_closingWindows->getBufferPtrs()[w], buffer->getBuffer().data() + fileOffset, pos); + } else { + auto endPos = std::stoi(words[wIdx+2+w]); + startPtrs[w] = w * hashTableSize; + if (prevStartPos < endPos) + m_decompressionFP[q](pid, buffer->getBuffer().data() + writeIdx, prevStartPos, endPos, slot.m_closingWindows->getBufferPtrs()[w], + hashTableStart, false, flag); + prevStartPos = endPos; + } + fileOffset += pos; + } + wIdx += (m_useCompression[q]) ? numOfWindows : 0; + } else { + // setPosition + throw std::runtime_error("error: not implemented yet"); + } + wIdx += 1; + } + else if (words[wIdx] == "p") { + auto numOfWindows = std::stoi(words[wIdx+1]); + partialWindows += numOfWindows; + slot.m_pendingWindows->incrementCount(numOfWindows); + if (code->hasGroupBy()) { + auto startPtrs = slot.m_pendingWindows->getStartPointers().data(); + auto prevStartPos = 0; + for (auto w = 0; w < numOfWindows; ++w) { + int pos; + if (!m_useCompression[q]) { + pos = storedOffset; + startPtrs[w] = w * hashTableSize; + std::memcpy(slot.m_pendingWindows->getBufferPtrs()[w], buffer->getBuffer().data() + fileOffset, pos); + } else { + auto endPos = std::stoi(words[wIdx+2+w]); + startPtrs[w] = w * hashTableSize; + if (prevStartPos < endPos) + m_decompressionFP[q](pid, buffer->getBuffer().data() + writeIdx, prevStartPos, endPos, slot.m_pendingWindows->getBufferPtrs()[w], + hashTableStart, false, flag); + prevStartPos = endPos; + } + fileOffset += pos; + } + wIdx += (m_useCompression[q]) ? numOfWindows : 0; + } else { + throw std::runtime_error("error: not implemented yet"); + } + wIdx += 1; + } + else if (words[wIdx] == "o") { + auto numOfWindows = std::stoi(words[wIdx+1]); + partialWindows += numOfWindows; + slot.m_openingWindows->incrementCount(numOfWindows); + if (code->hasGroupBy()) { + auto startPtrs = slot.m_openingWindows->getStartPointers().data(); + auto prevStartPos = 0; + for (auto w = 0; w < numOfWindows; ++w) { + int pos; + if (!m_useCompression[q]) { + pos = storedOffset; + startPtrs[w] = w * hashTableSize; + std::memcpy(slot.m_openingWindows->getBufferPtrs()[w], buffer->getBuffer().data() + fileOffset, pos); + } else { + auto endPos = std::stoi(words[wIdx+2+w]); + startPtrs[w] = w * hashTableSize; + if (prevStartPos < endPos) + m_decompressionFP[q](pid, buffer->getBuffer().data() + writeIdx, prevStartPos, endPos, slot.m_openingWindows->getBufferPtrs()[w], + hashTableStart, false, flag); + prevStartPos = endPos; + } + fileOffset += pos; + } + wIdx += (m_useCompression[q]) ? numOfWindows : 0; + } else { + throw std::runtime_error("error: not implemented yet"); + } + wIdx += 1; + } + else if (words[wIdx] == "co") { + slot.m_completeWindows->incrementCount(1); + auto pos = std::stoi(words[wIdx+1]); + slot.m_completeWindows->setPosition(pos); + auto startPtrs = slot.m_completeWindows->getStartPointers().data(); + startPtrs[0] = 0; + startPtrs[1] = pos; + if (!m_useCompression[q]) { + std::memcpy(slot.m_completeWindows->getBuffer().data(), buffer->getBuffer().data() + fileOffset, pos); + } else { + auto startPos = std::stoi(words[wIdx+2]); + auto endPos = std::stoi(words[wIdx+3]); + wIdx += 1; + if (startPos < endPos) + m_decompressionFP[q](pid, buffer->getBuffer().data() + writeIdx, startPos, endPos, slot.m_completeWindows->getBuffer().data(), + hashTableStart, true, flag); + } + fileOffset += pos; + wIdx += (m_useCompression[q]) ? 2 : 0; + wIdx += 1; + } + + wIdx++; + } + if (partialWindows > 0) { + slot.m_slot.store(1); + } else { + slot.m_slot.store(3); + } + m_recoveries[q]->updateCounter(1); + + if (m_debug || true) { + std::cout << "[DBG] worker finishing recovering slot " << idx + << " with " << slot.m_taskId << " for query " + << q << std::endl; + } + } + UnboundedQueryBufferFactory::getInstance().free(buffer); + // std::cout << "[CP] worker leaving checkpoint function " << std::endl; +} + +void FileBackedCheckpointCoordinator::recoverWithoutFragments(int pid, int q) { + throw std::runtime_error("error: not implemented yet."); + // std::cout << "[CP] worker leaving checkpoint function " << std::endl; +} + +void FileBackedCheckpointCoordinator::persistGuid() { + auto str = m_guid.ToString(); + auto *arr = str.c_str(); + auto size = str.size(); + pmem::obj::transaction::run(m_pop, [&] { + if (m_root->m_guidSize > 0) + pmem::obj::delete_persistent(m_root->m_guid, m_root->m_guidSize); + + pmem::obj::persistent_ptr new_array = + pmem::obj::make_persistent(size); + + for (size_t i = 0; i < size; i++) new_array[i] = arr[i]; + + m_root->m_guidSize = (size_t)size; + m_root->m_guid = new_array; + }); +} + +void FileBackedCheckpointCoordinator::persistMetadata(std::string &metadata) { + auto str = metadata; + auto *arr = str.c_str(); + auto size = str.size(); + pmem::obj::transaction::run(m_pop, [&] { + if (m_root->m_mSize > 0) + pmem::obj::delete_persistent(m_root->m_metadata, m_root->m_mSize); + + pmem::obj::persistent_ptr new_array = + pmem::obj::make_persistent(size); + + for (size_t i = 0; i < size; i++) new_array[i] = arr[i]; + + m_root->m_mSize = (size_t)size; + m_root->m_metadata = new_array; + }); +} + +void FileBackedCheckpointCoordinator::unsafePrint() { + std::string str; + for (int q = 0; q < m_numOfQueries; ++q) { + auto checkpointVersion = m_checkpointId % 2; + str.append("query " + std::to_string(q) + " nextToForward " + + std::to_string(m_resultHandlers[q]->m_nextToForward)); + str.append(" nextToAggregate " + + std::to_string(m_resultHandlers[q]->m_nextToAggregate) + "\n"); + int idx = 0; + if (m_resultHandlers[q]->m_hasWindowFragments) { + auto &slots = m_resultHandlers[q]->m_results; + for (auto &slot : slots) { + str.append(std::to_string(idx) + ": slot " + + std::to_string(slot.m_slot.load()) + " taskId " + + std::to_string(slot.m_taskId) + " windowFrags " + + std::to_string(slot.getNumberOfWindowFragments(true)) + + "\n"); + idx++; + } + } else { + auto &slots = m_resultHandlers[q]->m_resultsWithoutFrags; + for (auto &slot : slots) { + str.append(std::to_string(idx) + ": slot " + + std::to_string(slot.m_slot.load()) + " taskId " + + std::to_string(slot.m_taskId) + "\n"); + idx++; + } + } + } + std::cout << "[CP] warning: \n" << str << std::endl; +} + +void FileBackedCheckpointCoordinator::createMergeTask(int query) { + if (m_taskDispatchers[query]->m_workerQueue->size_approx() < m_queries[query]->getTaskQueueCapacity()) { + auto batch = WindowBatchFactory::getInstance().newInstance( + 0, 0, -1, -1, m_queries[query].get(), nullptr, + &m_queries[query]->getWindowDefinition(), m_queries[query]->getSchema(), + -1); + batch->setTaskType(TaskType::MERGE_FORWARD); + auto task = TaskFactory::getInstance().newInstance(0, batch, nullptr, + TaskType::MERGE_FORWARD); + if (!m_taskDispatchers[query]->m_workerQueue->try_enqueue(task)) { + std::cout << "warning: waiting to enqueue MERGE_FORWARD task in the " + << "checkpoint coordinator with size " + << std::to_string( + m_taskDispatchers[query]->m_workerQueue->size_approx()) + << std::endl; + WindowBatchFactory::getInstance().free(batch); + TaskFactory::getInstance().free(task); + } + } +} + +void FileBackedCheckpointCoordinator::createCheckpointTask(int query) { + if (m_taskDispatchers[query]->m_workerQueue->size_approx() < m_queries[query]->getTaskQueueCapacity()) { + auto batch = WindowBatchFactory::getInstance().newInstance( + 0, 0, -1, -1, m_queries[query].get(), nullptr, + &m_queries[query]->getWindowDefinition(), m_queries[query]->getSchema(), + -1); + batch->setTaskType(TaskType::CHECKPOINT); + auto task = TaskFactory::getInstance().newInstance(0, batch, nullptr, + TaskType::CHECKPOINT); + if (!m_taskDispatchers[query]->m_workerQueue->try_enqueue(task)) { + std::cout << "warning: waiting to enqueue CHECKPOINT task in the " + << "checkpoint coordinator with size " + << std::to_string( + m_taskDispatchers[query]->m_workerQueue->size_approx()) + << std::endl; + WindowBatchFactory::getInstance().free(batch); + TaskFactory::getInstance().free(task); + } + } +} + +void FileBackedCheckpointCoordinator::createRecoveryTask(int query) { + if (m_taskDispatchers[query]->m_workerQueue->size_approx() < m_queries[query]->getTaskQueueCapacity()) { + auto batch = WindowBatchFactory::getInstance().newInstance( + 0, 0, -1, -1, m_queries[query].get(), nullptr, + &m_queries[query]->getWindowDefinition(), m_queries[query]->getSchema(), + -1); + batch->setTaskType(TaskType::RECOVER); + auto task = TaskFactory::getInstance().newInstance(0, batch, nullptr, + TaskType::RECOVER); + if (!m_taskDispatchers[query]->m_workerQueue->try_enqueue(task)) { + std::cout << "warning: waiting to enqueue RECOVER task in the " + << "checkpoint coordinator with size " + << std::to_string( + m_taskDispatchers[query]->m_workerQueue->size_approx()) + << std::endl; + WindowBatchFactory::getInstance().free(batch); + TaskFactory::getInstance().free(task); + } + } +} + +void FileBackedCheckpointCoordinator::checkBlockSize(size_t size, size_t capacity) { + if (size > capacity) + throw std::runtime_error( + "error: the write exceeds the size of slots in the " + "checkpoint stage: " + + std::to_string(size) + " > " + std::to_string(capacity)); +} + +void FileBackedCheckpointCoordinator::topologicalSort(int q, std::vector &visited, std::stack &stack) { + visited[q] = true; + for (int i = 0; i < m_queries[q]->getNumberOfUpstreamQueries(); i++) { + auto qId = m_queries[q]->getUpstreamQuery(i)->getId(); + if (!visited[qId]) { + topologicalSort(qId, visited, stack); + } + } + stack.push(q); +} \ No newline at end of file diff --git a/src/checkpoint/FileBackedCheckpointCoordinator.h b/src/checkpoint/FileBackedCheckpointCoordinator.h new file mode 100644 index 0000000..ba2052d --- /dev/null +++ b/src/checkpoint/FileBackedCheckpointCoordinator.h @@ -0,0 +1,193 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils/Guid.h" +#include "utils/SystemConf.h" + +class Checkpoint; +class Recovery; +class Query; +struct PartialResultSlot; +class ResultHandler; +class ITaskDispatcher; +class ResultHandler; +struct FileOptions; +class QueueIoHandler; +template +class FileSystemDisk; +template +class FileSystemFile; +class CheckpointStatistics; +struct LineageGraph; + +/* + * \brief The checkpoint coordinator coordinates the snapshots of operators and + * state. It triggers the checkpoint by sending the messages to the relevant + * operators and collects the checkpoint acknowledgements. It also collects and + * maintains the overview of the state handles reported by the tasks that + * acknowledge the checkpoint. + * + * */ + +class FileBackedCheckpointCoordinator { + private: + long m_jobId; + bool m_triggersCheckpoints; + std::atomic m_waitCondition; + Guid m_guid; + size_t m_checkpointId; + std::atomic m_checkpointCounter; + std::atomic m_recoveryCounter; + const int m_numOfQueries; + const std::vector> &m_queries; + std::vector> m_sortedQueries; + const int m_numberOfCheckpoints = 2; // use double buffering for checkpoints + std::vector>> m_checkpoints; + std::vector> m_recoveries; + std::vector> m_checkpointInput; + std::vector m_lastTaskId; + std::vector> m_checkpointPtrs; // the last slot hold the input buffer offset + std::vector> m_taskDispatchers; + std::vector> m_resultHandlers; + std::atomic m_hasConcurrentCheckpoint; // allow only a single checkpoint to happen + + std::vector m_intermSizes; + std::vector m_outputSizes; + size_t m_expectedBytes; + std::atomic m_measuredBytes; + + std::unique_ptr m_statistics; + + // variables used for compression + std::vector m_useCompression; + std::vector> m_compressionFP; + std::vector> m_decompressionFP; + + // slots for checkpointing + std::vector>> m_readySlots; + + typedef QueueIoHandler adapter_t; + typedef FileSystemDisk disk_t; + typedef FileSystemFile file_t; + + std::shared_ptr m_filesystem; + + // Variables for persisting metadata + struct PMem; + const size_t m_poolSize; + const std::string m_layout = ""; + pmem::obj::pool m_pop; + pmem::obj::persistent_ptr m_root; + std::string m_pmFileName; + file_t *m_pmFile; + + // Variables for persisting asynchronously the actual data + size_t m_slotFileSize; + std::vector> m_asyncFileNames; + std::vector> m_asyncFiles; + std::unique_ptr m_asyncFileOptions; + + std::atomic *m_clearFiles; + std::atomic m_ready; + const bool m_checkpointInputQueues = true; + const bool m_debug = false; + const bool m_printMessages = true; + + public: + FileBackedCheckpointCoordinator( + long jobId, const std::vector> &queries, std::atomic *clearFiles = nullptr, + std::shared_ptr filesystem = nullptr, bool triggersCheckpoints = false); + + void operator()(); + + void checkpoint(int pid, int query); + + void recover(int pid, int query); + + int roundOffset(int offset); + + bool prepareCheckpoint(); + + bool prepareRecovery(); + + void setReady(); + + void setCompressionFP(int query, std::function fp); + + void setDecompressionFP(int query, std::function fp); + + void signalWaitCondition(); + + bool hasWorkUnsafe(int query); + + void clearPersistentMemory(); + + void tryToPurgeCheckpoint(std::shared_ptr &graph); + + ~FileBackedCheckpointCoordinator(); + + private: + /* + * \brief This is used to store metadata for the checkpoints. + * A checkpoint is valid if every pipeline has checkpointed + * successfully. + * + * */ + struct PMem { + pmem::obj::p> m_version; + pmem::obj::persistent_ptr m_guid; + pmem::obj::p m_guidSize; + pmem::obj::persistent_ptr m_metadata; + pmem::obj::p m_mSize; + pmem::obj::p> m_valid; + pmem::obj::persistent_ptr next; + PMem() { + m_version.get_rw() = 0L; + m_guidSize.get_rw() = 0; + m_mSize.get_rw() = 0; + m_valid.get_rw() = false; + }; + /** Copy constructor is deleted */ + PMem(const PMem &) = delete; + /** Assignment operator is deleted */ + PMem &operator=(const PMem &) = delete; + }; + + void checkpointBuffer(int pid, int bufferId, int query); + + void checkpointWithFragments(int pid, int query); + + void checkpointWithoutFragments(int pid, int query); + + void recoverBuffer(int pid, int bufferId, int query); + + void recoverWithFragments(int pid, int query); + + void recoverWithoutFragments(int pid, int query); + + void persistGuid(); + + void persistMetadata(std::string &metadata); + + void unsafePrint(); + + void createMergeTask(int query); + + void createCheckpointTask(int query); + + void createRecoveryTask(int query); + + void checkBlockSize(size_t size, size_t capacity); + + void topologicalSort(int q, std::vector &visited, std::stack &stack); +}; \ No newline at end of file diff --git a/src/checkpoint/LineageGraph.cpp b/src/checkpoint/LineageGraph.cpp new file mode 100644 index 0000000..5aa6765 --- /dev/null +++ b/src/checkpoint/LineageGraph.cpp @@ -0,0 +1,214 @@ +#include "checkpoint/LineageGraph.h" + +#include +#include +#include +#include + +#include "buffers/QueryBuffer.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" + +LineageGraph::LineageGraph(std::vector> &queries) + : m_vertices(queries.size()), m_graph(m_vertices), m_clockVector(m_vertices * 3) { + bfs(queries); +} + +LineageGraph::LineageGraph(LineageGraph &lineageGraph) { + m_vertices = lineageGraph.m_vertices; + //std::unordered_set set; + for (auto &v : lineageGraph.m_graph) { + //set.insert(v.get()); + m_graph.push_back(std::make_shared(v->m_query)); + for (auto &c : v->m_children) { + m_graph.back()->m_children.push_back(m_graph[c->m_query->getId()]); + } + } + + // sanity check + //for (auto &v : m_graph) { + // if (set.find(v.get()) != set.end()) + // throw std::runtime_error("error: the deep copy of the lineage graph failed"); + //} + m_root = m_graph[m_vertices - 1]; +} + +void LineageGraph::mergeGraphs(std::shared_ptr &lineageGraph) { + if (lineageGraph) { + for (size_t v = 0; v < m_vertices; ++v) { + // todo: is this correct? + m_graph[v]->m_freePtr1 = (lineageGraph->m_graph[v]->m_freeOffset1 > m_graph[v]->m_freeOffset1) ? + lineageGraph->m_graph[v]->m_freePtr1 : m_graph[v]->m_freePtr1; + m_graph[v]->m_freeOffset1 = std::max(lineageGraph->m_graph[v]->m_freeOffset1, + m_graph[v]->m_freeOffset1); + m_graph[v]->m_freePtr2 = (lineageGraph->m_graph[v]->m_freeOffset2 > m_graph[v]->m_freeOffset2) ? + lineageGraph->m_graph[v]->m_freePtr2 : m_graph[v]->m_freePtr2; + m_graph[v]->m_freeOffset2 = std::max(lineageGraph->m_graph[v]->m_freeOffset2, + m_graph[v]->m_freeOffset2); + m_graph[v]->m_outputPtr = std::max(lineageGraph->m_graph[v]->m_outputPtr, + m_graph[v]->m_outputPtr); + } + if (!m_isValid && lineageGraph->m_isValid) { + m_isValid = true; + } + if (lineageGraph.use_count() == 1) { + LineageGraphFactory::getInstance().free(lineageGraph); + } else { + lineageGraph.reset(); + } + } +} + +void LineageGraph::advanceOffsets(std::shared_ptr &lineageGraph) { + if (lineageGraph) { + for (size_t v = 0; v < m_vertices; ++v) { + m_graph[v]->m_freeOffset1 = std::max(lineageGraph->m_graph[v]->m_freeOffset1, + m_graph[v]->m_freeOffset1); + m_graph[v]->m_freeOffset2 = std::max(lineageGraph->m_graph[v]->m_freeOffset2, + m_graph[v]->m_freeOffset2); + } + } +} + +void LineageGraph::freePersistentState(int qId) { + if (qId >= (int)m_vertices) + throw std::runtime_error("error: invalid query id"); + + if (!m_isValid) + return; + + std::vector visited(qId + 1, false); + std::list queue; + visited[qId] = true; + queue.push_back(qId); + + while (!queue.empty()) { + auto q = queue.front(); + queue.pop_front(); + m_graph[q]->tryToFree(); + for (auto &v : m_graph[q]->m_children) { + auto id = v->m_query->getId(); + if (id >= (int)m_vertices) + throw std::runtime_error("error: invalid query id"); + if (!visited[id]) { + visited[id] = true; + queue.push_back(id); + } + //m_graph[q]->m_children.push_back(m_graph[id]); + } + /*for (int i = 0; i < queries[q]->getNumberOfDownstreamQueries(); i++) { + auto qId = queries[q]->getDownstreamQuery(i)->getId(); + if (qId >= (int) m_vertices) + throw std::runtime_error("error: invalid query id"); + if (!visited[qId]) { + visited[qId] = true; + m_graph[qId] = std::make_shared(queries[qId]); + queue.push_back(qId); + } + }*/ + } +} + +void LineageGraph::setOutputPtr (int qId, long outputPtr) { + if (outputPtr <= 0) + return; + m_graph[qId]->m_outputPtr = outputPtr; + m_isValid = true; +} + +void LineageGraph::clear() { + for (auto &v : m_graph) { + v->m_freePtr1 = INT_MIN; + v->m_freeOffset1 = INT_MIN; + v->m_freePtr2 = INT_MIN; + v->m_freeOffset2 = INT_MIN; + v->m_outputPtr = INT_MIN; + } + m_isValid = false; +} + +void LineageGraph::bfs(std::vector> &queries) { + if (queries.empty()) return; + + std::vector visited(m_vertices, false); + std::list queue; + visited[m_vertices - 1] = true; + m_graph[m_vertices - 1] = + std::make_shared(queries[m_vertices - 1]); + queue.push_back(m_vertices - 1); + + while (!queue.empty()) { + auto q = queue.front(); + queue.pop_front(); + for (int i = 0; i < queries[q]->getNumberOfUpstreamQueries(); i++) { + auto qId = queries[q]->getUpstreamQuery(i)->getId(); + if (qId >= (int)m_vertices) + throw std::runtime_error("error: invalid query id"); + if (!visited[qId]) { + visited[qId] = true; + m_graph[qId] = std::make_shared(queries[qId]); + queue.push_back(qId); + } + m_graph[q]->m_children.push_back(m_graph[qId]); + } + /*for (int i = 0; i < queries[q]->getNumberOfDownstreamQueries(); i++) { + auto qId = queries[q]->getDownstreamQuery(i)->getId(); + if (qId >= (int) m_vertices) + throw std::runtime_error("error: invalid query id"); + if (!visited[qId]) { + visited[qId] = true; + m_graph[qId] = std::make_shared(queries[qId]); + queue.push_back(qId); + } + }*/ + } + + // assume that the root is the last query + m_root = m_graph[m_vertices - 1]; +} + +void LineageGraph::serialize() { + size_t idx = 0; + for (auto &l: m_graph) { + m_clockVector[idx++] = l->m_freeOffset1; + m_clockVector[idx++] = l->m_freeOffset2; + m_clockVector[idx++] = l->m_outputPtr; + } +} + +LineageGraph::LineageNode::LineageNode(std::shared_ptr query, + long freePtr1, long freePtr2, long outputPtr) + : m_query(query), m_freePtr1(freePtr1), m_freePtr2(freePtr2), m_freeOffset1(INT_MIN), m_freeOffset2(INT_MIN), m_outputPtr(outputPtr) {} + +void LineageGraph::LineageNode::set(std::shared_ptr query, long freePtr1, long freePtr2, long outputPtr) { + m_query = query; + m_freePtr1 = freePtr1; + m_freePtr2 = freePtr2; + m_outputPtr = outputPtr; +} +void LineageGraph::LineageNode::addChild(std::shared_ptr &node) { + m_children.push_back(node); +} +// todo: fix that this gets called only when results are outputted externally +void LineageGraph::LineageNode::tryToFree() { + if (m_freePtr1 != INT_MIN) { + //std::cout << "freeing ptr " << m_freePtr1 << " from shared_ptr " << this << std::endl; + if (m_freeOffset1 != INT_MIN) { + //m_query->getOperator()->updateInputPtr(m_freeOffset1, true); + m_query->getBuffer()->getFileStore()->freePersistent(m_query->getId(), 0, m_freeOffset1); + } + m_query->getOperator()->updateInputPtr(m_freePtr1, true); + m_query->getBuffer()->free(m_freePtr1, true); + } + if (m_freePtr2 != INT_MIN) { + if (m_freeOffset2 != INT_MIN) { + //m_query->getOperator()->updateInputPtr(m_freeOffset2, false); + m_query->getBuffer()->getFileStore()->freePersistent(m_query->getId(), 1, m_freeOffset2); + } + m_query->getOperator()->updateInputPtr(m_freePtr2, false); + m_query->getBuffer()->free(m_freePtr2, true); + } + if (m_outputPtr != INT_MIN) { + m_query->getOperator()->updateOutputPtr(m_outputPtr); + } +} diff --git a/src/checkpoint/LineageGraph.h b/src/checkpoint/LineageGraph.h new file mode 100644 index 0000000..1904841 --- /dev/null +++ b/src/checkpoint/LineageGraph.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include + +class Query; + +/* + * \brief The lineage tracks the data dependencies between input-output tuples + * for all the operators in a pipeline. It can be serialized to a vector clock. + * + * */ + +struct LineageGraph { + struct LineageNode; + bool m_isValid = false; + size_t m_vertices; + std::shared_ptr m_root; + std::vector> m_graph; + std::vector m_clockVector; + + explicit LineageGraph(std::vector> &queries); + + LineageGraph(LineageGraph &lineageGraph); + + void mergeGraphs(std::shared_ptr &lineageGraph); + + void advanceOffsets(std::shared_ptr &lineageGraph); + + void freePersistentState(int qId); + + void setOutputPtr (int qId, long outputPtr); + + void clear(); + + void bfs(std::vector> &queries); + + void serialize(); + + struct LineageNode { + std::vector> m_children; + std::shared_ptr m_query; + long m_freePtr1, m_freePtr2, m_freeOffset1, m_freeOffset2, m_outputPtr; + explicit LineageNode(std::shared_ptr query = nullptr, long freePtr1 = INT_MIN, + long freePtr2 = INT_MIN, long m_outputPtr = INT_MIN); + void set (std::shared_ptr query, long freePtr1, long freePtr2, long outputPtr); + void addChild (std::shared_ptr &node); + void tryToFree(); + }; +}; \ No newline at end of file diff --git a/src/checkpoint/LineageGraphFactory.h b/src/checkpoint/LineageGraphFactory.h new file mode 100644 index 0000000..08bd88e --- /dev/null +++ b/src/checkpoint/LineageGraphFactory.h @@ -0,0 +1,75 @@ +#pragma once + +#include +#include + +#include "tbb/concurrent_queue.h" + +#include "checkpoint/LineageGraph.h" +#include "utils/SystemConf.h" + +/* + * \brief This class creates a single pool of LineageGraphs used to perform + * dependency tracking. + * + * */ + +class LineageGraphFactory { + private: + bool isReady = false; + std::atomic count; + std::shared_ptr m_graph; + tbb::concurrent_queue> m_pool; + LineageGraphFactory() {}; + + public: + static LineageGraphFactory &getInstance() { + static LineageGraphFactory instance; + return instance; + } + + void setGraph(std::vector> &queries) { + if (!isReady) { + m_graph = std::make_shared(queries); + isReady = true; + } + } + + LineageGraphFactory(LineageGraphFactory const &) = delete; + void operator=(LineageGraphFactory const &) = delete; + + std::shared_ptr newInstance() { + if (!isReady) + throw std::runtime_error("error: the lineage graph is not set"); + + std::shared_ptr graph; + bool hasRemaining = m_pool.try_pop(graph); + if (!hasRemaining) { + count.fetch_add(1); + graph = std::make_shared(*m_graph); + } else { + while (graph.use_count() != 1) { + hasRemaining = m_pool.try_pop(graph); + if (!hasRemaining) { + count.fetch_add(1, std::memory_order_seq_cst); + graph = std::make_shared(*m_graph); + } + } + } + + //std::stringstream ss; ss << graph->m_graph[0].get(); + //std::cout << "allocating share_ptr " + ss.str() << std::endl; + return graph; + } + + void free(std::shared_ptr &graph) { + //std::stringstream ss; ss << graph->m_graph[0].get(); + //std::cout << "returning share_ptr " + ss.str() << std::endl; + graph->clear(); + m_pool.push(graph); + } + + long getCount() { + return count.load(); + } +}; diff --git a/src/checkpoint/Recovery.h b/src/checkpoint/Recovery.h new file mode 100644 index 0000000..611579b --- /dev/null +++ b/src/checkpoint/Recovery.h @@ -0,0 +1,149 @@ +#pragma once + +#include +#include + +#include "utils/SystemConf.h" + +/* + * \brief This class describes the process of recovery for a pipeline. + * + * */ + +enum RecoveryState : uint8_t { FINISHED, AWAITING }; + +class Recovery { + private: + int m_pipelineId; + long m_triggerTimestamp; + std::atomic m_numberOfSlots; + std::atomic m_inputQueueSlots; + std::mutex m_completionLock; + std::atomic m_counter; + std::atomic m_workers; + long m_lastTaskId; + std::atomic *m_recoveryCounter; + RecoveryState m_state; + std::atomic m_recoverySize; + std::atomic m_recoveryDuration; + std::string m_filePath; + //std::atomic m_readyFlag; + + friend class FileBackedCheckpointCoordinator; + + public: + explicit Recovery(long id = -1, long timestamp = -1, int slots = 0, + long taskId = -1, + std::atomic *recoveryCounter = nullptr) + : + m_pipelineId(0), + m_triggerTimestamp(timestamp), + m_numberOfSlots(slots), + m_inputQueueSlots(0), + m_counter(0), + m_workers(SystemConf::getInstance().WORKER_THREADS), + m_lastTaskId(taskId), + m_recoveryCounter(recoveryCounter), + m_state(RecoveryState::AWAITING), + m_recoverySize(0), + m_recoveryDuration(0) {}; + + void updateCounter(size_t size = 0) { + m_counter.fetch_add(1); + /*int oldValue = m_counter.load() ; + while(!m_counter.compare_exchange_weak(oldValue, oldValue + 1, + std::memory_order_release, + std::memory_order_relaxed)) { + _mm_pause(); + oldValue = m_counter.load() ; + }*/ + updateSize(size); + if (m_counter.load() == m_numberOfSlots.load()) { + const std::lock_guard lock(m_completionLock); + setComplete(); + if (m_counter.load() > m_numberOfSlots.load()) { + std::cout << "m_counter " << m_counter.load() << " m_numberOfSlots " + << m_numberOfSlots.load() << std::endl; + throw std::runtime_error( + "error: the counter of the recovery exceeds the expected number"); + } + } + } + + void updateSize(size_t size) { m_recoverySize.fetch_add(size); } + + void updateDuration(size_t duration) { + throw std::runtime_error( + "error: the updateDuration function is not implemented yet"); + } + + void setFilePath(const std::string path) { m_filePath = path; } + + void setRecoveryCounter(std::atomic *recoveryCounter) { + m_recoveryCounter = recoveryCounter; + } + + void resetSlots() { m_numberOfSlots.store(0); } + + void increaseSlots(int slots, int inputQueueSlots = 0) { + m_numberOfSlots.fetch_add(slots); + m_inputQueueSlots.fetch_add(inputQueueSlots); + } + + int getSlots() { return m_numberOfSlots.load(); } + + int getInputQueueSlots() { return m_inputQueueSlots.load(); } + + size_t getRecoverySize() { return m_recoverySize.load(); } + + void setRecoveryId(int pipeline = 0) { + m_pipelineId = pipeline; + } + + void resetRecovery() { + m_triggerTimestamp = -1; + m_numberOfSlots.store(0); + m_inputQueueSlots.store(0); + m_counter.store(0); + m_workers.store(SystemConf::getInstance().WORKER_THREADS); + m_lastTaskId = -1; + m_state = RecoveryState::AWAITING; + m_recoverySize = 0; + m_recoveryDuration = 0; + //m_readyFlag.store(false); + } + + std::string toString() { + std::string s; + s.append(" ["); + if (m_state == RecoveryState::AWAITING) + s.append("Pending").append(" "); + else + s.append("Completed").append(" "); + s.append("] "); + s.append("duration (" + std::to_string(m_recoveryDuration) + ") "); + s.append("triggered (" + std::to_string(m_triggerTimestamp) + ") "); + return s; + } + + private: + void setComplete() { + if (m_state != RecoveryState::FINISHED /*&& m_readyFlag.load()*/) { + m_state = RecoveryState::FINISHED; + if (m_recoveryCounter) { + std::atomic_thread_fence(std::memory_order_release); + m_recoveryCounter->fetch_add(1); + std::cout << "[DBG] recovery " + << " has finished for pipeline " + std::to_string(m_pipelineId) + << std::endl; + } + } + } + + void trySetComplete(){ + if (m_counter.load() == m_numberOfSlots.load()) { + const std::lock_guard lock(m_completionLock); + setComplete(); + } + } +}; \ No newline at end of file diff --git a/src/compression/CompressionCodeGenUtils.cpp b/src/compression/CompressionCodeGenUtils.cpp new file mode 100644 index 0000000..dd5a349 --- /dev/null +++ b/src/compression/CompressionCodeGenUtils.cpp @@ -0,0 +1,347 @@ +#include "compression/CompressionCodeGenUtils.h" + +#include "utils/Utils.h" + +uint32_t getRoundedTypeInt(uint32_t precision) { + auto roundedType = Utils::getPowerOfTwo(precision); + if (roundedType <= 8) + roundedType = 8; + return roundedType; +} + +std::string getRoundedType(uint32_t precision) { + return getType(Utils::getPowerOfTwo(precision)); +} + +std::string getType(uint32_t precision) { + bool powerOfTwo = precision != 0 && !(precision & (precision - 1)); + if (!powerOfTwo) { + throw std::runtime_error("error: precision must be a power of two"); + } + switch (precision) { + case 1: + case 2: + case 4: + case 8: + return "uint8_t"; + case 16: + return "uint16_t"; + case 32: + return "uint32_t"; + case 64: + return "uint64_t"; + case 128: + return "__uint128_t"; + default: + throw std::runtime_error( + "error: precision must be lower or equal to 128 bits"); + } +} + +std::string getIncludesString() { + std::string s; + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("\n"); + return s; +} + +std::string getInstrumentationMetrics(size_t workers, size_t cols) { + return "static const int numOfWorkers = " + std::to_string(workers) + ";\n" + "static const int numOfCols = " + std::to_string(cols) + ";\n" + "static uint32_t dVals[numOfWorkers][numOfCols];\n" + "static double cVals[numOfWorkers][numOfCols];\n" + "static double min[numOfWorkers][numOfCols];\n" + "static double max[numOfWorkers][numOfCols];\n" + "static double maxDiff[numOfWorkers][numOfCols];\n" + "static double temp[numOfWorkers][numOfCols];\n"; +} + +std::string getCompressionAlgorithms() { + return "#define DELTA_7_MASK 0x02 << 7;\n" + "#define DELTA_9_MASK 0x06 << 9;\n" + "#define DELTA_12_MASK 0x0E << 12;\n" + "#define DELTA_28_MASK 0x0F << 28;\n" + "\n" + "#include \n" + "#include \n" + "\n" + "using std::numeric_limits;\n" + "\n" + "template \n" + " bool CanTypeFitValue(const U value) {\n" + " const intmax_t botT = intmax_t(numeric_limits::min() );\n" + " const intmax_t botU = intmax_t(numeric_limits::min() );\n" + " const uintmax_t topT = uintmax_t(numeric_limits::max() );\n" + " const uintmax_t topU = uintmax_t(numeric_limits::max() );\n" + " return !( (botT > botU && value < static_cast (botT)) || (topT < topU && value > static_cast (topT)) ); \n" + " }\n" + "\n" + "template \n" + "struct HashMapEqualTo {\n" + " constexpr bool operator()(const T &lhs, const T &rhs) const {\n" + " return lhs == rhs;\n" + " }\n" + "};\n" + "\n" + "template \n" + "struct alignas(16) Bucket {\n" + " char state;\n" + " KeyT key;\n" + "};\n" + "\n" + "template ,\n" + " typename EqT = HashMapEqualTo>\n" + "class alignas(64) HashSet {\n" + " private:\n" + " using BucketT = Bucket;\n" + "\n" + " HashT m_hasher;\n" + " EqT m_eq;\n" + " BucketT *m_buckets = nullptr;\n" + " size_t m_num_buckets = 0;\n" + " size_t m_num_filled = 0;\n" + " size_t m_mask = 0;\n" + "\n" + " public:\n" + " HashSet(size_t size = 512)\n" + " : m_num_buckets(size), m_mask(size - 1) {\n" + " if (!(m_num_buckets && !(m_num_buckets & (m_num_buckets - 1)))) " + "{\n" + " throw std::runtime_error(\n" + " \"error: the size of the hash table has to be a power of " + "two\\n\");\n" + " }\n" + "\n" + " m_buckets = (BucketT *)malloc(m_num_buckets * sizeof(BucketT));\n" + " if (!m_buckets) {\n" + " free(m_buckets);\n" + " throw std::bad_alloc();\n" + " }\n" + "\n" + " for (auto i = 0; i < m_num_buckets; ++i) {\n" + " m_buckets[i].state = 0;\n" + " }\n" + " }\n" + "\n" + " HashSet(Bucket *nodes,\n" + " size_t size = 512)\n" + " : m_buckets(nodes), m_num_buckets(size), m_mask(size - 1) {\n" + " if (!(m_num_buckets && !(m_num_buckets & (m_num_buckets - 1)))) " + "{\n" + " throw std::runtime_error(\n" + " \"error: the size of the hash table has to be a power of " + "two\\n\");\n" + " }\n" + " }\n" + "\n" + " void clear() {\n" + " for (auto i = 0; i < m_num_buckets; ++i) {\n" + " m_buckets[i].state = 0;\n" + " }\n" + " m_num_filled = 0;\n" + " }\n" + "\n" + " void insert(KeyT key, ValueT &pos) {\n" + " size_t ind = m_hasher(key) & m_mask, i = ind;\n" + " for (; i < m_num_buckets; i++) {\n" + " if (!m_buckets[i].state || m_eq(m_buckets[i].key, key)) {\n" + " m_buckets[i].state = 1;\n" + " m_buckets[i].key = key;\n" + " pos = i;\n" + " return;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (!m_buckets[i].state || m_eq(m_buckets[i].key, key)) {\n" + " m_buckets[i].state = 1;\n" + " m_buckets[i].key = key;\n" + " pos = i;\n" + " return;\n" + " }\n" + " }\n" + " throw std::runtime_error(\"error: the hashtable is full \\n\");\n" + " }\n" + "\n" + " bool find(const KeyT &key, ValueT &pos) {\n" + " size_t ind = m_hasher(key) & m_mask, i = ind;\n" + " for (; i < m_num_buckets; i++) {\n" + " if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) {\n" + " pos = i;\n" + " return true;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) {\n" + " pos = i;\n" + " return true;\n" + " }\n" + " }\n" + " return false;\n" + " }\n" + "\n" + " bool erase(const KeyT &key) {\n" + " size_t ind = m_hasher(key) & m_mask, i = ind;\n" + " for (; i < m_num_buckets; i++) {\n" + " if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) {\n" + " m_buckets[i].state = 0;\n" + " return true;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) {\n" + " m_buckets[i].state = 0;\n" + " return true;\n" + " }\n" + " }\n" + " printf(\"error: entry not found \\n\");\n" + " return false;\n" + " }\n" + "\n" + " BucketT *buckets() { return m_buckets; }\n" + "\n" + " size_t size() {\n" + " m_num_filled = 0;\n" + " for (size_t i = 0; i < m_num_buckets; i++) {\n" + " m_num_filled += m_buckets[i].state;\n" + " }\n" + " return m_num_filled;\n" + " }\n" + "\n" + " bool empty() const { return m_num_filled == 0; }\n" + "\n" + " size_t max_size() const { return m_num_buckets; }\n" + "\n" + " size_t bucket_size() const { return sizeof(BucketT); }\n" + "\n" + " float load_factor() {\n" + " return static_cast(size()) / " + "static_cast(m_num_buckets);\n" + " }\n" + "\n" + " ~HashSet() {\n" + " for (size_t bucket = 0; bucket < m_num_buckets; ++bucket) {\n" + " m_buckets[bucket].~BucketT();\n" + " }\n" + " free(m_buckets);\n" + " }\n" + "};\n" + "\n" + "\n" + "namespace zz {\n" + "inline uint64_t encode(int64_t i) { return (i >> 63) ^ (i << 1); }\n" + "\n" + "inline int64_t decode(uint64_t i) { return (i >> 1) ^ (-(i & 1)); }\n" + "} // namespace zz\n" + "\n" + "\n" + "template \n" + "class DictionaryCompressor {\n" + " private:\n" + " HashSet m_table;\n" + " Out m_id = 0;\n" + "\n" + " public:\n" + " DictionaryCompressor(size_t size) : m_table(size) {}\n" + " Out compress(In &input) {\n" + " Out res;\n" + " if (m_table.find(input, res)) {\n" + " return res;\n" + " }\n" + " res = m_id;\n" + " m_table.insert(input, m_id);\n" + " m_id++;\n" + " return res;\n" + " }\n" + " HashSet &getTable() {\n" + " return m_table;\n" + " }\n" + " void clear() {\n" + " m_table.clear();\n" + " }\n" + "};\n" + "\n" + "template \n" + "class BaseDeltaCompressor {\n" + " private:\n" + " In m_base;\n" + "\n" + " public:\n" + " BaseDeltaCompressor(In base) : m_base(base) {}\n" + " inline Out compress(In &input) { return (Out) std::abs(m_base - input); }\n" + " inline bool check(In &input) {\n" + " auto res = input - m_base;\n " + " return !CanTypeFitValue(res);\n" + " }\n" + " inline std::string getBase() {\n" + " return std::to_string(m_base);\n" + " }\n" + "};\n" + "\n" + "template \n" + "class BucketCompressor {\n" + " private:\n" + " In m_bucket;\n" + "\n" + " public:\n" + " BucketCompressor(In bucket) : m_bucket(bucket) {}\n" + " inline Out compress(In &input) { return Out(input / m_bucket); }\n" + "};\n" + "\n" + "template \n" + "class FloatMultCompressor {\n" + " private:\n" + " const int m_multiplier;\n" + "\n" + " public:\n" + " FloatMultCompressor(int multiplier) : m_multiplier(multiplier) {\n" + " // check for precision issues here!\n" + " }\n" + " inline Out compress(float &input) {\n" + " return (Out)(input * m_multiplier);\n" + " }\n" + " inline bool check(float &input) {\n" + " //double intpart;\n" + " // does it have a fractional part?\n" + " //if (modf(input, &intpart) != 0) {\n" + " // return true;\n" + " //}\n" + " auto res = (uint64_t)(input * m_multiplier);\n" + " return !CanTypeFitValue(res);\n" + " }\n" + " inline std::string getMultiplier() {\n" + " return std::to_string(m_multiplier);\n" + " }\n" + "};\n"; +} + +std::string getCompressionVars(bool hasDict, size_t workers, size_t cols) { + std::string s; + s.append("#include \n"); + s.append( + "static const int numOfWorkers = " + std::to_string(workers) + ";\n" + + "static const int numOfCols = " + std::to_string(cols) + ";\n"); + if (hasDict) + s.append("static std::unique_ptr> dcomp[numOfWorkers][numOfCols];\n"); + s.append("static std::string metadata[numOfWorkers][numOfCols];\n"); + s.append("static bool isFirst [numOfWorkers] = {"); + for (size_t i = 0; i < workers; ++i) { + s.append("true"); + if (i != workers - 1) + s.append(", "); + } + s.append("};\n"); + return s; +} \ No newline at end of file diff --git a/src/compression/CompressionCodeGenUtils.h b/src/compression/CompressionCodeGenUtils.h new file mode 100644 index 0000000..0ef9094 --- /dev/null +++ b/src/compression/CompressionCodeGenUtils.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include +#include + +#include "utils/AttributeType.h" + +/* + * \brief Utilities for code generating compression algorithms. + * + * */ + +enum class CompressionType { None, FloatMult, BaseDelta, Dictionary, RLE }; +uint32_t getRoundedTypeInt(uint32_t precision); +std::string getRoundedType(uint32_t precision); +std::string getType(uint32_t precision); +std::string getIncludesString(); +std::string getInstrumentationMetrics(size_t workers, size_t cols); +std::string getCompressionAlgorithms(); +std::string getCompressionVars(bool hasDict, size_t workers, size_t cols); + +struct alignas(16) CompressionColumn { + int m_column; + BasicType m_type; + std::string m_typeString; + std::unordered_set m_comps; + uint32_t m_precision; + uint32_t m_diffPrecision; + uint32_t m_RLEPrecision; + uint32_t m_multiplier; + double m_min, m_max; + std::string m_expression; + explicit CompressionColumn(int col = 0, BasicType type = BasicType::Long, + uint32_t precision = 64, uint32_t diffPrecision = 64, + uint32_t rlePrecision = 64, uint32_t multiplier = 1, + double min = DBL_MIN, double max = DBL_MAX) + : m_column(col), + m_type(type), + m_typeString(AttributeType::m_typeNames.find(type)->second), + m_precision(precision), + m_diffPrecision(diffPrecision), + m_RLEPrecision(rlePrecision), + m_multiplier(multiplier), + m_min(min), + m_max(max){} +}; \ No newline at end of file diff --git a/src/compression/CompressionStatistics.cpp b/src/compression/CompressionStatistics.cpp new file mode 100644 index 0000000..537085c --- /dev/null +++ b/src/compression/CompressionStatistics.cpp @@ -0,0 +1,229 @@ +#include "compression/CompressionStatistics.h" + +#include "compression/CompressionCodeGenUtils.h" +#include "cql/expressions/ColumnReference.h" +#include "utils/Utils.h" + +class ColumnReference; + +CompressionStatistics::CompressionStatistics( + int id, std::vector *cols) + : m_id(id), + m_cols(cols), + m_colsSize(cols->size()), + m_distinctVals(m_colsSize, 0), + m_consecutiveVals(m_colsSize, consAvg()), + m_min(m_colsSize, DBL_MAX), + m_max(m_colsSize, DBL_MIN), + m_maxDiff(m_colsSize, DBL_MIN), + m_precision(m_colsSize, 32), + m_diffPrecision(m_colsSize, 32), + m_floatMult(m_colsSize, 1), + m_RLEPrecision(m_colsSize, 32), + m_prevPrecision(m_colsSize, 32), + m_prevDiffPrecision(m_colsSize, 32), + m_prevFloatMult(m_colsSize, 1), + m_prevRLEPrecision(m_colsSize, 32), + m_useRLE(m_colsSize, false), + m_prevUseRLE(m_colsSize, false), + m_compRatio(m_colsSize, 0), + m_throughput(0.), + m_compCols(cols->size(), nullptr) { + //std::sort(m_cols->begin(), m_cols->end()); +} + +void CompressionStatistics::addStatistics(const uint32_t *distinctVals, + const double *consecutiveVals, + const double *min, const double *max, + const double *maxDiff) { + const std::lock_guard lock(m_mutex); + initializeGT(m_distinctVals, distinctVals); + initializeAVG(m_consecutiveVals, consecutiveVals); + initializeLT(m_min, min); + initializeGT(m_max, max); + initializeGT(m_maxDiff, maxDiff); + m_hasData = true; + if (m_debug) { + printStatisticsUnsafe(); + } +} + +bool CompressionStatistics::updateCompressionDecision() { + bool hasChanged = false; + const std::lock_guard lock(m_mutex); + if (!m_hasData) + return hasChanged; + // all vectors have the same size + for (size_t i = 0; i < m_colsSize; ++i) { + // update m_precision + uint32_t precision = 0; + precision = std::max(precision, getPrecision(m_min[i], m_floatMult[i])); + precision = std::max(precision, getPrecision(m_max[i], m_floatMult[i])); + m_precision[i] = precision; + // we reset the min/max values later + + // update max diff precision + precision = 0; + //precision = std::max(precision, getPrecision(m_maxDiff[i], m_floatMult[i])); + auto maxDiff = m_max[i] - m_min[i]; + precision = std::max(precision, getPrecision(maxDiff, m_floatMult[i])); + m_diffPrecision[i] = precision; + m_maxDiff[i] = DBL_MIN; + + // update rle decision + int avg = (int)std::ceil(m_consecutiveVals[i].getAverage()) - 1; // todo: replace with ceil + uint32_t dMul = 0; + m_RLEPrecision[i] = getPrecision(avg, dMul); + //if (m_RLEPrecision[i] >= 7) { + // m_RLEPrecision[i] = 6; // todo: remove this + //} + m_useRLE[i] = false; + if (avg >= 5) { + m_useRLE[i] = true; + } + m_consecutiveVals[i].reset(); + + // todo: update dict decision + + // update previous values for comparison + if (m_prevPrecision[i] != m_precision[i] || + m_prevDiffPrecision[i] != m_diffPrecision[i] || + m_prevUseRLE[i] != m_useRLE[i] || + m_floatMult[i] != m_prevFloatMult[i] || + m_RLEPrecision[i] != m_prevRLEPrecision[i]) { + hasChanged = true; + /*std::cout << "Compression: col " << i << " min " << m_min[i] << " max " + << m_max[i] << " prec " << m_precision[i] << " diffPrec " + << m_diffPrecision[i] << " RLE " << m_useRLE[i] << " RLEprec " + << m_RLEPrecision[i] << " fMul " << m_floatMult[i] << std::endl;*/ + + // create the column if it doesn't exist + if (!m_compCols[i]) { + m_compCols[i] = std::make_shared( + (*m_cols)[i]->getColumn(), (*m_cols)[i]->getBasicType(), + m_precision[i], m_diffPrecision[i], m_RLEPrecision[i], + m_floatMult[i], m_min[i], m_max[i]); + if ((*m_cols)[i]->getColumn() == -1) { + m_compCols[i]->m_expression = (*m_cols)[i]->getExpression(); + } + } else { + m_compCols[i]->m_precision = m_precision[i]; + m_compCols[i]->m_diffPrecision = m_diffPrecision[i]; + m_compCols[i]->m_RLEPrecision = m_RLEPrecision[i]; + m_compCols[i]->m_multiplier = m_floatMult[i]; + m_compCols[i]->m_min = m_min[i]; + m_compCols[i]->m_max = m_max[i]; + } + // clear previous decisions + m_compCols[i]->m_comps.clear(); + + if (m_compCols[i]->m_column == 0) { + // for timestamps use BaseDelta + m_compCols[i]->m_comps.insert(CompressionType::BaseDelta); + } else if (m_compCols[i]->m_type == BasicType::LongLong || + (m_compCols[i]->m_type != BasicType::Float && + m_compCols[i]->m_precision > 16)) { + // for uint128 use Dictionary compression + // todo: this can be group keys + m_compCols[i]->m_comps.insert(CompressionType::Dictionary); + } else if (m_compCols[i]->m_type == BasicType::Float) { + // for floats use Float Multiplier + m_compCols[i]->m_comps.insert(CompressionType::FloatMult); + if (m_useAgrresivelyRLE) { + m_compCols[i]->m_comps.insert(CompressionType::RLE); + } + } else { + // use casting to reduce precision if possible + m_compCols[i]->m_comps.insert(CompressionType::None); + } + // finally check if RLE is needed + if (m_useRLE[i]) { + m_compCols[i]->m_comps.insert(CompressionType::RLE); + } + } + m_min[i] = DBL_MAX; + m_max[i] = DBL_MIN; + m_prevPrecision[i] = m_precision[i]; + m_prevDiffPrecision[i] = m_diffPrecision[i]; + m_prevRLEPrecision[i] = m_RLEPrecision[i]; + m_prevUseRLE[i] = m_useRLE[i]; + m_prevFloatMult[i] = m_floatMult[i]; + } + + m_hasData = false; + if (m_debug) { + printStatisticsDecisionUnsafe(); + } + return hasChanged; +} + +// todo: handle higher precisions and remove the std::abs(floatpart) >= +// 0.0000001 assumption +inline uint32_t CompressionStatistics::getPrecision(double num, + uint32_t &floatMult) { + if (num < 0) { + throw std::runtime_error( + "error: cannot measure the precision of a negative number yet"); + } + uint32_t bits = 0; + double intpart; + // does it have a fractional part? + if (modf(num, &intpart) == 0) { + auto dec = (uint32_t)intpart; // use std::abs for negative values? + auto clz = __builtin_clz(dec); + bits = (dec == 0) ? 1 : 32 - clz; + } else { + auto count = 0; + auto floatpart = std::abs(num); + floatpart = floatpart - int(floatpart); + while (std::abs(floatpart) >= 0.0000001) { + floatpart = floatpart * 10; + count = count + 1; + floatpart = floatpart - int(floatpart); + } + count = (count > 3) ? 3 : count; // todo: fix this + floatMult = std::pow(10, count); + auto dec = (uint32_t)(num * floatMult); + auto clz = __builtin_clz(dec); + bits = (dec == 0) ? 1 : 32 - clz; + } + + // std::cout << "The precision of " << num << " is " << + // Utils::getPowerOfTwo(bits) << " bits." << std::endl; + // do we need to round the result to a power of two? + if (bits == 1) + bits = 2; + return bits; +} + +void CompressionStatistics::printStatisticsUnsafe() const { + std::ostringstream streamObj; + streamObj << "[MON] [CompressionStatistics] ID-" + std::to_string(m_id) + " "; + streamObj << std::fixed << std::setprecision(2); + for (size_t i = 0; i < m_colsSize; ++i) { + streamObj << "[COL-" + std::to_string(i) + " "; + streamObj << "MIN " << m_min[i] << " "; + streamObj << "MAX " << m_max[i] << " "; + streamObj << "MD " << m_maxDiff[i] << " "; + streamObj << "CV " + << m_consecutiveVals[i].m_consPercentage / + m_consecutiveVals[i].m_counter; + streamObj << "]"; + } + std::cout << streamObj.str() << std::endl; +} + +void CompressionStatistics::printStatisticsDecisionUnsafe() const { + std::ostringstream streamObj; + streamObj << "[MON] [CompressionStatistics] ID-" + std::to_string(m_id) + + " RES "; + for (size_t i = 0; i < m_colsSize; ++i) { + streamObj << "[COL-" + std::to_string(i) + " "; + streamObj << "P " + std::to_string(m_precision[i]) + " "; + streamObj << "DP " + std::to_string(m_diffPrecision[i]) + " "; + streamObj << "FM " + std::to_string(m_floatMult[i]) + " "; + streamObj << "RLE " + std::to_string(m_useRLE[i]); + streamObj << "]"; + } + std::cout << streamObj.str() << std::endl; +} diff --git a/src/compression/CompressionStatistics.h b/src/compression/CompressionStatistics.h new file mode 100644 index 0000000..0d47c00 --- /dev/null +++ b/src/compression/CompressionStatistics.h @@ -0,0 +1,120 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils/Utils.h" + +class ColumnReference; +struct CompressionColumn; + +/* + * \biref This class represents the compression statistics, such as the number of + * consecutive values, min/max, and max difference of each column. + * + * */ + +struct CompressionStatistics { + const int m_id; + std::vector *m_cols; + size_t m_colsSize; + std::mutex m_mutex; + struct consAvg { + explicit consAvg() = default; + double m_consPercentage = 0.; + uint32_t m_counter = 0; + [[nodiscard]] double getAverage() const { + return (m_counter == 0) ? 0 : m_consPercentage/(double)m_counter; + } + void reset() { + m_consPercentage = 0.; + m_counter = 0; + } + }; + std::vector m_distinctVals; + std::vector m_consecutiveVals; // run-length greater or equal to 3 or keep the average run-length!!! + std::vector m_min, m_max, m_maxDiff; + std::vector m_precision, m_diffPrecision, m_floatMult, m_RLEPrecision, + m_prevPrecision, m_prevDiffPrecision, m_prevFloatMult, m_prevRLEPrecision; + std::vector m_useRLE, m_prevUseRLE; + std::vector m_compRatio; + double m_throughput; + std::vector> m_compCols; + bool m_hasData = false; + bool m_useAgrresivelyRLE = true; + + const bool m_debug = false; + + explicit CompressionStatistics(int id, std::vector *cols); + void addStatistics(const uint32_t *distinctVals, + const double *consecutiveVals, + const double *min, + const double *max, + const double *maxDiff); + bool updateCompressionDecision(); + static inline uint32_t getPrecision(double num, uint32_t &floatMult); + void printStatisticsUnsafe() const; + void printStatisticsDecisionUnsafe() const; + + template + static inline void initialize(std::vector &left, const T *right) { + if (!right) return; + auto size = right->size(); + for (size_t i = 0; i < size; ++i) { + left[i] = (*right)[i]; + } + } + + template + static inline void initializeGT(std::vector &left, const T *right) { + if (!right) return; + auto size = left.size(); + for (size_t i = 0; i < size; ++i) { + left[i] = (left[i] > right[i]) ? left[i] : right[i]; + } + } + + template + static inline void initializeLT(std::vector &left, const T *right) { + if (!right) return; + auto size = left.size(); + for (size_t i = 0; i < size; ++i) { + left[i] = (left[i] < right[i]) ? left[i] : right[i]; + } + } + + static inline void initializeAVG(std::vector &left, const double *right) { + if (!right) return; + auto size = left.size(); + for (size_t i = 0; i < size; ++i) { + left[i].m_consPercentage += right[i]; + left[i].m_counter++; + } + } + + template + static inline void checkOrInitialize(std::vector &left, + std::vector *right) { + if (!right) return; + auto size = right->size(); + if (left.empty()) { + left.resize(size); + } else { + if (size != left.size()) { + throw std::runtime_error( + "error: wrong number of columns during initialization"); + } + } + for (size_t i = 0; i < size; ++i) { + left[i] = (*right)[i]; + } + } +}; \ No newline at end of file diff --git a/src/compression/Compressor.h b/src/compression/Compressor.h new file mode 100644 index 0000000..08b6165 --- /dev/null +++ b/src/compression/Compressor.h @@ -0,0 +1,361 @@ +#pragma once + +#include + +#include "compression/Zigzag.h" +#include "cql/operators/HashTable.h" + +#define DELTA_7_MASK 0x02 << 7; +#define DELTA_9_MASK 0x06 << 9; +#define DELTA_12_MASK 0x0E << 12; +#define DELTA_28_MASK 0x0F << 28; + +/* + * \biref A set of compression algorithms used for implementing the hardcoded + * versions of compression. + * + * */ + +template +class Compressor { + public: + virtual Out compress(In &input) = 0; +}; + +template +class DictionaryCompressor : public Compressor { + private: + HashSet m_table; + + public: + DictionaryCompressor(size_t size) : m_table(size) {} + Out compress(In &input) override { + Out res; + if (m_table.find(input, res)) { + return res; + } + m_table.insert(input, res); + return res; + } + HashSet &getTable() { return m_table; } + void clear() { m_table.clear(); } +}; + +template +class BaseDeltaCompressor : public Compressor { + private: + In m_base; + + public: + BaseDeltaCompressor(In base) : m_base(base) {} + Out compress(In &input) override { return std::abs(m_base - input); } +}; + +template +class BucketCompressor : public Compressor { + private: + In m_bucket; + + public: + BucketCompressor(In bucket) : m_bucket(bucket) {} + Out compress(In &input) override { return Out(input / m_bucket); } +}; + +class DummyFloatCompressor : public Compressor { + private: + const int m_multiplier; + + public: + DummyFloatCompressor(int multiplier) : m_multiplier(multiplier) { + // check for precision issues here! + } + uint16_t compress(float &input) override { + return (uint16_t)(input * m_multiplier); + } +}; + +template +class GorillaTimestampCompressor { + public: + GorillaTimestampCompressor() {} + + std::tuple compress(In timestamp, In prevTimestamp, + In doublePrevTimestamp) { + int deltaLength = 0; // The length of the stored value + uint64_t deltaD = 0; // The value stored in 64 bits due to shift up to 64 + int64_t deltaOfDelta = + (timestamp - prevTimestamp) - (prevTimestamp - doublePrevTimestamp); + if (deltaOfDelta == 0) { + deltaLength = 1; + deltaD = 0; + } else if (deltaOfDelta < 65 && deltaOfDelta > -64) { + deltaD = zz::encode(deltaOfDelta); + deltaD |= DELTA_7_MASK; + deltaLength = 9; + } else if (deltaOfDelta < 256 && deltaOfDelta > -255) { + deltaD = zz::encode(deltaOfDelta); + deltaD |= DELTA_9_MASK; + deltaLength = 12; + } else if (deltaOfDelta < 2048 && deltaOfDelta > -2047) { + deltaD = zz::encode(deltaOfDelta); + deltaD |= DELTA_12_MASK; + deltaLength = 16; + } else { + deltaD = zz::encode(deltaOfDelta); + deltaD |= DELTA_28_MASK; + deltaLength = 32; + } + return {deltaD, deltaLength}; + } +}; + +class GorillaValuesCompressor { + public: + std::tuple compress(uint64_t xorValue, + int prevLeadingZeros, + int prevTrailingZeros) { + uint64_t appendedValue; + int appendedValueLength = 0; + + if (xorValue == 0) { + appendedValue = 0; + } else { + int leadingZeros = __builtin_clzll(xorValue); + int trailingZeros = __builtin_ctzll(xorValue); + if (leadingZeros >= 32) { + leadingZeros = 31; + } + + if (leadingZeros == trailingZeros) { + xorValue = xorValue >> 1 << 1; + trailingZeros = 1; + } + // Store bit '1' + appendedValue = 1; + appendedValueLength++; + + if (leadingZeros >= prevLeadingZeros && + trailingZeros >= prevTrailingZeros) { + appendedValue <<= 1; + appendedValueLength++; + int significantBits = 64 - prevLeadingZeros - prevTrailingZeros; + xorValue >>= prevTrailingZeros; + appendedValue <<= significantBits; + appendedValue |= xorValue; + appendedValueLength += significantBits; + } else { + int significantBits = 64 - leadingZeros - trailingZeros; + // fot_comment: 0x20->0010 0000 to keep the 1 control bit + // then xor with the leading zeros, keeping the leading + // zeros after 1 then shifts it for 6 places to enter the + // significant bits so 1+5 leading zeros+ 6 length of + // significant = 12 length + appendedValue <<= 12; + appendedValue |= ((0x20 ^ leadingZeros) << 6) ^ (significantBits); + appendedValueLength += 12; + xorValue >>= trailingZeros; // Length of meaningful bits in + // the next 6 bits + appendedValue <<= significantBits; + appendedValue |= xorValue; + appendedValueLength += significantBits; + } + } + return {appendedValue, appendedValueLength, prevLeadingZeros, + prevTrailingZeros}; + } +}; + +class VarByte { + public: + size_t compression(uint64_t input, uint8_t *buffer, size_t outputSize) { + while (input > 127) { + //|128: Set the next byte flag + buffer[outputSize] = ((uint8_t)(input & 127)) | 128; + // Remove the seven bits we just wrote + input >>= 7; + outputSize++; + } + buffer[outputSize++] = ((uint8_t)input) & 127; + return outputSize; + } +}; + +class Simple8 { + constexpr static const uint32_t bitLength[16] = { + 1, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 15, 20, 30, 60, 32}; + + static const uint32_t SIMPLE8B_BITSIZE = 60; + static const uint32_t SIMPLE8B_MAXCODE = 15; + static const uint32_t SIMPLE8B_MINCODE = 1; + + public: + template + uint32_t compress(int32_t inOffset, int32_t outOffset, uint32_t n, + StructName *st, MemberName mem, int numberForEquation, + uint64_t *buf) { + uint32_t inPos = inOffset; + uint32_t inEnd = inOffset + n; + uint32_t outPos = outOffset; + + while (inPos < inEnd) { + uint32_t remainingCount = inEnd - inPos; + uint64_t outVal = 0; + uint32_t code = SIMPLE8B_MINCODE; + for (; code < SIMPLE8B_MAXCODE; code++) { + uint32_t intNum = bitLength[SIMPLE8B_MAXCODE - code]; + uint32_t bitLen = bitLength[code]; + intNum = (intNum < remainingCount) ? intNum : remainingCount; + + uint64_t maxVal = (1ULL << bitLen) - 1; + uint64_t val = static_cast(code) << SIMPLE8B_BITSIZE; + uint32_t j = 0; + for (; j < intNum; j++) { + uint64_t inputVal = ((*st)[inPos + j].*mem) / numberForEquation; + /*static_cast(data[inPos + j].mem);*/ + if (inputVal > maxVal) { + break; + } + val |= inputVal << (j * bitLen); + } + if (j == intNum) { + outVal = val; + inPos += intNum; + break; + } + } + // if no bit packing possible, encode just one value + if (code == SIMPLE8B_MAXCODE) { + outVal = + (static_cast(code) << SIMPLE8B_BITSIZE) | + ((*st)[inPos++].*mem) / numberForEquation; /*data[inPos++].mem;*/ + } + + buf[outPos++] = outVal; + } + return outPos - outOffset; + } + + template + uint32_t compress(int32_t inOffset, int32_t outOffset, uint32_t n, + StructName *st, MemberName mem, int numberForEquation, + uint64_t *buf, int divider) { + uint32_t inPos = inOffset; + uint32_t inEnd = inOffset + n; + uint32_t outPos = outOffset; + + while (inPos < inEnd) { + uint32_t remainingCount = inEnd - inPos; + uint64_t outVal = 0; + uint32_t code = SIMPLE8B_MINCODE; + for (; code < SIMPLE8B_MAXCODE; code++) { + uint32_t intNum = bitLength[SIMPLE8B_MAXCODE - code]; + uint32_t bitLen = bitLength[code]; + intNum = (intNum < remainingCount) ? intNum : remainingCount; + + uint64_t maxVal = (1ULL << bitLen) - 1; + uint64_t val = static_cast(code) << SIMPLE8B_BITSIZE; + uint32_t j = 0; + for (; j < intNum; j++) { + uint64_t inputVal = (((*st)[inPos + j].*mem)/divider) / numberForEquation; + /*static_cast(data[inPos + j].mem);*/ + if (inputVal > maxVal) { + break; + } + val |= inputVal << (j * bitLen); + } + if (j == intNum) { + outVal = val; + inPos += intNum; + break; + } + } + // if no bit packing possible, encode just one value + if (code == SIMPLE8B_MAXCODE) { + outVal = + (static_cast(code) << SIMPLE8B_BITSIZE) | + (((*st)[inPos++].*mem)/divider) / numberForEquation; /*data[inPos++].mem;*/ + } + + buf[outPos++] = outVal; + } + return outPos - outOffset; + } + + template + uint32_t decompress(uint32_t returnVal, uint32_t outOffset, uint32_t n, + StructName *st, MemberName mem, int numberForEquation, + uint64_t *buf) { + // REMEMBER!!!! THE FIRST VALUE TO DECODE + // IS ON THE LSB AND READ INCREMENTAL -> MSB + uint32_t inPos = 0; + uint32_t outPos = outOffset; + + for (uint32_t bufferRow = 0; bufferRow < returnVal; bufferRow++) { + uint32_t remainingCount = n - outPos; + uint64_t val = buf[inPos++]; + auto code = static_cast(val >> SIMPLE8B_BITSIZE); + + // optional check for end-of-stream + if (code == 0) { + break; // end of stream + } + + else { + // decode bit-packed integers + uint32_t intNum = bitLength[SIMPLE8B_MAXCODE - code]; + uint32_t bitLen = bitLength[code]; + uint64_t bitMask = (1ULL << bitLen) - 1; + intNum = (intNum < remainingCount) + ? intNum + : remainingCount; // optional buffer end check + + int bufShift = 0; + for (uint32_t inRow = 0; inRow < intNum; inRow++) { + // decompressed[outPos++] = (val >> bufShift) & bitMask; + ((*st)[outPos++].*mem) = (val >> bufShift) & bitMask; + bufShift += bitLen; + } + } + } + return outPos; + } + + template + uint32_t decompress(uint32_t returnVal, uint32_t outOffset, uint32_t n, + StructName *st, MemberName mem, int numberForEquation, + uint64_t *buf, int multiplier) { + // REMEMBER!!!! THE FIRST VALUE TO DECODE + // IS ON THE LSB AND READ INCREMENTAL -> MSB + uint32_t inPos = 0; + uint32_t outPos = outOffset; + + for (uint32_t bufferRow = 0; bufferRow < returnVal; bufferRow++) { + uint32_t remainingCount = n - outPos; + uint64_t val = buf[inPos++]; + auto code = static_cast(val >> SIMPLE8B_BITSIZE); + + // optional check for end-of-stream + if (code == 0) { + break; // end of stream + } + + else { + // decode bit-packed integers + uint32_t intNum = bitLength[SIMPLE8B_MAXCODE - code]; + uint32_t bitLen = bitLength[code]; + uint64_t bitMask = (1ULL << bitLen) - 1; + intNum = (intNum < remainingCount) + ? intNum + : remainingCount; // optional buffer end check + + int bufShift = 0; + for (uint32_t inRow = 0; inRow < intNum; inRow++) { + // decompressed[outPos++] = (val >> bufShift) & bitMask; + ((*st)[outPos++].*mem) = ((val >> bufShift) & bitMask) * multiplier; + bufShift += bitLen; + } + } + } + return outPos; + } +}; \ No newline at end of file diff --git a/src/compression/Zigzag.h b/src/compression/Zigzag.h new file mode 100644 index 0000000..4aa9158 --- /dev/null +++ b/src/compression/Zigzag.h @@ -0,0 +1,9 @@ +#pragma once +#include +#include + +namespace zz { +inline uint64_t encode(int64_t i) { return (i >> 63) ^ (i << 1); } + +inline int64_t decode(uint64_t i) { return (i >> 1) ^ (-(i & 1)); } +} // namespace zz \ No newline at end of file diff --git a/src/cql/expressions/ColumnReference.h b/src/cql/expressions/ColumnReference.h index 2c994b5..4aa0f0f 100644 --- a/src/cql/expressions/ColumnReference.h +++ b/src/cql/expressions/ColumnReference.h @@ -29,7 +29,14 @@ class ColumnReference : public Expression { s.append("\"").append(std::to_string(m_column)).append("\""); return s; } + void setExpression(std::string expression) { + m_expression = std::move(expression); + } + std::string getExpression() { + return m_expression; + } ~ColumnReference() override = default; private: int m_column; + std::string m_expression; }; \ No newline at end of file diff --git a/src/cql/expressions/Expression.cpp b/src/cql/expressions/Expression.cpp new file mode 100644 index 0000000..0f886b4 --- /dev/null +++ b/src/cql/expressions/Expression.cpp @@ -0,0 +1,40 @@ +#include "cql/expressions/Expression.h" + +TupleSchema ExpressionUtils::getTupleSchemaFromExpressions(std::vector &expressions, std::string name) { + TupleSchema schema((int) expressions.size(), name); + int idx = 0; + /* Set types */ + for (auto e : expressions) { + if (e->getBasicType() == BasicType::Integer) { + auto attr = AttributeType(BasicType::Integer); + schema.setAttributeType(idx, attr); + } else if (e->getBasicType() == BasicType::Float) { + auto attr = AttributeType(BasicType::Float); + schema.setAttributeType(idx, attr); + } else if (e->getBasicType() == BasicType::Long) { + auto attr = AttributeType(BasicType::Long); + schema.setAttributeType(idx, attr); + } else if (e->getBasicType() == BasicType::LongLong) { + auto attr = AttributeType(BasicType::LongLong); + schema.setAttributeType(idx, attr); + } + idx++; + } + return schema; +} + +TupleSchema ExpressionUtils::mergeTupleSchemas(TupleSchema &x, TupleSchema &y) { + TupleSchema schema(x.numberOfAttributes() + y.numberOfAttributes(), "MergedSchema"); + int idx = 0; + for (int i = 0; i < x.numberOfAttributes(); ++i) { + auto type = x.getAttributeType(i); + auto attr = AttributeType(type); + schema.setAttributeType(idx++, attr); + } + for (int i = 0; i < y.numberOfAttributes(); ++i) { + auto type = y.getAttributeType(i); + auto attr = AttributeType(type); + schema.setAttributeType(idx++, attr); + } + return schema; +} \ No newline at end of file diff --git a/src/cql/expressions/Expression.h b/src/cql/expressions/Expression.h index ed5536c..8949346 100644 --- a/src/cql/expressions/Expression.h +++ b/src/cql/expressions/Expression.h @@ -26,42 +26,6 @@ class Expression { inline Expression::~Expression() = default; namespace ExpressionUtils { -TupleSchema getTupleSchemaFromExpressions(std::vector &expressions, std::string name = "Stream") { - TupleSchema schema((int) expressions.size(), name); - int idx = 0; - /* Set types */ - for (auto e : expressions) { - if (e->getBasicType() == BasicType::Integer) { - auto attr = AttributeType(BasicType::Integer); - schema.setAttributeType(idx, attr); - } else if (e->getBasicType() == BasicType::Float) { - auto attr = AttributeType(BasicType::Float); - schema.setAttributeType(idx, attr); - } else if (e->getBasicType() == BasicType::Long) { - auto attr = AttributeType(BasicType::Long); - schema.setAttributeType(idx, attr); - } else if (e->getBasicType() == BasicType::LongLong) { - auto attr = AttributeType(BasicType::LongLong); - schema.setAttributeType(idx, attr); - } - idx++; - } - return schema; -} - -TupleSchema mergeTupleSchemas(TupleSchema &x, TupleSchema &y) { - TupleSchema schema(x.numberOfAttributes() + y.numberOfAttributes(), "MergedSchema"); - int idx = 0; - for (int i = 0; i < x.numberOfAttributes(); ++i) { - auto type = x.getAttributeType(i); - auto attr = AttributeType(type); - schema.setAttributeType(idx++, attr); - } - for (int i = 0; i < y.numberOfAttributes(); ++i) { - auto type = y.getAttributeType(i); - auto attr = AttributeType(type); - schema.setAttributeType(idx++, attr); - } - return schema; -} +TupleSchema getTupleSchemaFromExpressions(std::vector &expressions, std::string name = "Stream"); +TupleSchema mergeTupleSchemas(TupleSchema &x, TupleSchema &y); } \ No newline at end of file diff --git a/src/cql/operators/AggregateOperatorCode.h b/src/cql/operators/AggregateOperatorCode.h index 837259b..36f866c 100644 --- a/src/cql/operators/AggregateOperatorCode.h +++ b/src/cql/operators/AggregateOperatorCode.h @@ -15,11 +15,9 @@ class OperatorKernel; * */ class AggregateOperatorCode { - private: + protected: long m_hashTableSize = 0; - friend OperatorKernel; - public: virtual void aggregatePartials(std::shared_ptr openingWindows, std::shared_ptr closingOrPendingWindows, diff --git a/src/cql/operators/Aggregation.h b/src/cql/operators/Aggregation.h index e35296a..1c022de 100644 --- a/src/cql/operators/Aggregation.h +++ b/src/cql/operators/Aggregation.h @@ -144,6 +144,13 @@ class Aggregation : public OperatorCode, public AggregateOperatorCode { (void) pid; throw std::runtime_error("error: this operator cannot be used directly"); } + void processData(const std::shared_ptr& lBatch, const std::shared_ptr& rBatch, Task &task, int pid) override { + (void) lBatch; + (void) rBatch; + (void) task; + (void) pid; + throw std::runtime_error("error: this operator cannot be used directly"); + } void aggregatePartials(std::shared_ptr openingWindows, std::shared_ptr closingOrPendingWindows, std::shared_ptr completeWindows, diff --git a/src/cql/operators/HashTable.h b/src/cql/operators/HashTable.h new file mode 100644 index 0000000..f7f0647 --- /dev/null +++ b/src/cql/operators/HashTable.h @@ -0,0 +1,372 @@ +#pragma once + +#include +#include +#include + +#include "utils/SystemConf.h" + +template +struct HashMapEqualTo { + constexpr bool operator()(const T &lhs, const T &rhs) const { + return lhs == rhs; + } +}; + +template +struct alignas(16) SimpleBucket { + char state; + KeyT key; +}; + +template +struct alignas(16) Bucket { + char state = 0; + KeyT key; + ValueT value; + int counter = 0; + void combine(ValueT v) { Opt(value, v); } +}; + +template +struct DummyAggr { + unsigned int addedElements = 0; + unsigned int removedElements = 0; + void initialise(){}; + void insert(ValueT v){}; + ValueT query() { return 0; }; + void evict(){}; +}; + +/* + * \brief This class implements a hashtable. + * + * It is used for debugging. + * + * */ + +template , + typename EqT = HashMapEqualTo, + typename AggrT = DummyAggr> +class alignas(64) HashTable { + private: + using BucketT = Bucket; + + HashT m_hasher; + EqT m_eq; + BucketT *m_buckets = nullptr; + AggrT *m_aggrs = nullptr; + size_t m_num_buckets = 0; + size_t m_num_filled = 0; + size_t m_mask = 0; + + public: + HashTable(size_t size = SystemConf::getInstance().HASH_TABLE_SIZE) + : m_num_buckets(size), m_mask(size - 1) { + if (!(m_num_buckets && !(m_num_buckets & (m_num_buckets - 1)))) { + throw std::runtime_error( + "error: the size of the hash table has to be a power of two\n"); + } + + m_buckets = (BucketT *)malloc(m_num_buckets * sizeof(BucketT)); + m_aggrs = (AggrT *)malloc(m_num_buckets * sizeof(AggrT)); + if (!m_buckets || !m_aggrs) { + free(m_buckets); + free(m_aggrs); + throw std::bad_alloc(); + } + + for (auto i = 0; i < m_num_buckets; ++i) { + m_buckets[i].state = 0; + } + } + + HashTable(Bucket *nodes, + size_t size = SystemConf::getInstance().HASH_TABLE_SIZE) + : m_buckets(nodes), m_num_buckets(size), m_mask(size - 1) { + if (!(m_num_buckets && !(m_num_buckets & (m_num_buckets - 1)))) { + throw std::runtime_error( + "error: the size of the hash table has to be a power of two\n"); + } + } + + void clear() { + for (auto i = 0; i < m_num_buckets; ++i) { + m_buckets[i].state = 0; + m_aggrs[i].initialise(); + } + m_num_filled = 0; + } + + void insert(KeyT key, ValueT value) { + size_t ind = m_hasher(key) & m_mask, i = ind; + for (; i < m_num_buckets; i++) { + if (!m_buckets[i].state || m_eq(m_buckets[i].key, key)) { + m_buckets[i].state = 1; + m_buckets[i].key = key; + m_buckets[i].value = value; + return; + } + } + for (i = 0; i < ind; i++) { + if (!m_buckets[i].state || m_eq(m_buckets[i].key, key)) { + m_buckets[i].state = 1; + m_buckets[i].key = key; + m_buckets[i].value = value; + return; + } + } + throw std::runtime_error("error: the hashtable is full \n"); + } + + void insert_or_modify(KeyT key, ValueT value) { + size_t ind = m_hasher(key) & m_mask, i = ind; + char tempState; + for (; i < m_num_buckets; i++) { + tempState = m_buckets[i].state; + if (tempState && m_eq(m_buckets[i].key, key)) { + m_buckets[i].value.combine(value); + m_buckets[i].counter++; + return; + } + if (!tempState) { + m_buckets[i].state = 1; + m_buckets[i].key = key; + m_buckets[i].value = value; + m_buckets[i].counter = 1; + return; + } + } + for (i = 0; i < ind; i++) { + tempState = m_buckets[i].state; + if (tempState && m_eq(m_buckets[i].key, key)) { + m_buckets[i].value.combine(value); + m_buckets[i].counter++; + return; + } + if (!tempState) { + m_buckets[i].state = 1; + m_buckets[i].key = key; + m_buckets[i].value = value; + m_buckets[i].counter = 1; + return; + } + } + + throw std::runtime_error("error: the hashtable is full \n"); + } + + bool erase(const KeyT &key) { + size_t ind = m_hasher(key) & m_mask, i = ind; + for (; i < m_num_buckets; i++) { + if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) { + m_buckets[i].state = 0; + return true; + } + } + for (i = 0; i < ind; i++) { + if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) { + m_buckets[i].state = 0; + return true; + } + } + printf("error: entry not found \n"); + return false; + } + + bool find(const KeyT &key, ValueT &result) { + size_t ind = m_hasher(key) & m_mask, i = ind; + for (; i < m_num_buckets; i++) { + if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) { + result = m_buckets[i].value; + return true; + } + } + for (i = 0; i < ind; i++) { + if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) { + result = m_buckets[i].value; + return true; + } + } + return false; + } + + bool find_index(const KeyT &key, size_t &index) { + size_t ind = m_hasher(key) & m_mask, i = ind; + int dist = 0; + for (; i < m_num_buckets; i++) { + if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) { + index = i; + return true; + } + } + for (i = 0; i < ind; i++) { + if (m_buckets[i].state && _eq(m_buckets[i].key, key)) { + index = i; + return true; + } + } + return false; + } + + BucketT *buckets() { return m_buckets; } + + size_t size() { + m_num_filled = 0; + for (size_t i = 0; i < m_num_buckets; i++) { + m_num_filled += m_buckets[i].state; + } + return m_num_filled; + } + + bool empty() const { return m_num_filled == 0; } + + size_t max_size() const { return m_num_buckets; } + + size_t bucket_size() const { return sizeof(BucketT); } + + float load_factor() { + return static_cast(size()) / static_cast(m_num_buckets); + } + + ~HashTable() { + for (size_t bucket = 0; bucket < m_num_buckets; ++bucket) { + m_buckets[bucket].~BucketT(); + m_aggrs->~AggrT(); + } + free(m_buckets); + free(m_aggrs); + } +}; + +template , + typename EqT = HashMapEqualTo> +class alignas(64) HashSet { + private: + using BucketT = SimpleBucket; + + HashT m_hasher; + EqT m_eq; + BucketT *m_buckets = nullptr; + size_t m_num_buckets = 0; + size_t m_num_filled = 0; + size_t m_mask = 0; + + public: + explicit HashSet(size_t size = SystemConf::getInstance().HASH_TABLE_SIZE) + : m_num_buckets(size), m_mask(size - 1) { + if (!(m_num_buckets && !(m_num_buckets & (m_num_buckets - 1)))) { + throw std::runtime_error( + "error: the size of the hash table has to be a power of two\n"); + } + + m_buckets = (BucketT *)malloc(m_num_buckets * sizeof(BucketT)); + if (!m_buckets) { + free(m_buckets); + throw std::bad_alloc(); + } + + for (auto i = 0; i < m_num_buckets; ++i) { + m_buckets[i].state = 0; + } + } + + explicit HashSet(SimpleBucket *nodes, + size_t size = SystemConf::getInstance().HASH_TABLE_SIZE) + : m_buckets(nodes), m_num_buckets(size), m_mask(size - 1) { + if (!(m_num_buckets && !(m_num_buckets & (m_num_buckets - 1)))) { + throw std::runtime_error( + "error: the size of the hash table has to be a power of two\n"); + } + } + + void clear() { + for (auto i = 0; i < m_num_buckets; ++i) { + m_buckets[i].state = 0; + } + m_num_filled = 0; + } + + void insert(KeyT key, ValueT &pos) { + size_t ind = m_hasher(key) & m_mask, i = ind; + for (; i < m_num_buckets; i++) { + if (!m_buckets[i].state || m_eq(m_buckets[i].key, key)) { + m_buckets[i].state = 1; + m_buckets[i].key = key; + pos = i; + return; + } + } + for (i = 0; i < ind; i++) { + if (!m_buckets[i].state || m_eq(m_buckets[i].key, key)) { + m_buckets[i].state = 1; + m_buckets[i].key = key; + pos = i; + return; + } + } + throw std::runtime_error("error: the hashtable is full \n"); + } + + bool find(const KeyT &key, ValueT &pos) { + size_t ind = m_hasher(key) & m_mask, i = ind; + for (; i < m_num_buckets; i++) { + if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) { + pos = i; + return true; + } + } + for (i = 0; i < ind; i++) { + if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) { + pos = i; + return true; + } + } + return false; + } + + bool erase(const KeyT &key) { + size_t ind = m_hasher(key) & m_mask, i = ind; + for (; i < m_num_buckets; i++) { + if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) { + m_buckets[i].state = 0; + return true; + } + } + for (i = 0; i < ind; i++) { + if (m_buckets[i].state && m_eq(m_buckets[i].key, key)) { + m_buckets[i].state = 0; + return true; + } + } + printf("error: entry not found \n"); + return false; + } + + BucketT *buckets() { return m_buckets; } + + size_t size() { + m_num_filled = 0; + for (size_t i = 0; i < m_num_buckets; i++) { + m_num_filled += m_buckets[i].state; + } + return m_num_filled; + } + + bool empty() const { return m_num_filled == 0; } + + size_t max_size() const { return m_num_buckets; } + + size_t bucket_size() const { return sizeof(BucketT); } + + float load_factor() { + return static_cast(size()) / static_cast(m_num_buckets); + } + + ~HashSet() { + for (size_t bucket = 0; bucket < m_num_buckets; ++bucket) { + m_buckets[bucket].~BucketT(); + } + free(m_buckets); + } +}; \ No newline at end of file diff --git a/src/cql/operators/NoOp.h b/src/cql/operators/NoOp.h index 2ade4f0..9a6ab15 100644 --- a/src/cql/operators/NoOp.h +++ b/src/cql/operators/NoOp.h @@ -22,7 +22,7 @@ class NoOp : public OperatorCode { s.append("NoOp (").append(")"); return s; } - void processData(const std::shared_ptr &batch, Task &task, int pid) override { + void processData(const std::shared_ptr& batch, Task &task, int pid) override { //batch->initPartialCountBasedWindowPointers(); @@ -41,18 +41,25 @@ class NoOp : public OperatorCode { auto outputBuffer = PartialWindowResultsFactory::getInstance().newInstance(pid); - inputBuffer->appendBytesTo(startP, endP, outputBuffer->getBuffer()); + inputBuffer->appendBytesTo(startP, endP, outputBuffer->getBufferRaw()); outputBuffer->setPosition(batch->getBatchSize()); batch->setOutputBuffer(outputBuffer); /*auto tupleSize = batch->getSchema()->getTupleSize(); - auto output = (_InputSchema *) batch->getOutputBuffer()->getBuffer().data(); + auto output = (_InputSchema *) batch->getOutputBuffer()->getBufferRaw(); for (int i = 0; i < batch->getBatchSize()/tupleSize; i++) { std::cout << "[DBG] timestamp "+std::to_string(output[i].timestamp)+", attr1 "+std::to_string(output[i].attr_1)+", attr2 "+std::to_string(output[i].attr_2) << std::endl; }*/ task.outputWindowBatchResult(batch); } + void processData(const std::shared_ptr& lBatch, const std::shared_ptr& rBatch, Task &task, int pid) override { + (void) lBatch; + (void) rBatch; + (void) task; + (void) pid; + throw std::runtime_error("error: this operator cannot be used directly"); + } TupleSchema &getOutputSchema() override { return m_inputSchema; } diff --git a/src/cql/operators/OperatorCode.h b/src/cql/operators/OperatorCode.h index b95800f..270bd6e 100644 --- a/src/cql/operators/OperatorCode.h +++ b/src/cql/operators/OperatorCode.h @@ -3,6 +3,8 @@ #include "tasks/Task.h" #include "tasks/WindowBatch.h" +class ColumnReference; + /* * \brief This base class is used for implementing operators. * @@ -12,6 +14,14 @@ class OperatorCode { public: virtual std::string toSExpr() const = 0; virtual void processData(const std::shared_ptr& batch, Task &api, int pid) = 0; + virtual void processData(const std::shared_ptr& lBatch, const std::shared_ptr& rBatch, Task &api, int pid) = 0; virtual TupleSchema &getOutputSchema() = 0; + virtual std::vector *getInputCols() { return nullptr; } + virtual std::vector *getSecondInputCols() { return nullptr; } + [[nodiscard]] virtual bool hasSelection() const { return false; } + virtual std::string getInputSchemaString() { return ""; } + virtual std::string getSelectionExpr() { return ""; } + virtual std::string getHashTableExpr() { return ""; } + [[nodiscard]] virtual bool hasStaticHashJoin() const { return false; } virtual ~OperatorCode() = default; }; \ No newline at end of file diff --git a/src/cql/operators/Projection.h b/src/cql/operators/Projection.h index de30ccf..6174e7e 100644 --- a/src/cql/operators/Projection.h +++ b/src/cql/operators/Projection.h @@ -47,12 +47,19 @@ class Projection : public OperatorCode { s.append(")"); return s; } - void processData(const std::shared_ptr &batch, Task &task, int pid) override { + void processData(const std::shared_ptr& batch, Task &task, int pid) override { (void) batch; (void) task; (void) pid; throw std::runtime_error("error: this operator cannot be used directly"); } + void processData(const std::shared_ptr& lBatch, const std::shared_ptr& rBatch, Task &task, int pid) override { + (void) lBatch; + (void) rBatch; + (void) task; + (void) pid; + throw std::runtime_error("error: this operator cannot be used directly"); + } std::vector &getExpressions() { return m_expressions; } diff --git a/src/cql/operators/Selection.h b/src/cql/operators/Selection.h index 75c6a5e..8c96692 100644 --- a/src/cql/operators/Selection.h +++ b/src/cql/operators/Selection.h @@ -26,12 +26,19 @@ class Selection : public OperatorCode { s.append(")"); return s; } - void processData(const std::shared_ptr &batch, Task &task, int pid) override { + void processData(const std::shared_ptr& batch, Task &task, int pid) override { (void) batch; (void) task; (void) pid; throw std::runtime_error("error: this operator cannot be used directly"); } + void processData(const std::shared_ptr& lBatch, const std::shared_ptr& rBatch, Task &task, int pid) override { + (void) lBatch; + (void) rBatch; + (void) task; + (void) pid; + throw std::runtime_error("error: this operator cannot be used directly"); + } Predicate *getPredicate() { return m_predicate; } TupleSchema &getOutputSchema() override { throw std::runtime_error("error: cannot take the output schema from selection directly"); diff --git a/src/cql/operators/StaticHashJoin.h b/src/cql/operators/StaticHashJoin.h index 9af2822..83eb403 100644 --- a/src/cql/operators/StaticHashJoin.h +++ b/src/cql/operators/StaticHashJoin.h @@ -47,12 +47,19 @@ class StaticHashJoin : public OperatorCode { s.append(")"); return s; } - void processData(const std::shared_ptr &batch, Task &task, int pid) override { + void processData(const std::shared_ptr& batch, Task &task, int pid) override { (void) batch; (void) task; (void) pid; throw std::runtime_error("error: this operator cannot be used directly"); } + void processData(const std::shared_ptr& lBatch, const std::shared_ptr& rBatch, Task &task, int pid) override { + (void) lBatch; + (void) rBatch; + (void) task; + (void) pid; + throw std::runtime_error("error: this operator cannot be used directly"); + } Predicate *getPredicate() { return m_predicate; } TupleSchema &getOutputSchema() override { return m_outputSchema; diff --git a/src/cql/operators/ThetaJoin.h b/src/cql/operators/ThetaJoin.h new file mode 100644 index 0000000..b1a6def --- /dev/null +++ b/src/cql/operators/ThetaJoin.h @@ -0,0 +1,819 @@ +#pragma once + +#include + +#include "buffers/PartialWindowResultsFactory.h" +#include "cql/expressions/Expression.h" +#include "cql/operators/OperatorCode.h" +#include "cql/operators/codeGeneration/OperatorJit.h" +#include "cql/predicates/Predicate.h" +#include "utils/Utils.h" +#include "utils/WindowDefinition.h" + +/* + * \brief This class is used for generating code for theta join. + * + * */ + +class ThetaJoin : public OperatorCode { + private: + bool m_isReady = false; + bool m_debug = false; + bool m_genCode = true; + bool m_monitorSelectivity = false; + std::vector m_invoked; + std::vector m_matched; + Predicate *m_predicate; + TupleSchema m_leftInputSchema, m_rightInputSchema; + TupleSchema m_outputSchema; + size_t m_circularBufferSize; + + int m_id = -1; + CodeGenWrapper m_codeGen; + std::unique_ptr m_dLoader; + std::function process; + + public: + ThetaJoin(TupleSchema lSchema, TupleSchema rSchema, Predicate *predicate) + : m_invoked(SystemConf::getInstance().WORKER_THREADS), + m_matched(SystemConf::getInstance().WORKER_THREADS), + m_predicate(predicate), + m_leftInputSchema(lSchema), + m_rightInputSchema(rSchema), + m_outputSchema(ExpressionUtils::mergeTupleSchemas(lSchema, rSchema)) {} + + void setQueryId(int qId) { + m_id = qId; + } + + std::string toSExpr() const override { + std::string s; + s.append("ThetaJoin ("); + s.append(m_predicate->toSExpr()); + s.append(")"); + return s; + } + + explicit operator std::string() const { + std::string s; + s.append("ThetaJoin ("); + s.append(m_predicate->toSExpr()); + s.append(")"); + return s; + } + + void setup(WindowDefinition *winDef1, WindowDefinition *winDef2, size_t circularBufferSize = 0) { + // create file path if it doesn't exist + std::experimental::filesystem::path path{ + SystemConf::getInstance().FILE_ROOT_PATH}; + if (!std::experimental::filesystem::exists( + std::experimental::filesystem::status(path))) { + std::experimental::filesystem::create_directories(path); + } + path = {SystemConf::getInstance().FILE_ROOT_PATH + "/scabbard"}; + if (!std::experimental::filesystem::exists( + std::experimental::filesystem::status(path))) { + std::experimental::filesystem::create_directories(path); + } + + // setup operator + m_circularBufferSize = (circularBufferSize != 0) ? circularBufferSize : SystemConf::getInstance().CIRCULAR_BUFFER_SIZE; + std::string s; + + if (!SystemConf::getInstance().RECOVER) { + // add definitions + s.append(getIncludesString()); + s.append(getQueryDefinitionString(winDef1, winDef2)); + // add schemas + s.append(getInputSchemaString(true)); + s.append(getInputSchemaString(false)); + s.append(getOutputSchemaString()); + // get predicate + auto predicate1 = getSelectionExpr(true); + auto predicate2 = getSelectionExpr(false); + // get code for row/range based + auto fWin = getFirstWindowExpr(winDef1, winDef2); + auto sWin = getSecondWindowExpr(winDef1, winDef2); + // construct code + s.append(getComputationCode(predicate1, predicate2, fWin, sWin)); + s.append(getC_Definitions()); + + //auto path = Utils::getCurrentWorkingDir(); + auto path = SystemConf::getInstance().FILE_ROOT_PATH + "/scabbard"; + std::ofstream out(path + "/GeneratedCode_" + std::to_string(m_id) + ".cpp"); + out << s; + out.close(); + } + if (m_genCode) { + int argc = 2; + //auto path = Utils::getCurrentWorkingDir(); + auto path = SystemConf::getInstance().FILE_ROOT_PATH + "/scabbard"; + std::string mainPath = path + "/LightSaber"; + std::string generatedPath = path + "/GeneratedCode_" + std::to_string(m_id) + ".cpp"; + std::string libPath = path + "/GeneratedCode_" + std::to_string(m_id) + ".so"; + const char *str0 = mainPath.c_str(); + const char *str1 = generatedPath.c_str(); + const char **argv = (const char **) malloc(2 * sizeof(char *)); + argv[0] = str0; + argv[1] = str1; + if (!SystemConf::getInstance().RECOVER) { + // generate shared library + std::thread slt([&]{ + std::string command = "clang -shared -fPIC -O3 -march=native -g -o " + libPath + " " + generatedPath; + system(command.c_str()); + }); + m_codeGen.parseAndCodeGen(argc, argv, SystemConf::getInstance().RECOVER); + + auto processFn = m_codeGen.getFunction("process"); + if (!processFn) { + std::cout << "Failed to fetch the pointers." << std::endl; + exit(1); + } + process = *processFn; + + slt.join(); + } else { + m_dLoader = std::make_unique(libPath.c_str()); + process = m_dLoader->load(libPath, "process"); + } + } + + m_isReady = true; + } + + void processData(const std::shared_ptr &batch, Task &task, + int pid) override { + (void)batch; + (void)task; + (void)pid; + throw std::runtime_error("error: this operator cannot be used directly"); + } + + void processData(const std::shared_ptr &lBatch, + const std::shared_ptr &rBatch, Task &task, + int pid) override { + if (!m_isReady) + throw std::runtime_error("error: the operator has not been set"); + + if (m_debug) { + processInDebug(lBatch, rBatch, task, pid); + } else { + processCodeGen + //processInDebug + (lBatch, rBatch, task, pid); + } + } + + Predicate *getPredicate() { return m_predicate; } + + TupleSchema &getOutputSchema() override { + return m_outputSchema; + } + + std::vector *getInputCols() override { + auto cols = new std::vector; + std::unordered_set colNums; + for (int i = 0; i < m_leftInputSchema.numberOfAttributes(); ++i) { + auto col = new ColumnReference(i, m_leftInputSchema.getAttributeType(i)); + cols->push_back(col); + colNums.insert(0); + } + return cols; + } + + std::vector *getSecondInputCols() override { + auto cols = new std::vector; + std::unordered_set colNums; + for (int i = 0; i < m_rightInputSchema.numberOfAttributes(); ++i) { + auto col = new ColumnReference(i, m_rightInputSchema.getAttributeType(i)); + cols->push_back(col); + colNums.insert(0); + } + return cols; + } + + private: + void processInDebug(const std::shared_ptr &lBatch, + const std::shared_ptr &rBatch, Task &task, + int pid) { + long currentIndex1 = lBatch->getBufferStartPointer(); + long currentIndex2 = rBatch->getBufferStartPointer(); + long endIndex1 = lBatch->getBufferEndPointer() + m_leftInputSchema.getTupleSize(); + long endIndex2 = rBatch->getBufferEndPointer() + m_rightInputSchema.getTupleSize(); + long currentWindowStart1 = currentIndex1; + long currentWindowEnd1 = currentIndex1; + long currentWindowStart2 = currentIndex2; + long currentWindowEnd2 = currentIndex2; + + auto lBuffer = lBatch->getInputQueryBuffer(); + auto rBuffer = rBatch->getInputQueryBuffer(); + auto outputBuffer = + PartialWindowResultsFactory::getInstance().newInstance(pid); + + int tupleSize1 = m_leftInputSchema.getTupleSize(); + int tupleSize2 = m_rightInputSchema.getTupleSize(); + + // todo: fix the padding logic + // Actual Tuple Size without padding + int pointerOffset1 = tupleSize1 - m_leftInputSchema.getPadLength(); + int pointerOffset2 = tupleSize2 - m_rightInputSchema.getPadLength(); + + auto windowDef1 = lBatch->getWindowDefinition(); + auto windowDef2 = rBatch->getWindowDefinition(); + + if (m_debug) { + std::cout << "[DBG] t " + std::to_string(lBatch->getTaskId()) + + " batch-1 [" + std::to_string(currentIndex1) + ", " + + std::to_string(endIndex1) + "] " + + std::to_string((endIndex1 - currentIndex1) / + tupleSize1) + + " tuples [f " + + std::to_string(lBatch->getFreePointer()) + + "] / batch-2 [" + std::to_string(currentIndex2) + ", " + + std::to_string(endIndex2) + "] " + + std::to_string((endIndex2 - currentIndex2) / + tupleSize2) + + " tuples [f " + + std::to_string(lBatch->getSecondFreePointer()) + "]" + << std::endl; + } + + long currentTimestamp1, startTimestamp1; + long currentTimestamp2, startTimestamp2; + + if (m_monitorSelectivity) m_invoked[pid].m_value = m_matched[pid].m_value = 0L; + + // Is one of the windows empty? + if (currentIndex1 != endIndex1 && currentIndex2 != endIndex2) { + long prevCurrentIndex1 = -1; + long countMatchPositions = 0; + + // Changed <=, <=, || to && + // while (currentIndex1 < endIndex1 && currentIndex2 <= endIndex2) { + // OLD + while (currentIndex1 < endIndex1 || currentIndex2 < endIndex2) { + //std::cout << "[DBG] batch-1 index " + std::to_string(currentIndex1) + " end " + //+ std::to_string(endIndex1) + " batch-2 index " + + //std::to_string(currentIndex2) + " end " + std::to_string(endIndex1) << std::endl; + + // Get timestamps of currently processed tuples in either batch + currentTimestamp1 = lBatch->getTimestamp(currentIndex1); + currentTimestamp2 = rBatch->getTimestamp(currentIndex2); + + // Move in first batch? + if ((currentTimestamp1 < currentTimestamp2) || + (currentTimestamp1 == currentTimestamp2 && + currentIndex2 >= endIndex2)) { + // Scan second window + // Changed here: <= + // for (long i = currentWindowStart2; i <= currentWindowEnd2; i += + // tupleSize2) { OLD + for (long i = currentWindowStart2; i < currentWindowEnd2; i += tupleSize2) { + //std::cout << "[DBG] 1st window index " + + //std::to_string(currentIndex1) + " 2nd window index " + + // std::to_string(i) << std::endl; + + if (m_monitorSelectivity) m_invoked[pid].m_value++; + + if (m_predicate == nullptr + // m_predicate.satisfied (buffer1, schema1, currentIndex1, buffer2, schema2, i) + ) { + if (prevCurrentIndex1 != currentIndex1) { + prevCurrentIndex1 = currentIndex1; + countMatchPositions++; + } + + //std::cout << "[DBG] match at currentIndex1 = " + std::to_string(currentIndex1) + " (count = " + std::to_string(countMatchPositions) + ")" << std::endl; + + auto writePos = outputBuffer->getPosition(); + std::memcpy(outputBuffer->getBufferRaw() + writePos, lBuffer->getBufferRaw() + currentIndex1, tupleSize1); + writePos += tupleSize1; + std::memcpy(outputBuffer->getBufferRaw() + writePos, rBuffer->getBufferRaw() + i, tupleSize2); + writePos += tupleSize2; + // Write dummy content, if needed + writePos += m_outputSchema.getPadLength(); + outputBuffer->setPosition(writePos); + + if (m_monitorSelectivity) m_matched[pid].m_value++; + } + } + + // Add current tuple to window over first batch + currentWindowEnd1 = currentIndex1; + + // Remove old tuples in window over first batch + if (windowDef1->isRowBased()) { + if ((currentWindowEnd1 - currentWindowStart1) / tupleSize1 > windowDef1->getSize()) + currentWindowStart1 += windowDef1->getSlide() * tupleSize1; + } else if (windowDef1->isRangeBased()) { + startTimestamp1 = lBatch->getTimestamp(currentWindowStart1); + while (startTimestamp1 < currentTimestamp1 - windowDef1->getSize()) { + currentWindowStart1 += tupleSize1; + startTimestamp1 = lBatch->getTimestamp(currentWindowStart1); + } + } + + // Remove old tuples in window over second batch (only for range + // windows) + if (windowDef2->isRangeBased()) { + startTimestamp2 = rBatch->getTimestamp(currentWindowStart2); + while (startTimestamp2 < currentTimestamp1 - windowDef2->getSize()) { + currentWindowStart2 += tupleSize2; + startTimestamp2 = rBatch->getTimestamp(currentWindowStart2); + } + } + + // Do the actual move in first window batch + currentIndex1 += tupleSize1; + } else { + // Move in second batch + // Scan first window + + //std::cout << "[DBG] move in second window..." << std::endl; + //std::cout << "[DBG] scan first window: start " + std::to_string(currentWindowStart1) + + //" end " + std::to_string(currentWindowEnd1) << std::endl; + + // Changed here: <= + // for (long i = currentWindowStart1; i <= currentWindowEnd1; i += tupleSize1) { + for (long i = currentWindowStart1; i < currentWindowEnd1; i += tupleSize1) { + if (m_monitorSelectivity) m_invoked[pid].m_value++; + + if (m_predicate == nullptr + // m_predicate.satisfied (buffer1, schema1, i, buffer2, schema2, currentIndex2) + ) { + //std::cout << "[DBG] Match in first window..." << std::endl; + + auto writePos = outputBuffer->getPosition(); + std::memcpy(outputBuffer->getBufferRaw() + writePos, lBuffer->getBufferRaw() + i, tupleSize1); + writePos += tupleSize1; + std::memcpy(outputBuffer->getBufferRaw() + writePos, rBuffer->getBufferRaw() + currentIndex2, tupleSize2); + writePos += tupleSize2; + // Write dummy content, if needed + writePos += m_outputSchema.getPadLength(); + outputBuffer->setPosition(writePos); + + if (m_monitorSelectivity) m_matched[pid].m_value++; + } + } + + // Add current tuple to window over second batch + currentWindowEnd2 = currentIndex2; + + //std::cout << "[DBG] currentWindowStart2 = " + std::to_string(currentWindowStart2) << std::endl; + //std::cout << "[DBG] currentWindowEnd2 = " + std::to_string(currentWindowEnd2) << std::endl; + + // Remove old tuples in window over second batch + if (windowDef2->isRowBased()) { + if ((currentWindowEnd2 - currentWindowStart2) / tupleSize2 > windowDef2->getSize()) + currentWindowStart2 += windowDef2->getSlide() * tupleSize2; + } else if (windowDef2->isRangeBased()) { + startTimestamp2 = rBatch->getTimestamp(currentWindowStart2); + while (startTimestamp2 < currentTimestamp2 - windowDef2->getSize()) { + currentWindowStart2 += tupleSize2; + startTimestamp2 = rBatch->getTimestamp(currentWindowStart2); + } + } + + // Remove old tuples in window over first batch (only for range windows) + if (windowDef1->isRangeBased()) { + startTimestamp1 = lBatch->getTimestamp(currentWindowStart1); + while (startTimestamp1 < currentTimestamp2 - windowDef1->getSize()) { + currentWindowStart1 += tupleSize1; + startTimestamp1 = lBatch->getTimestamp(currentWindowStart1); + } + } + // Do the actual move in second window batch + currentIndex2 += tupleSize2; + } + } + } + + // lBuffer->release(); + // rBuffer->release(); + lBatch->setOutputBuffer(outputBuffer); + lBatch->setSchema(&m_outputSchema); + + if (m_debug) { + std::cout << "[DBG] output buffer position is " + + std::to_string(outputBuffer->getPosition()) + << std::endl; + } + + if (m_monitorSelectivity) { + double selectivity = 0; + if (m_invoked[pid].m_value > 0) + selectivity = ((double)m_matched[pid].m_value / (double)m_invoked[pid].m_value) * 100; + std::cout << "[DBG] task " + std::to_string(lBatch->getTaskId()) + " " + + std::to_string(m_matched[pid].m_value) + " out of " + + std::to_string(m_invoked[pid].m_value) + " tuples selected (" + + std::to_string(selectivity) + ")" + << std::endl; + } + task.outputWindowBatchResult(lBatch); + } + + void processCodeGen(const std::shared_ptr &lBatch, + const std::shared_ptr &rBatch, Task &task, + int pid) { + long currentIndex1 = lBatch->getBufferStartPointer(); + long currentIndex2 = rBatch->getBufferStartPointer(); + long endIndex1 = lBatch->getBufferEndPointer() + m_leftInputSchema.getTupleSize(); + long endIndex2 = rBatch->getBufferEndPointer() + m_rightInputSchema.getTupleSize(); + long currentWindowStart1 = currentIndex1; + long currentWindowEnd1 = currentIndex1; + long currentWindowStart2 = currentIndex2; + long currentWindowEnd2 = currentIndex2; + + auto lBuffer = lBatch->getInputQueryBuffer(); + auto rBuffer = rBatch->getInputQueryBuffer(); + auto outputBuffer = + PartialWindowResultsFactory::getInstance().newInstance(pid); + + int tupleSize1 = m_leftInputSchema.getTupleSize(); + int tupleSize2 = m_rightInputSchema.getTupleSize(); + + // todo: fix the padding logic + // Actual Tuple Size without padding + int pointerOffset1 = tupleSize1 - m_leftInputSchema.getPadLength(); + int pointerOffset2 = tupleSize2 - m_rightInputSchema.getPadLength(); + + auto windowDef1 = lBatch->getWindowDefinition(); + auto windowDef2 = rBatch->getWindowDefinition(); + + if (m_debug) { + std::cout << "[DBG] t " + std::to_string(lBatch->getTaskId()) + + " batch-1 [" + std::to_string(currentIndex1) + ", " + + std::to_string(endIndex1) + "] " + + std::to_string((endIndex1 - currentIndex1) / + tupleSize1) + + " tuples [f " + + std::to_string(lBatch->getFreePointer()) + + "] / batch-2 [" + std::to_string(currentIndex2) + ", " + + std::to_string(endIndex2) + "] " + + std::to_string((endIndex2 - currentIndex2) / + tupleSize2) + + " tuples [f " + + std::to_string(lBatch->getSecondFreePointer()) + "]" + << std::endl; + } + + long currentTimestamp1, startTimestamp1; + long currentTimestamp2, startTimestamp2; + + if (m_monitorSelectivity) m_invoked[pid].m_value = m_matched[pid].m_value = 0L; + + // Is one of the windows empty? + if (currentIndex1 != endIndex1 && currentIndex2 != endIndex2) { + long writePos = outputBuffer->getPosition(); + process(currentIndex1, currentIndex2, endIndex1, endIndex2, + lBuffer->getBufferRaw(), rBuffer->getBufferRaw(), + outputBuffer->getBufferRaw(), writePos, m_monitorSelectivity, + m_invoked[pid].m_value, m_matched[pid].m_value); + outputBuffer->setPosition(writePos); + } + + // lBuffer->release(); + // rBuffer->release(); + lBatch->setOutputBuffer(outputBuffer); + lBatch->setSchema(&m_outputSchema); + + if (m_debug) { + std::cout << "[DBG] output buffer position is " + + std::to_string(outputBuffer->getPosition()) + << std::endl; + } + + if (m_monitorSelectivity) { + double selectivity = 0; + if (m_invoked[pid].m_value > 0) + selectivity = ((double)m_matched[pid].m_value / (double)m_invoked[pid].m_value) * 100; + std::cout << "[DBG] task " + std::to_string(lBatch->getTaskId()) + " " + + std::to_string(m_matched[pid].m_value) + " out of " + + std::to_string(m_invoked[pid].m_value) + " tuples selected (" + + std::to_string(selectivity) + ")" + << std::endl; + } + task.outputWindowBatchResult(lBatch); + } + + std::string getIncludesString() { + std::string s; + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("#include \n"); + s.append("\n"); + return s; + } + + std::string getInputSchemaString(bool isFirst) { + auto schema = (isFirst) ? m_leftInputSchema : m_rightInputSchema; + auto suffix = (isFirst) ? "1" : "2"; + std::string s; + s.append("struct alignas(16) input_tuple_t_"); + s.append(suffix); + s.append(" {\n"); + /* The first attribute is always a timestamp */ + s.append("\tlong timestamp;\n"); + for (int i = 1; i < schema.numberOfAttributes(); i++) { + auto type = schema.getAttributeType(i); + switch (type) { + case BasicType::Integer : s.append("\tint _" + std::to_string(i) + ";\n"); + break; + case BasicType::Float : s.append("\tfloat _" + std::to_string(i) + ";\n"); + break; + case BasicType::Long : s.append("\tlong _" + std::to_string(i) + ";\n"); + break; + case BasicType::LongLong : s.append("\t__uint128_t _" + std::to_string(i) + ";\n"); + break; + default : + throw std::runtime_error( + "error: failed to generate tuple struct (attribute " + std::to_string(i) + " is undefined)"); + } + } + s.append("};\n"); + s.append("\n"); + return s; + } + + std::string getOutputSchemaString() { + std::string s; + s.append("struct alignas(16) output_tuple_t {\n"); + int i = 0; + if (m_outputSchema.hasTime()) { + s.append("\tlong timestamp;\n"); + i++; + } + for (; i < m_outputSchema.numberOfAttributes(); i++) { + auto type = m_outputSchema.getAttributeType(i); + switch (type) { + case BasicType::Integer : s.append("\tint _" + std::to_string(i) + ";\n"); + break; + case BasicType::Float : s.append("\tfloat _" + std::to_string(i) + ";\n"); + break; + case BasicType::Long : s.append("\tlong _" + std::to_string(i) + ";\n"); + break; + case BasicType::LongLong : s.append("\t__uint128_t _" + std::to_string(i) + ";\n"); + break; + default : + throw std::runtime_error( + "error: failed to generate tuple struct (attribute " + std::to_string(i) + " is undefined)"); + } + } + s.append("};\n"); + s.append("\n"); + return s; + } + + std::string getQueryDefinitionString(WindowDefinition *winDef1, WindowDefinition *winDef2) { + std::string s; + + s.append("#define WINDOW_SIZE_1 " + std::to_string(winDef1->getSize()) + "L\n"); + s.append("#define WINDOW_SLIDE_1 " + std::to_string(winDef1->getSlide()) + "L\n"); + s.append("#define WINDOW_SIZE_2 " + std::to_string(winDef2->getSize()) + "L\n"); + s.append("#define WINDOW_SLIDE_2 " + std::to_string(winDef2->getSlide()) + "L\n"); + s.append("#define PAD_LENGTH " + std::to_string(m_outputSchema.getPadLength()) + "L\n"); + s.append("#define BUFFER_SIZE " + std::to_string(m_circularBufferSize) + "L\n"); + s.append("#define MASK " + std::to_string(m_circularBufferSize-1) + "L\n"); + s.append("#define UNBOUNDED_SIZE " + std::to_string(SystemConf::getInstance().UNBOUNDED_BUFFER_SIZE) + "L\n"); + s.append("\n"); + return s; + } + + std::string getSelectionExpr(bool isFirst) { + std::string s; + if (m_predicate != nullptr) { + + s.append(" auto maskedI = (i&MASK);\n"); + s.append(" if ( "); + s.append(m_predicate->toSExprForCodeGen()); + s.append(" )\n"); + + // todo: fix the predicate definition + std::string str = "data[bufferPtr]"; + if (isFirst) { + s.replace(s.find(str), str.length(), "leftB[maskedCI1/tupleSize1]"); + s.replace(s.find(str), str.length(), "rightB[maskedI/tupleSize2]"); + } else { + s.replace(s.find(str), str.length(), "leftB[maskedI/tupleSize1]"); + s.replace(s.find(str), str.length(), "rightB[maskedCI2/tupleSize2]"); + } + + } else { + s.append("if (true)"); + } + return s; + } + + std::string getFirstWindowExpr(WindowDefinition *winDef1, WindowDefinition *winDef2) { + std::string s; + s.append(" // Remove old tuples in window over first batch\n"); + if (winDef1->isRowBased()) { + s.append(" if ((currentWindowEnd1 - currentWindowStart1) / tupleSize1 > WINDOW_SIZE_1)\n" + " currentWindowStart1 += WINDOW_SLIDE_1 * tupleSize1;\n"); + } else { + s.append(" startTimestamp1 = (int) leftB[currentWindowStart1/tupleSize1].timestamp;\n" + " while (startTimestamp1 < currentTimestamp1 - WINDOW_SIZE_1) {\n" + " currentWindowStart1 += tupleSize1;\n" + " startTimestamp1 = (int) leftB[currentWindowStart1/tupleSize1].timestamp;\n" + " }\n"); + + } + if (winDef2->isRangeBased()) { + s.append(" // Remove old tuples in window over second batch (only for range\n" + " // windows)\n"); + s.append(" startTimestamp2 = (int) rightB[currentWindowStart2/tupleSize2].timestamp;\n" + " while (startTimestamp2 < currentTimestamp1 - WINDOW_SIZE_2) {\n" + " currentWindowStart2 += tupleSize2;\n" + " startTimestamp2 = (int) rightB[currentWindowStart2/tupleSize2].timestamp;\n" + " }\n"); + } + return s; + } + + std::string getSecondWindowExpr(WindowDefinition *winDef1, WindowDefinition *winDef2) { + std::string s; + s.append(" // Remove old tuples in window over second batch\n"); + if (winDef2->isRowBased()) { + s.append(" if ((currentWindowEnd2 - currentWindowStart2) / tupleSize2 > WINDOW_SIZE_2)\n" + " currentWindowStart2 += WINDOW_SLIDE_2 * tupleSize2;\n"); + } else { + s.append(" startTimestamp2 = (int) rightB[currentWindowStart2/tupleSize2].timestamp;\n" + " while (startTimestamp2 < currentTimestamp2 - WINDOW_SIZE_2) {\n" + " currentWindowStart2 += tupleSize2;\n" + " startTimestamp2 = (int) rightB[currentWindowStart2/tupleSize2].timestamp;\n" + " }\n"); + + } + if (winDef1->isRangeBased()) { + s.append(" // Remove old tuples in window over first batch (only for range windows)\n"); + s.append(" startTimestamp1 = (int) leftB[currentWindowStart1/tupleSize1].timestamp;\n" + " while (startTimestamp1 < currentTimestamp2 - WINDOW_SIZE_1) {\n" + " currentWindowStart1 += tupleSize1;\n" + " startTimestamp1 = (int) leftB[currentWindowStart1/tupleSize1].timestamp;\n" + " }\n"); + } + return s; + } + + std::string getComputationCode(std::string &predicate1, std::string &predicate2, std::string &firstWindowExpr, std::string &secondWindowExpr) { + std::string s = "void processJoin(long currentIndex1, long currentIndex2, long endIndex1,\n" + " long endIndex2, char *lBuffer, char *rBuffer,\n" + " char *outputBuffer, long &writePos, bool monitorSelectivity,\n" + " long &invoked, long &matched) {\n" + " long currentWindowStart1 = currentIndex1;\n" + " long currentWindowEnd1 = currentIndex1;\n" + " long currentWindowStart2 = currentIndex2;\n" + " long currentWindowEnd2 = currentIndex2;\n" + " long prevCurrentIndex1 = -1;\n" + " long countMatchPositions = 0;\n" + " long currentTimestamp1, startTimestamp1;\n" + " long currentTimestamp2, startTimestamp2;\n" + "\n" + " input_tuple_t_1 *leftB = (input_tuple_t_1 *)lBuffer;\n" + " int tupleSize1 = sizeof(input_tuple_t_1);\n" + " input_tuple_t_2 *rightB = (input_tuple_t_2 *)rBuffer;\n" + " int tupleSize2 = sizeof(input_tuple_t_2);\n" + "\n" + " // Changed <=, <=, || to &&\n" + " // while (currentIndex1 < endIndex1 && currentIndex2 <= endIndex2) {\n" + " // OLD\n" + " while (currentIndex1 < endIndex1 && currentIndex2 < endIndex2) {\n // the && was ||" + " // std::cout << \"[DBG] batch-1 index \" + std::to_string(currentIndex1) + \"\n" + " // end \"\n" + " //+ std::to_string(endIndex1) + \" batch-2 index \" +\n" + " // std::to_string(currentIndex2) + \" end \" + std::to_string(endIndex1) <<\n" + " // std::endl;\n" + "\n" + " // Get timestamps of currently processed tuples in either batch\n" + " auto maskedCI1 = (currentIndex1 & MASK);\n" + " auto maskedCI2 = (currentIndex2 & MASK);\n" + " currentTimestamp1 = (int) leftB[maskedCI1 / tupleSize1].timestamp;\n" + " currentTimestamp2 = (int) rightB[maskedCI2 / tupleSize2].timestamp;\n" + "\n" + " // Move in first batch?\n" + " if ((currentTimestamp1 < currentTimestamp2) ||\n" + " (currentTimestamp1 == currentTimestamp2 &&\n" + " currentIndex2 >= endIndex2)) {\n" + " // Scan second window\n" + " // Changed here: <=\n" + " // for (long i = currentWindowStart2; i <= currentWindowEnd2; i +=\n" + " // tupleSize2) { OLD\n" + " for (long i = currentWindowStart2; i < currentWindowEnd2;\n" + " i += tupleSize2) {\n" + " // std::cout << \"[DBG] 1st window index \" +\n" + " // std::to_string(currentIndex1) + \" 2nd window index \" +\n" + " // std::to_string(i) << std::endl;\n" + "\n" + " if (monitorSelectivity) invoked++;\n" + "\n" + predicate1 + + " // if (true)\n" + " {\n" + " if (prevCurrentIndex1 != currentIndex1) {\n" + " prevCurrentIndex1 = currentIndex1;\n" + " countMatchPositions++;\n" + " }\n" + "\n" + " // std::cout << \"[DBG] match at currentIndex1 = \" +\n" + " // std::to_string(currentIndex1) + \" (count = \" +\n" + " // std::to_string(countMatchPositions) + \")\" << std::endl;\n" + " if (writePos + tupleSize1 + tupleSize2 >= UNBOUNDED_SIZE)\n" + " throw std::runtime_error(\"error: increase the size of unbounded buffers for join (\" \n" + " + std::to_string(writePos + tupleSize1 + tupleSize2) + \" < \"\n" + " + std::to_string(UNBOUNDED_SIZE) + \" - selectivity \"\n" + " + std::to_string(((double)matched / (double)invoked) * 100) + \" - matched \"\n" + " + std::to_string(matched) + \")\");\n" + "\n" + " std::memcpy(outputBuffer + writePos, lBuffer + maskedCI1,\n" + " tupleSize1);\n" + " writePos += tupleSize1;\n" + " std::memcpy(outputBuffer + writePos, rBuffer + maskedI, tupleSize2);\n" + " writePos += tupleSize2;\n" + " // Write dummy content, if needed\n" + " writePos += PAD_LENGTH;\n" + "\n" + " if (monitorSelectivity) matched++;\n" + " }\n" + " }\n" + "\n" + " // Add current tuple to window over first batch\n" + " currentWindowEnd1 = currentIndex1;\n" + "\n" + firstWindowExpr + + "\n" + " // Do the actual move in first window batch\n" + " currentIndex1 += tupleSize1;\n" + " } else {\n" + " // Move in second batch\n" + " // Scan first window\n" + "\n" + " // std::cout << \"[DBG] move in second window...\" << std::endl;\n" + " // std::cout << \"[DBG] scan first window: start \" +\n" + " // std::to_string(currentWindowStart1) + \" end \" +\n" + " // std::to_string(currentWindowEnd1) << std::endl;\n" + "\n" + " // Changed here: <=\n" + " // for (long i = currentWindowStart1; i <= currentWindowEnd1; i +=\n" + " // tupleSize1) {\n" + " for (long i = currentWindowStart1; i < currentWindowEnd1;\n" + " i += tupleSize1) {\n" + " if (monitorSelectivity) invoked++;\n" + "\n" + predicate2 + + " // if (true)\n" + " {\n" + " // std::cout << \"[DBG] Match in first window...\" << std::endl;\n" + " if (writePos + tupleSize1 + tupleSize2 >= UNBOUNDED_SIZE)\n" + " throw std::runtime_error(\"error: increase the size of unbounded buffers for join (\" \n" + " + std::to_string(writePos + tupleSize1 + tupleSize2) + \" < \"\n" + " + std::to_string(UNBOUNDED_SIZE) + \" - selectivity \"\n" + " + std::to_string(((double)matched / (double)invoked) * 100) + \" - matched \"\n" + " + std::to_string(matched) + \")\");\n" + "\n" + " std::memcpy(outputBuffer + writePos, lBuffer + maskedI, tupleSize1);\n" + " writePos += tupleSize1;\n" + " std::memcpy(outputBuffer + writePos, rBuffer + maskedCI2,\n" + " tupleSize2);\n" + " writePos += tupleSize2;\n" + " // Write dummy content, if needed\n" + " writePos += PAD_LENGTH;\n" + "\n" + " if (monitorSelectivity) matched++;\n" + " }\n" + " }\n" + "\n" + " // Add current tuple to window over second batch\n" + " currentWindowEnd2 = currentIndex2;\n" + "\n" + " // std::cout << \"[DBG] currentWindowStart2 = \" +\n" + " // std::to_string(currentWindowStart2) << std::endl; std::cout << \"[DBG]\n" + " // currentWindowEnd2 = \" + std::to_string(currentWindowEnd2) << std::endl;\n" + "\n" + secondWindowExpr + + " // Do the actual move in second window batch\n" + " currentIndex2 += tupleSize2;\n" + " }\n" + " }\n" + "}\n" + "\n"; + + return s; + } + + std::string getC_Definitions() { + std::string s; + s.append("extern \"C\" {\n" + "void process(long currentIndex1, long currentIndex2, long endIndex1, long endIndex2,\n" + " char *lBuffer, char *rBuffer, char *outputBuffer, long &writePos,\n" + " bool monitorSelectivity, long &invoked, long &matched) {\n" + " processJoin(currentIndex1, currentIndex2, endIndex1, endIndex2, lBuffer,\n" + " rBuffer, outputBuffer, writePos, monitorSelectivity, invoked,\n" + " matched);\n" + "};\n" + "}"); + return s; + } +}; \ No newline at end of file diff --git a/src/cql/operators/codeGeneration/OperatorJit.cpp b/src/cql/operators/codeGeneration/OperatorJit.cpp index 36ce2d0..f6e6c66 100644 --- a/src/cql/operators/codeGeneration/OperatorJit.cpp +++ b/src/cql/operators/codeGeneration/OperatorJit.cpp @@ -1,5 +1,11 @@ #include "OperatorJit.h" +#include +#include +#include + +#include + #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/Tool.h" @@ -7,11 +13,9 @@ #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/TextDiagnosticPrinter.h" - #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" -#include -#include +#include "llvm/Object/ObjectFile.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" @@ -30,9 +34,9 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" //#include "llvm/Transforms/Vectorize.h" @@ -54,19 +58,20 @@ namespace llvm { namespace orc { OperatorJit::OperatorJit() : Resolver(createLegacyLookupResolver( - ES, - [this](const std::string &Name) -> JITSymbol { - if (auto Sym = OptimizeLayer.findSymbol(Name, false)) - return Sym; - else if (auto Err = Sym.takeError()) - return std::move(Err); - if (auto SymAddr = - RTDyldMemoryManager::getSymbolAddressInProcess(Name)) - return JITSymbol(SymAddr, JITSymbolFlags::Exported); - return nullptr; - }, - [](Error Err) { cantFail(std::move(Err), "lookupFlags failed"); })), - TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + ES, + [this](const std::string &Name) -> JITSymbol { + if (auto Sym = OptimizeLayer.findSymbol(Name, false)) + return Sym; + else if (auto Err = Sym.takeError()) + return std::move(Err); + if (auto SymAddr = + RTDyldMemoryManager::getSymbolAddressInProcess(Name)) + return JITSymbol(SymAddr, JITSymbolFlags::Exported); + return nullptr; + }, + [](Error Err) { cantFail(std::move(Err), "lookupFlags failed"); })), + TM(EngineBuilder().selectTarget()), + DL(TM->createDataLayout()), ObjectLayer(ES, [this](VModuleKey) { return LegacyRTDyldObjectLinkingLayer::Resources{ @@ -89,6 +94,18 @@ VModuleKey OperatorJit::addModule(std::unique_ptr M) { return K; } +VModuleKey OperatorJit::addObjectFile(object::OwningBinary O) { + // Add the module to the JIT with a new VModuleKey. + objectFile = std::move(O); + auto K = ES.allocateVModule(); + std::unique_ptr mb = + MemoryBuffer::getMemBuffer(objectFile.getBinary()->getMemoryBufferRef()); + //MemoryBuffer::getMemBufferCopy(O->getMemoryBufferRef().getBuffer()); + cantFail(ObjectLayer.addObject(K, std::move(mb))); + keys.push_back(K); + return K; +} + JITSymbol OperatorJit::findSymbol(const StringRef &Name) { std::string MangledName; raw_string_ostream MangledNameStream(MangledName); @@ -108,8 +125,7 @@ JITTargetAddress OperatorJit::getSymbolAddress(const StringRef &Name) { } void OperatorJit::removeAllModules() { - for (auto m : keys) - removeModule(m); + for (auto m : keys) removeModule(m); keys.clear(); } @@ -118,6 +134,7 @@ void OperatorJit::removeModule(VModuleKey K) { } std::unique_ptr OperatorJit::optimizeModule(std::unique_ptr M) { + if (!useOptimizationPasses) return M; // Optimize the emitted LLVM IR. Timer topt; @@ -144,7 +161,8 @@ std::unique_ptr OperatorJit::optimizeModule(std::unique_ptr M) { auto FPM = llvm::make_unique(M.get()); // Add some optimizations. - FPM->add(llvm::createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + FPM->add( + llvm::createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); FPM->add(createInstructionCombiningPass()); FPM->add(createReassociatePass()); @@ -163,7 +181,7 @@ std::unique_ptr OperatorJit::optimizeModule(std::unique_ptr M) { PM_BuilderP->populateFunctionPassManager(*FPM); PM_BuilderP->populateModulePassManager(*PMP); PM_BuilderP->populateLTOPassManager(*PMP); - //PM_BuilderP->populateThinLTOPassManager(*PMP); + // PM_BuilderP->populateThinLTOPassManager(*PMP); PMP->add(createVerifierPass()); PMP->add(createGlobalOptimizerPass()); @@ -171,41 +189,70 @@ std::unique_ptr OperatorJit::optimizeModule(std::unique_ptr M) { // Run the optimizations over all functions in the module being added to // the JIT. FPM->doInitialization(); - for (auto &F : *M) - FPM->run(F); + for (auto &F : *M) FPM->run(F); FPM->doFinalization(); // Finally run the module passes PMP->run(*M); - //if (verbose) { + // if (verbose) { topt.stopTimer(); - //std::cout << "[Optimization elapsed:] " << topt.getTotalTime().getProcessTime() << "s\n"; - // const char* post_opt_file = "/tmp/llvmjit-post-opt.ll"; - // llvm_module_to_file(*M, post_opt_file); - // std::cout << "[Post optimization module] dumped to " << post_opt_file - // << "\n"; + // std::cout << "[Optimization elapsed:] " << + // topt.getTotalTime().getProcessTime() << "s\n"; + if (!llPath.empty()) { + const char *post_opt_file = llPath.c_str(); //"/tmp/llvmjit-post-opt.ll"; + llvm_module_to_file(*M, post_opt_file); + std::cout << "[Post optimization module] dumped to " << post_opt_file << "\n"; + } + if (!oPath.empty()) { + // https://stackoverflow.com/questions/62311918/llvm-c-creating-object-file-results-in-targetmachine-cant-emit-a-file-of-this + char* errors = 0; + char *object_file = const_cast(oPath.c_str()); + LLVMTargetMachineEmitToFile(reinterpret_cast(const_cast(TM.get())), + (LLVMModuleRef)M.get(), + object_file, LLVMObjectFile, &errors); + printf("error: %s\n", errors); + LLVMDisposeErrorMessage(errors); + std::cout << "[Post optimization module] dumped to " << object_file << "\n"; + } //} - //M->dump(); + // M->dump(); return M; } + +void OperatorJit::llvm_module_to_file(const llvm::Module &module, + const char *filename) { + std::string str; + llvm::raw_string_ostream os(str); + module.print(os, nullptr); + + std::ofstream of(filename); + of << os.str(); } -} +} // namespace orc +} // namespace llvm using namespace clang; using namespace clang::driver; -CodeGenWrapper::CodeGenWrapper() { -} - -uint64_t CodeGenWrapper::parseAndCodeGen(int argc, const char **argv) { +CodeGenWrapper::CodeGenWrapper() {} +uint64_t CodeGenWrapper::parseAndCodeGen(int argc, const char **argv, + bool fileExists) { // give the path of the file... - //argv[1] = "/home/george/clion/workspace/llvm_test/cmake-build-debug/dummy.cpp"; + // argv[1] = + // "/home/george/clion/workspace/llvm_test/cmake-build-debug/dummy.cpp"; + auto llPath = std::string(argv[1]); + std::string toReplace(".cpp"); + size_t pos = llPath.find(toReplace); + llPath.replace(pos, toReplace.length(), ".ll"); + auto oPath = std::string(argv[1]); + pos = oPath.find(toReplace); + oPath.replace(pos, toReplace.length(), ".o"); // This just needs to be some symbol in the binary; C++ doesn't // allow taking the address of ::main however. - void *MainAddr = (void *) (intptr_t) GetExecutablePath; + void *MainAddr = (void *)(intptr_t)GetExecutablePath; std::string Path = GetExecutablePath(argv[0], MainAddr); IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); TextDiagnosticPrinter *DiagClient = @@ -219,8 +266,7 @@ uint64_t CodeGenWrapper::parseAndCodeGen(int argc, const char **argv) { // Use ELF on Windows-32 and MingW for now. #ifndef CLANG_INTERPRETER_COFF_FORMAT - if (T.isOSBinFormatCOFF()) - T.setObjectFormat(llvm::Triple::ELF); + if (T.isOSBinFormatCOFF()) T.setObjectFormat(llvm::Triple::ELF); #endif Driver TheDriver(Path, T.str(), Diags); @@ -231,14 +277,12 @@ uint64_t CodeGenWrapper::parseAndCodeGen(int argc, const char **argv) { // recognize. We need to extend the driver library to support this use model // (basically, exactly one input, and the operation mode is hard wired). SmallVector Args(argv, argv + argc); - //Args.push_back("-fsyntax-only"); + // Args.push_back("-fsyntax-only"); std::string cpp = "JITFromSource.cpp"; populateArgs(Args, cpp); std::unique_ptr C(TheDriver.BuildCompilation(Args)); - if (!C) - return 1; - + if (!C) return 1; // FIXME: This is copied from ASTUnit.cpp; simplify and eliminate. // We expect to get back exactly one command job, if we didn't something @@ -261,11 +305,9 @@ uint64_t CodeGenWrapper::parseAndCodeGen(int argc, const char **argv) { // Initialize a compiler invocation object from the clang (-cc1) arguments. const llvm::opt::ArgStringList &CCArgs = Cmd.getArguments(); std::unique_ptr CI(new CompilerInvocation); - CompilerInvocation::CreateFromArgs(*CI, - const_cast(CCArgs.data()), - const_cast(CCArgs.data()) + - CCArgs.size(), - Diags); + CompilerInvocation::CreateFromArgs( + *CI, const_cast(CCArgs.data()), + const_cast(CCArgs.data()) + CCArgs.size(), Diags); // Show the invocation, with -v. if (CI->getHeaderSearchOpts().Verbose) { @@ -282,8 +324,7 @@ uint64_t CodeGenWrapper::parseAndCodeGen(int argc, const char **argv) { // Create the compilers actual diagnostics engine. Clang.createDiagnostics(); - if (!Clang.hasDiagnostics()) - return 1; + if (!Clang.hasDiagnostics()) return 1; // Infer the builtin include path if unspecified. if (Clang.getHeaderSearchOpts().UseBuiltinIncludes && @@ -291,23 +332,45 @@ uint64_t CodeGenWrapper::parseAndCodeGen(int argc, const char **argv) { Clang.getHeaderSearchOpts().ResourceDir = CompilerInvocation::GetResourcesPath(argv[0], MainAddr); - // Create and execute the frontend to generate an LLVM bitcode module. + std::unique_ptr Module; std::unique_ptr Act(new EmitLLVMOnlyAction()); - if (!Clang.ExecuteAction(*Act)) - return 1; + // Create and execute the frontend to generate an LLVM bitcode module. + if (!fileExists) { + if (!Clang.ExecuteAction(*Act)) return 1; + Module = Act->takeModule(); + } else { + if (!useObjectFile) { + Module = llvm::parseIRFile(StringRef(llPath), error, context); + useOptimizationPasses = false; + } else { + auto obj = llvm::object::ObjectFile::createObjectFile(oPath); + if (!obj) { + std::cout << "Failed to load " << oPath << std::endl; + exit(1); + } + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + // LLVMLinkInMCJIT(); + J = new llvm::orc::OperatorJit; + auto moduleKey = J->addObjectFile(std::move(obj.get())); + return moduleKey; + } + } llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); - //LLVMLinkInMCJIT(); + // LLVMLinkInMCJIT(); - std::unique_ptr Module = Act->takeModule(); + // std::unique_ptr Module = Act->takeModule(); llvm::orc::VModuleKey moduleKey = 0; - //Module->dump(); + // Module->dump(); if (Module) { J = new llvm::orc::OperatorJit; + J->llPath = llPath; + if (useObjectFile) + J->oPath = oPath; moduleKey = J->addModule(std::move(Module)); } - return moduleKey; } @@ -323,50 +386,58 @@ CodeGenWrapper::~CodeGenWrapper() { llvm::llvm_shutdown(); } -void CodeGenWrapper::populateArgs(SmallVector &args, llvm::StringRef cpp) { - - //args.push_back("-g"); - //args.push_back("-ccc-print-phases"); - //args.push_back("-v"); - //args.push_back("-march=native"); - //args.push_back("-stdlib=libc++"); +void CodeGenWrapper::populateArgs(SmallVector &args, + llvm::StringRef cpp) { + // args.push_back("-g"); + // args.push_back("-ccc-print-phases"); + // args.push_back("-v"); + // args.push_back("-march=native"); + // args.push_back("-stdlib=libc++"); + //args.push_back("--gcc-toolchain=/usr/local/gcc/7.5.0"); args.push_back("-emit-llvm"); args.push_back("-emit-llvm-bc"); args.push_back("-emit-llvm-uselists"); - //args.push_back("-main-file-name"); - //args.push_back(cpp.data()); + // args.push_back("-main-file-name"); + // args.push_back(cpp.data()); args.push_back("-mavx2"); args.push_back("-std=c++14"); + + // error: unknown argument + // args.push_back("-frename-registers"); + // args.push_back("-fdeprecated-macro"); + // args.push_back("-mrelocation-model"); + // args.push_back("-mconstructor-aliases"); + // args.push_back("-munwind-tables"); + // args.push_back("-masm-verbose"); + + // if (!useOptimizationPasses) { args.push_back("-disable-free"); - args.push_back("-fdeprecated-macro"); args.push_back("-fmath-errno"); args.push_back("-fuse-init-array"); - - args.push_back("-mrelocation-model"); + args.push_back("-funroll-loops"); args.push_back("static"); args.push_back("-mthread-model"); args.push_back("posix"); - args.push_back("-masm-verbose"); - args.push_back("-mconstructor-aliases"); - args.push_back("-munwind-tables"); - args.push_back("-dwarf-column-info"); args.push_back("-debugger-tuning=gdb"); + //} #if DEBUG args.push_back("-debug-info-kind=limited"); args.push_back("-dwarf-version=4"); #else args.push_back("-O3"); - args.push_back("-mdisable-fp-elim"); args.push_back("-momit-leaf-frame-pointer"); - //args.push_back("-vectorize-loops"); + // args.push_back("-vectorize-loops"); args.push_back("-loop-vectorize"); - //args.push_back("-vectorize-slp"); - args.push_back("-slp-vectorizer"); + // args.push_back("-vectorize-slp"); + + // error: unknown argument + // args.push_back("-mdisable-fp-elim"); + // args.push_back("-slp-vectorizer"); #endif args.push_back("-resource-dir"); @@ -385,24 +456,21 @@ void CodeGenWrapper::populateArgs(SmallVector &args, llvm::Str */ args.push_back("-internal-isystem"); args.push_back(STRINGIFY(OPERATOR_JIT_LIB_CLANG_RESOURCE_DIR) "/include"); - //args.push_back("-internal-isystem " STRINGIFY(OPERATOR_JIT_LIB_CLANG_RESOURCE_DIR) "/include"); + // args.push_back("-internal-isystem " + // STRINGIFY(OPERATOR_JIT_LIB_CLANG_RESOURCE_DIR) "/include"); - /* - "-internal-externc-isystem" + /* "-internal-externc-isystem" "/usr/include/x86_64-linux-gnu" "-internal-externc-isystem" "/include" "-internal-externc-isystem" - "/usr/include" - */ - /* - std::string bc = replaceExtension (cpp, "bc"); + "/usr/include" */ + /* std::string bc = replaceExtension (cpp, "bc"); args.push_back("-o"); args.push_back(bc.data()); args.push_back("-x"); args.push_back("c++"); args.push_back(cpp.data());*/ - // args.push_back("opt -O3"); args.push_back("-flto"); } \ No newline at end of file diff --git a/src/cql/operators/codeGeneration/OperatorJit.h b/src/cql/operators/codeGeneration/OperatorJit.h index 5e6eff8..9ae6b81 100644 --- a/src/cql/operators/codeGeneration/OperatorJit.h +++ b/src/cql/operators/codeGeneration/OperatorJit.h @@ -19,6 +19,8 @@ #define STRINGIFY_DETAIL(X) #X #define STRINGIFY(X) STRINGIFY_DETAIL(X) +static bool useOptimizationPasses = true; + // Show the error message and exit. LLVM_ATTRIBUTE_NORETURN static void fatalError(llvm::Error E) { llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) { @@ -44,13 +46,18 @@ class OperatorJit { using OptimizeFunction = std::function(std::unique_ptr)>; LegacyIRTransformLayer OptimizeLayer; std::vector keys; + object::OwningBinary objectFile; public: + std::string llPath, oPath; + OperatorJit(); const TargetMachine &getTargetMachine() const; VModuleKey addModule(std::unique_ptr M); + VModuleKey addObjectFile(object::OwningBinary O); + JITSymbol findSymbol(const StringRef &Name); JITSymbol findSymbolIn(VModuleKey &key, const StringRef &Name); @@ -101,18 +108,24 @@ class OperatorJit { private: std::unique_ptr optimizeModule(std::unique_ptr M); + + void llvm_module_to_file(const llvm::Module& module, const char* filename); }; } // end namespace orc } // end namespace llvm class CodeGenWrapper { private: + llvm::LLVMContext context; + llvm::SMDiagnostic error; llvm::orc::OperatorJit *J; + const bool useObjectFile = false; + public: CodeGenWrapper(); - uint64_t parseAndCodeGen(int argc, const char **argv); + uint64_t parseAndCodeGen(int argc, const char **argv, bool fileExists = false); template llvm::Expected> getFunction(const clang::StringRef &Name) { diff --git a/src/cql/operators/codeGeneration/OperatorKernel.h b/src/cql/operators/codeGeneration/OperatorKernel.h index 53ca791..e015cac 100644 --- a/src/cql/operators/codeGeneration/OperatorKernel.h +++ b/src/cql/operators/codeGeneration/OperatorKernel.h @@ -1,7 +1,10 @@ #pragma once +#include + #include #include +#include #include #include "buffers/PartialWindowResultsFactory.h" @@ -38,6 +41,7 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::vector *m_groupByAttributes; int m_keyLength = 0, m_valueLength = 0, m_bucketSize = 0; bool m_groupBy; + int m_numberOfAggregationAttributes = 0; int m_numberOfKeyAttributes = 0; bool m_processIncremental; bool m_invertible; @@ -53,6 +57,11 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string m_staticInitialization; // Having Predicate *m_havingPredicate; + // Post window operation + std::string m_postWindowOp; + std::string m_postWindowPredicate; + std::string m_postMergeOperation; + // Code Generation Variables bool m_isReady; @@ -60,25 +69,22 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { bool m_generateFile; int m_id = -1; bool m_usePtrs; + const bool m_doProcessing = true; // set this false this for ingestion benchmarking CodeGenWrapper m_codeGen; + + std::unique_ptr m_dLoader; std::function process; std::function - processFragments; + char *, char *, char *, char *, int *, int *, int *, + int *, /*long *, long *, long *, long * ,*/ long, + int *, char *)> processFragments; std::function - processFragmentsWithPtrs; - std::function - aggregate; - std::function - aggregateWithPtrs; - std::function - aggregateSingleHashTableWithPtrs; + char **, char **, char **, char *, int *, int *, + int *, int *, /*long *, long *, long *, long *,*/ + long, int *, char *)> processFragmentsWithPtrs; + std::function aggregate; + std::function aggregateWithPtrs; + std::function aggregateSingleHashTableWithPtrs; std::function getHashTableSize; QueryConfig *m_config = nullptr; @@ -86,38 +92,37 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { const bool m_debug = false; public: - OperatorKernel(bool genCode = true, bool usePtrs = true, - bool useParallelMerge = false, bool generateFile = true) - : m_windowDefinition(nullptr), - m_inputSchema(nullptr), - m_outputSchema(nullptr), - m_expressions(nullptr), - m_predicate(nullptr), - m_aggregationTypes(nullptr), - m_aggregationAttributes(nullptr), - m_timestampReference(0), - m_groupBy(false), - m_processIncremental(false), - m_invertible(false), - m_nonInvertible(false), - m_useParallelMerge(useParallelMerge), - m_staticJoinPredicate(nullptr), - m_staticBuffer(nullptr), - m_havingPredicate(nullptr), - m_isReady(false), - m_genCode(genCode), - m_generateFile(generateFile), - m_usePtrs(usePtrs) {} - - void setQueryId(int qId) { m_id = qId; } + OperatorKernel(bool genCode = true, bool usePtrs = true, bool useParallelMerge = false, bool generateFile = true) : + m_windowDefinition(nullptr), + m_inputSchema(nullptr), + m_outputSchema(nullptr), + m_expressions(nullptr), + m_predicate(nullptr), + m_aggregationTypes(nullptr), + m_aggregationAttributes(nullptr), + m_timestampReference(0), + m_groupBy(false), + m_processIncremental(false), + m_invertible(false), + m_nonInvertible(false), + m_useParallelMerge(useParallelMerge), + m_staticJoinPredicate(nullptr), + m_staticBuffer(nullptr), + m_havingPredicate(nullptr), + m_isReady(false), + m_genCode(genCode), + m_generateFile(generateFile), + m_usePtrs(usePtrs) {} + + void setQueryId(int qId) { + m_id = qId; + } void setInputSchema(TupleSchema *schema) { m_inputSchema = schema; } void setProjection(Projection *projection) { if (m_expressions != nullptr) - throw std::runtime_error( - "error: projection has already been set, try setting static join " - "after projection"); + throw std::runtime_error("error: projection has already been set, try setting static join after projection"); m_expressions = &projection->getExpressions(); m_outputSchema = &projection->getOutputSchema(); m_hasIntermMaterialization = projection->isIntermediate(); @@ -127,12 +132,13 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { if (m_predicate != nullptr) throw std::runtime_error("error: selection has already been set"); m_predicate = selection->getPredicate(); - if (m_outputSchema == nullptr) m_outputSchema = m_inputSchema; + if (m_outputSchema == nullptr) + m_outputSchema = m_inputSchema; } void setStaticHashJoin(StaticHashJoin *hasJoin) { m_staticJoinPredicate = hasJoin->getPredicate(); - // m_outputSchema = &hasJoin->getOutputSchema(); + //m_outputSchema = &hasJoin->getOutputSchema(); m_staticBuffer = hasJoin->getStaticBuffer()->data(); m_staticHashTable = hasJoin->getStaticHashTable(); m_staticComputation = hasJoin->getStaticComputation(); @@ -143,9 +149,17 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { if (m_aggregationTypes != nullptr || m_aggregationAttributes != nullptr) throw std::runtime_error("error: aggregation has already been set"); m_windowDefinition = &aggregation->getWindowDefinition(); - if (!hasProjection()) m_outputSchema = &aggregation->getOutputSchema(); + if (!hasProjection()) + m_outputSchema = &aggregation->getOutputSchema(); m_aggregationTypes = &aggregation->getAggregationTypes(); m_aggregationAttributes = &aggregation->getAggregationAttributes(); + if (!(*m_aggregationAttributes).empty()) { + auto set = std::unordered_set(); + for (auto &at: (*m_aggregationAttributes)) { + set.insert(at->getColumn()); + } + m_numberOfAggregationAttributes = set.size(); + } m_keyLength = aggregation->getKeyLength(); m_valueLength = aggregation->getValueLength(); m_bucketSize = aggregation->getBucketSize(); @@ -155,7 +169,7 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { m_invertible = aggregation->hasInvertible(); m_nonInvertible = aggregation->hasNonInvertible(); if (!(*m_groupByAttributes).empty()) { - m_numberOfKeyAttributes = (int)(*m_groupByAttributes).size(); + m_numberOfKeyAttributes = (int) (*m_groupByAttributes).size(); } } @@ -165,7 +179,17 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { m_havingPredicate = selection->getPredicate(); } - void setCollisionBarrier(int barrier) { m_collisionBarrier = barrier; } + void setPostWindowOperation(std::string operation, std::string predicate, std::string mergeOperation) { + if (!m_postWindowOp.empty() || !m_postWindowPredicate.empty() || !m_postMergeOperation.empty()) + throw std::runtime_error("error: post window operation has already been set"); + m_postWindowOp = std::move(operation); + m_postWindowPredicate = std::move(predicate); + m_postMergeOperation = std::move(mergeOperation); + } + + void setCollisionBarrier(int barrier) { + m_collisionBarrier = barrier; + } void setCustomHashTable(std::string hashTable) { std::cout << "Setting custom hashtable..." << std::endl; @@ -177,174 +201,251 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { if (m_id == -1) { throw std::runtime_error("error: query id has not be set up"); } + // create file path if it doesn't exist + std::experimental::filesystem::path path{ + SystemConf::getInstance().FILE_ROOT_PATH}; + if (!std::experimental::filesystem::exists( + std::experimental::filesystem::status(path))) { + std::experimental::filesystem::create_directories(path); + } + path = {SystemConf::getInstance().FILE_ROOT_PATH + "/scabbard"}; + if (!std::experimental::filesystem::exists( + std::experimental::filesystem::status(path))) { + std::experimental::filesystem::create_directories(path); + } + + // setup operator if (!hasAggregation()) setupWithoutAggregation(); else setupWithAggregation(); + } void setupWithoutAggregation() { std::string s; - s.append(getIncludesString()); - // s.append(getQueryDefinitionString()); - s.append(getInputSchemaString()); - if (hasProjection()) - s.append(getOutputSchemaString()); - else - s.append("using output_tuple_t = input_tuple_t;\n\n"); - - std::string computation = getComputeString(); - s.append(getC_Definitions(computation)); + if (!SystemConf::getInstance().RECOVER) { + s.append(getIncludesString()); + // s.append(getQueryDefinitionString()); + s.append(getInputSchemaString()); + if (hasProjection()) + s.append(getOutputSchemaString()); + else + s.append("using output_tuple_t = input_tuple_t;\n\n"); - std::ofstream out("GeneratedCode_" + std::to_string(m_id) + ".cpp"); - out << s; - out.close(); + std::string computation = getComputeString(); + s.append(getC_Definitions(computation)); + //auto path = Utils::getCurrentWorkingDir(); + auto path = SystemConf::getInstance().FILE_ROOT_PATH + "/scabbard"; + std::ofstream out(path + "/GeneratedCode_" + std::to_string(m_id) + ".cpp"); + out << s; + out.close(); + } if (m_genCode) { int argc = 2; - std::string mainPath = Utils::GetCurrentWorkingDir() + "/LightSaber"; - std::string generatedPath = Utils::GetCurrentWorkingDir() + - "/GeneratedCode_" + std::to_string(m_id) + - ".cpp"; + //auto path = Utils::getCurrentWorkingDir(); + auto path = SystemConf::getInstance().FILE_ROOT_PATH + "/scabbard"; + std::string mainPath = path + "/LightSaber"; + std::string generatedPath = path + "/GeneratedCode_" + std::to_string(m_id) + ".cpp"; + std::string libPath = path + "/GeneratedCode_" + std::to_string(m_id) + ".so"; const char *str0 = mainPath.c_str(); const char *str1 = generatedPath.c_str(); - const char **argv = (const char **)malloc(2 * sizeof(char *)); + const char **argv = (const char **) malloc(2 * sizeof(char *)); argv[0] = str0; argv[1] = str1; - m_codeGen.parseAndCodeGen(argc, argv); - - llvm::Expected< - std::function> - processFn = - m_codeGen - .getFunction( - "process"); - - if (!processFn) { - std::cout << "Failed to fetch the pointers." << std::endl; - exit(1); + if (!SystemConf::getInstance().RECOVER) { + // generate shared library + std::thread slt([&]{ + std::string command = "clang -shared -fPIC -O3 -march=native -g -o " + libPath + " " + generatedPath; + system(command.c_str()); + }); + + m_codeGen.parseAndCodeGen(argc, argv, SystemConf::getInstance().RECOVER); + auto processFn = m_codeGen.getFunction("process"); + + if (!processFn) { + std::cout << "Failed to fetch the pointers." << std::endl; + exit(1); + } + process = *processFn; + + slt.join(); + } else { + m_dLoader = std::make_unique(libPath.c_str()); + process = m_dLoader->load(libPath, "process"); } - process = *processFn; } m_isReady = true; } void setupWithAggregation() { std::string s; - s.append(getIncludesString()); - s.append(getQueryDefinitionString()); - s.append(getInputSchemaString()); - s.append(getOutputSchemaString()); - s.append(getSingleKeyDataStructureString()); - if (hasGroupBy()) { - std::string key = getKeyType(); - s.append(getHashTableBucketString()); - s.append(getHashTableString(key)); - s.append(getHashTableStaticDeclaration()); - if (!m_usePtrs) { - s.append(getHashTableMergeString()); - } else { - if (!m_useParallelMerge) { - s.append(getHashTableMergeWithPtrsString()); + if (!SystemConf::getInstance().RECOVER) { + s.append(getIncludesString()); + s.append(getQueryDefinitionString()); + s.append(getInputSchemaString()); + s.append(getOutputSchemaString()); + + s.append(getSingleKeyDataStructureString()); + if (hasGroupBy()) { + std::string key = getKeyType(); + s.append(getHashTableBucketString()); + s.append(getHashTableString(key)); + s.append(getHashTableStaticDeclaration()); + if (!m_usePtrs) { + s.append(getHashTableMergeString()); } else { - s.append(getHashTableParallelMergeWithPtrsString()); + if (!m_useParallelMerge) { + s.append(getHashTableMergeWithPtrsString()); + } else { + s.append(getHashTableParallelMergeWithPtrsString()); + } } - } - } else { - if (hasIncremental()) s.append(getSingleStaticDeclaration()); - s.append(getSingleBucketString()); - s.append(getSingleKeyMergeString()); - } + } else { + if (hasIncremental()) s.append(getSingleStaticDeclaration()); + s.append(getSingleBucketString()); + s.append(getSingleKeyMergeString()); + } - if (hasStaticHashJoin()) s.append(m_staticHashTable); + if (hasStaticHashJoin()) s.append(m_staticHashTable); - s.append(getComputeString()); - s.append(getAggregate_C_Definitions()); + s.append(getComputeString()); + s.append(getAggregate_C_Definitions()); - if (m_generateFile) { - std::ofstream out("GeneratedCode_" + std::to_string(m_id) + ".cpp"); - out << s; - out.close(); + if (m_generateFile) { + //auto path = Utils::getCurrentWorkingDir(); + auto path = SystemConf::getInstance().FILE_ROOT_PATH + "/scabbard"; + std::ofstream out(path + "/GeneratedCode_" + std::to_string(m_id) + ".cpp"); + out << s; + out.close(); + } } - if (m_genCode) { + Utils::Timer timer; int argc = 2; - std::string mainPath = Utils::GetCurrentWorkingDir() + "/LightSaber"; - std::string generatedPath = Utils::GetCurrentWorkingDir() + - "/GeneratedCode_" + std::to_string(m_id) + - ".cpp"; + //auto path = Utils::getCurrentWorkingDir(); + auto path = SystemConf::getInstance().FILE_ROOT_PATH + "/scabbard"; + std::string mainPath = path + "/LightSaber"; + std::string generatedPath = path + "/GeneratedCode_" + std::to_string(m_id) + ".cpp"; + std::string libPath = path + "/GeneratedCode_" + std::to_string(m_id) + ".so"; const char *str0 = mainPath.c_str(); const char *str1 = generatedPath.c_str(); const char **argv = (const char **)malloc(2 * sizeof(char *)); argv[0] = str0; argv[1] = str1; - m_codeGen.parseAndCodeGen(argc, argv); - - if (!hasGroupBy() || - !m_usePtrs) { // use the actual values stored sequentially in a - // buffer to aggregate - auto processFn = m_codeGen.getFunction("process"); - auto aggregateFn = - m_codeGen.getFunction( - "aggregate"); - if (!processFn || !aggregateFn) { - std::cout << "Failed to fetch the pointers." << std::endl; - exit(1); - } - processFragments = *processFn; - aggregate = *aggregateFn; - } else { // use pointers to hashtables to aggregate - auto processFn = m_codeGen.getFunction("process"); - auto computeSizeFn = - m_codeGen.getFunction("getHashTableSizeInBytes"); - if (!processFn || !computeSizeFn) { - std::cout << "Failed to fetch the pointers." << std::endl; - exit(1); - } - processFragmentsWithPtrs = *processFn; - getHashTableSize = *computeSizeFn; - - if (!m_useParallelMerge) { + if (!SystemConf::getInstance().RECOVER) { + // generate shared library + std::thread slt([&] { + std::string command = "clang -shared -fPIC -O3 -march=native -g -o " + libPath + " " + generatedPath; + system(command.c_str()); + }); + m_codeGen.parseAndCodeGen(argc, argv, SystemConf::getInstance().RECOVER); + if (!hasGroupBy() || + !m_usePtrs) { // use the actual values stored sequentially in a + // buffer to aggregate + auto processFn = m_codeGen.getFunction("process"); auto aggregateFn = - m_codeGen - .getFunction( - "aggregate"); - if (!aggregateFn) { + m_codeGen.getFunction( + "aggregate"); + if (!processFn || !aggregateFn) { std::cout << "Failed to fetch the pointers." << std::endl; exit(1); } - aggregateWithPtrs = *aggregateFn; - } else { - auto aggregateFn = - m_codeGen.getFunction("aggregate"); - if (!aggregateFn) { + processFragments = *processFn; + aggregate = *aggregateFn; + } else { // use pointers to hashtables to aggregate + auto processFn = m_codeGen.getFunction("process"); + auto computeSizeFn = + m_codeGen.getFunction("getHashTableSizeInBytes"); + if (!processFn || !computeSizeFn) { std::cout << "Failed to fetch the pointers." << std::endl; exit(1); } - aggregateSingleHashTableWithPtrs = *aggregateFn; + processFragmentsWithPtrs = *processFn; + getHashTableSize = *computeSizeFn; + + if (!m_useParallelMerge) { + auto aggregateFn = + m_codeGen.getFunction( + "aggregate"); + if (!aggregateFn) { + std::cout << "Failed to fetch the pointers." << std::endl; + exit(1); + } + aggregateWithPtrs = *aggregateFn; + } else { + auto aggregateFn = m_codeGen.getFunction( + "aggregate"); + if (!aggregateFn) { + std::cout << "Failed to fetch the pointers." << std::endl; + exit(1); + } + aggregateSingleHashTableWithPtrs = *aggregateFn; + } + + // Initialize the size needed for the hashtable here + m_hashTableSize = getHashTableSize(); } + slt.join(); + timer.printElapsed("CodeGeneration-"); + } else { + m_dLoader = std::make_unique(libPath.c_str()); + if (!hasGroupBy() || !m_usePtrs) { // use the actual values stored sequentially in a + // buffer to aggregate + processFragments = m_dLoader->load(libPath, "process"); + aggregate = + m_dLoader->load( + libPath, "aggregate"); + } else { // use pointers to hashtables to aggregate + processFragmentsWithPtrs = m_dLoader->load(libPath, "process"); + getHashTableSize = m_dLoader->load(libPath, "getHashTableSizeInBytes"); + + if (!m_useParallelMerge) { + aggregateWithPtrs = + m_dLoader->load( + libPath, "aggregate"); + } else { + aggregateSingleHashTableWithPtrs = + m_dLoader->load(libPath, "aggregate"); + } - // Initialize the size needed for the hashtable here - m_hashTableSize = getHashTableSize(); + // Initialize the size needed for the hashtable here + m_hashTableSize = getHashTableSize(); + } + timer.printElapsed("DLibLoad-"); } } m_isReady = true; } - bool hasGroupBy() override { return m_groupBy; } + bool hasGroupBy() override { + return m_groupBy; + } TupleSchema &getOutputSchema() override { if (m_outputSchema == nullptr) @@ -352,15 +453,51 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { return *m_outputSchema; } + std::vector *getInputCols() override { + if (hasProjection()) + throw std::runtime_error("error: projection operator hasn't been implemented yet"); + auto cols = new std::vector; + std::unordered_set colNums; + // always add the timestamp + auto col = new ColumnReference(0, BasicType::Long); + cols->push_back(col); + colNums.insert(0); + for (int i = 0; i < m_numberOfAggregationAttributes; ++i) { + col = (*m_aggregationAttributes)[i]; + // todo: fix this -- works only for YSB + if (hasStaticHashJoin() && col->getColumn() == 2) + continue; + if (colNums.find(col->getColumn()) == colNums.end()) { + colNums.insert(col->getColumn()); + cols->push_back(col); + } + } + for (int i = 0; i < m_numberOfKeyAttributes; ++i) { + col = dynamic_cast((*m_groupByAttributes)[i]); + if (col) { + if (colNums.find(col->getColumn()) == colNums.end()) { + colNums.insert(col->getColumn()); + cols->push_back(col); + } + } else { + col = new ColumnReference(-1, (*m_groupByAttributes)[i]->getBasicType()); + col->setExpression((*m_groupByAttributes)[i]->toSExprForCodeGen()); + cols->push_back(col); + colNums.insert(-1); + } + } + + return cols; + } + AggregationType &getAggregationType() override { return getAggregationType(0); } AggregationType &getAggregationType(int idx) override { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); - if (idx < 0 || idx > (int)(*m_aggregationTypes).size() - 1) + throw std::runtime_error("error: aggregation operator hasn't been set up"); + if (idx < 0 || idx > (int) (*m_aggregationTypes).size() - 1) throw std::out_of_range("error: invalid aggregation type index"); return (*m_aggregationTypes)[idx]; } @@ -392,10 +529,11 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { return s; } - explicit operator std::string() const { return toSExpr(); } + explicit operator std::string() const { + return toSExpr(); + } - void processData(const std::shared_ptr &batch, Task &task, - int pid) override { + void processData(const std::shared_ptr& batch, Task &task, int pid) override { if (!m_isReady) throw std::runtime_error("error: the operator has not been set"); @@ -406,57 +544,45 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { } } - void processWithoutFragments(const std::shared_ptr &batch, - Task &task, int pid) { + void processWithoutFragments(const std::shared_ptr& batch, Task &task, int pid) { auto inputBuffer = batch->getInputQueryBuffer(); long startP = batch->getBufferStartPointer(); long endP = batch->getBufferEndPointer(); long pos = 0; - auto outputBuffer = - PartialWindowResultsFactory::getInstance().newInstance(pid); + auto outputBuffer = PartialWindowResultsFactory::getInstance().newInstance(pid); - process(pid, inputBuffer->getBufferRaw(), startP, endP, - outputBuffer->getBufferRaw(), pos); + if (m_doProcessing) { + process(pid, inputBuffer->getBufferRaw(), startP, endP, + outputBuffer->getBufferRaw(), pos); + } outputBuffer->setPosition(pos); batch->setOutputBuffer(outputBuffer); task.outputWindowBatchResult(batch); } - void processWithFragments(const std::shared_ptr &batch, - Task &task, int pid) { + void processWithFragments(const std::shared_ptr& batch, Task &task, int pid) { auto circularBuffer = batch->getInputQueryBuffer(); - auto circularBufferSize = - circularBuffer->getBufferCapacity(task.getNumaNodeId()); + auto circularBufferSize = circularBuffer->getBufferCapacity(task.getNumaNodeId()); auto &buffer = batch->getBuffer(); auto startPointer = batch->getBufferStartPointer(); auto endPointer = batch->getBufferEndPointer(); batch->resetWindowPointers(); // TODO: the hashtable sizes has to be variable size!!! - auto openingWindows = - (!hasGroupBy() || !m_usePtrs) - ? PartialWindowResultsFactory::getInstance().newInstance(pid) - : PartialWindowResultsFactory::getInstance().newInstance( - pid, m_hashTableSize); - auto closingWindows = - (!hasGroupBy() || !m_usePtrs) - ? PartialWindowResultsFactory::getInstance().newInstance(pid) - : PartialWindowResultsFactory::getInstance().newInstance( - pid, m_hashTableSize); - auto pendingWindows = - (!hasGroupBy() || !m_usePtrs) - ? PartialWindowResultsFactory::getInstance().newInstance(pid) - : PartialWindowResultsFactory::getInstance().newInstance( - pid, m_hashTableSize); - auto completeWindows = - PartialWindowResultsFactory::getInstance().newInstance(pid); + auto openingWindows = (!hasGroupBy() || !m_usePtrs) ? PartialWindowResultsFactory::getInstance().newInstance(pid) : + PartialWindowResultsFactory::getInstance().newInstance(pid, m_hashTableSize); + auto closingWindows = (!hasGroupBy() || !m_usePtrs) ? PartialWindowResultsFactory::getInstance().newInstance(pid) : + PartialWindowResultsFactory::getInstance().newInstance(pid, m_hashTableSize); + auto pendingWindows = (!hasGroupBy() || !m_usePtrs) ? PartialWindowResultsFactory::getInstance().newInstance(pid) : + PartialWindowResultsFactory::getInstance().newInstance(pid, m_hashTableSize); + auto completeWindows = PartialWindowResultsFactory::getInstance().newInstance(pid); auto &openingStartPointers = openingWindows->getStartPointers(); auto &closingStartPointers = closingWindows->getStartPointers(); auto &pendingStartPointers = pendingWindows->getStartPointers(); auto &completeStartPointers = completeWindows->getStartPointers(); - // if (batch->hasTimestampOffset()) + //if (batch->hasTimestampOffset()) // batch->updateTimestamps(); auto streamStartPointer = batch->getStreamStartPointer(); @@ -467,52 +593,52 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { timestampFromPrevBatch = batch->getPrevEndTimestamp(); if (m_debug) { - std::cout << "[DBG] processWithFragments [TID " << task.getTaskId() - << "]:" - << " pipeline " << m_id << " entering generated function with " + std::cout << "[DBG] processWithFragments [TID " << task.getTaskId() << "]:" + << " pipeline " << m_id + << " entering generated function with " << batch->getBatchStartTimestamp() << " startTimestamp " << batch->getBatchEndTimestamp() << " endTimestamp " << batch->getBufferStartPointer() << " startPointer " << batch->getBufferEndPointer() << " endPointer " << streamStartPointer << " streamStartPointer " << timestampFromPrevBatch << " prevTimestamp " - << circularBufferSize << " circularBufferSize " << std::endl; + << circularBufferSize << " circularBufferSize " + << std::endl; } - if (!hasGroupBy() || !m_usePtrs) { // use the actual values stored - // sequentially in a buffer to aggregate - processFragments( - pid, buffer.data(), circularBufferSize, startPointer, endPointer, - timestampFromPrevBatch, batch->getWindowStartPointers().data(), - batch->getWindowEndPointers().data(), openingWindows->getBufferRaw(), - closingWindows->getBufferRaw(), pendingWindows->getBufferRaw(), - completeWindows->getBufferRaw(), openingStartPointers.data(), - closingStartPointers.data(), pendingStartPointers.data(), - completeStartPointers.data(), - // openingWindowIds.data(), closingWindowIds.data(), - // pendingWindowIds.data(), completeWindowIds.data(), - streamStartPointer, &pointersAndCounts[0], m_staticBuffer); - } else { // use pointers to hashtables to aggregate - processFragmentsWithPtrs( - pid, buffer.data(), circularBufferSize, startPointer, endPointer, - timestampFromPrevBatch, batch->getWindowStartPointers().data(), - batch->getWindowEndPointers().data(), - openingWindows->getBufferPtrs().data(), - closingWindows->getBufferPtrs().data(), - pendingWindows->getBufferPtrs().data(), - completeWindows->getBufferRaw(), openingStartPointers.data(), - closingStartPointers.data(), pendingStartPointers.data(), - completeStartPointers.data(), - // openingWindowIds.data(), closingWindowIds.data(), - // pendingWindowIds.data(), completeWindowIds.data(), - streamStartPointer, &pointersAndCounts[0], m_staticBuffer); + if (m_doProcessing) { + if (!hasGroupBy() || + !m_usePtrs) { // use the actual values stored sequentially in a buffer to aggregate + processFragments( + pid, buffer.data(), circularBufferSize, startPointer, endPointer, + timestampFromPrevBatch, batch->getWindowStartPointers().data(), + batch->getWindowEndPointers().data(), + openingWindows->getBufferRaw(), closingWindows->getBufferRaw(), + pendingWindows->getBufferRaw(), completeWindows->getBufferRaw(), + openingStartPointers.data(), closingStartPointers.data(), + pendingStartPointers.data(), completeStartPointers.data(), + // openingWindowIds.data(), closingWindowIds.data(), pendingWindowIds.data(), completeWindowIds.data(), + streamStartPointer, &pointersAndCounts[0], m_staticBuffer); + } else { // use pointers to hashtables to aggregate + processFragmentsWithPtrs( + pid, buffer.data(), circularBufferSize, startPointer, endPointer, + timestampFromPrevBatch, batch->getWindowStartPointers().data(), + batch->getWindowEndPointers().data(), + openingWindows->getBufferPtrs().data(), + closingWindows->getBufferPtrs().data(), + pendingWindows->getBufferPtrs().data(), + completeWindows->getBufferRaw(), openingStartPointers.data(), + closingStartPointers.data(), pendingStartPointers.data(), + completeStartPointers.data(), + // openingWindowIds.data(), closingWindowIds.data(), pendingWindowIds.data(), completeWindowIds.data(), + streamStartPointer, &pointersAndCounts[0], m_staticBuffer); + } } - // Set positions - openingWindows->setPosition((size_t)pointersAndCounts[0]); - closingWindows->setPosition((size_t)pointersAndCounts[1]); - pendingWindows->setPosition((size_t)pointersAndCounts[2]); - completeWindows->setPosition((size_t)pointersAndCounts[3]); + openingWindows->setPosition((size_t) pointersAndCounts[0]); + closingWindows->setPosition((size_t) pointersAndCounts[1]); + pendingWindows->setPosition((size_t) pointersAndCounts[2]); + completeWindows->setPosition((size_t) pointersAndCounts[3]); // Set counters openingWindows->incrementCount(pointersAndCounts[4]); closingWindows->incrementCount(pointersAndCounts[5]); @@ -525,16 +651,16 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { batch->setPendingWindows(pendingWindows); batch->setCompleteWindows(completeWindows); // Set window ids - // batch->setWindowIds(); + //batch->setWindowIds(); - // batch->getBuffer().release(); + //batch->getBuffer().release(); batch->setSchema(m_outputSchema); task.outputWindowBatchResult(batch); if (m_debug) { - std::cout << "[DBG] processWithFragments [TID " << task.getTaskId() - << "]:" - << " pipeline " << m_id << " exiting generated function with " + std::cout << "[DBG] processWithFragments [TID " << task.getTaskId() << "]:" + << " pipeline " << m_id + << " exiting generated function with " << batch->getBatchStartTimestamp() << " startTimestamp " << batch->getBatchEndTimestamp() << " endTimestamp " << streamStartPointer << " streamStartPointer " @@ -542,119 +668,143 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { << pointersAndCounts[5] << " closingWindows " << pointersAndCounts[6] << " pendingWindows " << pointersAndCounts[7] << " completeWindows " << std::endl; + + //if (task.getTaskId() == 128) { + // exit(0); + //} } } - void aggregatePartials( - std::shared_ptr openingWindows, - std::shared_ptr closingOrPendingWindows, - std::shared_ptr completeWindows, int numOfWindows, - long &windowsPos, int &tupleSize, bool pack) override { + void processData(const std::shared_ptr& lBatch, const std::shared_ptr& rBatch, Task &task, int pid) override { + (void) lBatch; + (void) rBatch; + (void) task; + (void) pid; + throw std::runtime_error("error: this operator cannot be used directly"); + } + + void aggregatePartials(std::shared_ptr openingWindows, + std::shared_ptr closingOrPendingWindows, + std::shared_ptr completeWindows, + int numOfWindows, + long &windowsPos, + int &tupleSize, + bool pack) override { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); - if (!hasGroupBy() || !m_usePtrs) { // use the actual values stored - // sequentially in a buffer to aggregate + if (!hasGroupBy() || !m_usePtrs) { // use the actual values stored sequentially in a buffer to aggregate if (pack) { - windowsPos = - aggregate(openingWindows->getBufferRaw(), - openingWindows->getStartPointers().data(), - (int)openingWindows->getPosition(), - closingOrPendingWindows->getBufferRaw(), - closingOrPendingWindows->getStartPointers().data(), - (int)closingOrPendingWindows->getPosition(), 0, - numOfWindows, pack, completeWindows->getBufferRaw(), - completeWindows->getPosition(), tupleSize); + windowsPos = aggregate(openingWindows->getBufferRaw(), + openingWindows->getStartPointers().data(), + (int) openingWindows->getPosition(), + closingOrPendingWindows->getBufferRaw(), + closingOrPendingWindows->getStartPointers().data(), + (int) closingOrPendingWindows->getPosition(), + 0, + numOfWindows, + pack, + completeWindows->getBufferRaw(), + completeWindows->getPosition(), + tupleSize); } else { - windowsPos = - aggregate(openingWindows->getBufferRaw(), - openingWindows->getStartPointers().data(), - (int)openingWindows->getPosition(), - closingOrPendingWindows->getBufferRaw(), - closingOrPendingWindows->getStartPointers().data(), - (int)closingOrPendingWindows->getPosition(), - openingWindows->numberOfWindows() - numOfWindows, - numOfWindows, pack, completeWindows->getBufferRaw(), - completeWindows->getPosition(), tupleSize); + windowsPos = aggregate(openingWindows->getBufferRaw(), + openingWindows->getStartPointers().data(), + (int) openingWindows->getPosition(), + closingOrPendingWindows->getBufferRaw(), + closingOrPendingWindows->getStartPointers().data(), + (int) closingOrPendingWindows->getPosition(), + openingWindows->numberOfWindows() - numOfWindows, + numOfWindows, + pack, + completeWindows->getBufferRaw(), + completeWindows->getPosition(), + tupleSize); } - } else { // use pointers to hashtables to aggregate + } else { // use pointers to hashtables to aggregate if (pack) { windowsPos = aggregateWithPtrs - // aggregate2 + //aggregate2 (openingWindows->getBufferPtrs().data(), openingWindows->getStartPointers().data(), - (int)openingWindows->getPosition(), + (int) openingWindows->getPosition(), closingOrPendingWindows->getBufferPtrs().data(), closingOrPendingWindows->getStartPointers().data(), - (int)closingOrPendingWindows->getPosition(), 0, numOfWindows, pack, - completeWindows->getBufferRaw(), completeWindows->getPosition(), + (int) closingOrPendingWindows->getPosition(), + 0, + numOfWindows, + pack, + completeWindows->getBufferRaw(), + completeWindows->getPosition(), tupleSize); } else { windowsPos = aggregateWithPtrs - // aggregate2 + //aggregate2 (openingWindows->getBufferPtrs().data(), openingWindows->getStartPointers().data(), - (int)openingWindows->getPosition(), + (int) openingWindows->getPosition(), closingOrPendingWindows->getBufferPtrs().data(), closingOrPendingWindows->getStartPointers().data(), - (int)closingOrPendingWindows->getPosition(), - openingWindows->numberOfWindows() - numOfWindows, numOfWindows, - pack, completeWindows->getBufferRaw(), - completeWindows->getPosition(), tupleSize); + (int) closingOrPendingWindows->getPosition(), + openingWindows->numberOfWindows() - numOfWindows, + numOfWindows, + pack, + completeWindows->getBufferRaw(), + completeWindows->getPosition(), + tupleSize); } } } - void aggregateSinglePartial( - std::shared_ptr completeWindows, int completeWindow, - int completeWindowsStartPos, - std::shared_ptr partialWindows, int window, - int &startPos, int &endPos, int &tupleSize, bool pack) override { + void aggregateSinglePartial(std::shared_ptr completeWindows, + int completeWindow, + int completeWindowsStartPos, + std::shared_ptr partialWindows, + int window, + int &startPos, + int &endPos, + int &tupleSize, + bool pack) override { if (!hasAggregation() || !m_useParallelMerge || !hasGroupBy()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up for parallel merge"); - - if (!hasGroupBy() || !m_usePtrs) { // use the actual values stored - // sequentially in a buffer to aggregate - throw std::runtime_error( - "error: this aggregation mode is not supported without pointers to " - "hashtables"); - } else { // use pointers to hashtables to aggregate - aggregateSingleHashTableWithPtrs( - completeWindows->getBufferRaw(), completeWindow, - completeWindowsStartPos, - (partialWindows != nullptr) ? partialWindows->getBufferPtrs().data() - : nullptr, - window, startPos, endPos, tupleSize, pack); + throw std::runtime_error("error: aggregation operator hasn't been set up for parallel merge"); + + if (!hasGroupBy() || !m_usePtrs) { // use the actual values stored sequentially in a buffer to aggregate + throw std::runtime_error("error: this aggregation mode is not supported without pointers to hashtables"); + } else { // use pointers to hashtables to aggregate + aggregateSingleHashTableWithPtrs + (completeWindows->getBufferRaw(), + completeWindow, + completeWindowsStartPos, + (partialWindows != nullptr) ? partialWindows->getBufferPtrs().data() : nullptr, + window, + startPos, + endPos, + tupleSize, + pack); } } WindowDefinition &getWindowDefinition() override { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); return *m_windowDefinition; } std::vector &getAggregationTypes() override { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); return *m_aggregationTypes; } std::vector &getAggregationAttributes() override { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); return *m_aggregationAttributes; } std::vector &getGroupByAttributes() override { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); return *m_groupByAttributes; } @@ -664,36 +814,242 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { int numberOfValues() override { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); - return (int)(*m_aggregationAttributes).size(); + throw std::runtime_error("error: aggregation operator hasn't been set up"); + return (int) (*m_aggregationAttributes).size(); + } + + bool hasProjection() const { + return (m_expressions != nullptr); } - bool hasProjection() const { return (m_expressions != nullptr); } + bool hasSelection() const override { + return (m_predicate != nullptr); + } - bool hasSelection() const { return (m_predicate != nullptr); } + bool hasHavingPredicate() const { + return (m_havingPredicate != nullptr) || (!m_postWindowPredicate.empty()); + } - bool hasHavingPredicate() const { return (m_havingPredicate != nullptr); } + bool hasPostWindowOperation() const { + return !m_postWindowOp.empty(); + } bool hasAggregation() const { - if (m_aggregationTypes != nullptr && m_aggregationAttributes != nullptr && - m_aggregationTypes->size() != m_aggregationAttributes->size()) - throw std::runtime_error( - "error: the number of aggregation types should ne equal to the " - "aggregation attributes"); - return (m_aggregationTypes != nullptr || - m_aggregationAttributes != nullptr); + if (m_aggregationTypes != nullptr && m_aggregationAttributes != nullptr + && m_aggregationTypes->size() != m_aggregationAttributes->size()) + throw std::runtime_error("error: the number of aggregation types should ne equal to the aggregation attributes"); + return (m_aggregationTypes != nullptr || m_aggregationAttributes != nullptr); } - bool hasStaticHashJoin() const { + bool hasStaticHashJoin() const override { return (m_staticBuffer != nullptr && m_staticJoinPredicate != nullptr); } + std::string getSelectionExpr() override { + std::string s; + if (hasSelection()) { + s.append("if ( "); + s.append(m_predicate->toSExprForCodeGen()); + s.append(" )\n"); + } + return s; + } + + std::string getHashTableExpr() override { + std::string s; + if (!hasGroupBy() + //|| ((*m_groupByAttributes)[0]->getBasicType() != BasicType::LongLong && + //m_numberOfKeyAttributes == 1) + ) { + return s; + } + // create Key + if (m_numberOfKeyAttributes == 1) { + s.append("using Key = "); + for (int idx = 1; idx <= m_numberOfKeyAttributes; ++idx) { + auto e = (*m_groupByAttributes)[idx - 1]; + if (e->getBasicType() == BasicType::Integer) { + s.append("int"); + } else if (e->getBasicType() == BasicType::Float) { ; + s.append("float"); + } else if (e->getBasicType() == BasicType::Long) { + s.append("long"); + } else if (e->getBasicType() == BasicType::LongLong) { + s.append("__uint128_t"); + } else + throw std::invalid_argument("error: invalid group-by attribute"); + } + s.append(";\n"); + } else { + s.append("struct Key {\n"); + for (int idx = 1; idx <= m_numberOfKeyAttributes; ++idx) { + s.append("\t"); + auto e = (*m_groupByAttributes)[idx - 1]; + if (e->getBasicType() == BasicType::Integer) { + s.append("int"); + } else if (e->getBasicType() == BasicType::Float) { ; + s.append("float"); + } else if (e->getBasicType() == BasicType::Long) { + s.append("long"); + } else if (e->getBasicType() == BasicType::LongLong) { + s.append("__uint128_t"); + } else + throw std::invalid_argument("error: invalid group-by attribute"); + s.append(" _" + std::to_string(idx - 1) + ";\n"); + } + s.append("};\n"); + } + s.append("using KeyT = Key;\n\n"); + // get hash and equality functions + if (m_customHashTable.empty()) { + if (m_numberOfKeyAttributes == 1 && + (*m_groupByAttributes)[0]->getBasicType() != BasicType::LongLong) { + s.append( + "struct HMEqualTo {\n" + " constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) " + "const {\n" + " return lhs == rhs;\n" + " }\n" + "};\n" + "\n" + "struct MyHash{\n" + " std::size_t operator()(KeyT m) const {\n" + " std::hash hashVal;\n" + " return hashVal(m);\n" + " }\n" + "};\n" + "\n"); + } else { + if (m_numberOfKeyAttributes == 1 && + (*m_groupByAttributes)[0]->getBasicType() == BasicType::LongLong) { + s.append( + "struct HMEqualTo {\n" + " constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) " + "const {\n" + " return lhs == rhs;\n" + " }\n" + "};\n" + "struct UInt128Hash {\n" + " UInt128Hash() = default;\n" + " inline std::size_t operator()(__uint128_t data) const {\n" + " const __uint128_t __mask = " + "static_cast(-1);\n" + " const std::size_t __a = (std::size_t)(data & __mask);\n" + " const std::size_t __b = (std::size_t)((data & (__mask " + "<< 64)) >> 64);\n" + " auto hasher = std::hash();\n" + " return hasher(__a) + hasher(__b);\n" + " }\n" + "};\n" + "using MyHash = UInt128Hash;\n" + "\n"); + } else { + s.append( + "struct HMEqualTo {\n" + " constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) " + "const {\n" + " return"); + for (auto i = 0; i < m_numberOfKeyAttributes; ++i) { + s.append(" lhs._" + std::to_string(i) + " == rhs._" + + std::to_string(i)); + if (i != m_numberOfKeyAttributes - 1) s.append(" &&"); + } + s.append( + ";\n" + " }\n" + "};\n"); + s.append( + "\n" + "" + "#include \n" + "#include \n" + "\n" + "#define CRCPOLY 0x82f63b78 // reversed 0x1EDC6F41\n" + "#define CRCINIT 0xFFFFFFFF\n" + "uint32_t Crc32Lookup [256] = {\n" + " 0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3,\n" + " 0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91,\n" + " 0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7,\n" + " 0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5,\n" + " 0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,\n" + " 0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59,\n" + " 0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F,\n" + " 0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D,\n" + " 0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433,\n" + " 0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01,\n" + " 0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,\n" + " 0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65,\n" + " 0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,\n" + " 0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9,\n" + " 0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F,\n" + " 0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD,\n" + " 0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683,\n" + " 0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1,\n" + " 0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,\n" + " 0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5,\n" + " 0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B,\n" + " 0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79,\n" + " 0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F,\n" + " 0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,\n" + " 0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,\n" + " 0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,\n" + " 0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,\n" + " 0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45,\n" + " 0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB,\n" + " 0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9,\n" + " 0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF,\n" + " 0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D\n" + "};\n" + "// Hardware-accelerated CRC-32C (using CRC32 instruction)\n" + "inline size_t CRC_Hardware(const void* data, size_t length) {\n" + " size_t crc = CRCINIT;\n" + "\n" + " unsigned char* current = (unsigned char*) data;\n" + " // Align to DWORD boundary\n" + " size_t align = (sizeof(unsigned int) - (__int64_t)current) " + "& (sizeof(unsigned int) - 1);\n" + " align = std::min(align, length);\n" + " length -= align;\n" + " for (; align; align--)\n" + " crc = Crc32Lookup[(crc ^ *current++) & 0xFF] ^ (crc >> " + "8);\n" + "\n" + " size_t ndwords = length / sizeof(unsigned int);\n" + " for (; ndwords; ndwords--) {\n" + " crc = _mm_crc32_u32(crc, *(unsigned int*)current);\n" + " current += sizeof(unsigned int);\n" + " }\n" + "\n" + " length &= sizeof(unsigned int) - 1;\n" + " for (; length; length--)\n" + " crc = _mm_crc32_u8(crc, *current++);\n" + " return ~crc;\n" + "}\n" + "struct Crc32Hash {\n" + " std::size_t operator()(KeyT t) const {\n" + " return CRC_Hardware(&t, KEY_SIZE);\n" + " }\n" + "};\n" + "using MyHash = Crc32Hash;\n" + "\n"); + } + } + //if (m_collisionBarrier > 0) { + // //s.append(" int _barrier = " + std::to_string(m_collisionBarrier) + ";\n"); + // throw std::runtime_error("error: the collisionBarrier is not supported yet"); + //} + } else { + //throw std::runtime_error("error: custom hashtables are not supported yet"); + } + s.append("\n"); + + return s; + } + private: std::string tab = "\t"; std::string newline = "\n"; - std::string getCombineFunction(AggregationType type, std::string leftArg, - std::string rightArg) { + std::string getCombineFunction(AggregationType type, std::string leftArg, std::string rightArg) { if (type == CNT || type == SUM) return leftArg + "+" + rightArg; else if (type == MIN) @@ -701,8 +1057,8 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { else if (type == MAX) return leftArg + ">" + rightArg + "?" + leftArg + ":" + rightArg; else if (type == AVG) - return "std::make_pair(" + leftArg + ".first+" + rightArg + ".first," + - leftArg + ".second+" + rightArg + ".second)"; + return "std::make_pair(" + leftArg + ".first+" + rightArg + ".first," + leftArg + ".second+" + rightArg + + ".second)"; else { throw std::runtime_error("error: unsupported type"); } @@ -720,13 +1076,30 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { } return s; } + std::string addPostWindowOperation(int numOfTabs) { + std::string s; + std::string tabs; + for (int i = 0; i < numOfTabs; ++i) { + tabs.append("\t"); + } + if (hasPostWindowOperation()) { + s.append(tabs).append(m_postWindowOp); + } + return s; + } + std::string addPostMergeOp() { + std::string s; + if (hasPostWindowOperation()) { + s.append(m_postMergeOperation); + } + return s; + } std::string getC_Definitions(std::string computation) { std::string s; s.append( "extern \"C\" {\n" - "void process (int pid, char *inputBuffer, long startPointer, long " - "endPointer,\n" + "void process (int pid, char *inputBuffer, long startPointer, long endPointer,\n" " char *outputBuffer, long &pos) {\n" " // Input Buffer\n" " input_tuple_t *data= (input_tuple_t *) inputBuffer;\n" @@ -740,11 +1113,12 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " endPointer = endPointer / tupleSize;\n" "\n" " for (;bufferPtr < endPointer; ++bufferPtr) {\n" + - addTabs(computation, 2) + - " }\n" - " pos = pos * sizeof(output_tuple_t);\n" - "};\n" - "}\n"); + addTabs(computation, 2) + + " }\n" + " pos = pos * sizeof(output_tuple_t);\n" + "};\n" + "}\n" + ); return s; } @@ -758,67 +1132,49 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { } s.append( "extern \"C\" {\n" - " void process (int pid, char *inputBuffer, size_t inputBufferSize, " - "long startPointer, long endPointer, long timestampFromPrevBatch,\n" - " long *windowStartPointers, long " - "*windowEndPointers, char *" + - ptr + "openingWindowsBuffer, char *" + ptr + - "closingWindowsBuffer,\n" - " char *" + - ptr + - "pendingWindowsBuffer, char *completeWindowsBuffer,\n" - " int *openingStartPointers, int " - "*closingStartPointers, int *pendingStartPointers, int " - "*completeStartPointers,\n" - //" long *openingWindowIds, long - //*closingWindowIds, long *pendingWindowIds, long *completeWindowIds,\n" - " long streamStartPointer, int " - "*pointersAndCounts, char *staticBuffer) {\n" - " processData (pid, inputBuffer, inputBufferSize, " - "startPointer, endPointer, timestampFromPrevBatch,\n" - " windowStartPointers, windowEndPointers, " - "openingWindowsBuffer, closingWindowsBuffer,\n" - " pendingWindowsBuffer, completeWindowsBuffer,\n" - " openingStartPointers, closingStartPointers, " - "pendingStartPointers, completeStartPointers,\n" - //" openingWindowIds, closingWindowIds, pendingWindowIds, - // completeWindowIds,\n" - " streamStartPointer, pointersAndCounts, staticBuffer);\n" - " };\n" - "\n"); + " void process (int pid, char *inputBuffer, size_t inputBufferSize, long startPointer, long endPointer, long timestampFromPrevBatch,\n" + " long *windowStartPointers, long *windowEndPointers, char *" + ptr + + "openingWindowsBuffer, char *" + ptr + "closingWindowsBuffer,\n" + " char *" + ptr + + "pendingWindowsBuffer, char *completeWindowsBuffer,\n" + " int *openingStartPointers, int *closingStartPointers, int *pendingStartPointers, int *completeStartPointers,\n" + //" long *openingWindowIds, long *closingWindowIds, long *pendingWindowIds, long *completeWindowIds,\n" + " long streamStartPointer, int *pointersAndCounts, char *staticBuffer) {\n" + " processData (pid, inputBuffer, inputBufferSize, startPointer, endPointer, timestampFromPrevBatch,\n" + " windowStartPointers, windowEndPointers, openingWindowsBuffer, closingWindowsBuffer,\n" + " pendingWindowsBuffer, completeWindowsBuffer,\n" + " openingStartPointers, closingStartPointers, pendingStartPointers, completeStartPointers,\n" + //" openingWindowIds, closingWindowIds, pendingWindowIds, completeWindowIds,\n" + " streamStartPointer, pointersAndCounts, staticBuffer);\n" + " };\n" + "\n"); if (!m_useParallelMerge) { s.append( - " long aggregate (char *" + ptr + - "openingBuffer, int *openingStartPointers, int openingEndPointer,\n" - " char *" + - ptr + - "closingOrPendingBuffer, int *copStartPointers, int copEndPointer,\n" - " int startingWindow, int numOfWindows, " - "bool pack,\n" - " char* completeBuffer, long " - "completeBufferPtr, int &tupleSize) {\n" - " return aggregatePartials (openingBuffer, " - "openingStartPointers, openingEndPointer,\n" - " closingOrPendingBuffer, copStartPointers, " - "copEndPointer,\n" - " startingWindow, numOfWindows, pack, completeBuffer, " - "completeBufferPtr, tupleSize);\n" - " };\n" + - getSize + "}\n"); + " long aggregate (char *" + ptr + "openingBuffer, int *openingStartPointers, int openingEndPointer,\n" + " char *" + ptr + + "closingOrPendingBuffer, int *copStartPointers, int copEndPointer,\n" + " int startingWindow, int numOfWindows, bool pack,\n" + " char* completeBuffer, long completeBufferPtr, int &tupleSize) {\n" + " return aggregatePartials (openingBuffer, openingStartPointers, openingEndPointer,\n" + " closingOrPendingBuffer, copStartPointers, copEndPointer,\n" + " startingWindow, numOfWindows, pack, completeBuffer, completeBufferPtr, tupleSize);\n" + " };\n" + + getSize + + "}\n" + ); } else { s.append( - " long aggregate (char *completeBuffer, int completeWindow, int " - "completeStartPos,\n" - " char **partialBuffer, int " - "partialWindow,\n" + " long aggregate (char *completeBuffer, int completeWindow, int completeStartPos,\n" + " char **partialBuffer, int partialWindow,\n" " int &startPos, int &endPos,\n" " int &tupleSize, bool pack) {\n" - " return aggregatePartials (completeBuffer, completeWindow, " - "completeStartPos, partialBuffer,\n" + " return aggregatePartials (completeBuffer, completeWindow, completeStartPos, partialBuffer,\n" " partialWindow, startPos, endPos,\n" " tupleSize, pack);\n" " };\n" + - getSize + "}\n"); + getSize + + "}\n" + ); } return s; } @@ -827,20 +1183,11 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string s; if (hasAggregation()) { s.append("[Partial window u-aggregation] "); - for (int i = 0; i < (int)(*m_aggregationTypes).size(); ++i) - s.append(AggregationTypes::toString((*m_aggregationTypes)[i])) - .append("(") - .append((*m_aggregationAttributes)[i]->toSExpr()) - .append(")") - .append(" "); - s.append("(group-by ?") - .append(" ") - .append(std::to_string(m_groupBy)) - .append(") "); - s.append("(incremental ?") - .append(" ") - .append(std::to_string(m_processIncremental)) - .append(")"); + for (int i = 0; i < (int) (*m_aggregationTypes).size(); ++i) + s.append(AggregationTypes::toString((*m_aggregationTypes)[i])).append("(").append((*m_aggregationAttributes)[i]->toSExpr()).append( + ")").append(" "); + s.append("(group-by ?").append(" ").append(std::to_string(m_groupBy)).append(") "); + s.append("(incremental ?").append(" ").append(std::to_string(m_processIncremental)).append(")"); } s.append("\n"); return s; @@ -862,9 +1209,10 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { if (hasProjection()) { s.append("Projection ("); int i = 0; - for (auto e : *m_expressions) { + for (auto e: *m_expressions) { s.append(e->toSExpr()); - if (i != (int)(*m_expressions).size() - 1) s.append(", "); + if (i != (int) (*m_expressions).size() - 1) + s.append(", "); i++; } s.append(")"); @@ -887,7 +1235,7 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { return s; } - std::string getInputSchemaString() { + std::string getInputSchemaString() override { if (m_inputSchema == nullptr) throw std::runtime_error("error: m_inputSchema hasn't been set up"); std::string s; @@ -897,22 +1245,17 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { for (int i = 1; i < m_inputSchema->numberOfAttributes(); i++) { auto type = m_inputSchema->getAttributeType(i); switch (type) { - case BasicType::Integer: - s.append("\tint _" + std::to_string(i) + ";\n"); + case BasicType::Integer : s.append("\tint _" + std::to_string(i) + ";\n"); break; - case BasicType::Float: - s.append("\tfloat _" + std::to_string(i) + ";\n"); + case BasicType::Float : s.append("\tfloat _" + std::to_string(i) + ";\n"); break; - case BasicType::Long: - s.append("\tlong _" + std::to_string(i) + ";\n"); + case BasicType::Long : s.append("\tlong _" + std::to_string(i) + ";\n"); break; - case BasicType::LongLong: - s.append("\t__uint128_t _" + std::to_string(i) + ";\n"); + case BasicType::LongLong : s.append("\t__uint128_t _" + std::to_string(i) + ";\n"); break; - default: + default : throw std::runtime_error( - "error: failed to generate tuple struct (attribute " + - std::to_string(i) + " is undefined)"); + "error: failed to generate tuple struct (attribute " + std::to_string(i) + " is undefined)"); } } s.append("};\n"); @@ -933,22 +1276,17 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { for (; i < m_outputSchema->numberOfAttributes(); i++) { auto type = m_outputSchema->getAttributeType(i); switch (type) { - case BasicType::Integer: - s.append("\tint _" + std::to_string(i) + ";\n"); + case BasicType::Integer : s.append("\tint _" + std::to_string(i) + ";\n"); break; - case BasicType::Float: - s.append("\tfloat _" + std::to_string(i) + ";\n"); + case BasicType::Float : s.append("\tfloat _" + std::to_string(i) + ";\n"); break; - case BasicType::Long: - s.append("\tlong _" + std::to_string(i) + ";\n"); + case BasicType::Long : s.append("\tlong _" + std::to_string(i) + ";\n"); break; - case BasicType::LongLong: - s.append("\t__uint128_t _" + std::to_string(i) + ";\n"); + case BasicType::LongLong : s.append("\t__uint128_t _" + std::to_string(i) + ";\n"); break; - default: + default : throw std::runtime_error( - "error: failed to generate tuple struct (attribute " + - std::to_string(i) + " is undefined)"); + "error: failed to generate tuple struct (attribute " + std::to_string(i) + " is undefined)"); } } s.append("};\n"); @@ -959,8 +1297,7 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string getIntermediateSchemaString() { if (!hasProjection()) throw std::runtime_error("error: projection hasn't been set up"); - auto schema = ExpressionUtils::getTupleSchemaFromExpressions( - *m_expressions, "IntermediateStream"); + auto schema = ExpressionUtils::getTupleSchemaFromExpressions(*m_expressions, "IntermediateStream"); std::string s; s.append("struct alignas(16) interm_tuple_t {\n"); int i = 0; @@ -971,22 +1308,17 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { for (; i < schema.numberOfAttributes(); i++) { auto type = schema.getAttributeType(i); switch (type) { - case BasicType::Integer: - s.append("\tint _" + std::to_string(i) + ";\n"); + case BasicType::Integer : s.append("\tint _" + std::to_string(i) + ";\n"); break; - case BasicType::Float: - s.append("\tfloat _" + std::to_string(i) + ";\n"); + case BasicType::Float : s.append("\tfloat _" + std::to_string(i) + ";\n"); break; - case BasicType::Long: - s.append("\tlong _" + std::to_string(i) + ";\n"); + case BasicType::Long : s.append("\tlong _" + std::to_string(i) + ";\n"); break; - case BasicType::LongLong: - s.append("\t__uint128_t _" + std::to_string(i) + ";\n"); + case BasicType::LongLong : s.append("\t__uint128_t _" + std::to_string(i) + ";\n"); break; - default: + default : throw std::runtime_error( - "error: failed to generate tuple struct (attribute " + - std::to_string(i) + " is undefined)"); + "error: failed to generate tuple struct (attribute " + std::to_string(i) + " is undefined)"); } } s.append("};\n"); @@ -1000,36 +1332,21 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { s.append("#define RANGE_BASED\n"); else s.append("#define COUNT_BASED\n"); - s.append("#define WINDOW_SIZE " + - std::to_string(m_windowDefinition->getSize()) + "L\n"); - s.append("#define WINDOW_SLIDE " + - std::to_string(m_windowDefinition->getSlide()) + "L\n"); - s.append("#define PANES_PER_WINDOW " + - std::to_string(m_windowDefinition->numberOfPanes()) + "L\n"); - s.append("#define PANES_PER_SLIDE " + - std::to_string(m_windowDefinition->panesPerSlide()) + "L\n"); - s.append("#define PANE_SIZE " + - std::to_string(m_windowDefinition->getPaneSize()) + "L\n"); - // s.append("#define PARTIAL_WINDOWS - // "+std::to_string(SystemConf::getInstance().PARTIAL_WINDOWS)+"L\n"); - s.append("#define BUFFER_SIZE " + - std::to_string( - m_config ? m_config->getCircularBufferSize() - : SystemConf::getInstance().CIRCULAR_BUFFER_SIZE) + - "L\n"); - s.append("#define UNBOUNDED_SIZE " + - std::to_string(SystemConf::getInstance().UNBOUNDED_BUFFER_SIZE) + - "L\n"); + s.append("#define WINDOW_SIZE " + std::to_string(m_windowDefinition->getSize()) + "L\n"); + s.append("#define WINDOW_SLIDE " + std::to_string(m_windowDefinition->getSlide()) + "L\n"); + s.append("#define PANES_PER_WINDOW " + std::to_string(m_windowDefinition->numberOfPanes()) + "L\n"); + s.append("#define PANES_PER_SLIDE " + std::to_string(m_windowDefinition->panesPerSlide()) + "L\n"); + s.append("#define PANE_SIZE " + std::to_string(m_windowDefinition->getPaneSize()) + "L\n"); + //s.append("#define PARTIAL_WINDOWS "+std::to_string(SystemConf::getInstance().PARTIAL_WINDOWS)+"L\n"); + s.append("#define BUFFER_SIZE " + std::to_string(m_config ? m_config->getCircularBufferSize() : + SystemConf::getInstance().CIRCULAR_BUFFER_SIZE) + "L\n"); + s.append("#define UNBOUNDED_SIZE " + std::to_string(SystemConf::getInstance().UNBOUNDED_BUFFER_SIZE) + "L\n"); if (!hasGroupBy()) { s.append("#define MAP_SIZE " + std::to_string(1) + "L\n"); } else { - s.append("#define MAP_SIZE " + - std::to_string(m_config - ? m_config->getHashtableSize() - : SystemConf::getInstance().HASH_TABLE_SIZE) + - "L\n"); - s.append("#define KEY_SIZE " + std::to_string(m_keyLength) + - "L\n"); + s.append("#define MAP_SIZE " + std::to_string(m_config ? m_config->getHashtableSize() : + SystemConf::getInstance().HASH_TABLE_SIZE) + "L\n"); + s.append("#define KEY_SIZE " + std::to_string(m_keyLength) + "L\n"); } s.append("\n"); return s; @@ -1038,14 +1355,14 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string getSingleKeyDataStructureString() { std::string s; // TODO: test that they produce equivalent results - AbstractTreeRepresentation tree(*m_windowDefinition, *m_aggregationTypes, - hasNonInvertible()); - // GeneralAggregationGraph gag (m_windowDefinition, m_aggregationTypes); + AbstractTreeRepresentation + tree(*m_windowDefinition, *m_aggregationTypes, hasNonInvertible()); + //GeneralAggregationGraph gag (m_windowDefinition, m_aggregationTypes); s.append(tree.generateAggregationTreeNode()); - // s.append(gag.generateAggregationTreeNode()); + //s.append(gag.generateAggregationTreeNode()); if (hasIncremental()) { s.append(tree.generateCode()); - // s.append(gag.generateCode(true)); + //s.append(gag.generateCode(true)); } return s; } @@ -1054,50 +1371,39 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { if (m_outputSchema == nullptr) throw std::runtime_error("error: outputSchema hasn't been set up"); std::string s; - s.append("struct Value {\n"); + s.append( + "struct Value {\n"); for (unsigned long i = 0; i < (*m_aggregationTypes).size(); ++i) { switch ((*m_aggregationTypes)[i]) { - case SUM: - s.append("\tfloat _" + std::to_string((i + 1)) + ";\n"); + case SUM:s.append("\tfloat _" + std::to_string((i + 1)) + ";\n"); break; - case AVG: - s.append("\tfloat _" + std::to_string((i + 1)) + ";\n"); - // s.append("\t\t_c"+std::to_string((i+1))+" = 0.0f;\n"); + case AVG:s.append("\tfloat _" + std::to_string((i + 1)) + ";\n"); + //s.append("\t\t_c"+std::to_string((i+1))+" = 0.0f;\n"); break; - case CNT: - s.append("\tfloat _" + std::to_string((i + 1)) + ";\n"); + case CNT:s.append("\tfloat _" + std::to_string((i + 1)) + ";\n"); break; - case MIN: - s.append("\tfloat _" + std::to_string((i + 1)) + ";\n"); + case MIN:s.append("\tfloat _" + std::to_string((i + 1)) + ";\n"); break; - case MAX: - s.append("\tfloat _" + std::to_string((i + 1)) + ";\n"); + case MAX:s.append("\tfloat _" + std::to_string((i + 1)) + ";\n"); break; - default: - throw std::runtime_error("error: invalid aggregation type"); + default:throw std::runtime_error("error: invalid aggregation type"); } } s.append("\tValue () {\n"); for (unsigned long i = 0; i < (*m_aggregationTypes).size(); ++i) { switch ((*m_aggregationTypes)[i]) { - case SUM: - s.append("\t\t_" + std::to_string((i + 1)) + " = 0.0f;\n"); + case SUM:s.append("\t\t_" + std::to_string((i + 1)) + " = 0.0f;\n"); break; - case AVG: - s.append("\t\t_" + std::to_string((i + 1)) + " = 0.0f;\n"); - // s.append("\t\t_c"+std::to_string((i+1))+" = 0.0f;\n"); + case AVG:s.append("\t\t_" + std::to_string((i + 1)) + " = 0.0f;\n"); + //s.append("\t\t_c"+std::to_string((i+1))+" = 0.0f;\n"); break; - case CNT: - s.append("\t\t_" + std::to_string((i + 1)) + " = 0.0f;\n"); + case CNT:s.append("\t\t_" + std::to_string((i + 1)) + " = 0.0f;\n"); break; - case MIN: - s.append("\t\t_" + std::to_string((i + 1)) + " = FLT_MAX;\n"); + case MIN:s.append("\t\t_" + std::to_string((i + 1)) + " = FLT_MAX;\n"); break; - case MAX: - s.append("\t\t_" + std::to_string((i + 1)) + " = FLT_MIN;\n"); + case MAX:s.append("\t\t_" + std::to_string((i + 1)) + " = FLT_MIN;\n"); break; - default: - throw std::runtime_error("error: invalid aggregation type"); + default:throw std::runtime_error("error: invalid aggregation type"); } } s.append("\t}\n"); @@ -1111,76 +1417,68 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { s.append( " long getHashTableSizeInBytes () {\n" " return sizeof(Bucket) * MAP_SIZE;\n" - " }\n"); + " }\n" + ); return s; } std::string getHashTableString(std::string key) { std::string s; if (m_customHashTable.empty()) { - if (m_numberOfKeyAttributes == 1 && - (*m_groupByAttributes)[0]->getBasicType() != BasicType::LongLong) - s.append("using KeyT = " + key + - ";\n" - "using ValueT = Value;\n" - "\n" - "struct HashMapEqualTo {\n" - " constexpr bool operator()(const KeyT& lhs, const KeyT& " - "rhs) const {\n" - " return lhs == rhs;\n" - " }\n" - "};\n" - "\n" - "struct MyHash{\n" - " std::size_t operator()(KeyT m) const {\n" - " std::hash hashVal;\n" - " return hashVal(m);\n" - " }\n" - "};\n" - "\n"); + if (m_numberOfKeyAttributes == 1 && (*m_groupByAttributes)[0]->getBasicType() != BasicType::LongLong) + s.append( + "using KeyT = " + key + ";\n" + "using ValueT = Value;\n" + "\n" + "struct HashMapEqualTo {\n" + " constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const {\n" + " return lhs == rhs;\n" + " }\n" + "};\n" + "\n" + "struct MyHash{\n" + " std::size_t operator()(KeyT m) const {\n" + " std::hash hashVal;\n" + " return hashVal(m);\n" + " }\n" + "};\n" + "\n"); else { - if (m_numberOfKeyAttributes == 1 && - (*m_groupByAttributes)[0]->getBasicType() == BasicType::LongLong) { - s.append( - "using KeyT = " + key + - ";\n" - "using ValueT = Value;\n" - "\n" - "struct HashMapEqualTo {\n" - " constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) " - "const {\n" - " return lhs == rhs;\n" - " }\n" - "};\n" - "struct UInt128Hash {\n" - " UInt128Hash() = default;\n" - " inline std::size_t operator()(__uint128_t data) const {\n" - " const __uint128_t __mask = " - "static_cast(-1);\n" - " const std::size_t __a = (std::size_t)(data & __mask);\n" - " const std::size_t __b = (std::size_t)((data & (__mask " - "<< 64)) >> 64);\n" - " auto hasher = std::hash();\n" - " return hasher(__a) + hasher(__b);\n" - " }\n" - "};\n" - "using MyHash = UInt128Hash;\n" - "\n"); + if (m_numberOfKeyAttributes == 1 && (*m_groupByAttributes)[0]->getBasicType() == BasicType::LongLong) { + s.append("using KeyT = " + key + ";\n" + "using ValueT = Value;\n" + "\n" + "struct HashMapEqualTo {\n" + " constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const {\n" + " return lhs == rhs;\n" + " }\n" + "};\n" + "struct UInt128Hash {\n" + " UInt128Hash() = default;\n" + " inline std::size_t operator()(__uint128_t data) const {\n" + " const __uint128_t __mask = static_cast(-1);\n" + " const std::size_t __a = (std::size_t)(data & __mask);\n" + " const std::size_t __b = (std::size_t)((data & (__mask << 64)) >> 64);\n" + " auto hasher = std::hash();\n" + " return hasher(__a) + hasher(__b);\n" + " }\n" + "};\n" + "using MyHash = UInt128Hash;\n" + "\n"); } else { - s.append(key + - ";\n" - "using KeyT = Key;\n" - "using ValueT = Value;\n" - "\n" - "struct HashMapEqualTo {\n" - " constexpr bool operator()(const KeyT& lhs, const KeyT& " - "rhs) const {\n" - " return"); + s.append(key + ";\n" + "using KeyT = Key;\n" + "using ValueT = Value;\n" + "\n" + "struct HashMapEqualTo {\n" + " constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const {\n" + " return" + ); for (auto i = 0; i < m_numberOfKeyAttributes; ++i) { - s.append(" lhs._" + std::to_string(i) + " == rhs._" + - std::to_string(i)); - if (i != m_numberOfKeyAttributes - 1) s.append(" &&"); + s.append(" lhs._" + std::to_string(i) + " == rhs._" + std::to_string(i)); + if (i != m_numberOfKeyAttributes - 1) + s.append(" &&"); } s.append( ";\n" @@ -1195,102 +1493,38 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "#define CRCPOLY 0x82f63b78 // reversed 0x1EDC6F41\n" "#define CRCINIT 0xFFFFFFFF\n" "uint32_t Crc32Lookup [256] = {\n" - " " - "0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419," - "0x706AF48F,0xE963A535,0x9E6495A3,\n" - " " - "0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B," - "0x7EB17CBD,0xE7B82D07,0x90BF1D91,\n" - " " - "0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D," - "0x6DDDE4EB,0xF4D4B551,0x83D385C7,\n" - " " - "0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,0x14015C4F," - "0x63066CD9,0xFA0F3D63,0x8D080DF5,\n" - " " - "0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,0x3C03E4D1," - "0x4B04D447,0xD20D85FD,0xA50AB56B,\n" - " " - "0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,0x32D86CE3," - "0x45DF5C75,0xDCD60DCF,0xABD13D59,\n" - " " - "0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5," - "0x56B3C423,0xCFBA9599,0xB8BDA50F,\n" - " " - "0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87," - "0x58684C11,0xC1611DAB,0xB6662D3D,\n" - " " - "0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589," - "0x06B6B51F,0x9FBFE4A5,0xE8B8D433,\n" - " " - "0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB," - "0x086D3D2D,0x91646C97,0xE6635C01,\n" - " " - "0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,0x6C0695ED," - "0x1B01A57B,0x8208F4C1,0xF50FC457,\n" - " " - "0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,0x62DD1DDF," - "0x15DA2D49,0x8CD37CF3,0xFBD44C65,\n" - " " - "0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,0x4ADFA541," - "0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,\n" - " " - "0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73," - "0x33031DE5,0xAA0A4C5F,0xDD0D7CC9,\n" - " " - "0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525," - "0x206F85B3,0xB966D409,0xCE61E49F,\n" - " " - "0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17," - "0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD,\n" - " " - "0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739," - "0x9DD277AF,0x04DB2615,0x73DC1683,\n" - " " - "0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,0xE40ECF0B," - "0x9309FF9D,0x0A00AE27,0x7D079EB1,\n" - " " - "0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,0xF762575D," - "0x806567CB,0x196C3671,0x6E6B06E7,\n" - " " - "0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F," - "0x8EBEEFF9,0x17B7BE43,0x60B08ED5,\n" - " " - "0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1," - "0xA6BC5767,0x3FB506DD,0x48B2364B,\n" - " " - "0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3," - "0xA867DF55,0x316E8EEF,0x4669BE79,\n" - " " - "0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795," - "0xBB0B4703,0x220216B9,0x5505262F,\n" - " " - "0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7," - "0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,\n" - " " - "0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,0x9C0906A9," - "0xEB0E363F,0x72076785,0x05005713,\n" - " " - "0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,0x92D28E9B," - "0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,\n" - " " - "0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,0x81BE16CD," - "0xF6B9265B,0x6FB077E1,0x18B74777,\n" - " " - "0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF," - "0xF862AE69,0x616BFFD3,0x166CCF45,\n" - " " - "0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661," - "0xD06016F7,0x4969474D,0x3E6E77DB,\n" - " " - "0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53," - "0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9,\n" - " " - "0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605," - "0xCDD70693,0x54DE5729,0x23D967BF,\n" - " " - "0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,0xB40BBE37," - "0xC30C8EA1,0x5A05DF1B,0x2D02EF8D\n" + " 0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3,\n" + " 0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91,\n" + " 0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7,\n" + " 0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5,\n" + " 0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,\n" + " 0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59,\n" + " 0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F,\n" + " 0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D,\n" + " 0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433,\n" + " 0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01,\n" + " 0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,\n" + " 0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65,\n" + " 0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,\n" + " 0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9,\n" + " 0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F,\n" + " 0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD,\n" + " 0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683,\n" + " 0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1,\n" + " 0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,\n" + " 0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5,\n" + " 0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B,\n" + " 0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79,\n" + " 0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F,\n" + " 0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,\n" + " 0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,\n" + " 0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,\n" + " 0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,\n" + " 0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45,\n" + " 0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB,\n" + " 0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9,\n" + " 0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF,\n" + " 0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D\n" "};\n" "// Hardware-accelerated CRC-32C (using CRC32 instruction)\n" "inline size_t CRC_Hardware(const void* data, size_t length) {\n" @@ -1298,13 +1532,11 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "\n" " unsigned char* current = (unsigned char*) data;\n" " // Align to DWORD boundary\n" - " size_t align = (sizeof(unsigned int) - (__int64_t)current) " - "& (sizeof(unsigned int) - 1);\n" + " size_t align = (sizeof(unsigned int) - (__int64_t)current) & (sizeof(unsigned int) - 1);\n" " align = std::min(align, length);\n" " length -= align;\n" " for (; align; align--)\n" - " crc = Crc32Lookup[(crc ^ *current++) & 0xFF] ^ (crc >> " - "8);\n" + " crc = Crc32Lookup[(crc ^ *current++) & 0xFF] ^ (crc >> 8);\n" "\n" " size_t ndwords = length / sizeof(unsigned int);\n" " for (; ndwords; ndwords--) {\n" @@ -1323,7 +1555,8 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " }\n" "};\n" "using MyHash = Crc32Hash;\n" - "\n"); + "\n" + ); } } s.append( @@ -1341,21 +1574,28 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "class alignas(64) HashTable {\n" "private:\n" " using HashT = MyHash; //std::hash;\n" - " using EqT = HashMapEqualTo;\n"); - if (hasIncremental()) s.append(" using AggrT = Aggregator;\n"); + " using EqT = HashMapEqualTo;\n" + ); + if (hasIncremental()) + s.append( + " using AggrT = Aggregator;\n" + ); s.append( "\n" " HashT _hasher;\n" " EqT _eq;\n" - " BucketT* _buckets = nullptr;\n"); - if (hasIncremental()) s.append(" AggrT* _aggrs = nullptr;\n"); + " BucketT* _buckets = nullptr;\n" + ); + if (hasIncremental()) + s.append( + " AggrT* _aggrs = nullptr;\n" + ); s.append( " size_t _num_buckets = MAP_SIZE;\n" " size_t _num_filled = 0;\n" " size_t _mask = MAP_SIZE-1;\n"); if (m_collisionBarrier > 0) - s.append(" int _barrier = " + - std::to_string(m_collisionBarrier) + ";\n"); + s.append(" int _barrier = " + std::to_string(m_collisionBarrier) + ";\n"); s.append( "public:\n" " HashTable ();\n" @@ -1364,8 +1604,7 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " void reset ();\n" " void clear ();\n" " void insert (KeyT &key, ValueT &value, long timestamp);\n" - " void insert_or_modify (KeyT &key, ValueT &value, long " - "timestamp);\n" + " void insert_or_modify (KeyT &key, ValueT &value, long timestamp);\n" " bool evict (KeyT &key);\n" " void insertSlices ();\n" " void evictSlices ();\n" @@ -1386,23 +1625,21 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "\n" "HashTable::HashTable (Bucket *nodes) : _buckets(nodes) {\n" " if (!(_num_buckets && !(_num_buckets & (_num_buckets - 1)))) {\n" - " throw std::runtime_error (\"error: the size of the hash " - "table has to be a power of two\\n\");\n" + " throw std::runtime_error (\"error: the size of the hash table has to be a power of two\\n\");\n" " }\n" "}\n" "\n" "void HashTable::init () {\n" " if (!(_num_buckets && !(_num_buckets & (_num_buckets - 1)))) {\n" - " throw std::runtime_error (\"error: the size of the hash " - "table has to be a power of two\\n\");\n" + " throw std::runtime_error (\"error: the size of the hash table has to be a power of two\\n\");\n" " }\n" "\n" - " _buckets = (BucketT*)_mm_malloc(_num_buckets * " - "sizeof(BucketT), 64);\n"); + " _buckets = (BucketT*)_mm_malloc(_num_buckets * sizeof(BucketT), 64);\n" + ); if (hasIncremental()) s.append( - " _aggrs = (AggrT*)_mm_malloc(_num_buckets * sizeof(AggrT), " - "64);\n"); + " _aggrs = (AggrT*)_mm_malloc(_num_buckets * sizeof(AggrT), 64);\n" + ); s.append( " if (!_buckets /*|| !_aggrs*/) {\n" " free(_buckets);\n" @@ -1412,11 +1649,13 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "\n" " for (auto i = 0; i < _num_buckets; ++i) {\n" " _buckets[i].state = 0;\n" - " _buckets[i].dirty = 0;\n"); + " _buckets[i].dirty = 0;\n" + ); if (hasIncremental()) s.append( " _aggrs[i] = AggrT (); // maybe initiliaze this on insert\n" - " _aggrs[i].initialise();\n"); + " _aggrs[i].initialise();\n" + ); s.append( " }\n" "}\n" @@ -1433,22 +1672,24 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " for (auto i = 0; i < _num_buckets; ++i) {\n" " _buckets[i].state = 0;\n" " _buckets[i].dirty = 0;\n" - " //_buckets[i].counter = 0;\n"); - if (hasIncremental()) s.append(" _aggrs[i].initialise();\n"); + " //_buckets[i].counter = 0;\n" + ); + if (hasIncremental()) + s.append( + " _aggrs[i].initialise();\n" + ); s.append( " }\n" " _num_filled = 0;\n" "}\n" "\n" - "void HashTable::insert (KeyT &key, ValueT &value, long timestamp) " - "{\n" + "void HashTable::insert (KeyT &key, ValueT &value, long timestamp) {\n" " size_t ind = _hasher(key) & _mask, i = ind;\n" " for (; i < _num_buckets; i++) {\n" " if (!_buckets[i].state || _eq(_buckets[i].key, key)) {\n" " _buckets[i].state = 1;\n" " _buckets[i].timestamp = timestamp;\n" - " _buckets[i].key = key; //std::memcpy(&_buckets[i].key, " - "key, KEY_SIZE);\n" + " _buckets[i].key = key; //std::memcpy(&_buckets[i].key, key, KEY_SIZE);\n" " _buckets[i].value = value;\n" " return;\n" " }\n" @@ -1462,43 +1703,33 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " return;\n" " }\n" " }\n" - " throw std::runtime_error (\"error: the hashtable is full " - "\\n\");\n" + " throw std::runtime_error (\"error: the hashtable is full \\n\");\n" "}\n" "\n" - "void HashTable::insert_or_modify (KeyT &key, ValueT &value, long " - "timestamp) {\n" + "void HashTable::insert_or_modify (KeyT &key, ValueT &value, long timestamp) {\n" " size_t ind = _hasher(key) & _mask, i = ind;\n" " char tempState;\n"); - if (m_collisionBarrier > 0) s.append(" int steps = 0;\n"); + if (m_collisionBarrier > 0) + s.append(" int steps = 0;\n"); s.append( " for (; i < _num_buckets; i++) {\n" " tempState = _buckets[i].state;\n" " if (tempState && _eq(_buckets[i].key, key)) {\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\t_buckets[i].value._" + std::to_string((i + 1)) + - " = " + - getCombineFunction( - SUM, "_buckets[i].value._" + std::to_string((i + 1)), - "value._" + std::to_string((i + 1))) + - ";\n"); + s.append("\t\t\t_buckets[i].value._" + std::to_string((i + 1)) + " = " + + getCombineFunction(SUM, "_buckets[i].value._" + std::to_string((i + 1)), + "value._" + std::to_string((i + 1))) + ";\n"); } else - s.append("\t\t\t_buckets[i].value._" + std::to_string((i + 1)) + - " = " + - getCombineFunction( - (*m_aggregationTypes)[i], - "_buckets[i].value._" + std::to_string((i + 1)), - "value._" + std::to_string((i + 1))) + - ";\n"); + s.append("\t\t\t_buckets[i].value._" + std::to_string((i + 1)) + " = " + + getCombineFunction((*m_aggregationTypes)[i], "_buckets[i].value._" + std::to_string((i + 1)), + "value._" + std::to_string((i + 1))) + ";\n"); } s.append( " _buckets[i].counter++;\n" " return;\n" " }\n" - " if (!tempState && (_eq(_buckets[i].key, key) || " - "_buckets[i].dirty == 0)) { // first insert -- keep track of " - "previous inserted value\n" + " if (!tempState && (_eq(_buckets[i].key, key) || _buckets[i].dirty == 0)) { // first insert -- keep track of previous inserted value\n" " _buckets[i].state = 1;\n" " _buckets[i].dirty = 1;\n" " _buckets[i].timestamp = timestamp;\n" @@ -1508,13 +1739,11 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " return;\n" " }\n"); if (m_collisionBarrier > 0) - s.append( - " steps++;\n" - " if (steps == _barrier ) {\n" - " printf(\"Too many collisions, increase the " - "size...\\n\");\n" - " exit(1);\n" - " };\n"); + s.append(" steps++;\n" + " if (steps == _barrier ) {\n" + " printf(\"Too many collisions, increase the size...\\n\");\n" + " exit(1);\n" + " };\n"); s.append( " }\n" " for (i = 0; i < ind; i++) {\n" @@ -1522,28 +1751,19 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " if (tempState && _eq(_buckets[i].key, key)) {\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\t\t_buckets[i].value._" + std::to_string((i + 1)) + - " = " + - getCombineFunction( - SUM, "_buckets[i].value._" + std::to_string((i + 1)), - "value._" + std::to_string((i + 1))) + - ";\n"); + s.append("\t\t\t\t_buckets[i].value._" + std::to_string((i + 1)) + " = " + + getCombineFunction(SUM, "_buckets[i].value._" + std::to_string((i + 1)), + "value._" + std::to_string((i + 1))) + ";\n"); } else - s.append("\t\t\t\t_buckets[i].value._" + std::to_string((i + 1)) + - " = " + - getCombineFunction( - (*m_aggregationTypes)[i], - "_buckets[i].value._" + std::to_string((i + 1)), - "value._" + std::to_string((i + 1))) + - ";\n"); + s.append("\t\t\t\t_buckets[i].value._" + std::to_string((i + 1)) + " = " + + getCombineFunction((*m_aggregationTypes)[i], "_buckets[i].value._" + std::to_string((i + 1)), + "value._" + std::to_string((i + 1))) + ";\n"); } s.append( " _buckets[i].counter++;\n" " return;\n" " }\n" - " if (!tempState && (_eq(_buckets[i].key, key) || " - "_buckets[i].dirty == 0)) { // first insert -- keep track of " - "previous inserted value\n" + " if (!tempState && (_eq(_buckets[i].key, key) || _buckets[i].dirty == 0)) { // first insert -- keep track of previous inserted value\n" " _buckets[i].state = 1;\n" " _buckets[i].dirty = 1;\n" " _buckets[i].timestamp = timestamp;\n" @@ -1553,17 +1773,14 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " return;\n" " }\n"); if (m_collisionBarrier > 0) - s.append( - " steps++;\n" - " if (steps == _barrier ) {\n" - " printf(\"Too many collisions, increase the " - "size...\\n\");\n" - " exit(1);\n" - " };\n"); + s.append(" steps++;\n" + " if (steps == _barrier ) {\n" + " printf(\"Too many collisions, increase the size...\\n\");\n" + " exit(1);\n" + " };\n"); s.append( " }\n" - " throw std::runtime_error (\"error: the hashtable is full " - "\\n\");\n" + " throw std::runtime_error (\"error: the hashtable is full \\n\");\n" "}\n" "\n" "bool HashTable::evict (KeyT &key) {\n" @@ -1583,22 +1800,21 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " printf (\"error: entry not found \\n\");\n" " return false;\n" "}\n" - "\n"); + "\n" + ); if (hasIncremental()) { s.append( "void HashTable::insertSlices () {\n" " int maxNumOfSlices = INT_MIN;\n" " for (auto i = 0; i < _num_buckets; ++i) {\n" - " int temp = _aggrs[i].addedElements - " - "_aggrs[i].removedElements;\n" + " int temp = _aggrs[i].addedElements - _aggrs[i].removedElements;\n" " if (_buckets[i].state) {\n" " node n;\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { - s.append("\t\t\t\tn._" + std::to_string((i + 1)) + - " = _buckets[i].value._" + std::to_string((i + 1)) + ";\n"); + s.append("\t\t\t\tn._" + std::to_string((i + 1)) + " = _buckets[i].value._" + + std::to_string((i + 1)) + ";\n"); if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\t\tn._c" + std::to_string((i + 1)) + - " = _buckets[i].counter;\n"); + s.append("\t\t\t\tn._c" + std::to_string((i + 1)) + " = _buckets[i].counter;\n"); } } s.append( @@ -1610,8 +1826,7 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " ValueT val;\n" " node n;\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { - s.append("\t\t\tn._" + std::to_string((i + 1)) + " = val._" + - std::to_string((i + 1)) + ";\n"); + s.append("\t\t\tn._" + std::to_string((i + 1)) + " = val._" + std::to_string((i + 1)) + ";\n"); if ((*m_aggregationTypes)[i] == AVG) { s.append("\t\t\tn._c" + std::to_string((i + 1)) + " = 0;\n"); } @@ -1624,8 +1839,7 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "\n" "void HashTable::evictSlices () {\n" " for (auto i = 0; i < _num_buckets; ++i) {\n" - " if (_aggrs[i].addedElements - _aggrs[i].removedElements > " - "0) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" " _aggrs[i].evict();\n" " }\n" " }\n" @@ -1633,13 +1847,12 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "\n" "void HashTable::setValues () {\n" " for (auto i = 0; i < _num_buckets; ++i) {\n" - " if (_aggrs[i].addedElements - _aggrs[i].removedElements > " - "0) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" " auto res = _aggrs[i].query();\n" " _buckets[i].state = 1;\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { - s.append("\t\t\t_buckets[i].value._" + std::to_string((i + 1)) + - " = res._" + std::to_string((i + 1)) + ";\n"); + s.append("\t\t\t_buckets[i].value._" + std::to_string((i + 1)) + " = res._" + + std::to_string((i + 1)) + ";\n"); } s.append( " _buckets[i].counter = 1;\n" @@ -1649,24 +1862,23 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "\n" "void HashTable::setIntermValues (int pos, long timestamp) {\n" " for (auto i = 0; i < _num_buckets; ++i) {\n" - " if (_aggrs[i].addedElements - _aggrs[i].removedElements > " - "0) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" " auto res = _aggrs[i].queryIntermediate (pos);\n" " _buckets[i].state = 1;\n" " _buckets[i].timestamp = timestamp;\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { - s.append("\t\t\t_buckets[i].value._" + std::to_string((i + 1)) + - " = res._" + std::to_string((i + 1)) + ";\n"); + s.append("\t\t\t_buckets[i].value._" + std::to_string((i + 1)) + " = res._" + + std::to_string((i + 1)) + ";\n"); if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\t_buckets[i].counter = res._c" + - std::to_string((i + 1)) + ";\n"); + s.append("\t\t\t_buckets[i].counter = res._c" + std::to_string((i + 1)) + ";\n"); } } s.append( " }\n" " }\n" "}\n" - "\n"); + "\n" + ); } s.append( "bool HashTable::get_value (const KeyT &key, ValueT &result) {\n" @@ -1689,7 +1901,8 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "bool HashTable::get_index (const KeyT &key, int &index) {\n" " size_t ind = _hasher(key) & _mask, i = ind;\n" " index = -1;\n"); - if (m_collisionBarrier > 0) s.append(" int steps = 0;\n"); + if (m_collisionBarrier > 0) + s.append(" int steps = 0;\n"); s.append( " for (; i < _num_buckets; i++) {\n" " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" @@ -1700,11 +1913,10 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " index = i;\n" " }\n"); if (m_collisionBarrier > 0) - s.append( - " steps++;\n" - " if (steps == _barrier ) {\n" - " return false;;\n" - " };\n"); + s.append(" steps++;\n" + " if (steps == _barrier ) {\n" + " return false;;\n" + " };\n"); s.append( " }\n" " for (i = 0; i < ind; i++) {\n" @@ -1716,11 +1928,10 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " index = i;\n" " }\n"); if (m_collisionBarrier > 0) - s.append( - " steps++;\n" - " if (steps == _barrier ) {\n" - " return false;\n" - " };\n"); + s.append(" steps++;\n" + " if (steps == _barrier ) {\n" + " return false;\n" + " };\n"); s.append( " }\n" " return false;\n" @@ -1728,12 +1939,20 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "\n" "void HashTable::deleteHashTable() {\n" " for (size_t bucket=0; bucket<_num_buckets; ++bucket) {\n" - " _buckets[bucket].~BucketT();\n"); - if (hasIncremental()) s.append(" _aggrs->~AggrT();\n"); + " _buckets[bucket].~BucketT();\n" + ); + if (hasIncremental()) + s.append( + " _aggrs->~AggrT();\n" + ); s.append( " }\n" - " free(_buckets);\n"); - if (hasIncremental()) s.append(" free(_aggrs);\n"); + " free(_buckets);\n" + ); + if (hasIncremental()) + s.append( + " free(_aggrs);\n" + ); s.append( "}\n" "\n" @@ -1754,9 +1973,9 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "}\n" "\n" "float HashTable::load_factor() const {\n" - " return static_cast(_num_filled) / " - "static_cast(_num_buckets);\n" - "}"); + " return static_cast(_num_filled) / static_cast(_num_buckets);\n" + "}" + ); } else { s.append(m_customHashTable); } @@ -1768,31 +1987,23 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string s; if (hasAggregation()) { std::string outputBuffers; - std::string initialiseAggrs, computeAggrs, resetAggrs, insertAggrs, - evictAggrs, setValues, filter; - std::string openingWindows, closingWindows, pendingWindows, - completeWindows; + std::string initialiseAggrs, computeAggrs, resetAggrs, insertAggrs, evictAggrs, setValues, filter; + std::string openingWindows, closingWindows, pendingWindows, completeWindows; std::string resultPointers; - if (m_windowDefinition->isRangeBased()) filter.append(getSelectionExpr()); + if (m_windowDefinition->isRangeBased()) + filter.append(getSelectionExpr()); if (!hasGroupBy()) { outputBuffers = getAggregationVarsOutputBuffers(); initialiseAggrs = getAggregationVarsInitialization(); - computeAggrs = - getAggregationVarsComputation(m_windowDefinition->isRangeBased()); + computeAggrs = getAggregationVarsComputation(m_windowDefinition->isRangeBased()); if (hasIncremental()) { resetAggrs = "aggrs.reset();\n"; insertAggrs = "aggrStructures[pid].insert(aggrs);\n"; evictAggrs = "aggrStructures[pid].evict();\n"; setValues = "aggrs = aggrStructures[pid].query();\n"; - closingWindows = - "aggrs = " - "aggrStructures[pid].queryIntermediate(PARENTS_SIZE-2);\n"; - openingWindows = - "aggrs = " - "aggrStructures[pid].queryIntermediate(numberOfOpeningWindows);" - "\n"; - pendingWindows = - "aggrs = aggrStructures[pid].queryIntermediate(-1);\n"; + closingWindows = "aggrs = aggrStructures[pid].queryIntermediate(PARENTS_SIZE-2);\n"; + openingWindows = "aggrs = aggrStructures[pid].queryIntermediate(numberOfOpeningWindows);\n"; + pendingWindows = "aggrs = aggrStructures[pid].queryIntermediate(-1);\n"; } else { resetAggrs = getAggregationVarsReset(); } @@ -1802,27 +2013,21 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { completeWindows = getWriteCompleteResults(); resultPointers = getAggregationVarsResultPointers(); } else { - // std::string key = getKeyType(); + //std::string key = getKeyType(); outputBuffers = getGroupByOutputBuffers(); initialiseAggrs = getGroupByInitialization(); - computeAggrs = - getGroupByComputation(m_windowDefinition->isRangeBased()); + computeAggrs = getGroupByComputation(m_windowDefinition->isRangeBased()); resetAggrs = getGroupByReset(); if (hasIncremental()) { - // resetAggrs = "aggrStructures[pid].clear();\n"; + //resetAggrs = "aggrStructures[pid].clear();\n"; insertAggrs = "aggrStructures[pid].insertSlices();\n"; evictAggrs = "aggrStructures[pid].evictSlices();\n"; setValues = "aggrStructures[pid].setValues();\n"; - closingWindows = - "aggrStructures[pid].setIntermValues(PARENTS_SIZE-2, " - "(prevClosePane+1) * panesPerSlide + panesPerWindow);\n"; - openingWindows = - "aggrStructures[pid].insertSlices();\n" - "aggrStructures[pid].setIntermValues(numberOfOpeningWindows, " - "prevOpenPane * paneSize);\n" - "prevOpenPane += panesPerSlide;\n"; - pendingWindows = - "aggrStructures[pid].setIntermValues(-1, prevPane * paneSize);\n"; + closingWindows = "aggrStructures[pid].setIntermValues(PARENTS_SIZE-2, (prevClosePane+1) * panesPerSlide + panesPerWindow);\n"; + openingWindows = "aggrStructures[pid].insertSlices();\n" + "aggrStructures[pid].setIntermValues(numberOfOpeningWindows, prevOpenPane * paneSize);\n" + "prevOpenPane += panesPerSlide;\n"; + pendingWindows = "aggrStructures[pid].setIntermValues(-1, prevPane * paneSize);\n"; } closingWindows.append(getWriteIntermediateResultsGroupBy(0)); openingWindows.append(getWriteIntermediateResultsGroupBy(1)); @@ -1839,32 +2044,62 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { if (m_windowDefinition->isRowBased()) { if (m_windowDefinition->isTumbling()) { - s.append(getTumblingWindowRows( - outputBuffers, initialiseAggrs, computeAggrs, resetAggrs, - openingWindows, closingWindows, pendingWindows, completeWindows, - resultPointers)); + s.append(getTumblingWindowRows(outputBuffers, + initialiseAggrs, + computeAggrs, + resetAggrs, + openingWindows, + closingWindows, + pendingWindows, + completeWindows, + resultPointers)); } else { - s.append(getSlidingWindowRows( - outputBuffers, initialiseAggrs, computeAggrs, insertAggrs, - evictAggrs, resetAggrs, setValues, openingWindows, closingWindows, - pendingWindows, completeWindows, resultPointers)); + s.append(getSlidingWindowRows(outputBuffers, + initialiseAggrs, + computeAggrs, + insertAggrs, + evictAggrs, + resetAggrs, + setValues, + openingWindows, + closingWindows, + pendingWindows, + completeWindows, + resultPointers)); } } else { if (m_windowDefinition->isTumbling()) { - s.append(getFillEmptyTumblingWindows(resetAggrs, closingWindows, - completeWindows)); - s.append(getTumblingWindowRange( - outputBuffers, initialiseAggrs, computeAggrs, resetAggrs, - openingWindows, closingWindows, pendingWindows, completeWindows, - resultPointers, filter)); + s.append(getFillEmptyTumblingWindows(resetAggrs, closingWindows, completeWindows)); + s.append(getTumblingWindowRange(outputBuffers, + initialiseAggrs, + computeAggrs, + resetAggrs, + openingWindows, + closingWindows, + pendingWindows, + completeWindows, + resultPointers, + filter)); } else { - s.append(getFillEmptySlidingWindows(insertAggrs, evictAggrs, - resetAggrs, setValues, - closingWindows, completeWindows)); - s.append(getSlidingWindowRange( - outputBuffers, initialiseAggrs, computeAggrs, insertAggrs, - evictAggrs, resetAggrs, setValues, openingWindows, closingWindows, - pendingWindows, completeWindows, resultPointers, filter)); + s.append(getFillEmptySlidingWindows(insertAggrs, + evictAggrs, + resetAggrs, + setValues, + closingWindows, + completeWindows)); + s.append(getSlidingWindowRange(outputBuffers, + initialiseAggrs, + computeAggrs, + insertAggrs, + evictAggrs, + resetAggrs, + setValues, + openingWindows, + closingWindows, + pendingWindows, + completeWindows, + resultPointers, + filter)); } } } else { @@ -1888,15 +2123,13 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string getHashTableStaticDeclaration() { std::string s; - s.append("static HashTable aggrStructures[" + - std::to_string(SystemConf::getInstance().WORKER_THREADS) + "];\n"); + s.append("static HashTable aggrStructures[" + std::to_string(SystemConf::getInstance().WORKER_THREADS) + "];\n"); - s.append("bool isFirst [" + - std::to_string(SystemConf::getInstance().WORKER_THREADS) + - "] = {"); + s.append("bool isFirst [" + std::to_string(SystemConf::getInstance().WORKER_THREADS) + "] = {"); for (auto i = 0; i < SystemConf::getInstance().WORKER_THREADS; ++i) { s.append("true"); - if (i != SystemConf::getInstance().WORKER_THREADS - 1) s.append(", "); + if (i != SystemConf::getInstance().WORKER_THREADS - 1) + s.append(", "); } s.append("};\n"); s.append("\n"); @@ -1911,25 +2144,19 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { else completeResPredicate = "resultIndex += " + getHavingExpr(false) + ";\n"; s.append( - "long aggregatePartials (char *openingBuffer, int " - "*openingStartPointers, int openingEndPointer,\n" - " char *closingOrPendingBuffer, int " - "*copStartPointers, int copEndPointer,\n" - " int startingWindow, int numOfWindows, bool " - "pack,\n" - " char* completeBuffer, long completeBufferPtr, " - "int &tupleSize) {\n" + "long aggregatePartials (char *openingBuffer, int *openingStartPointers, int openingEndPointer,\n" + " char *closingOrPendingBuffer, int *copStartPointers, int copEndPointer,\n" + " int startingWindow, int numOfWindows, bool pack,\n" + " char* completeBuffer, long completeBufferPtr, int &tupleSize) {\n" " tupleSize = sizeof(Bucket);\n" " int mapSize = MAP_SIZE;\n" " // Input and Output Buffers\n" " Bucket *openingWindowsRes= (Bucket *) openingBuffer;\n" " Bucket *partialRes= (Bucket *) closingOrPendingBuffer;\n" - " output_tuple_t *completeWindowsRes = (output_tuple_t *) " - "completeBuffer; // the results here are packed\n" + " output_tuple_t *completeWindowsRes = (output_tuple_t *) completeBuffer; // the results here are packed\n" "\n" " // Temp variables for the merging\n" - " int resultIndex = (pack) ? " - "completeBufferPtr/sizeof(output_tuple_t) : startingWindow*mapSize;\n" + " int resultIndex = (pack) ? completeBufferPtr/sizeof(output_tuple_t) : startingWindow*mapSize;\n" " int posInB2;\n" " bool isFound;\n" " int posInRes = 0;\n" @@ -1941,8 +2168,7 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " start1 = openingStartPointers[wid];\n" " start2 = (pack) ? copStartPointers[wid] : 0;\n" " end1 = openingStartPointers[wid+1];\n" - " end2 = (pack) ? copStartPointers[wid+1] : " - "copEndPointer/tupleSize;\n" + " end2 = (pack) ? copStartPointers[wid+1] : copEndPointer/tupleSize;\n" " if (end1 < 0)\n" " end1 = openingEndPointer/tupleSize;\n" " if (end2 < 0)\n" @@ -1952,46 +2178,36 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " exit(1);\n" " }\n" " if (start2 == end2) {\n" - " printf (\"error: empty closing/pending window partial " - "result\");\n" + " printf (\"error: empty closing/pending window partial result\");\n" " exit(1);\n" " }\n" - " // search in the correct hashtables by moving the respective " - "pointers\n" + " // search in the correct hashtables by moving the respective pointers\n" " HashTable map1 (&openingWindowsRes[resultIndex]);\n" " HashTable map2 (&partialRes[start2]);\n" "\n" " if (pack) {\n" - " /* Iterate over tuples in first table. Search for key in " - "the hash table.\n" + " /* Iterate over tuples in first table. Search for key in the hash table.\n" " * If found, merge the two entries. */\n" " for (int idx = start1; idx < end1; idx++) {\n" - " if (openingWindowsRes[idx].state != 1) /* Skip empty " - "slot */\n" + " if (openingWindowsRes[idx].state != 1) /* Skip empty slot */\n" " continue;\n" - " isFound = map2.get_index(openingWindowsRes[idx].key, " - "posInB2);\n" + " isFound = map2.get_index(openingWindowsRes[idx].key, posInB2);\n" " if (posInB2 < 0) {\n" - " printf(\"error: open-adress hash table is full " - "\\n\");\n" + " printf(\"error: open-adress hash table is full \\n\");\n" " exit(1);\n" " }\n" " posInB2 += start2; // get the correct index;\n" " if (!isFound) { \n" " /* Copy tuple based on output schema */\n" " /* Put timestamp */\n" - " completeWindowsRes[resultIndex].timestamp = " - "openingWindowsRes[idx].timestamp;\n" + " completeWindowsRes[resultIndex].timestamp = openingWindowsRes[idx].timestamp;\n" " /* Put key */\n"); if (m_numberOfKeyAttributes == 1) { - s.append( - " completeWindowsRes[resultIndex]._1 = " - "openingWindowsRes[idx].key;\n"); + s.append(" completeWindowsRes[resultIndex]._1 = openingWindowsRes[idx].key;\n"); } else { for (auto i = 0; i < m_numberOfKeyAttributes; ++i) { - s.append(" completeWindowsRes[resultIndex]._" + - std::to_string(i + 1) + " = openingWindowsRes[idx].key._" + - std::to_string(i) + ";\n"); + s.append(" completeWindowsRes[resultIndex]._" + std::to_string(i + 1) + + " = openingWindowsRes[idx].key._" + std::to_string(i) + ";\n"); } } @@ -1999,175 +2215,131 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { s.append(" /* Put value(s) */\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - "openingWindowsRes[idx].value._" + std::to_string((i + 1)) + - "/openingWindowsRes[idx].counter;\n"); + s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + "openingWindowsRes[idx].value._" + std::to_string((i + 1)) + + "/openingWindowsRes[idx].counter;\n"); } else - s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - "openingWindowsRes[idx].value._" + std::to_string((i + 1)) + - ";\n"); + s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + "openingWindowsRes[idx].value._" + std::to_string((i + 1)) + ";\n"); completeResIndex++; } s.append( //" resultIndex++;\n" addTabs(completeResPredicate, 5) + - " // Do I need padding here ???\n" - " } else { // merge values based on the number of " - "aggregated values and their types! \n" - " /* Copy tuple based on output schema */\n" - " /* Put timestamp */\n" - " completeWindowsRes[resultIndex].timestamp = " - "std::max(completeWindowsRes[resultIndex].timestamp, " - "openingWindowsRes[idx].timestamp);\n" - " /* Put key */\n"); + " // Do I need padding here ???\n" + " } else { // merge values based on the number of aggregated values and their types! \n" + " /* Copy tuple based on output schema */\n" + " /* Put timestamp */\n" + " completeWindowsRes[resultIndex].timestamp = std::max(completeWindowsRes[resultIndex].timestamp, openingWindowsRes[idx].timestamp);\n" + " /* Put key */\n"); if (m_numberOfKeyAttributes == 1) { - s.append( - " completeWindowsRes[resultIndex]._1 = " - "openingWindowsRes[idx].key;\n"); + s.append(" completeWindowsRes[resultIndex]._1 = openingWindowsRes[idx].key;\n"); } else { for (auto i = 0; i < m_numberOfKeyAttributes; ++i) { - s.append(" completeWindowsRes[resultIndex]._" + - std::to_string(i + 1) + " = openingWindowsRes[idx].key._" + - std::to_string(i) + ";\n"); + s.append(" completeWindowsRes[resultIndex]._" + std::to_string(i + 1) + + " = openingWindowsRes[idx].key._" + std::to_string(i) + ";\n"); } } s.append(" /* Put value(s) */\n"); completeResIndex = m_numberOfKeyAttributes; for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append( - "\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - "(openingWindowsRes[idx].value._" + std::to_string((i + 1)) + "+" + - "partialRes[posInB2].value._" + std::to_string((i + 1)) + ")/" + + s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + "(openingWindowsRes[idx].value._" + std::to_string((i + 1)) + "+" + "partialRes[posInB2].value._" + + std::to_string((i + 1)) + ")/" + "(openingWindowsRes[idx].counter+partialRes[posInB2].counter);\n"); } else - s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - getCombineFunction( - (*m_aggregationTypes)[i], - "openingWindowsRes[idx].value._" + std::to_string((i + 1)), - "partialRes[posInB2].value._" + std::to_string((i + 1))) + - ";\n"); + s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + getCombineFunction((*m_aggregationTypes)[i], "openingWindowsRes[idx].value._" + std::to_string((i + 1)), + "partialRes[posInB2].value._" + std::to_string((i + 1))) + ";\n"); completeResIndex++; } s.append( //" resultIndex++;\n" addTabs(completeResPredicate, 5) + - " // Do I need padding here ???\n" - " \n" - " // Unmark occupancy in second buffer\n" - " partialRes[posInB2].state = 0;\n" - " }\n" - " }\n" - "\n" - " /* Iterate over the remaining tuples in the second table. " - "*/\n" - " for (int idx = start2; idx < end2; idx++) {\n" - " if (partialRes[idx].state != 1) /* Skip empty slot " - "*/\n" - " continue; \n" - " /* Copy tuple based on output schema */\n" - " /* Put timestamp */\n" - " completeWindowsRes[resultIndex].timestamp = " - "partialRes[idx].timestamp;\n" - " /* Put key */\n"); + " // Do I need padding here ???\n" + " \n" + " // Unmark occupancy in second buffer\n" + " partialRes[posInB2].state = 0;\n" + " }\n" + " }\n" + "\n" + " /* Iterate over the remaining tuples in the second table. */\n" + " for (int idx = start2; idx < end2; idx++) {\n" + " if (partialRes[idx].state != 1) /* Skip empty slot */\n" + " continue; \n" + " /* Copy tuple based on output schema */\n" + " /* Put timestamp */\n" + " completeWindowsRes[resultIndex].timestamp = partialRes[idx].timestamp;\n" + " /* Put key */\n"); if (m_numberOfKeyAttributes == 1) { - s.append( - " completeWindowsRes[resultIndex]._1 = " - "partialRes[idx].key;\n"); + s.append(" completeWindowsRes[resultIndex]._1 = partialRes[idx].key;\n"); } else { for (auto i = 0; i < m_numberOfKeyAttributes; ++i) { - s.append(" completeWindowsRes[resultIndex]._" + - std::to_string(i + 1) + " = partialRes[idx].key._" + - std::to_string(i) + ";\n"); + s.append( + " completeWindowsRes[resultIndex]._" + std::to_string(i + 1) + " = partialRes[idx].key._" + + std::to_string(i) + ";\n"); } } s.append(" /* Put value(s) */\n"); completeResIndex = m_numberOfKeyAttributes; for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - "partialRes[idx].value._" + std::to_string((i + 1)) + - "/partialRes[idx].counter;\n"); + s.append("\t\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + "partialRes[idx].value._" + std::to_string((i + 1)) + "/partialRes[idx].counter;\n"); } else - s.append("\t\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - "partialRes[idx].value._" + std::to_string((i + 1)) + ";\n"); + s.append("\t\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + "partialRes[idx].value._" + std::to_string((i + 1)) + ";\n"); completeResIndex++; } s.append( //" resultIndex++;\n" addTabs(completeResPredicate, 4) + - " // Do I need padding here ??? \n" - " }\n" - " } else {\n" - " /* Iterate over the second table. */\n" - " for (int idx = start2; idx < end2; idx++) {\n" - " if (partialRes[idx].state != 1) /* Skip empty " - "slot */\n" - " continue;\n" - "\n" - " /* Create a new hash table entry */\n" - " isFound = map1.get_index(partialRes[idx].key, " - "posInRes); //isFound = " - "map2.get_index(&openingWindowsResults[resultIndex], " - "&buffer2[idx].key, posInRes);\n" - //" if (posInRes < 0 || isFound) {\n" - //" printf(\"error: failed to insert new key in - // intermediate hash table \\n\");\n" " exit(1);\n" " }\n" - " if (!isFound) {\n" - " /* Mark occupancy */\n" - " openingWindowsRes[posInRes + " - "resultIndex].state = 1;\n" - " /* Put timestamp */\n" - " openingWindowsRes[posInRes + " - "resultIndex].timestamp = partialRes[idx].timestamp;\n" - " /* Put key and value(s) */\n" - " openingWindowsRes[posInRes + resultIndex].key " - "= partialRes[idx].key;\n" - " openingWindowsRes[posInRes + " - "resultIndex].value = partialRes[idx].value;\n" - " openingWindowsRes[posInRes + " - "resultIndex].counter = partialRes[idx].counter;\n" - " } else {\n" - " /* Mark occupancy */\n" - " openingWindowsRes[posInRes + " - "resultIndex].state = 1;\n" - " /* Put timestamp */\n" - " openingWindowsRes[posInRes + " - "resultIndex].timestamp = std::max(openingWindowsRes[posInRes + " - "resultIndex].timestamp, partialRes[idx].timestamp);\n" - " /* Put key and value(s) */\n" - " openingWindowsRes[posInRes + resultIndex].key " - "= openingWindowsRes[posInRes + resultIndex].key;\n"); + " // Do I need padding here ??? \n" + " }\n" + " } else {\n" + " /* Iterate over the second table. */\n" + " for (int idx = start2; idx < end2; idx++) {\n" + " if (partialRes[idx].state != 1) /* Skip empty slot */\n" + " continue;\n" + "\n" + " /* Create a new hash table entry */\n" + " isFound = map1.get_index(partialRes[idx].key, posInRes); //isFound = map2.get_index(&openingWindowsResults[resultIndex], &buffer2[idx].key, posInRes);\n" + //" if (posInRes < 0 || isFound) {\n" + //" printf(\"error: failed to insert new key in intermediate hash table \\n\");\n" + //" exit(1);\n" + //" }\n" + " if (!isFound) {\n" + " /* Mark occupancy */\n" + " openingWindowsRes[posInRes + resultIndex].state = 1;\n" + " /* Put timestamp */\n" + " openingWindowsRes[posInRes + resultIndex].timestamp = partialRes[idx].timestamp;\n" + " /* Put key and value(s) */\n" + " openingWindowsRes[posInRes + resultIndex].key = partialRes[idx].key;\n" + " openingWindowsRes[posInRes + resultIndex].value = partialRes[idx].value;\n" + " openingWindowsRes[posInRes + resultIndex].counter = partialRes[idx].counter;\n" + " } else {\n" + " /* Mark occupancy */\n" + " openingWindowsRes[posInRes + resultIndex].state = 1;\n" + " /* Put timestamp */\n" + " openingWindowsRes[posInRes + resultIndex].timestamp = std::max(openingWindowsRes[posInRes + resultIndex].timestamp, partialRes[idx].timestamp);\n" + " /* Put key and value(s) */\n" + " openingWindowsRes[posInRes + resultIndex].key = openingWindowsRes[posInRes + resultIndex].key;\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\t\t\topeningWindowsRes[posInRes + resultIndex].value._" + - std::to_string((i + 1)) + " = " + - "(openingWindowsRes[posInRes + resultIndex].value._" + - std::to_string((i + 1)) + "+" + "partialRes[idx].value._" + - std::to_string((i + 1)) + "); // /" + - "(openingWindowsRes[posInRes + " - "resultIndex].counter+partialRes[idx].counter);\n"); + s.append("\t\t\t\t\topeningWindowsRes[posInRes + resultIndex].value._" + std::to_string((i + 1)) + " = " + + "(openingWindowsRes[posInRes + resultIndex].value._" + std::to_string((i + 1)) + "+" + + "partialRes[idx].value._" + std::to_string((i + 1)) + "); // /" + + "(openingWindowsRes[posInRes + resultIndex].counter+partialRes[idx].counter);\n"); } else - s.append("\t\t\t\t\topeningWindowsRes[posInRes + resultIndex].value._" + - std::to_string((i + 1)) + " = " + - getCombineFunction( - (*m_aggregationTypes)[i], - "openingWindowsRes[posInRes + resultIndex].value._" + - std::to_string((i + 1)), - "partialRes[idx].value._" + std::to_string((i + 1))) + - ";\n"); + s.append("\t\t\t\t\topeningWindowsRes[posInRes + resultIndex].value._" + std::to_string((i + 1)) + " = " + + getCombineFunction((*m_aggregationTypes)[i], + "openingWindowsRes[posInRes + resultIndex].value._" + std::to_string((i + 1)), + "partialRes[idx].value._" + std::to_string((i + 1))) + ";\n"); } s.append( " /* Put count */\n" - " openingWindowsRes[posInRes + resultIndex].counter " - "= openingWindowsRes[posInRes + resultIndex].counter + " - "partialRes[idx].counter;\n" + " openingWindowsRes[posInRes + resultIndex].counter = openingWindowsRes[posInRes + resultIndex].counter + partialRes[idx].counter;\n" " }\n" " \n" @@ -2175,11 +2347,10 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " resultIndex += mapSize; \n" " }\n" " }\n" - " // return the pointer required for appending or prepending the " - "results\n" - " return (pack) ? resultIndex*sizeof(output_tuple_t) : " - "(numOfWindows*mapSize)*sizeof(Bucket);\n" - "}\n"); + " // return the pointer required for appending or prepending the results\n" + " return (pack) ? resultIndex*sizeof(output_tuple_t) : (numOfWindows*mapSize)*sizeof(Bucket);\n" + "}\n" + ); s.append("\n"); return s; } @@ -2192,25 +2363,19 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { else completeResPredicate = "resultIndex += " + getHavingExpr(false) + ";\n"; s.append( - "long aggregatePartials (char **openingBuffer, int " - "*openingStartPointers, int openingEndPointer,\n" - " char **closingOrPendingBuffer, int " - "*copStartPointers, int copEndPointer,\n" - " int startingWindow, int numOfWindows, bool " - "pack,\n" - " char* completeBuffer, long completeBufferPtr, " - "int &tupleSize) {\n" + "long aggregatePartials (char **openingBuffer, int *openingStartPointers, int openingEndPointer,\n" + " char **closingOrPendingBuffer, int *copStartPointers, int copEndPointer,\n" + " int startingWindow, int numOfWindows, bool pack,\n" + " char* completeBuffer, long completeBufferPtr, int &tupleSize) {\n" " tupleSize = sizeof(Bucket);\n" " int mapSize = MAP_SIZE;\n" " // Input and Output Buffers\n" " Bucket **openingWindowsRes= (Bucket **) openingBuffer;\n" " Bucket **partialRes= (Bucket **) closingOrPendingBuffer;\n" - " output_tuple_t *completeWindowsRes = (output_tuple_t *) " - "completeBuffer; // the results here are packed\n" + " output_tuple_t *completeWindowsRes = (output_tuple_t *) completeBuffer; // the results here are packed\n" "\n" " // Temp variables for the merging\n" - " int resultIndex = (pack) ? " - "completeBufferPtr/sizeof(output_tuple_t) : startingWindow*mapSize;\n" + " int resultIndex = (pack) ? completeBufferPtr/sizeof(output_tuple_t) : startingWindow*mapSize;\n" " int posInB2;\n" " bool isFound;\n" " int posInRes = 0;\n" @@ -2223,8 +2388,7 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " start2 = (pack) ? copStartPointers[wid] : 0;\n" " wid2 = (pack) ? wid : 0;" " end1 = openingStartPointers[wid+1];\n" - " end2 = (pack) ? copStartPointers[wid+1] : " - "copEndPointer/tupleSize;\n" + " end2 = (pack) ? copStartPointers[wid+1] : copEndPointer/tupleSize;\n" " if (end1 < 0)\n" " end1 = openingEndPointer/tupleSize;\n" " if (end2 < 0)\n" @@ -2234,47 +2398,37 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " exit(1);\n" " }\n" " if (start2 == end2) {\n" - " printf (\"error: empty closing/pending window partial " - "result\");\n" + " printf (\"error: empty closing/pending window partial result\");\n" " exit(1);\n" " }\n" - " // search in the correct hashtables by moving the respective " - "pointers\n" + " // search in the correct hashtables by moving the respective pointers\n" " HashTable map1 (openingWindowsRes[wid]);\n" " HashTable map2 (partialRes[wid2]);\n" "\n" " if (pack) {\n" - " /* Iterate over tuples in first table. Search for key in " - "the hash table.\n" + " /* Iterate over tuples in first table. Search for key in the hash table.\n" " * If found, merge the two entries. */\n" + + addPostMergeOp() + " for (int idx = 0; idx < mapSize; idx++) {\n" - " if (openingWindowsRes[wid][idx].state != 1) /* Skip " - "empty slot */\n" + " if (openingWindowsRes[wid][idx].state != 1) /* Skip empty slot */\n" " continue;\n" - " isFound = " - "map2.get_index(openingWindowsRes[wid][idx].key, posInB2);\n" + " isFound = map2.get_index(openingWindowsRes[wid][idx].key, posInB2);\n" " if (posInB2 < 0) {\n" - " printf(\"error: open-adress hash table is full " - "\\n\");\n" + " printf(\"error: open-adress hash table is full \\n\");\n" " exit(1);\n" " }\n" //" posInB2 += start2; // get the correct index;\n" " if (!isFound) { \n" " /* Copy tuple based on output schema */\n" " /* Put timestamp */\n" - " completeWindowsRes[resultIndex].timestamp = " - "openingWindowsRes[wid][idx].timestamp;\n" + " completeWindowsRes[resultIndex].timestamp = openingWindowsRes[wid][idx].timestamp;\n" " /* Put key */\n"); if (m_numberOfKeyAttributes == 1) { - s.append( - " completeWindowsRes[resultIndex]._1 = " - "openingWindowsRes[wid][idx].key;\n"); + s.append(" completeWindowsRes[resultIndex]._1 = openingWindowsRes[wid][idx].key;\n"); } else { for (auto i = 0; i < m_numberOfKeyAttributes; ++i) { - s.append(" completeWindowsRes[resultIndex]._" + - std::to_string(i + 1) + - " = openingWindowsRes[wid][idx].key._" + std::to_string(i) + - ";\n"); + s.append(" completeWindowsRes[resultIndex]._" + std::to_string(i + 1) + + " = openingWindowsRes[wid][idx].key._" + std::to_string(i) + ";\n"); } } @@ -2282,191 +2436,141 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { s.append(" /* Put value(s) */\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - "openingWindowsRes[wid][idx].value._" + - std::to_string((i + 1)) + - "/openingWindowsRes[wid][idx].counter;\n"); + s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + "openingWindowsRes[wid][idx].value._" + std::to_string((i + 1)) + + "/openingWindowsRes[wid][idx].counter;\n"); } else - s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - "openingWindowsRes[wid][idx].value._" + - std::to_string((i + 1)) + ";\n"); + s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + "openingWindowsRes[wid][idx].value._" + std::to_string((i + 1)) + ";\n"); completeResIndex++; } s.append( //" resultIndex++;\n" addTabs(completeResPredicate, 5) + - " // Do I need padding here ???\n" - " } else { // merge values based on the number of " - "aggregated values and their types! \n" - " /* Copy tuple based on output schema */\n" - " /* Put timestamp */\n" - " completeWindowsRes[resultIndex].timestamp = " - "std::max(completeWindowsRes[resultIndex].timestamp, " - "openingWindowsRes[wid][idx].timestamp);\n" - " /* Put key */\n"); + " // Do I need padding here ???\n" + " } else { // merge values based on the number of aggregated values and their types! \n" + " /* Copy tuple based on output schema */\n" + " /* Put timestamp */\n" + " completeWindowsRes[resultIndex].timestamp = std::max(completeWindowsRes[resultIndex].timestamp, openingWindowsRes[wid][idx].timestamp);\n" + " /* Put key */\n"); if (m_numberOfKeyAttributes == 1) { - s.append( - " completeWindowsRes[resultIndex]._1 = " - "openingWindowsRes[wid][idx].key;\n"); + s.append(" completeWindowsRes[resultIndex]._1 = openingWindowsRes[wid][idx].key;\n"); } else { for (auto i = 0; i < m_numberOfKeyAttributes; ++i) { - s.append(" completeWindowsRes[resultIndex]._" + - std::to_string(i + 1) + - " = openingWindowsRes[wid][idx].key._" + std::to_string(i) + - ";\n"); + s.append(" completeWindowsRes[resultIndex]._" + std::to_string(i + 1) + + " = openingWindowsRes[wid][idx].key._" + std::to_string(i) + ";\n"); } } s.append(" /* Put value(s) */\n"); completeResIndex = m_numberOfKeyAttributes; for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - "(openingWindowsRes[wid][idx].value._" + - std::to_string((i + 1)) + "+" + - "partialRes[wid2][posInB2].value._" + std::to_string((i + 1)) + - ")/" + - "(openingWindowsRes[wid][idx].counter+partialRes[wid2][" - "posInB2].counter);\n"); + s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + "(openingWindowsRes[wid][idx].value._" + std::to_string((i + 1)) + "+" + "partialRes[wid2][posInB2].value._" + + std::to_string((i + 1)) + ")/" + + "(openingWindowsRes[wid][idx].counter+partialRes[wid2][posInB2].counter);\n"); } else - s.append( - "\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - getCombineFunction( - (*m_aggregationTypes)[i], - "openingWindowsRes[wid][idx].value._" + std::to_string((i + 1)), - "partialRes[wid2][posInB2].value._" + std::to_string((i + 1))) + - ";\n"); + s.append("\t\t\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + getCombineFunction((*m_aggregationTypes)[i], + "openingWindowsRes[wid][idx].value._" + std::to_string((i + 1)), + "partialRes[wid2][posInB2].value._" + std::to_string((i + 1))) + ";\n"); completeResIndex++; } s.append( //" resultIndex++;\n" addTabs(completeResPredicate, 5) + - " // Do I need padding here ???\n" - " \n" - " // Unmark occupancy in second buffer\n" - " partialRes[wid2][posInB2].state = 0;\n" - " }\n" - " }\n" - "\n" - " /* Iterate over the remaining tuples in the second table. " - "*/\n" - " for (int idx = 0; idx < mapSize; idx++) {\n" - " if (partialRes[wid2][idx].state != 1) /* Skip empty " - "slot */\n" - " continue; \n" - " /* Copy tuple based on output schema */\n" - " /* Put timestamp */\n" - " completeWindowsRes[resultIndex].timestamp = " - "partialRes[wid2][idx].timestamp;\n" - " /* Put key */\n"); + " // Do I need padding here ???\n" + " \n" + " // Unmark occupancy in second buffer\n" + " partialRes[wid2][posInB2].state = 0;\n" + " }\n" + " }\n" + "\n" + " /* Iterate over the remaining tuples in the second table. */\n" + " for (int idx = 0; idx < mapSize; idx++) {\n" + " if (partialRes[wid2][idx].state != 1) /* Skip empty slot */\n" + " continue; \n" + " /* Copy tuple based on output schema */\n" + " /* Put timestamp */\n" + " completeWindowsRes[resultIndex].timestamp = partialRes[wid2][idx].timestamp;\n" + " /* Put key */\n"); if (m_numberOfKeyAttributes == 1) { - s.append( - " completeWindowsRes[resultIndex]._1 = " - "partialRes[wid2][idx].key;\n"); + s.append(" completeWindowsRes[resultIndex]._1 = partialRes[wid2][idx].key;\n"); } else { for (auto i = 0; i < m_numberOfKeyAttributes; ++i) { - s.append(" completeWindowsRes[resultIndex]._" + - std::to_string(i + 1) + " = partialRes[wid2][idx].key._" + - std::to_string(i) + ";\n"); + s.append(" completeWindowsRes[resultIndex]._" + std::to_string(i + 1) + + " = partialRes[wid2][idx].key._" + std::to_string(i) + ";\n"); } } s.append(" /* Put value(s) */\n"); completeResIndex = m_numberOfKeyAttributes; for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - "partialRes[wid2][idx].value._" + std::to_string((i + 1)) + - "/partialRes[wid2][idx].counter;\n"); + s.append("\t\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + "partialRes[wid2][idx].value._" + std::to_string((i + 1)) + "/partialRes[wid2][idx].counter;\n"); } else - s.append("\t\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - "partialRes[wid2][idx].value._" + std::to_string((i + 1)) + - ";\n"); + s.append("\t\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + "partialRes[wid2][idx].value._" + std::to_string((i + 1)) + ";\n"); completeResIndex++; } s.append( //" resultIndex++;\n" addTabs(completeResPredicate, 4) + - " // Do I need padding here ??? \n" - " }\n" - " } else {\n" - " /* Iterate over the second table. */\n" - " for (int idx = 0; idx < mapSize; idx++) {\n" - " if (partialRes[wid2][idx].state != 1) /* Skip " - "empty slot */\n" - " continue;\n" - "\n" - " /* Create a new hash table entry */\n" - " isFound = " - "map1.get_index(partialRes[wid2][idx].key, posInRes); //isFound = " - "map2.get_index(&openingWindowsResults[resultIndex], " - "&buffer2[idx].key, posInRes);\n" - //" if (posInRes < 0 || isFound) {\n" - //" printf(\"error: failed to insert new key in - // intermediate hash table \\n\");\n" " exit(1);\n" " }\n" - " if (!isFound) {\n" - " /* Mark occupancy */\n" - " openingWindowsRes[wid][posInRes].state = 1;\n" - " /* Put timestamp */\n" - " openingWindowsRes[wid][posInRes].timestamp = " - "partialRes[wid2][idx].timestamp;\n" - " /* Put key and value(s) */\n" - " openingWindowsRes[wid][posInRes].key = " - "partialRes[wid2][idx].key;\n" - " openingWindowsRes[wid][posInRes].value = " - "partialRes[wid2][idx].value;\n" - " openingWindowsRes[wid][posInRes].counter = " - "partialRes[wid2][idx].counter;\n" - " } else {\n" - " /* Mark occupancy */\n" - " openingWindowsRes[wid][posInRes].state = 1;\n" - " /* Put timestamp */\n" - " openingWindowsRes[wid][posInRes].timestamp = " - "std::max(openingWindowsRes[wid][posInRes].timestamp, " - "partialRes[wid2][idx].timestamp);\n" - " /* Put key and value(s) */\n" - " openingWindowsRes[wid][posInRes].key = " - "openingWindowsRes[wid][posInRes].key;\n"); + " // Do I need padding here ??? \n" + " }\n" + " } else {\n" + " /* Iterate over the second table. */\n" + " for (int idx = 0; idx < mapSize; idx++) {\n" + " if (partialRes[wid2][idx].state != 1) /* Skip empty slot */\n" + " continue;\n" + "\n" + " /* Create a new hash table entry */\n" + " isFound = map1.get_index(partialRes[wid2][idx].key, posInRes); //isFound = map2.get_index(&openingWindowsResults[resultIndex], &buffer2[idx].key, posInRes);\n" + //" if (posInRes < 0 || isFound) {\n" + //" printf(\"error: failed to insert new key in intermediate hash table \\n\");\n" + //" exit(1);\n" + //" }\n" + " if (!isFound) {\n" + " /* Mark occupancy */\n" + " openingWindowsRes[wid][posInRes].state = 1;\n" + " /* Put timestamp */\n" + " openingWindowsRes[wid][posInRes].timestamp = partialRes[wid2][idx].timestamp;\n" + " /* Put key and value(s) */\n" + " openingWindowsRes[wid][posInRes].key = partialRes[wid2][idx].key;\n" + " openingWindowsRes[wid][posInRes].value = partialRes[wid2][idx].value;\n" + " openingWindowsRes[wid][posInRes].counter = partialRes[wid2][idx].counter;\n" + " } else {\n" + " /* Mark occupancy */\n" + " openingWindowsRes[wid][posInRes].state = 1;\n" + " /* Put timestamp */\n" + " openingWindowsRes[wid][posInRes].timestamp = std::max(openingWindowsRes[wid][posInRes].timestamp, partialRes[wid2][idx].timestamp);\n" + " /* Put key and value(s) */\n" + " openingWindowsRes[wid][posInRes].key = openingWindowsRes[wid][posInRes].key;\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\t\t\topeningWindowsRes[wid][posInRes].value._" + - std::to_string((i + 1)) + " = " + - "(openingWindowsRes[wid][posInRes].value._" + - std::to_string((i + 1)) + "+" + - "partialRes[wid2][idx].value._" + std::to_string((i + 1)) + - "); // /" + - "(openingWindowsRes[wid][posInRes].counter+partialRes[wid2][" - "idx].counter);\n"); + s.append("\t\t\t\t\topeningWindowsRes[wid][posInRes].value._" + std::to_string((i + 1)) + " = " + + "(openingWindowsRes[wid][posInRes].value._" + std::to_string((i + 1)) + "+" + + "partialRes[wid2][idx].value._" + std::to_string((i + 1)) + "); // /" + + "(openingWindowsRes[wid][posInRes].counter+partialRes[wid2][idx].counter);\n"); } else - s.append("\t\t\t\t\topeningWindowsRes[wid][posInRes].value._" + - std::to_string((i + 1)) + " = " + - getCombineFunction((*m_aggregationTypes)[i], - "openingWindowsRes[wid][posInRes].value._" + - std::to_string((i + 1)), - "partialRes[wid2][idx].value._" + - std::to_string((i + 1))) + - ";\n"); + s.append("\t\t\t\t\topeningWindowsRes[wid][posInRes].value._" + std::to_string((i + 1)) + " = " + + getCombineFunction((*m_aggregationTypes)[i], + "openingWindowsRes[wid][posInRes].value._" + std::to_string((i + 1)), + "partialRes[wid2][idx].value._" + std::to_string((i + 1))) + ";\n"); } s.append( " /* Put count */\n" - " openingWindowsRes[wid][posInRes].counter = " - "openingWindowsRes[wid][posInRes].counter + " - "partialRes[wid2][idx].counter;\n" + " openingWindowsRes[wid][posInRes].counter = openingWindowsRes[wid][posInRes].counter + partialRes[wid2][idx].counter;\n" " }\n" " \n" " }\n" " resultIndex += mapSize; \n" " }\n" " }\n" - " // return the pointer required for appending or prepending the " - "results\n" - " return (pack) ? resultIndex*sizeof(output_tuple_t) : " - "(numOfWindows*mapSize)*sizeof(Bucket);\n" - "}\n"); + " // return the pointer required for appending or prepending the results\n" + " return (pack) ? resultIndex*sizeof(output_tuple_t) : (numOfWindows*mapSize)*sizeof(Bucket);\n" + "}\n" + ); s.append("\n"); return s; } @@ -2479,25 +2583,19 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { else completeResPredicate = "resultIndex += " + getHavingExpr(false) + ";\n"; s.append( - "long aggregatePartials (char *completeBuffer, int completeWindow, int " - "completeStartPos,\n" + "long aggregatePartials (char *completeBuffer, int completeWindow, int completeStartPos,\n" " char **partialBuffer, int partialWindow,\n" " int &startPos, int &endPos,\n" " int &tupleSize, bool pack) {\n" " tupleSize = sizeof(Bucket);\n" " int mapSize = MAP_SIZE;\n" " int hashTableSpacing = mapSize * sizeof(Bucket);\n" - " int currentWindowSpacing = completeStartPos + completeWindow * " - "hashTableSpacing;\n" + " int currentWindowSpacing = completeStartPos + completeWindow * hashTableSpacing;\n" "\n" " // Input and Output Buffers\n" - " Bucket **partialRes= (partialBuffer!=nullptr) ? (Bucket **) " - "partialBuffer : nullptr;\n" - " Bucket *tempCompleteWindowsRes = (Bucket *) " - "(completeBuffer+currentWindowSpacing);\n" - " output_tuple_t *completeWindowsRes = (output_tuple_t *) " - "(completeBuffer+currentWindowSpacing); // the results here are " - "packed\n" + " Bucket **partialRes= (partialBuffer!=nullptr) ? (Bucket **) partialBuffer : nullptr;\n" + " Bucket *tempCompleteWindowsRes = (Bucket *) (completeBuffer+currentWindowSpacing);\n" + " output_tuple_t *completeWindowsRes = (output_tuple_t *) (completeBuffer+currentWindowSpacing); // the results here are packed\n" "\n" " // Temp variables for the merging\n" " bool isFound;\n" @@ -2507,16 +2605,12 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "\n" " // check boundaries\n" " if (currentWindowSpacing+hashTableSpacing >= UNBOUNDED_SIZE) {\n" - " throw std::runtime_error (\"error: resize unbounded buffer: " - "\" + std::to_string(currentWindowSpacing+hashTableSpacing) + \" - \" " - "+ std::to_string(UNBOUNDED_SIZE) + \"\\n\");\n" + " throw std::runtime_error (\"error: resize unbounded buffer: \" + std::to_string(currentWindowSpacing+hashTableSpacing) + \" - \" + std::to_string(UNBOUNDED_SIZE) + \"\\n\");\n" " }\n" "\n" " if (startPos == -1) {\n" - " // memcpy the first opening buffer to the correct result " - "slot\n" - " memcpy(completeBuffer+currentWindowSpacing, " - "partialRes[partialWindow], mapSize * tupleSize * sizeof(char));\n" + " // memcpy the first opening buffer to the correct result slot\n" + " memcpy(completeBuffer+currentWindowSpacing, partialRes[partialWindow], mapSize * tupleSize * sizeof(char));\n" " startPos = 0;\n" " endPos = 0;\n" " return 0;\n" @@ -2527,14 +2621,11 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " HashTable rightMap (partialRes[partialWindow]);\n" " /* Iterate over the second table. */\n" " for (int idx = 0; idx < mapSize; idx++) {\n" - " if (partialRes[partialWindow][idx].state != 1) /* Skip " - "empty slot */\n" + " if (partialRes[partialWindow][idx].state != 1) /* Skip empty slot */\n" " continue;\n" - " isFound = " - "leftMap.get_index(partialRes[partialWindow][idx].key, posInLeft);\n" + " isFound = leftMap.get_index(partialRes[partialWindow][idx].key, posInLeft);\n" " if (posInLeft < 0) {\n" - " printf(\"error: open-adress hash table is full " - "\\n\");\n" + " printf(\"error: open-adress hash table is full \\n\");\n" " exit(1);\n" " }\n" " /* Create a new hash table entry */\n" @@ -2542,103 +2633,77 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " /* Mark occupancy */\n" " tempCompleteWindowsRes[posInLeft].state = 1;\n" " /* Put timestamp */\n" - " tempCompleteWindowsRes[posInLeft].timestamp = " - "partialRes[partialWindow][idx].timestamp;\n" + " tempCompleteWindowsRes[posInLeft].timestamp = partialRes[partialWindow][idx].timestamp;\n" " /* Put key and value(s) */\n" - " tempCompleteWindowsRes[posInLeft].key = " - "partialRes[partialWindow][idx].key;\n" - " tempCompleteWindowsRes[posInLeft].value = " - "partialRes[partialWindow][idx].value;\n" - " tempCompleteWindowsRes[posInLeft].counter = " - "partialRes[partialWindow][idx].counter;\n" + " tempCompleteWindowsRes[posInLeft].key = partialRes[partialWindow][idx].key;\n" + " tempCompleteWindowsRes[posInLeft].value = partialRes[partialWindow][idx].value;\n" + " tempCompleteWindowsRes[posInLeft].counter = partialRes[partialWindow][idx].counter;\n" " } else {\n" " /* Mark occupancy */\n" " tempCompleteWindowsRes[posInLeft].state = 1;\n" " /* Put timestamp */\n" - " tempCompleteWindowsRes[posInLeft].timestamp = " - "std::max(tempCompleteWindowsRes[posInLeft].timestamp, " - "partialRes[partialWindow][idx].timestamp);\n" + " tempCompleteWindowsRes[posInLeft].timestamp = std::max(tempCompleteWindowsRes[posInLeft].timestamp, partialRes[partialWindow][idx].timestamp);\n" " /* Put key and value(s) */\n" - " tempCompleteWindowsRes[posInLeft].key = " - "tempCompleteWindowsRes[posInLeft].key;\n"); + " tempCompleteWindowsRes[posInLeft].key = tempCompleteWindowsRes[posInLeft].key;\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\t\ttempCompleteWindowsRes[posInLeft].value._" + - std::to_string((i + 1)) + " = " + - "(tempCompleteWindowsRes[posInLeft].value._" + - std::to_string((i + 1)) + "+" + - "partialRes[partialWindow][idx].value._" + - std::to_string((i + 1)) + "); // /" + - "(tempCompleteWindowsRes[posInLeft].counter+partialRes[" - "partialWindow][idx].counter);\n"); + s.append("\t\t\t\ttempCompleteWindowsRes[posInLeft].value._" + std::to_string((i + 1)) + " = " + + "(tempCompleteWindowsRes[posInLeft].value._" + std::to_string((i + 1)) + "+" + + "partialRes[partialWindow][idx].value._" + std::to_string((i + 1)) + "); // /" + + "(tempCompleteWindowsRes[posInLeft].counter+partialRes[partialWindow][idx].counter);\n"); } else - s.append( - "\t\t\t\ttempCompleteWindowsRes[posInLeft].value._" + - std::to_string((i + 1)) + " = " + + s.append("\t\t\t\ttempCompleteWindowsRes[posInLeft].value._" + std::to_string((i + 1)) + " = " + getCombineFunction((*m_aggregationTypes)[i], - "tempCompleteWindowsRes[posInLeft].value._" + - std::to_string((i + 1)), - "partialRes[partialWindow][idx].value._" + - std::to_string((i + 1))) + - ";\n"); + "tempCompleteWindowsRes[posInLeft].value._" + std::to_string((i + 1)), + "partialRes[partialWindow][idx].value._" + std::to_string((i + 1))) + ";\n"); } s.append( " /* Put count */\n" - " tempCompleteWindowsRes[posInLeft].counter = " - "tempCompleteWindowsRes[posInLeft].counter + " - "partialRes[partialWindow][idx].counter;\n" + " tempCompleteWindowsRes[posInLeft].counter = tempCompleteWindowsRes[posInLeft].counter + partialRes[partialWindow][idx].counter;\n" " }\n" " \n" " } \n" " } else {\n" + + addPostMergeOp() + " for (int idx = 0; idx < mapSize; idx++) {\n" - " if (tempCompleteWindowsRes[idx].state != 1) /* Skip empty " - "slot */\n" + " if (tempCompleteWindowsRes[idx].state != 1) /* Skip empty slot */\n" " continue;\n" " /* Copy tuple based on output schema */\n" " /* Put timestamp */\n" - " completeWindowsRes[resultIndex].timestamp = " - "tempCompleteWindowsRes[idx].timestamp;\n" + " completeWindowsRes[resultIndex].timestamp = tempCompleteWindowsRes[idx].timestamp;\n" " /* Put key */\n"); if (m_numberOfKeyAttributes == 1) { - s.append( - " completeWindowsRes[resultIndex]._1 = " - "tempCompleteWindowsRes[idx].key;\n"); + s.append(" completeWindowsRes[resultIndex]._1 = tempCompleteWindowsRes[idx].key;\n"); } else { for (auto i = 0; i < m_numberOfKeyAttributes; ++i) { - s.append(" completeWindowsRes[resultIndex]._" + - std::to_string(i + 1) + - " = tempCompleteWindowsRes[idx].key._" + std::to_string(i) + - ";\n"); + s.append(" completeWindowsRes[resultIndex]._" + std::to_string(i + 1) + + " = tempCompleteWindowsRes[idx].key._" + std::to_string(i) + ";\n"); } } s.append(" /* Put value(s) */\n"); auto completeResIndex = m_numberOfKeyAttributes; for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - "tempCompleteWindowsRes[idx].value._" + - std::to_string((i + 1)) + - "/tempCompleteWindowsRes[idx].counter;\n"); + s.append("\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + "tempCompleteWindowsRes[idx].value._" + std::to_string((i + 1)) + + "/tempCompleteWindowsRes[idx].counter;\n"); } else - s.append("\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((completeResIndex + 1)) + " = " + - "tempCompleteWindowsRes[idx].value._" + - std::to_string((i + 1)) + ";\n"); + s.append("\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((completeResIndex + 1)) + " = " + + "tempCompleteWindowsRes[idx].value._" + std::to_string((i + 1)) + ";\n"); completeResIndex++; } - s.append(addTabs(completeResPredicate, 3) + - " // Do I need padding here ??? \n" - " }\n" - "\n" - " // set result positions\n" - " startPos = currentWindowSpacing;\n" - " endPos = currentWindowSpacing + " - "sizeof(output_tuple_t)*resultIndex;\n" - " }\n" - " return 0;\n" - "}\n"); + s.append( + addTabs(completeResPredicate, 3) + + " // Do I need padding here ??? \n" + " }\n" + "\n" + " // set result positions\n" + " startPos = currentWindowSpacing;\n" + " endPos = currentWindowSpacing + sizeof(output_tuple_t)*resultIndex;\n" + " }\n" + " return 0;\n" + "}\n" + ); s.append("\n"); return s; } @@ -2656,22 +2721,17 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { for (; i < m_outputSchema->numberOfAttributes(); i++) { auto type = m_outputSchema->getAttributeType(i); switch (type) { - case BasicType::Integer: - s.append("\tint _" + std::to_string(i) + ";\n"); + case BasicType::Integer : s.append("\tint _" + std::to_string(i) + ";\n"); break; - case BasicType::Float: - s.append("\tfloat _" + std::to_string(i) + ";\n"); + case BasicType::Float : s.append("\tfloat _" + std::to_string(i) + ";\n"); break; - case BasicType::Long: - s.append("\tlong _" + std::to_string(i) + ";\n"); + case BasicType::Long : s.append("\tlong _" + std::to_string(i) + ";\n"); break; - case BasicType::LongLong: - s.append("\t__uint128_t _" + std::to_string(i) + ";\n"); + case BasicType::LongLong : s.append("\t__uint128_t _" + std::to_string(i) + ";\n"); break; - default: + default : throw std::runtime_error( - "error: failed to generate tuple struct (attribute " + - std::to_string(i) + " is undefined)"); + "error: failed to generate tuple struct (attribute " + std::to_string(i) + " is undefined)"); } } s.append("};\n"); @@ -2681,9 +2741,7 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string getSingleStaticDeclaration() { std::string s; - s.append("static Aggregator aggrStructures[" + - std::to_string(SystemConf::getInstance().WORKER_THREADS) + - "];\n\n"); + s.append("static Aggregator aggrStructures[" + std::to_string(SystemConf::getInstance().WORKER_THREADS) + "];\n\n"); return s; } @@ -2695,34 +2753,25 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { else completeResPredicate = "resultIndex += " + getHavingExpr(false) + ";\n"; s.append( - "long aggregatePartials (char *openingBuffer, int " - "*openingStartPointers, int openingEndPointer,\n" - " char *closingOrPendingBuffer, int " - "*copStartPointers, int copEndPointer,\n" - " int startingWindow, int numOfWindows, bool " - "pack,\n" - " char* completeBuffer, long completeBufferPtr, " - "int &tupleSize) {\n" + "long aggregatePartials (char *openingBuffer, int *openingStartPointers, int openingEndPointer,\n" + " char *closingOrPendingBuffer, int *copStartPointers, int copEndPointer,\n" + " int startingWindow, int numOfWindows, bool pack,\n" + " char* completeBuffer, long completeBufferPtr, int &tupleSize) {\n" " tupleSize = sizeof(interm_tuple_t);\n" " // Input and Output Buffers\n" - " interm_tuple_t *openingWindowsRes= (interm_tuple_t *) " - "openingBuffer;\n" - " interm_tuple_t *partialRes= (interm_tuple_t *) " - "closingOrPendingBuffer;\n" - " output_tuple_t *completeWindowsRes = (output_tuple_t *) " - "completeBuffer; // the results here are packed\n" + " interm_tuple_t *openingWindowsRes= (interm_tuple_t *) openingBuffer;\n" + " interm_tuple_t *partialRes= (interm_tuple_t *) closingOrPendingBuffer;\n" + " output_tuple_t *completeWindowsRes = (output_tuple_t *) completeBuffer; // the results here are packed\n" "\n" " // Temp variables for the merging\n" - " int resultIndex = (pack) ? " - "completeBufferPtr/sizeof(output_tuple_t) : startingWindow;\n" + " int resultIndex = (pack) ? completeBufferPtr/sizeof(output_tuple_t) : startingWindow;\n" "\n" " int start1, end1, start2, end2;\n" " for (int wid = startingWindow; wid < numOfWindows; ++wid) {\n" " start1 = openingStartPointers[wid];\n" " start2 = (pack) ? copStartPointers[wid] : 0;\n" " end1 = openingStartPointers[wid+1];\n" - " end2 = (pack) ? copStartPointers[wid+1] : " - "copEndPointer/tupleSize;\n" + " end2 = (pack) ? copStartPointers[wid+1] : copEndPointer/tupleSize;\n" " if (end1 < 0)\n" " end1 = openingEndPointer/tupleSize;\n" " if (end2 < 0)\n" @@ -2732,114 +2781,87 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " exit(1);\n" " }\n" " if (start2 == end2) {\n" - " printf (\"error: empty closing/pending window partial " - "result\");\n" + " printf (\"error: empty closing/pending window partial result\");\n" " exit(1);\n" " }\n" - " // merge values based on the number of aggregated values and " - "their types!\n" + " // merge values based on the number of aggregated values and their types!\n" " if (pack) {\n" " /* Copy tuple based on output schema */\n" " /* Put timestamp */\n" - " completeWindowsRes[resultIndex].timestamp = " - "std::max(openingWindowsRes[start1].timestamp, " - "partialRes[start2].timestamp);\n" + " completeWindowsRes[resultIndex].timestamp = std::max(openingWindowsRes[start1].timestamp, partialRes[start2].timestamp);\n" " /* Put value(s) */\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((i + 1)) + " = " + - "(openingWindowsRes[start1]._" + std::to_string((i + 1)) + - "+" + "partialRes[start2]._" + std::to_string((i + 1)) + ")/" + - "(openingWindowsRes[start1]._" + - std::to_string((m_outputSchema->numberOfAttributes() - 1)) + - "+" + "partialRes[start2]._" + - std::to_string((m_outputSchema->numberOfAttributes() - 1)) + - ");\n"); + s.append("\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((i + 1)) + " = " + + "(openingWindowsRes[start1]._" + std::to_string((i + 1)) + "+" + "partialRes[start2]._" + + std::to_string((i + 1)) + ")/" + + "(openingWindowsRes[start1]._" + std::to_string((m_outputSchema->numberOfAttributes() - 1)) + "+" + + "partialRes[start2]._" + std::to_string((m_outputSchema->numberOfAttributes() - 1)) + ");\n"); } else - s.append("\t\t\tcompleteWindowsRes[resultIndex]._" + - std::to_string((i + 1)) + " = " + - getCombineFunction( - (*m_aggregationTypes)[i], - "openingWindowsRes[start1]._" + std::to_string((i + 1)), - "partialRes[start2]._" + std::to_string((i + 1))) + - ";\n"); + s.append("\t\t\tcompleteWindowsRes[resultIndex]._" + std::to_string((i + 1)) + " = " + + getCombineFunction((*m_aggregationTypes)[i], "openingWindowsRes[start1]._" + std::to_string((i + 1)), + "partialRes[start2]._" + std::to_string((i + 1))) + ";\n"); } s.append( //" resultIndex++;\n" addTabs(completeResPredicate, 3) + - " } else {\n" - " openingWindowsRes[start1].timestamp = " - "partialRes[start2].timestamp;\n" - " /* Put value(s) */\n"); + " } else {\n" + " openingWindowsRes[start1].timestamp = partialRes[start2].timestamp;\n" + " /* Put value(s) */\n" + ); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("\t\t\topeningWindowsRes[start1]._" + std::to_string((i + 1)) + - " = " + "openingWindowsRes[start1]._" + - std::to_string((i + 1)) + "+" + "partialRes[start2]._" + - std::to_string((i + 1)) + ";\n"); - s.append("\t\t\topeningWindowsRes[start1]._" + - std::to_string((m_outputSchema->numberOfAttributes() - 1)) + - " = " + "openingWindowsRes[start1]._" + - std::to_string((m_outputSchema->numberOfAttributes() - 1)) + - "+" + "partialRes[start2]._" + - std::to_string((m_outputSchema->numberOfAttributes() - 1)) + - ";\n"); + s.append("\t\t\topeningWindowsRes[start1]._" + std::to_string((i + 1)) + " = " + + "openingWindowsRes[start1]._" + std::to_string((i + 1)) + "+" + "partialRes[start2]._" + + std::to_string((i + 1)) + ";\n"); + s.append( + "\t\t\topeningWindowsRes[start1]._" + std::to_string((m_outputSchema->numberOfAttributes() - 1)) + " = " + + "openingWindowsRes[start1]._" + std::to_string((m_outputSchema->numberOfAttributes() - 1)) + "+" + + "partialRes[start2]._" + std::to_string((m_outputSchema->numberOfAttributes() - 1)) + ";\n"); } else - s.append("\t\t\topeningWindowsRes[start1]._" + std::to_string((i + 1)) + - " = " + - getCombineFunction( - (*m_aggregationTypes)[i], - "openingWindowsRes[start1]._" + std::to_string((i + 1)), - "partialRes[start2]._" + std::to_string((i + 1))) + - ";\n"); + s.append("\t\t\topeningWindowsRes[start1]._" + std::to_string((i + 1)) + " = " + + getCombineFunction((*m_aggregationTypes)[i], "openingWindowsRes[start1]._" + std::to_string((i + 1)), + "partialRes[start2]._" + std::to_string((i + 1))) + ";\n"); } s.append( " }\n" " }\n" - " // return the pointer required for appending or prepending the " - "results\n" - " return (pack) ? resultIndex*sizeof(output_tuple_t) : " - "(numOfWindows)*sizeof(interm_tuple_t);\n" - "}\n"); + " // return the pointer required for appending or prepending the results\n" + " return (pack) ? resultIndex*sizeof(output_tuple_t) : (numOfWindows)*sizeof(interm_tuple_t);\n" + "}\n" + ); s.append("\n"); return s; } - std::string getSelectionExpr() { - std::string s; - if (hasSelection()) { - s.append("if ( "); - s.append(m_predicate->toSExprForCodeGen()); - s.append(" )\n"); - } - return s; - } - std::string getHavingExpr(bool isCompute) { std::string s; - if (hasHavingPredicate()) { + if (!hasHavingPredicate() && m_postWindowPredicate.empty()) { + // do nothing + } else if (hasHavingPredicate() && m_postWindowPredicate.empty()) { s.append("( "); s.append(m_havingPredicate->toSExprForCodeGen()); s.append(" )"); + std::string str = "data[bufferPtr]"; + if (isCompute) + s.replace(s.find(str), str.length(), "completeWindowsResults[completeWindowsPointer]"); + else + s.replace(s.find(str), str.length(), "completeWindowsRes[resultIndex]"); + } else { + s.append(m_postWindowPredicate); + std::string str = "completeWindowsResults[completeWindowsPointer]"; + if (!isCompute) + s.replace(s.find(str), str.length(), "completeWindowsRes[resultIndex]"); } - std::string str = "data[bufferPtr]"; - if (isCompute) - s.replace(s.find(str), str.length(), - "completeWindowsResults[completeWindowsPointer]"); - else - s.replace(s.find(str), str.length(), "completeWindowsRes[resultIndex]"); return s; } std::string getProjectionExpr() { std::string s; - if (hasProjection() && m_expressions != nullptr && - !m_expressions->empty()) { + if (hasProjection() && m_expressions != nullptr && !m_expressions->empty()) { s.append("\toutput[pos].timestamp = data[bufferPtr].timestamp;\n"); for (unsigned long i = 1; i < m_expressions->size(); ++i) { - s.append("\toutput[pos]._" + std::to_string(i) + " = " + - (*m_expressions)[i]->toSExprForCodeGen() + ";\n"); + s.append("\toutput[pos]._" + std::to_string(i) + " = " + (*m_expressions)[i]->toSExprForCodeGen() + ";\n"); } } return s; @@ -2848,24 +2870,21 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string getAggregationVarsOutputBuffers() { std::string s; s.append( - "interm_tuple_t *openingWindowsResults = (interm_tuple_t *) " - "openingWindowsBuffer; // the results here are in the\n" - "interm_tuple_t *closingWindowsResults = (interm_tuple_t *) " - "closingWindowsBuffer; // form of the hashtable\n" - "interm_tuple_t *pendingWindowsResults = (interm_tuple_t *) " - "pendingWindowsBuffer;\n"); + "interm_tuple_t *openingWindowsResults = (interm_tuple_t *) openingWindowsBuffer; // the results here are in the\n" + "interm_tuple_t *closingWindowsResults = (interm_tuple_t *) closingWindowsBuffer; // form of the hashtable\n" + "interm_tuple_t *pendingWindowsResults = (interm_tuple_t *) pendingWindowsBuffer;\n" + ); return s; } std::string getAggregationVarsInitialization() { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); std::string s; - if (hasIncremental()) s.append("aggrStructures[pid].initialise();\n"); - s.append( - "node aggrs;\n" - "aggrs.reset();\n"); + if (hasIncremental()) + s.append("aggrStructures[pid].initialise();\n"); + s.append("node aggrs;\n" + "aggrs.reset();\n"); return s; } @@ -2884,36 +2903,27 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { else input.append("data[bufferPtr]._" + std::to_string(col)); switch ((*m_aggregationTypes)[i]) { - case SUM: - s.append("aggrs._" + std::to_string(i + 1) + " += " + input + - ";\n"); + case SUM:s.append("aggrs._" + std::to_string(i + 1) + " += " + input + ";\n"); break; - case AVG: - s.append("aggrs._" + std::to_string(i + 1) + " += " + input + - ";\n"); + case AVG:s.append("aggrs._" + std::to_string(i + 1) + " += " + input + ";\n"); s.append("aggrs._c" + std::to_string(i + 1) + "++;\n"); break; - case CNT: - s.append("aggrs._c" + std::to_string(i + 1) + "++;\n"); + case CNT:s.append("aggrs._c" + std::to_string(i + 1) + "++;\n"); break; case MIN: - s.append("aggrs._" + std::to_string(i + 1) + " = " + - getCombineFunction(MIN, "aggrs._" + std::to_string(i + 1), - input) + - ";\n"); + s.append("aggrs._" + std::to_string(i + 1) + " = " + + getCombineFunction(MIN, "aggrs._" + std::to_string(i + 1), input) + ";\n"); break; case MAX: - s.append("aggrs._" + std::to_string(i + 1) + " = " + - getCombineFunction(MAX, "aggrs._" + std::to_string(i + 1), - input) + - ";\n"); + s.append("aggrs._" + std::to_string(i + 1) + " = " + + getCombineFunction(MAX, "aggrs._" + std::to_string(i + 1), input) + ";\n"); break; - default: - throw std::runtime_error("error: invalid aggregation type"); + default:throw std::runtime_error("error: invalid aggregation type"); } } } if (hasProjection()) { + } if (!isRangeBased && hasSelection()) { s.append("}\n"); @@ -2923,24 +2933,21 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string getAggregationVarsReset() { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); std::string s; s.append("aggrs.reset();\n"); - // if (hasIncremental()) {} + //if (hasIncremental()) {} return s; } std::string getWriteIntermediateResults(int bufferType) { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); if (m_outputSchema == nullptr) throw std::runtime_error("error: outputSchema hasn't been set up"); - if ((int)m_aggregationTypes->size() > m_outputSchema->numberOfAttributes()) + if ((int) m_aggregationTypes->size() > m_outputSchema->numberOfAttributes()) throw std::runtime_error( - "error: the number of aggregation types should be <= to the " - "attributes of the output schema"); + "error: the number of aggregation types should be <= to the attributes of the output schema"); std::string s; std::string buffer; if (bufferType == 0) @@ -2952,50 +2959,39 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { s.append("" + buffer + ".timestamp = data[bufferPtr-1].timestamp;\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("" + buffer + "._" + std::to_string((i + 1)) + " = aggrs._" + - std::to_string((i + 1)) + ";\n"); - s.append("" + buffer + "._" + - std::to_string((m_outputSchema->numberOfAttributes() - 1)) + - " = aggrs._c" + std::to_string((i + 1)) + ";\n"); + s.append("" + buffer + "._" + std::to_string((i + 1)) + " = aggrs._" + std::to_string((i + 1)) + ";\n"); + s.append("" + buffer + "._" + std::to_string((m_outputSchema->numberOfAttributes() - 1)) + " = aggrs._c" + + std::to_string((i + 1)) + ";\n"); } else - s.append("" + buffer + "._" + std::to_string((i + 1)) + " = aggrs._" + - std::to_string((i + 1)) + ";\n"); + s.append("" + buffer + "._" + std::to_string((i + 1)) + " = aggrs._" + std::to_string((i + 1)) + ";\n"); } return s; } std::string getWriteCompleteResults() { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); if (m_outputSchema == nullptr) throw std::runtime_error("error: outputSchema hasn't been set up"); - if ((int)m_aggregationTypes->size() > m_outputSchema->numberOfAttributes()) + if ((int) m_aggregationTypes->size() > m_outputSchema->numberOfAttributes()) throw std::runtime_error( - "error: the number of aggregation types should be <= to the " - "attributes of the output schema"); + "error: the number of aggregation types should be <= to the attributes of the output schema"); std::string s; - s.append( - "completeWindowsResults[completeWindowsPointer].timestamp = " - "prevCompletePane * paneSize;\n"); // data[bufferPtr-1].timestamp;\n"); + s.append("completeWindowsResults[completeWindowsPointer].timestamp = prevCompletePane * paneSize;\n"); //data[bufferPtr-1].timestamp;\n"); for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("completeWindowsResults[completeWindowsPointer]._" + - std::to_string((i + 1)) + " = aggrs._" + - std::to_string((i + 1)) + "/aggrs._c" + - std::to_string((i + 1)) + ";\n"); + s.append("completeWindowsResults[completeWindowsPointer]._" + std::to_string((i + 1)) + " = aggrs._" + + std::to_string((i + 1)) + "/aggrs._c" + std::to_string((i + 1)) + ";\n"); } else - s.append("completeWindowsResults[completeWindowsPointer]._" + - std::to_string((i + 1)) + " = aggrs._" + - std::to_string((i + 1)) + ";\n"); + s.append("completeWindowsResults[completeWindowsPointer]._" + std::to_string((i + 1)) + " = aggrs._" + + std::to_string((i + 1)) + ";\n"); } std::string completeResPredicate; if (!hasHavingPredicate()) completeResPredicate = "completeWindowsPointer++;\n"; else - completeResPredicate = - "completeWindowsPointer += " + getHavingExpr(true) + ";\n"; - s.append(completeResPredicate); // s.append("completeWindowsPointer++;\n"); + completeResPredicate = "completeWindowsPointer += " + getHavingExpr(true) + ";\n"; + s.append(completeResPredicate); //s.append("completeWindowsPointer++;\n"); return s; } @@ -3006,8 +3002,7 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { auto e = (*m_groupByAttributes)[idx - 1]; if (e->getBasicType() == BasicType::Integer) { s.append("int"); - } else if (e->getBasicType() == BasicType::Float) { - ; + } else if (e->getBasicType() == BasicType::Float) { ; s.append("float"); } else if (e->getBasicType() == BasicType::Long) { s.append("long"); @@ -3023,8 +3018,7 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { auto e = (*m_groupByAttributes)[idx - 1]; if (e->getBasicType() == BasicType::Integer) { s.append("int"); - } else if (e->getBasicType() == BasicType::Float) { - ; + } else if (e->getBasicType() == BasicType::Float) { ; s.append("float"); } else if (e->getBasicType() == BasicType::Long) { s.append("long"); @@ -3042,14 +3036,11 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string getAggregationVarsResultPointers() { std::string s; s.append( - "pointersAndCounts[0] = openingWindowsPointer * " - "sizeof(interm_tuple_t);\n" - "pointersAndCounts[1] = closingWindowsPointer * " - "sizeof(interm_tuple_t);\n" - "pointersAndCounts[2] = pendingWindowsPointer * " - "sizeof(interm_tuple_t);\n" - "pointersAndCounts[3] = completeWindowsPointer * " - "sizeof(output_tuple_t);\n"); + "pointersAndCounts[0] = openingWindowsPointer * sizeof(interm_tuple_t);\n" + "pointersAndCounts[1] = closingWindowsPointer * sizeof(interm_tuple_t);\n" + "pointersAndCounts[2] = pendingWindowsPointer * sizeof(interm_tuple_t);\n" + "pointersAndCounts[3] = completeWindowsPointer * sizeof(output_tuple_t);\n" + ); return s; } @@ -3057,19 +3048,16 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string s; if (!m_usePtrs) { s.append( - "Bucket *openingWindowsResults = (Bucket *) openingWindowsBuffer; // " - "the results here are in the\n" - "Bucket *closingWindowsResults = (Bucket *) closingWindowsBuffer; // " - "form of the hashtable\n" - "Bucket *pendingWindowsResults = (Bucket *) pendingWindowsBuffer;\n"); + "Bucket *openingWindowsResults = (Bucket *) openingWindowsBuffer; // the results here are in the\n" + "Bucket *closingWindowsResults = (Bucket *) closingWindowsBuffer; // form of the hashtable\n" + "Bucket *pendingWindowsResults = (Bucket *) pendingWindowsBuffer;\n" + ); } else { s.append( - "Bucket **openingWindowsResults = (Bucket **) openingWindowsBuffer; " - "// the results here are in the\n" - "Bucket **closingWindowsResults = (Bucket **) closingWindowsBuffer; " - "// form of the hashtable\n" - "Bucket **pendingWindowsResults = (Bucket **) " - "pendingWindowsBuffer;\n"); + "Bucket **openingWindowsResults = (Bucket **) openingWindowsBuffer; // the results here are in the\n" + "Bucket **closingWindowsResults = (Bucket **) closingWindowsBuffer; // form of the hashtable\n" + "Bucket **pendingWindowsResults = (Bucket **) pendingWindowsBuffer;\n" + ); } return s; @@ -3077,23 +3065,20 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string getGroupByInitialization() { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); std::string s; - s.append( - "if (isFirst[pid]) {\n" - " aggrStructures[pid].init();\n" - " isFirst[pid] = false;\n" - "}\n" - "aggrStructures[pid].clear();\n" - "Value curVal;\n"); + s.append("if (isFirst[pid]) {\n" + " aggrStructures[pid].init();\n" + " isFirst[pid] = false;\n" + "}\n" + "aggrStructures[pid].clear();\n" + "Value curVal;\n"); return s; } std::string getGroupByComputation(bool isRangeBased) { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); std::string s; if (!isRangeBased && hasSelection()) { s.append(getSelectionExpr()); @@ -3103,35 +3088,30 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { auto col = (*m_aggregationAttributes)[i]->getColumn(); if (col == 0) - s.append("curVal._" + std::to_string(i + 1) + - " = data[bufferPtr].timestamp;\n"); + s.append("curVal._" + std::to_string(i + 1) + " = data[bufferPtr].timestamp;\n"); else - s.append("curVal._" + std::to_string(i + 1) + " = data[bufferPtr]._" + - std::to_string(col) + ";\n"); + s.append("curVal._" + std::to_string(i + 1) + " = data[bufferPtr]._" + std::to_string(col) + ";\n"); } if (m_numberOfKeyAttributes == 1) { std::string col; - if (auto cRef = - dynamic_cast((*m_groupByAttributes)[0])) { + if (auto cRef = dynamic_cast((*m_groupByAttributes)[0])) { if (cRef->getColumn() == 0) col = "timestamp"; else col = "_" + std::to_string(cRef->getColumn()); - s.append("aggrStructures[pid].insert_or_modify(data[bufferPtr]." + - col + ", curVal, data[bufferPtr].timestamp);\n"); + s.append("aggrStructures[pid].insert_or_modify(data[bufferPtr]." + col + + ", curVal, data[bufferPtr].timestamp);\n"); } else { - s.append("aggrStructures[pid].insert_or_modify(" + - (*m_groupByAttributes)[0]->toSExprForCodeGen() + - ", curVal, data[bufferPtr].timestamp);\n"); + s.append("aggrStructures[pid].insert_or_modify(" + (*m_groupByAttributes)[0]->toSExprForCodeGen() + + ", curVal, data[bufferPtr].timestamp);\n"); } } else { s.append("Key tempKey = {"); std::string col; for (int i = 0; i < m_numberOfKeyAttributes; ++i) { - if (auto cRef = - dynamic_cast((*m_groupByAttributes)[i])) { + if (auto cRef = dynamic_cast((*m_groupByAttributes)[i])) { if (cRef->getColumn() == 0) col = "timestamp"; else @@ -3140,15 +3120,15 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { } else { s.append((*m_groupByAttributes)[i]->toSExprForCodeGen()); } - if (i != m_numberOfKeyAttributes - 1) s.append(", "); + if (i != m_numberOfKeyAttributes - 1) + s.append(", "); } s.append("};\n"); - s.append( - "aggrStructures[pid].insert_or_modify(tempKey, curVal, " - "data[bufferPtr].timestamp);\n"); + s.append("aggrStructures[pid].insert_or_modify(tempKey, curVal, data[bufferPtr].timestamp);\n"); } } if (hasProjection()) { + } if (!isRangeBased && hasSelection()) { s.append("}\n"); @@ -3158,53 +3138,44 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string getGroupByReset() { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); std::string s; s.append("aggrStructures[pid].reset();\n"); if (hasIncremental()) { + } return s; } std::string getWriteIntermediateResultsGroupBy(int bufferType) { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); if (m_outputSchema == nullptr) throw std::runtime_error("error: outputSchema hasn't been set up"); - if ((int)m_aggregationTypes->size() > m_outputSchema->numberOfAttributes()) + if ((int) m_aggregationTypes->size() > m_outputSchema->numberOfAttributes()) throw std::runtime_error( - "error: the number of aggregation types should be <= to the " - "attributes of the output schema"); + "error: the number of aggregation types should be <= to the attributes of the output schema"); std::string s; std::string buffer; if (!m_usePtrs) { if (bufferType == 0) s.append( - "std::memcpy(closingWindowsResults + closingWindowsPointer, " - "aggrStructures[pid].getBuckets(), mapSize * sizeof(Bucket));\n"); + "std::memcpy(closingWindowsResults + closingWindowsPointer, aggrStructures[pid].getBuckets(), mapSize * sizeof(Bucket));\n"); else if (bufferType == 1) s.append( - "std::memcpy(openingWindowsResults + openingWindowsPointer, " - "aggrStructures[pid].getBuckets(), mapSize * sizeof(Bucket));\n"); + "std::memcpy(openingWindowsResults + openingWindowsPointer, aggrStructures[pid].getBuckets(), mapSize * sizeof(Bucket));\n"); else s.append( - "std::memcpy(pendingWindowsResults + pendingWindowsPointer, " - "aggrStructures[pid].getBuckets(), mapSize * sizeof(Bucket));\n"); + "std::memcpy(pendingWindowsResults + pendingWindowsPointer, aggrStructures[pid].getBuckets(), mapSize * sizeof(Bucket));\n"); } else { if (bufferType == 0) s.append( - "std::memcpy(closingWindowsResults[numberOfClosingWindows], " - "aggrStructures[pid].getBuckets(), mapSize * sizeof(Bucket));\n"); + "std::memcpy(closingWindowsResults[numberOfClosingWindows], aggrStructures[pid].getBuckets(), mapSize * sizeof(Bucket));\n"); else if (bufferType == 1) s.append( - "std::memcpy(openingWindowsResults[numberOfOpeningWindows], " - "aggrStructures[pid].getBuckets(), mapSize * sizeof(Bucket));\n"); + "std::memcpy(openingWindowsResults[numberOfOpeningWindows], aggrStructures[pid].getBuckets(), mapSize * sizeof(Bucket));\n"); else - s.append( - "std::memcpy(pendingWindowsResults[0], " - "aggrStructures[pid].getBuckets(), mapSize * sizeof(Bucket));\n"); + s.append("std::memcpy(pendingWindowsResults[0], aggrStructures[pid].getBuckets(), mapSize * sizeof(Bucket));\n"); } return s; @@ -3212,30 +3183,24 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { std::string getWriteCompleteResultsGroupBy() { if (!hasAggregation()) - throw std::runtime_error( - "error: aggregation operator hasn't been set up"); + throw std::runtime_error("error: aggregation operator hasn't been set up"); if (m_outputSchema == nullptr) throw std::runtime_error("error: outputSchema hasn't been set up"); - if ((int)m_aggregationTypes->size() > m_outputSchema->numberOfAttributes()) + if ((int) m_aggregationTypes->size() > m_outputSchema->numberOfAttributes()) throw std::runtime_error( - "error: the number of aggregation types should be <= to the " - "attributes of the output schema"); + "error: the number of aggregation types should be <= to the attributes of the output schema"); std::string s; s.append( - "completeWindowsResults[completeWindowsPointer].timestamp = " - "prevCompletePane * paneSize;\n"); // aggrStructures[pid].getBuckets()[i].timestamp;\n"); + "completeWindowsResults[completeWindowsPointer].timestamp = prevCompletePane * paneSize;\n"); //aggrStructures[pid].getBuckets()[i].timestamp;\n"); // Write Key if (m_numberOfKeyAttributes == 1) { - s.append("completeWindowsResults[completeWindowsPointer]._" + - std::to_string((1)) + - " = aggrStructures[pid].getBuckets()[i].key;\n"); + s.append("completeWindowsResults[completeWindowsPointer]._" + std::to_string((1)) + + " = aggrStructures[pid].getBuckets()[i].key;\n"); } else { for (auto i = 0; i < m_numberOfKeyAttributes; ++i) { - s.append("completeWindowsResults[completeWindowsPointer]._" + - std::to_string((i + 1)) + - " = aggrStructures[pid].getBuckets()[i].key._" + - std::to_string(i) + ";\n"); + s.append("completeWindowsResults[completeWindowsPointer]._" + std::to_string((i + 1)) + + " = aggrStructures[pid].getBuckets()[i].key._" + std::to_string(i) + ";\n"); } } @@ -3243,24 +3208,18 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { int completeResIndex = m_numberOfKeyAttributes; for (unsigned long i = 0; i < m_aggregationTypes->size(); ++i) { if ((*m_aggregationTypes)[i] == AVG) { - s.append("completeWindowsResults[completeWindowsPointer]._" + - std::to_string((m_numberOfKeyAttributes + 1)) + - " = aggrStructures[pid].getBuckets()[i].value._" + - std::to_string((i + 1)) + - "/aggrStructures[pid].getBuckets()[i].counter;\n"); + s.append("completeWindowsResults[completeWindowsPointer]._" + std::to_string((m_numberOfKeyAttributes + 1)) + + " = aggrStructures[pid].getBuckets()[i].value._" + std::to_string((i + 1)) + + "/aggrStructures[pid].getBuckets()[i].counter;\n"); } else - s.append("completeWindowsResults[completeWindowsPointer]._" + - std::to_string((m_numberOfKeyAttributes + 1)) + - " = aggrStructures[pid].getBuckets()[i].value._" + - std::to_string((i + 1)) + ";\n"); + s.append("completeWindowsResults[completeWindowsPointer]._" + std::to_string((m_numberOfKeyAttributes + 1)) + + " = aggrStructures[pid].getBuckets()[i].value._" + std::to_string((i + 1)) + ";\n"); completeResIndex++; } std::string completeResPredicate = ""; if (hasHavingPredicate()) completeResPredicate = " && " + getHavingExpr(true); - s.append( - "completeWindowsPointer += aggrStructures[pid].getBuckets()[i].state" + - completeResPredicate + ";\n"); + s.append("completeWindowsPointer += aggrStructures[pid].getBuckets()[i].state" + completeResPredicate + ";\n"); return s; } @@ -3270,670 +3229,601 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "pointersAndCounts[0] = openingWindowsPointer * sizeof(Bucket);\n" "pointersAndCounts[1] = closingWindowsPointer * sizeof(Bucket);\n" "pointersAndCounts[2] = pendingWindowsPointer * sizeof(Bucket);\n" - "pointersAndCounts[3] = completeWindowsPointer * " - "sizeof(output_tuple_t);\n"); - return s; - } - - std::string getTumblingWindowRows( - std::string outputBuffers, std::string initialiseAggrs, - std::string computeAggrs, std::string resetAggrs, - std::string openingWindows, std::string closingWindows, - std::string pendingWindows, std::string completeWindows, - std::string resultPointers) { - std::string s; - std::string ptr; - if (hasGroupBy() && m_usePtrs) { - ptr.append("*"); - } - s.append( - "void processData (int pid, char *inputBuffer, size_t inputBufferSize, " - "long startPointer, long endPointer, long timestampFromPrevBatch,\n" - " long *windowStartPointers, long *windowEndPointers, " - "char *" + - ptr + "openingWindowsBuffer, char *" + ptr + - "closingWindowsBuffer,\n" - " char *" + - ptr + - "pendingWindowsBuffer, char *completeWindowsBuffer,\n" - " int *openingStartPointers, int " - "*closingStartPointers, int *pendingStartPointers, int " - "*completeStartPointers,\n" - //" long *openingWindowIds, long *closingWindowIds, - // long *pendingWindowIds, long *completeWindowIds,\n" - " long streamStartPointer, int *pointersAndCounts, " - "char *staticBuffer) {" - " // Input Buffer\n" - " input_tuple_t *data= (input_tuple_t *) inputBuffer;\n" - "\n" - " // Output Buffers\n" + - addTabs(outputBuffers, 1) + - " output_tuple_t *completeWindowsResults = (output_tuple_t *) " - "completeWindowsBuffer; // the results here are packed\n" - "\n" - //" for (int i = 0; i < PARTIAL_WINDOWS; i++) {\n" - //" windowStartPointers[i] = -1;\n" - //" windowEndPointers[i] = -1;\n" - //" }\n" - //"\n" - " int tupleSize = sizeof(input_tuple_t);\n" - " int mapSize = MAP_SIZE;\n" - " long paneSize = PANE_SIZE;\n" - " long panesPerSlide = PANES_PER_SLIDE;\n" - " long panesPerWindow = PANES_PER_WINDOW;\n" - " long windowSlide = WINDOW_SLIDE;\n" - " long windowSize = WINDOW_SIZE;\n" - " int openingWindowsPointer = 0, closingWindowsPointer = 0;\n" - " int pendingWindowsPointer = 0, completeWindowsPointer = 0;\n" - "\n" - " // Set the first pointer for all types of windows\n" - " openingStartPointers[0] = openingWindowsPointer;\n" - " closingStartPointers[0] = closingWindowsPointer;\n" - " pendingStartPointers[0] = pendingWindowsPointer;\n" - " completeStartPointers[0] = completeWindowsPointer;\n" - "\n" - " // initialize aggregation data structures\n" + - addTabs(initialiseAggrs, 1) + - "\n" - " // Slicing based on panes logic\n" - " // Previous, next, and current pane ids\n" - " long prevClosePane, currPane, prevCompletePane, prevOpenPane, " - "startPane;\n" - " int numberOfOpeningWindows = 0;\n" - " int numberOfClosingWindows = 0;\n" - " int numberOfPendingWindows = 0;\n" - " int numberOfCompleteWindows = 0;\n" - " int currentSlide = 0;\n" - " int currentWindow = 0;\n" - " long step = 1; //tupleSize;\n" - " long streamPtr = streamStartPointer / tupleSize;\n" - " long bufferPtr = startPointer / tupleSize;\n" - " startPointer = startPointer / tupleSize;\n" - " endPointer = endPointer / tupleSize;\n" - " long diff = streamPtr - bufferPtr;\n" - " long tempStartPos, tempEndPos;\n" - "\n" - " //windowStartPointers[currentSlide++] = bufferPtr;\n" - " bool completeStartsFromPane = startPane==prevCompletePane;\n" - " bool hasComplete = ((endPointer - startPointer) >= windowSize);\n" - " startPane = (streamPtr / paneSize);\n" - " prevClosePane = prevOpenPane = (streamPtr / paneSize) - " - "panesPerSlide;\n" - " prevCompletePane = streamPtr / paneSize;\n" - " if (streamStartPointer!=0) {\n" - " long tmpPtr = streamPtr;\n" - " tmpPtr = tmpPtr/windowSlide;\n" - " tmpPtr = tmpPtr * windowSlide;\n" - " if (streamPtr%windowSlide!=0) {\n" - " prevOpenPane = tmpPtr / paneSize;\n" - " prevCompletePane = (tmpPtr+windowSlide) / paneSize;\n" - " }\n" - " if (streamPtr%windowSlide==0 && hasComplete) {\n" - " prevClosePane = tmpPtr / paneSize;\n" - " } else {\n" - " while (streamPtr-tmpPtr + windowSlide <= windowSize) {\n" - " tmpPtr -= windowSlide;\n" - " }\n" - " prevClosePane = tmpPtr / paneSize;\n" - " if (prevClosePane < 0)\n" - " prevClosePane = 0;\n" - " }\n" - " }\n" - "\n" - "\n" - " // The beginning of the stream. Check if we have at least one " - "complete window.\n" - " if (streamPtr == 0) {\n" - " // check for opening windows until finding the first " - "complete\n" - " while (bufferPtr < endPointer) {\n" - " currPane = streamPtr / paneSize;\n" - " if (currPane - prevOpenPane == panesPerSlide) {\n" - " prevOpenPane += panesPerSlide;\n" - " windowStartPointers[currentSlide++] = bufferPtr;\n" - " }\n" - " if (currPane - prevCompletePane == panesPerWindow) {\n" - " windowEndPointers[currentWindow++] = bufferPtr;\n" - " numberOfCompleteWindows++;\n" - " break;\n" - " }\n" - " // filter, project + aggregate here\n" + - addTabs(computeAggrs, 3) + - " streamPtr += step;\n" - " bufferPtr += step;\n" - " }\n" - " }\n" - " // Check for closing and opening windows, until we have a " - "complete window.\n" - " else {\n" - " while (bufferPtr < endPointer) {\n" - " currPane = streamPtr / paneSize;\n" - " if (currPane - prevOpenPane == panesPerSlide) { // new " - "slide and possible opening windows\n" - " prevOpenPane += panesPerSlide;\n" - " windowStartPointers[currentSlide++] = bufferPtr;\n" - " }\n" - " if (hasComplete && currPane - prevCompletePane == " - "panesPerWindow) { // complete window\n" - " windowEndPointers[currentWindow++] = bufferPtr;\n" - " numberOfCompleteWindows++;\n" - " break;\n" - " }\n" - " if (/*prevClosePane >= panesPerWindow &&*/ prevClosePane " - "<= startPane && currPane - prevClosePane == panesPerWindow) { // " - "closing window\n" - " // write result to the closing windows\n" + - addTabs(closingWindows, 5) + - " prevClosePane += panesPerSlide;\n" - " closingWindowsPointer += mapSize;\n" - " numberOfClosingWindows++;\n" - " closingStartPointers[numberOfClosingWindows] = " - "closingWindowsPointer;\n" - //" closingWindowIds[numberOfClosingWindows-1] = - // prevClosePane - panesPerSlide;\n" - " // reset values\n" + - addTabs(resetAggrs, 4) + - " }\n" - " // filter, project + aggregate here\n" + - addTabs(computeAggrs, 3) + - " streamPtr += step;\n" - " bufferPtr += step;\n" - " }\n" - "\n" - " // check for pending windows\n" - " if ((numberOfClosingWindows == 0 || windowSize!=windowSlide) " - "&& numberOfCompleteWindows == 0) {\n" + - //" currPane = streamPtr / paneSize;\n" - //" if (/*prevClosePane >= panesPerWindow &&*/ prevClosePane - //<= startPane && currPane - prevClosePane == panesPerWindow) { // - // closing window\n" " // write result to the closing - // windows\n" + addTabs(closingWindows, 5) + " closingWindowsPointer += - // mapSize;\n" " numberOfClosingWindows++;\n" " - // closingStartPointers[numberOfClosingWindows] = - // closingWindowsPointer;\n" " // reset values\n" + - // addTabs(resetAggrs, 4) + - //" }\n" - " // write result to pending windows\n" + - addTabs(pendingWindows, 3) + - " pendingWindowsPointer += mapSize;\n" - " numberOfPendingWindows++;\n" - " pendingStartPointers[numberOfPendingWindows] = " - "pendingWindowsPointer;\n" - " // reset values\n" + - addTabs(resetAggrs, 3) + - " }\n" - " }\n" - "\n" - " if (numberOfCompleteWindows == 0 && (streamStartPointer == 0 || " - "currentSlide >= 1)) { // We only have one opening window so far...\n" - " // write results\n" + - addTabs(openingWindows, 2) + - " openingWindowsPointer += mapSize;\n" - " numberOfOpeningWindows++;\n" - " openingStartPointers[numberOfOpeningWindows] = " - "openingWindowsPointer;\n" - //" openingWindowIds[numberOfOpeningWindows-1] = prevOpenPane;\n" - "\n" - " } else if (numberOfCompleteWindows > 0) {\n" - " // write results and pack them for the first complete window " - "in the batch\n" - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 3) + - //" completeWindowsPointer++;\n" - " }\n" - " // reset values\n" + - addTabs(resetAggrs, 2) + - " // write in the correct slot, as the value has already been " - "incremented!\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - //" completeStartWindowIds[numberOfCompleteWindows-1] = - // prevCompleteWindow;\n" - " // filter, project + aggregate here\n" + - addTabs(computeAggrs, 2) + - "\n" - " bufferPtr = windowEndPointers[0];\n" - " prevOpenPane = (windowStartPointers[currentSlide-1] +diff) / " - "paneSize;\n" - " int idx = 1;\n" - " prevCompletePane = (windowStartPointers[idx++]+diff) / " - "paneSize;\n" - " int removalIndex = currentWindow; //(startingFromPane) ? " - "currentWindow : currentWindow + 1;\n" - " bool foundComplete = false;\n" - " while (bufferPtr < endPointer) {\n" - " // add elements from the next slide\n" - " bufferPtr = windowEndPointers[currentWindow - 1] + 1; // " - "take the next position, as we have already computed this value\n" - " foundComplete = false;\n" - " while (true) {\n" - " currPane = (bufferPtr+diff) / paneSize;\n" - " if (currPane - prevOpenPane == panesPerSlide) {\n" - " prevOpenPane = currPane;\n" - " windowStartPointers[currentSlide++] = bufferPtr;\n" - " }\n" - " // complete windows\n" - " if (currPane - prevCompletePane == panesPerWindow) {\n" - " //prevPane = currPane;\n" - " prevCompletePane = " - "(windowStartPointers[idx++]+diff) / paneSize;\n" - "\n" - " windowEndPointers[currentWindow++] = bufferPtr;\n" - " // write and pack the complete window result\n" - " //map.setValues();\n" - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 6) + - //" completeWindowsPointer++;\n" - " }\n" - " numberOfCompleteWindows++;\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " foundComplete = true;\n" - " // reset values\n" + - addTabs(resetAggrs, 5) + - " }\n" - " if (bufferPtr >= endPointer) {\n" - " break;\n" - " }\n" - " // filter, project + aggregate here\n" + - addTabs(computeAggrs, 4) + - " bufferPtr += step;\n" - " if (foundComplete) {\n" - " break;\n" - " }\n" - " }\n" - " removalIndex++;\n" - " }\n" - "\n" - " if (!foundComplete) { // we have reached the first open " - "window after all the complete ones\n" - " // write the first open window if we have already " - "computed the result\n" + - addTabs(openingWindows, 3) + - " openingWindowsPointer += mapSize;\n" - " numberOfOpeningWindows++;\n" - " openingStartPointers[numberOfOpeningWindows] = " - "openingWindowsPointer;\n" - //" openingWindowIds[numberOfOpeningWindows-1] = - // prevCompletePane++;\n" - " }\n" - " }\n" - "\n" + - addTabs(resultPointers, 1) + - " pointersAndCounts[4] = numberOfOpeningWindows;\n" - " pointersAndCounts[5] = numberOfClosingWindows;\n" - " pointersAndCounts[6] = numberOfPendingWindows;\n" - " pointersAndCounts[7] = numberOfCompleteWindows;\n" - "}\n"); + "pointersAndCounts[3] = completeWindowsPointer * sizeof(output_tuple_t);\n" + ); return s; } - std::string getSlidingWindowRows( - std::string outputBuffers, std::string initialiseAggrs, - std::string computeAggrs, std::string insertAggrs, std::string evictAggrs, - std::string resetAggrs, std::string setValues, std::string openingWindows, - std::string closingWindows, std::string pendingWindows, - std::string completeWindows, std::string resultPointers) { + std::string getTumblingWindowRows(std::string outputBuffers, + std::string initialiseAggrs, + std::string computeAggrs, + std::string resetAggrs, + std::string openingWindows, + std::string closingWindows, + std::string pendingWindows, + std::string completeWindows, + std::string resultPointers) { std::string s; std::string ptr; if (hasGroupBy() && m_usePtrs) { ptr.append("*"); } s.append( - "void processData (int pid, char *inputBuffer, size_t inputBufferSize, " - "long startPointer, long endPointer, long timestampFromPrevBatch,\n" - " long *windowStartPointers, long *windowEndPointers, " - "char *" + - ptr + "openingWindowsBuffer, char *" + ptr + - "closingWindowsBuffer,\n" - " char *" + - ptr + - "pendingWindowsBuffer, char *completeWindowsBuffer,\n" - " int *openingStartPointers, int " - "*closingStartPointers, int *pendingStartPointers, int " - "*completeStartPointers,\n" - //" long *openingWindowIds, long *closingWindowIds, - // long *pendingWindowIds, long *completeWindowIds,\n" - " long streamStartPointer, int *pointersAndCounts, " - "char *staticBuffer) {\n" - "\n" - " // Input Buffer\n" - " input_tuple_t *data= (input_tuple_t *) inputBuffer;\n" - "\n" - " // Output Buffers\n" + - addTabs(outputBuffers, 1) + - " output_tuple_t *completeWindowsResults = (output_tuple_t *) " - "completeWindowsBuffer; // the results here are packed\n" - "\n" - //" for (int i = 0; i < PARTIAL_WINDOWS; i++) {\n" - //" windowStartPointers[i] = -1;\n" - //" windowEndPointers[i] = -1;\n" - //" }\n" - //"\n" - " int tupleSize = sizeof(input_tuple_t);\n" - " int mapSize = MAP_SIZE;\n" - " long paneSize = PANE_SIZE;\n" - " long panesPerSlide = PANES_PER_SLIDE;\n" - " long panesPerWindow = PANES_PER_WINDOW;\n" - " long windowSlide = WINDOW_SLIDE;\n" - " long windowSize = WINDOW_SIZE;\n" - " int openingWindowsPointer = 0, closingWindowsPointer = 0;\n" - " int pendingWindowsPointer = 0, completeWindowsPointer = 0;\n" - "\n" - " // Set the first pointer for all types of windows\n" - " openingStartPointers[0] = openingWindowsPointer;\n" - " closingStartPointers[0] = closingWindowsPointer;\n" - " pendingStartPointers[0] = pendingWindowsPointer;\n" - " completeStartPointers[0] = completeWindowsPointer;\n" - "\n" - " // initialize aggregation data structures\n" + - addTabs(initialiseAggrs, 1) + - "\n" - " // Slicing based on panes logic\n" - " // Previous, next, and current pane ids\n" - " long prevClosePane, currPane, prevCompletePane, prevOpenPane, " - "startPane;\n" - " int numberOfOpeningWindows = 0;\n" - " int numberOfClosingWindows = 0;\n" - " int numberOfPendingWindows = 0;\n" - " int numberOfCompleteWindows = 0;\n" - " int currentSlide = 0;\n" - " int currentWindow = 0;\n" - " long step = 1; //tupleSize;\n" - " long streamPtr = streamStartPointer / tupleSize;\n" - " long bufferPtr = startPointer / tupleSize;\n" - " startPointer = startPointer / tupleSize;\n" - " endPointer = endPointer / tupleSize;\n" - " long diff = streamPtr - bufferPtr;\n" - " long tempStartPos, tempEndPos;\n" - "\n" - " //windowStartPointers[currentSlide++] = bufferPtr;\n" - " startPane = (streamPtr / paneSize);\n" - " prevClosePane = prevOpenPane = (streamPtr / paneSize) - " - "panesPerSlide;\n" - " prevCompletePane = streamPtr / paneSize;\n" - " if (streamStartPointer!=0) {\n" - " long tmpPtr = streamPtr;\n" - " tmpPtr = tmpPtr/windowSlide;\n" - " tmpPtr = tmpPtr * windowSlide;\n" - " if (streamPtr%windowSlide!=0) {\n" - " prevOpenPane = tmpPtr / paneSize;\n" - " prevCompletePane = (tmpPtr+windowSlide) / paneSize;\n" - " }\n" - " while (streamPtr-tmpPtr + windowSlide < windowSize) {\n" - " tmpPtr -= windowSlide;\n" - " }\n" - " prevClosePane = tmpPtr / paneSize;\n" - " if (prevClosePane < 0)\n" - " prevClosePane = 0;\n" - " }\n" - "\n" - " bool completeStartsFromPane = startPane==prevCompletePane;\n" - " bool hasComplete = ((endPointer - startPointer) >= windowSize);\n" - "\n" - " // The beginning of the stream. Check if we have at least one " - "complete window.\n" - " if (streamPtr == 0) {\n" - " // check for opening windows until finding the first " - "complete\n" - " while (bufferPtr < endPointer) {\n" - " currPane = streamPtr / paneSize;\n" - " if (currPane - prevOpenPane == panesPerSlide) {\n" - " prevOpenPane += panesPerSlide;\n" - " windowStartPointers[currentSlide++] = bufferPtr; // " - "TODO: maybe store bPtr*tupleSize\n" - " if (bufferPtr!=0) {\n" + - addTabs(insertAggrs, 5) + addTabs(resetAggrs, 5) + - //" aggrStructures[pid].insert(aggrs);\n" - //" aggrs.reset();\n" - " }\n" - " }\n" - " if (currPane - prevCompletePane == panesPerWindow) {\n" - " windowEndPointers[currentWindow++] = bufferPtr;\n" - " numberOfCompleteWindows++;\n" - " break;\n" - " }\n" - " // filter, project + aggregate here\n" + - addTabs(computeAggrs, 3) + - " streamPtr += step;\n" - " bufferPtr += step;\n" - " }\n" - " }\n" - " // Check for closing and opening windows, until we have a " - "complete window.\n" - " else {\n" - " auto prevPane = streamPtr / paneSize;\n" - " int numOfPartials = 0;\n" - " while (bufferPtr < endPointer) {\n" - " currPane = streamPtr / paneSize;\n" - " if (currPane-prevPane==1) {\n" - " prevPane = currPane;\n" - " if (numOfPartials==BUCKET_SIZE) // remove the extra " - "values so that we have the first complete window\n" + - addTabs(evictAggrs, 5) + addTabs(insertAggrs, 4) + - addTabs(resetAggrs, 4) + - //" aggrStructures[pid].evict();\n" - //" aggrStructures[pid].insert(aggrs);\n" - //" aggrs.reset();\n" - " numOfPartials++;\n" - " }\n" - " if (currPane - prevOpenPane == panesPerSlide) { // new " - "slide and possible opening windows\n" - " prevOpenPane += panesPerSlide;\n" - " windowStartPointers[currentSlide++] = bufferPtr;\n" - " }\n" - " if (hasComplete && currPane - prevCompletePane == " - "panesPerWindow) { // complete window\n" - " windowEndPointers[currentWindow++] = bufferPtr;\n" - " numberOfCompleteWindows++;\n" - " break;\n" - " }\n" - " if (prevClosePane <= startPane && currPane - " - "prevClosePane == panesPerWindow) { // closing window\n" - " // write result to the closing windows\n" + - //" aggrs = - // aggrStructures[pid].queryIntermediate(PARENTS_SIZE-2);\n" + - addTabs(closingWindows, 4) + - " prevClosePane += panesPerSlide;\n" - " closingWindowsPointer += mapSize;\n" - " numberOfClosingWindows++;\n" - " closingStartPointers[numberOfClosingWindows] = " - "closingWindowsPointer;\n" + - //" closingWindowIds[numberOfClosingWindows-1] = - // prevClosePane - panesPerSlide;\n" + - addTabs(resetAggrs, 4) + - //" aggrs.reset();\n" - " }\n" - " // filter, project + aggregate here\n" + - addTabs(computeAggrs, 3) + - " streamPtr += step;\n" - " bufferPtr += step;\n" - " }\n" - "\n" - " // check for pending windows\n" - " if (numberOfCompleteWindows == 0) {\n" + - addTabs(insertAggrs, 3) + - " currPane = streamPtr / paneSize;\n" - " if (prevClosePane <= startPane && currPane - " - "prevClosePane == panesPerWindow) { // closing window\n" - " // write result to the closing windows\n" + - addTabs(closingWindows, 5) + - " closingWindowsPointer += mapSize;\n" - " numberOfClosingWindows++;\n" - " closingStartPointers[numberOfClosingWindows] = " - "closingWindowsPointer;\n" - //" closingWindowIds[numberOfClosingWindows-1] = - // prevClosePane;\n" - " // reset values\n" + - addTabs(resetAggrs, 4) + - " }\n" - " // write result to pending windows\n" + - //" aggrs = aggrStructures[pid].queryIntermediate(-1);\n" + - addTabs(pendingWindows, 3) + - " pendingWindowsPointer += mapSize;\n" - " numberOfPendingWindows++;\n" - " pendingStartPointers[numberOfPendingWindows] = " - "pendingWindowsPointer;\n" + - addTabs(resetAggrs, 3) + - //" aggrs.reset();\n" - " }\n" - " }\n" - "\n" - " if (numberOfCompleteWindows == 0 && (streamStartPointer == 0 || " - "currentSlide >= 1)) { // We only have one opening window so far...\n" - " if (streamPtr%windowSlide!=0 && streamStartPointer!=0) {\n" + - addTabs(evictAggrs, 3) + - //" aggrStructures[pid].evict();\n" - " }\n" - " // write results\n" + - //" aggrs = - // aggrStructures[pid].queryIntermediate(numberOfOpeningWindows);\n" + - addTabs(openingWindows, 2) + - " openingWindowsPointer += mapSize;\n" - " numberOfOpeningWindows++;\n" - " openingStartPointers[numberOfOpeningWindows] = " - "openingWindowsPointer;\n" + - //" openingWindowIds[numberOfOpeningWindows-1] = - // prevOpenPane++;\n" + - addTabs(resetAggrs, 2) + - //" aggrs.reset();\n" - " } else if (numberOfCompleteWindows > 0) {\n" - " // write results and pack them for the first complete window " - "in the batch\n" + - addTabs(setValues, 2) + - //" aggrs = aggrStructures[pid].query();\n" + - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 3) + - //" completeWindowsPointer++;\n" - " }\n" + addTabs(resetAggrs, 2) + - //" aggrs.reset();\n" - " // write in the correct slot, as the value has already been " - "incremented!\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " // filter, project + aggregate here\n" + - addTabs(computeAggrs, 2) + - " bufferPtr = windowEndPointers[0];\n" - " prevOpenPane = (windowStartPointers[currentSlide-1] +diff) / " - "paneSize;\n" - " int idx = 1;\n" - " prevCompletePane = (windowStartPointers[idx++]+diff) / " - "paneSize;\n" - " int removalIndex = currentWindow; //(startingFromPane) ? " - "currentWindow : currentWindow + 1;\n" - " bool foundComplete = false;\n" - " while (bufferPtr < endPointer) {\n" - " // remove previous slide \n" + - addTabs(evictAggrs, 3) + - //" aggrStructures[pid].evict();\n" - "\n" - " // add elements from the next slide\n" - " bufferPtr = windowEndPointers[currentWindow - 1] + 1; // " - "take the next position, as we have already computed this value\n" - " foundComplete = false;\n" - " auto prevPane = (bufferPtr+diff) / paneSize;\n" - " while (true) {\n" - " currPane = (bufferPtr+diff) / paneSize;\n" - " if (currPane-prevPane==1) {\n" - " prevPane = currPane;\n" + - addTabs(insertAggrs, 4) + addTabs(resetAggrs, 4) + - //" aggrStructures[pid].insert(aggrs);\n" - //" aggrs.reset();\n" - " }\n" - " if (currPane - prevOpenPane == panesPerSlide) {\n" - " prevOpenPane = currPane;\n" - " windowStartPointers[currentSlide++] = bufferPtr;\n" - " }\n" - " // complete windows\n" - " if (currPane - prevCompletePane == panesPerWindow) {\n" - " //prevPane = currPane;\n" - " prevCompletePane = " - "(windowStartPointers[idx++]+diff) / paneSize;\n" - "\n" - " windowEndPointers[currentWindow++] = bufferPtr;\n" - " // write and pack the complete window result\n" + - addTabs(setValues, 5) + - //" aggrs = aggrStructures[pid].query();\n" - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 6) + - //" completeWindowsPointer++;\n" - " }\n" + addTabs(resetAggrs, 5) + - //" aggrs.reset();\n" - " numberOfCompleteWindows++;\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " foundComplete = true;\n" - " }\n" - " if (bufferPtr >= endPointer) {\n" - " break;\n" - " }\n" - "\n" - " // filter, project + aggregate here\n" + - addTabs(computeAggrs, 4) + - " bufferPtr += step;\n" - " if (foundComplete) {\n" - " break;\n" - " }\n" - " }\n" - " removalIndex++;\n" - " }\n" - "\n" - " if (!foundComplete) { // we have reached the first open " - "window after all the complete ones\n" - " // write the first open window if we have already " - "computed the result\n" + - // addTabs(insertAggrs, 3) + - //" aggrStructures[pid].insert(aggrs);\n" + - //" aggrs = - // aggrStructures[pid].queryIntermediate(numberOfOpeningWindows);\n" + - addTabs(openingWindows, 3) + - " openingWindowsPointer += mapSize;\n" - " numberOfOpeningWindows++;\n" - " openingStartPointers[numberOfOpeningWindows] = " - "openingWindowsPointer;\n" - //" openingWindowIds[numberOfOpeningWindows-1] = - // prevCompletePane++;\n" - " } else { // otherwise remove the respective tuples for the " - "first opening window\n" - " currentWindow--;\n" - " }\n" - " }\n" - "\n" - " // compute the rest opening windows\n" - " while (currentWindow < currentSlide - 1) {\n" - //" while (currentWindow < currentSlide - 1) {\n" - " // remove previous slide\n" - " tempStartPos = windowStartPointers[currentWindow];\n" - " tempEndPos = windowStartPointers[currentWindow + 1];\n" - " currentWindow++;\n" - " if (tempStartPos == tempEndPos || tempEndPos==endPointer) " - "continue;\n" + - addTabs(evictAggrs, 2) + - //" aggrStructures[pid].evict(); \n" - " // write result to the opening windows\n" + - //" aggrs = - // aggrStructures[pid].queryIntermediate(numberOfOpeningWindows);\n" + - addTabs(openingWindows, 2) + - " openingWindowsPointer += mapSize;\n" - " numberOfOpeningWindows++;\n" - " openingStartPointers[numberOfOpeningWindows] = " - "openingWindowsPointer;\n" - //" openingWindowIds[numberOfOpeningWindows-1] = - // prevCompletePane++;\n" - " }\n" - "\n" + - addTabs(resultPointers, 1) + - " pointersAndCounts[4] = numberOfOpeningWindows;\n" - " pointersAndCounts[5] = numberOfClosingWindows;\n" - " pointersAndCounts[6] = numberOfPendingWindows;\n" - " pointersAndCounts[7] = numberOfCompleteWindows;\n" - "}\n"); + "void processData (int pid, char *inputBuffer, size_t inputBufferSize, long startPointer, long endPointer, long timestampFromPrevBatch,\n" + " long *windowStartPointers, long *windowEndPointers, char *" + ptr + + "openingWindowsBuffer, char *" + ptr + "closingWindowsBuffer,\n" + " char *" + ptr + + "pendingWindowsBuffer, char *completeWindowsBuffer,\n" + " int *openingStartPointers, int *closingStartPointers, int *pendingStartPointers, int *completeStartPointers,\n" + //" long *openingWindowIds, long *closingWindowIds, long *pendingWindowIds, long *completeWindowIds,\n" + " long streamStartPointer, int *pointersAndCounts, char *staticBuffer) {" + " // Input Buffer\n" + " input_tuple_t *data= (input_tuple_t *) inputBuffer;\n" + "\n" + " // Output Buffers\n" + + addTabs(outputBuffers, 1) + + " output_tuple_t *completeWindowsResults = (output_tuple_t *) completeWindowsBuffer; // the results here are packed\n" + "\n" + //" for (int i = 0; i < PARTIAL_WINDOWS; i++) {\n" + //" windowStartPointers[i] = -1;\n" + //" windowEndPointers[i] = -1;\n" + //" }\n" + //"\n" + " int tupleSize = sizeof(input_tuple_t);\n" + " int mapSize = MAP_SIZE;\n" + " long paneSize = PANE_SIZE;\n" + " long panesPerSlide = PANES_PER_SLIDE;\n" + " long panesPerWindow = PANES_PER_WINDOW;\n" + " long windowSlide = WINDOW_SLIDE;\n" + " long windowSize = WINDOW_SIZE;\n" + " int openingWindowsPointer = 0, closingWindowsPointer = 0;\n" + " int pendingWindowsPointer = 0, completeWindowsPointer = 0;\n" + "\n" + " // Set the first pointer for all types of windows\n" + " openingStartPointers[0] = openingWindowsPointer;\n" + " closingStartPointers[0] = closingWindowsPointer;\n" + " pendingStartPointers[0] = pendingWindowsPointer;\n" + " completeStartPointers[0] = completeWindowsPointer;\n" + "\n" + " // initialize aggregation data structures\n" + + addTabs(initialiseAggrs, 1) + + "\n" + " // Slicing based on panes logic\n" + " // Previous, next, and current pane ids\n" + " long prevClosePane, currPane, prevCompletePane, prevOpenPane, startPane;\n" + " int numberOfOpeningWindows = 0;\n" + " int numberOfClosingWindows = 0;\n" + " int numberOfPendingWindows = 0;\n" + " int numberOfCompleteWindows = 0;\n" + " int currentSlide = 0;\n" + " int currentWindow = 0;\n" + " long step = 1; //tupleSize;\n" + " long streamPtr = streamStartPointer / tupleSize;\n" + " long bufferPtr = startPointer / tupleSize;\n" + " startPointer = startPointer / tupleSize;\n" + " endPointer = endPointer / tupleSize;\n" + " long diff = streamPtr - bufferPtr;\n" + " long tempStartPos, tempEndPos;\n" + "\n" + " //windowStartPointers[currentSlide++] = bufferPtr;\n" + " bool completeStartsFromPane = startPane==prevCompletePane;\n" + " bool hasComplete = ((endPointer - startPointer) >= windowSize);\n" + " startPane = (streamPtr / paneSize);\n" + " prevClosePane = prevOpenPane = (streamPtr / paneSize) - panesPerSlide;\n" + " prevCompletePane = streamPtr / paneSize;\n" + " if (streamStartPointer!=0) {\n" + " long tmpPtr = streamPtr;\n" + " tmpPtr = tmpPtr/windowSlide;\n" + " tmpPtr = tmpPtr * windowSlide;\n" + " if (streamPtr%windowSlide!=0) {\n" + " prevOpenPane = tmpPtr / paneSize;\n" + " prevCompletePane = (tmpPtr+windowSlide) / paneSize;\n" + " }\n" + " if (streamPtr%windowSlide==0 && hasComplete) {\n" + " prevClosePane = tmpPtr / paneSize;\n" + " } else {\n" + " while (streamPtr-tmpPtr + windowSlide <= windowSize) {\n" + " tmpPtr -= windowSlide;\n" + " }\n" + " prevClosePane = tmpPtr / paneSize;\n" + " if (prevClosePane < 0)\n" + " prevClosePane = 0;\n" + " }\n" + " }\n" + "\n" + "\n" + " // The beginning of the stream. Check if we have at least one complete window.\n" + " if (streamPtr == 0) {\n" + " // check for opening windows until finding the first complete\n" + " while (bufferPtr < endPointer) {\n" + " currPane = streamPtr / paneSize;\n" + " if (currPane - prevOpenPane == panesPerSlide) {\n" + " prevOpenPane += panesPerSlide;\n" + " windowStartPointers[currentSlide++] = bufferPtr;\n" + " }\n" + " if (currPane - prevCompletePane == panesPerWindow) {\n" + " windowEndPointers[currentWindow++] = bufferPtr;\n" + " numberOfCompleteWindows++;\n" + " break;\n" + " }\n" + " // filter, project + aggregate here\n" + + addTabs(computeAggrs, 3) + + " streamPtr += step;\n" + " bufferPtr += step;\n" + " }\n" + " }\n" + " // Check for closing and opening windows, until we have a complete window.\n" + " else {\n" + " while (bufferPtr < endPointer) {\n" + " currPane = streamPtr / paneSize;\n" + " if (currPane - prevOpenPane == panesPerSlide) { // new slide and possible opening windows\n" + " prevOpenPane += panesPerSlide;\n" + " windowStartPointers[currentSlide++] = bufferPtr;\n" + " }\n" + " if (hasComplete && currPane - prevCompletePane == panesPerWindow) { // complete window\n" + " windowEndPointers[currentWindow++] = bufferPtr;\n" + " numberOfCompleteWindows++;\n" + " break;\n" + " }\n" + " if (/*prevClosePane >= panesPerWindow &&*/ prevClosePane <= startPane && currPane - prevClosePane == panesPerWindow) { // closing window\n" + " // write result to the closing windows\n" + + addTabs(closingWindows, 5) + + " prevClosePane += panesPerSlide;\n" + " closingWindowsPointer += mapSize;\n" + " numberOfClosingWindows++;\n" + " closingStartPointers[numberOfClosingWindows] = closingWindowsPointer;\n" + //" closingWindowIds[numberOfClosingWindows-1] = prevClosePane - panesPerSlide;\n" + " // reset values\n" + + addTabs(resetAggrs, 4) + + " }\n" + " // filter, project + aggregate here\n" + + addTabs(computeAggrs, 3) + + " streamPtr += step;\n" + " bufferPtr += step;\n" + " }\n" + "\n" + " // check for pending windows\n" + " if ((numberOfClosingWindows == 0 || windowSize!=windowSlide) && numberOfCompleteWindows == 0) {\n" + + + //" currPane = streamPtr / paneSize;\n" + //" if (/*prevClosePane >= panesPerWindow &&*/ prevClosePane <= startPane && currPane - prevClosePane == panesPerWindow) { // closing window\n" + //" // write result to the closing windows\n" + + //addTabs(closingWindows, 5) + + //" closingWindowsPointer += mapSize;\n" + //" numberOfClosingWindows++;\n" + //" closingStartPointers[numberOfClosingWindows] = closingWindowsPointer;\n" + //" // reset values\n" + + //addTabs(resetAggrs, 4) + + //" }\n" + " // write result to pending windows\n" + + addTabs(pendingWindows, 3) + + " pendingWindowsPointer += mapSize;\n" + " numberOfPendingWindows++;\n" + " pendingStartPointers[numberOfPendingWindows] = pendingWindowsPointer;\n" + " // reset values\n" + + addTabs(resetAggrs, 3) + + " }\n" + " }\n" + "\n" + " if (numberOfCompleteWindows == 0 && (streamStartPointer == 0 || currentSlide >= 1)) { // We only have one opening window so far...\n" + " // write results\n" + + addTabs(openingWindows, 2) + + " openingWindowsPointer += mapSize;\n" + " numberOfOpeningWindows++;\n" + " openingStartPointers[numberOfOpeningWindows] = openingWindowsPointer;\n" + //" openingWindowIds[numberOfOpeningWindows-1] = prevOpenPane;\n" + "\n" + " } else if (numberOfCompleteWindows > 0) {\n" + " // write results and pack them for the first complete window in the batch\n" + + addPostWindowOperation(2) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 3) + + //" completeWindowsPointer++;\n" + " }\n" + " // reset values\n" + + addTabs(resetAggrs, 2) + + " // write in the correct slot, as the value has already been incremented!\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + //" completeStartWindowIds[numberOfCompleteWindows-1] = prevCompleteWindow;\n" + " // filter, project + aggregate here\n" + + addTabs(computeAggrs, 2) + + "\n" + " bufferPtr = windowEndPointers[0];\n" + " prevOpenPane = (windowStartPointers[currentSlide-1] +diff) / paneSize;\n" + " int idx = 1;\n" + " prevCompletePane = (windowStartPointers[idx++]+diff) / paneSize;\n" + " int removalIndex = currentWindow; //(startingFromPane) ? currentWindow : currentWindow + 1;\n" + " bool foundComplete = false;\n" + " while (bufferPtr < endPointer) {\n" + " // add elements from the next slide\n" + " bufferPtr = windowEndPointers[currentWindow - 1] + 1; // take the next position, as we have already computed this value\n" + " foundComplete = false;\n" + " while (true) {\n" + " currPane = (bufferPtr+diff) / paneSize;\n" + " if (currPane - prevOpenPane == panesPerSlide) {\n" + " prevOpenPane = currPane;\n" + " windowStartPointers[currentSlide++] = bufferPtr;\n" + " }\n" + " // complete windows\n" + " if (currPane - prevCompletePane == panesPerWindow) {\n" + " //prevPane = currPane;\n" + " prevCompletePane = (windowStartPointers[idx++]+diff) / paneSize;\n" + "\n" + " windowEndPointers[currentWindow++] = bufferPtr;\n" + " // write and pack the complete window result\n" + " //map.setValues();\n" + + addPostWindowOperation(5) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 6) + + //" completeWindowsPointer++;\n" + " }\n" + " numberOfCompleteWindows++;\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " foundComplete = true;\n" + " // reset values\n" + + addTabs(resetAggrs, 5) + + " }\n" + " if (bufferPtr >= endPointer) {\n" + " break;\n" + " }\n" + " // filter, project + aggregate here\n" + + addTabs(computeAggrs, 4) + + " bufferPtr += step;\n" + " if (foundComplete) {\n" + " break;\n" + " }\n" + " }\n" + " removalIndex++;\n" + " }\n" + "\n" + " if (!foundComplete) { // we have reached the first open window after all the complete ones\n" + " // write the first open window if we have already computed the result\n" + + addTabs(openingWindows, 3) + + " openingWindowsPointer += mapSize;\n" + " numberOfOpeningWindows++;\n" + " openingStartPointers[numberOfOpeningWindows] = openingWindowsPointer;\n" + //" openingWindowIds[numberOfOpeningWindows-1] = prevCompletePane++;\n" + " }\n" + " }\n" + "\n" + + addTabs(resultPointers, 1) + + " pointersAndCounts[4] = numberOfOpeningWindows;\n" + " pointersAndCounts[5] = numberOfClosingWindows;\n" + " pointersAndCounts[6] = numberOfPendingWindows;\n" + " pointersAndCounts[7] = numberOfCompleteWindows;\n" + "}\n"); + return s; + } + + std::string getSlidingWindowRows(std::string outputBuffers, + std::string initialiseAggrs, + std::string computeAggrs, + std::string insertAggrs, + std::string evictAggrs, + std::string resetAggrs, + std::string setValues, + std::string openingWindows, + std::string closingWindows, + std::string pendingWindows, + std::string completeWindows, + std::string resultPointers) { + std::string s; + std::string ptr; + if (hasGroupBy() && m_usePtrs) { + ptr.append("*"); + } + s.append( + "void processData (int pid, char *inputBuffer, size_t inputBufferSize, long startPointer, long endPointer, long timestampFromPrevBatch,\n" + " long *windowStartPointers, long *windowEndPointers, char *" + ptr + + "openingWindowsBuffer, char *" + ptr + "closingWindowsBuffer,\n" + " char *" + ptr + + "pendingWindowsBuffer, char *completeWindowsBuffer,\n" + " int *openingStartPointers, int *closingStartPointers, int *pendingStartPointers, int *completeStartPointers,\n" + //" long *openingWindowIds, long *closingWindowIds, long *pendingWindowIds, long *completeWindowIds,\n" + " long streamStartPointer, int *pointersAndCounts, char *staticBuffer) {\n" + "\n" + " // Input Buffer\n" + " input_tuple_t *data= (input_tuple_t *) inputBuffer;\n" + "\n" + " // Output Buffers\n" + + addTabs(outputBuffers, 1) + + " output_tuple_t *completeWindowsResults = (output_tuple_t *) completeWindowsBuffer; // the results here are packed\n" + "\n" + //" for (int i = 0; i < PARTIAL_WINDOWS; i++) {\n" + //" windowStartPointers[i] = -1;\n" + //" windowEndPointers[i] = -1;\n" + //" }\n" + //"\n" + " int tupleSize = sizeof(input_tuple_t);\n" + " int mapSize = MAP_SIZE;\n" + " long paneSize = PANE_SIZE;\n" + " long panesPerSlide = PANES_PER_SLIDE;\n" + " long panesPerWindow = PANES_PER_WINDOW;\n" + " long windowSlide = WINDOW_SLIDE;\n" + " long windowSize = WINDOW_SIZE;\n" + " int openingWindowsPointer = 0, closingWindowsPointer = 0;\n" + " int pendingWindowsPointer = 0, completeWindowsPointer = 0;\n" + "\n" + " // Set the first pointer for all types of windows\n" + " openingStartPointers[0] = openingWindowsPointer;\n" + " closingStartPointers[0] = closingWindowsPointer;\n" + " pendingStartPointers[0] = pendingWindowsPointer;\n" + " completeStartPointers[0] = completeWindowsPointer;\n" + "\n" + " // initialize aggregation data structures\n" + + addTabs(initialiseAggrs, 1) + + "\n" + " // Slicing based on panes logic\n" + " // Previous, next, and current pane ids\n" + " long prevClosePane, currPane, prevCompletePane, prevOpenPane, startPane;\n" + " int numberOfOpeningWindows = 0;\n" + " int numberOfClosingWindows = 0;\n" + " int numberOfPendingWindows = 0;\n" + " int numberOfCompleteWindows = 0;\n" + " int currentSlide = 0;\n" + " int currentWindow = 0;\n" + " long step = 1; //tupleSize;\n" + " long streamPtr = streamStartPointer / tupleSize;\n" + " long bufferPtr = startPointer / tupleSize;\n" + " startPointer = startPointer / tupleSize;\n" + " endPointer = endPointer / tupleSize;\n" + " long diff = streamPtr - bufferPtr;\n" + " long tempStartPos, tempEndPos;\n" + "\n" + " //windowStartPointers[currentSlide++] = bufferPtr;\n" + " startPane = (streamPtr / paneSize);\n" + " prevClosePane = prevOpenPane = (streamPtr / paneSize) - panesPerSlide;\n" + " prevCompletePane = streamPtr / paneSize;\n" + " if (streamStartPointer!=0) {\n" + " long tmpPtr = streamPtr;\n" + " tmpPtr = tmpPtr/windowSlide;\n" + " tmpPtr = tmpPtr * windowSlide;\n" + " if (streamPtr%windowSlide!=0) {\n" + " prevOpenPane = tmpPtr / paneSize;\n" + " prevCompletePane = (tmpPtr+windowSlide) / paneSize;\n" + " }\n" + " while (streamPtr-tmpPtr + windowSlide < windowSize) {\n" + " tmpPtr -= windowSlide;\n" + " }\n" + " prevClosePane = tmpPtr / paneSize;\n" + " if (prevClosePane < 0)\n" + " prevClosePane = 0;\n" + " }\n" + "\n" + " bool completeStartsFromPane = startPane==prevCompletePane;\n" + " bool hasComplete = ((endPointer - startPointer) >= windowSize);\n" + "\n" + " // The beginning of the stream. Check if we have at least one complete window.\n" + " if (streamPtr == 0) {\n" + " // check for opening windows until finding the first complete\n" + " while (bufferPtr < endPointer) {\n" + " currPane = streamPtr / paneSize;\n" + " if (currPane - prevOpenPane == panesPerSlide) {\n" + " prevOpenPane += panesPerSlide;\n" + " windowStartPointers[currentSlide++] = bufferPtr; // TODO: maybe store bPtr*tupleSize\n" + " if (bufferPtr!=0) {\n" + + addTabs(insertAggrs, 5) + + addTabs(resetAggrs, 5) + + //" aggrStructures[pid].insert(aggrs);\n" + //" aggrs.reset();\n" + " }\n" + " }\n" + " if (currPane - prevCompletePane == panesPerWindow) {\n" + " windowEndPointers[currentWindow++] = bufferPtr;\n" + " numberOfCompleteWindows++;\n" + " break;\n" + " }\n" + " // filter, project + aggregate here\n" + + addTabs(computeAggrs, 3) + + " streamPtr += step;\n" + " bufferPtr += step;\n" + " }\n" + " }\n" + " // Check for closing and opening windows, until we have a complete window.\n" + " else {\n" + " auto prevPane = streamPtr / paneSize;\n" + " int numOfPartials = 0;\n" + " while (bufferPtr < endPointer) {\n" + " currPane = streamPtr / paneSize;\n" + " if (currPane-prevPane==1) {\n" + " prevPane = currPane;\n" + " if (numOfPartials==BUCKET_SIZE) // remove the extra values so that we have the first complete window\n" + + + addTabs(evictAggrs, 5) + + addTabs(insertAggrs, 4) + + addTabs(resetAggrs, 4) + + //" aggrStructures[pid].evict();\n" + //" aggrStructures[pid].insert(aggrs);\n" + //" aggrs.reset();\n" + " numOfPartials++;\n" + " }\n" + " if (currPane - prevOpenPane == panesPerSlide) { // new slide and possible opening windows\n" + " prevOpenPane += panesPerSlide;\n" + " windowStartPointers[currentSlide++] = bufferPtr;\n" + " }\n" + " if (hasComplete && currPane - prevCompletePane == panesPerWindow) { // complete window\n" + " windowEndPointers[currentWindow++] = bufferPtr;\n" + " numberOfCompleteWindows++;\n" + " break;\n" + " }\n" + " if (prevClosePane <= startPane && currPane - prevClosePane == panesPerWindow) { // closing window\n" + " // write result to the closing windows\n" + + //" aggrs = aggrStructures[pid].queryIntermediate(PARENTS_SIZE-2);\n" + + addTabs(closingWindows, 4) + + " prevClosePane += panesPerSlide;\n" + " closingWindowsPointer += mapSize;\n" + " numberOfClosingWindows++;\n" + " closingStartPointers[numberOfClosingWindows] = closingWindowsPointer;\n" + + //" closingWindowIds[numberOfClosingWindows-1] = prevClosePane - panesPerSlide;\n" + + addTabs(resetAggrs, 4) + + //" aggrs.reset();\n" + " }\n" + " // filter, project + aggregate here\n" + + addTabs(computeAggrs, 3) + + " streamPtr += step;\n" + " bufferPtr += step;\n" + " }\n" + "\n" + " // check for pending windows\n" + " if (numberOfCompleteWindows == 0) {\n" + + addTabs(insertAggrs, 3) + + " currPane = streamPtr / paneSize;\n" + " if (prevClosePane <= startPane && currPane - prevClosePane == panesPerWindow) { // closing window\n" + " // write result to the closing windows\n" + + addTabs(closingWindows, 5) + + " closingWindowsPointer += mapSize;\n" + " numberOfClosingWindows++;\n" + " closingStartPointers[numberOfClosingWindows] = closingWindowsPointer;\n" + //" closingWindowIds[numberOfClosingWindows-1] = prevClosePane;\n" + " // reset values\n" + + addTabs(resetAggrs, 4) + + " }\n" + " // write result to pending windows\n" + + //" aggrs = aggrStructures[pid].queryIntermediate(-1);\n" + + addTabs(pendingWindows, 3) + + " pendingWindowsPointer += mapSize;\n" + " numberOfPendingWindows++;\n" + " pendingStartPointers[numberOfPendingWindows] = pendingWindowsPointer;\n" + + addTabs(resetAggrs, 3) + + //" aggrs.reset();\n" + " }\n" + " }\n" + "\n" + " if (numberOfCompleteWindows == 0 && (streamStartPointer == 0 || currentSlide >= 1)) { // We only have one opening window so far...\n" + " if (streamPtr%windowSlide!=0 && streamStartPointer!=0) {\n" + + addTabs(evictAggrs, 3) + + //" aggrStructures[pid].evict();\n" + " }\n" + " // write results\n" + + //" aggrs = aggrStructures[pid].queryIntermediate(numberOfOpeningWindows);\n" + + addTabs(openingWindows, 2) + + " openingWindowsPointer += mapSize;\n" + " numberOfOpeningWindows++;\n" + " openingStartPointers[numberOfOpeningWindows] = openingWindowsPointer;\n" + + //" openingWindowIds[numberOfOpeningWindows-1] = prevOpenPane++;\n" + + addTabs(resetAggrs, 2) + + //" aggrs.reset();\n" + " } else if (numberOfCompleteWindows > 0) {\n" + " // write results and pack them for the first complete window in the batch\n" + + addTabs(setValues, 2) + //" aggrs = aggrStructures[pid].query();\n" + + + addPostWindowOperation(2) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 3) + + //" completeWindowsPointer++;\n" + " }\n" + + addTabs(resetAggrs, 2) + + //" aggrs.reset();\n" + " // write in the correct slot, as the value has already been incremented!\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " // filter, project + aggregate here\n" + + addTabs(computeAggrs, 2) + + " bufferPtr = windowEndPointers[0];\n" + " prevOpenPane = (windowStartPointers[currentSlide-1] +diff) / paneSize;\n" + " int idx = 1;\n" + " prevCompletePane = (windowStartPointers[idx++]+diff) / paneSize;\n" + " int removalIndex = currentWindow; //(startingFromPane) ? currentWindow : currentWindow + 1;\n" + " bool foundComplete = false;\n" + " while (bufferPtr < endPointer) {\n" + " // remove previous slide \n" + + addTabs(evictAggrs, 3) + + //" aggrStructures[pid].evict();\n" + "\n" + " // add elements from the next slide\n" + " bufferPtr = windowEndPointers[currentWindow - 1] + 1; // take the next position, as we have already computed this value\n" + " foundComplete = false;\n" + " auto prevPane = (bufferPtr+diff) / paneSize;\n" + " while (true) {\n" + " currPane = (bufferPtr+diff) / paneSize;\n" + " if (currPane-prevPane==1) {\n" + " prevPane = currPane;\n" + + addTabs(insertAggrs, 4) + + addTabs(resetAggrs, 4) + + //" aggrStructures[pid].insert(aggrs);\n" + //" aggrs.reset();\n" + " }\n" + " if (currPane - prevOpenPane == panesPerSlide) {\n" + " prevOpenPane = currPane;\n" + " windowStartPointers[currentSlide++] = bufferPtr;\n" + " }\n" + " // complete windows\n" + " if (currPane - prevCompletePane == panesPerWindow) {\n" + " //prevPane = currPane;\n" + " prevCompletePane = (windowStartPointers[idx++]+diff) / paneSize;\n" + "\n" + " windowEndPointers[currentWindow++] = bufferPtr;\n" + " // write and pack the complete window result\n" + + addTabs(setValues, 5) + //" aggrs = aggrStructures[pid].query();\n" + + addPostWindowOperation(5) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 6) + + //" completeWindowsPointer++;\n" + " }\n" + + addTabs(resetAggrs, 5) + + //" aggrs.reset();\n" + " numberOfCompleteWindows++;\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " foundComplete = true;\n" + " }\n" + " if (bufferPtr >= endPointer) {\n" + " break;\n" + " }\n" + "\n" + " // filter, project + aggregate here\n" + + addTabs(computeAggrs, 4) + + " bufferPtr += step;\n" + " if (foundComplete) {\n" + " break;\n" + " }\n" + " }\n" + " removalIndex++;\n" + " }\n" + "\n" + " if (!foundComplete) { // we have reached the first open window after all the complete ones\n" + " // write the first open window if we have already computed the result\n" + + //addTabs(insertAggrs, 3) + + //" aggrStructures[pid].insert(aggrs);\n" + + //" aggrs = aggrStructures[pid].queryIntermediate(numberOfOpeningWindows);\n" + + addTabs(openingWindows, 3) + + " openingWindowsPointer += mapSize;\n" + " numberOfOpeningWindows++;\n" + " openingStartPointers[numberOfOpeningWindows] = openingWindowsPointer;\n" + //" openingWindowIds[numberOfOpeningWindows-1] = prevCompletePane++;\n" + " } else { // otherwise remove the respective tuples for the first opening window\n" + " currentWindow--;\n" + " }\n" + " }\n" + "\n" + " // compute the rest opening windows\n" + " while (currentWindow < currentSlide - 1) {\n" + //" while (currentWindow < currentSlide - 1) {\n" + " // remove previous slide\n" + " tempStartPos = windowStartPointers[currentWindow];\n" + " tempEndPos = windowStartPointers[currentWindow + 1];\n" + " currentWindow++;\n" + " if (tempStartPos == tempEndPos || tempEndPos==endPointer) continue;\n" + + addTabs(evictAggrs, 2) + + //" aggrStructures[pid].evict(); \n" + " // write result to the opening windows\n" + + //" aggrs = aggrStructures[pid].queryIntermediate(numberOfOpeningWindows);\n" + + addTabs(openingWindows, 2) + + " openingWindowsPointer += mapSize;\n" + " numberOfOpeningWindows++;\n" + " openingStartPointers[numberOfOpeningWindows] = openingWindowsPointer;\n" + //" openingWindowIds[numberOfOpeningWindows-1] = prevCompletePane++;\n" + " }\n" + "\n" + + addTabs(resultPointers, 1) + + " pointersAndCounts[4] = numberOfOpeningWindows;\n" + " pointersAndCounts[5] = numberOfClosingWindows;\n" + " pointersAndCounts[6] = numberOfPendingWindows;\n" + " pointersAndCounts[7] = numberOfCompleteWindows;\n" + "}\n" + ); return s; } @@ -3952,298 +3842,267 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { } else { closingDef = "interm_tuple_t"; std::string str = "data[bufferPtr-1].timestamp"; - closingWindows.replace(closingWindows.find(str), str.length(), - "prevPane * paneSize;"); - completeWindows.replace(completeWindows.find(str), str.length(), - "prevPane * paneSize;"); + closingWindows.replace(closingWindows.find(str), str.length(), "prevPane * paneSize;"); + completeWindows.replace(completeWindows.find(str), str.length(), "prevPane * paneSize;"); singleKey = "node aggrs;\n"; } s.append( - "void fillEmptyWindows (int pid, int phase, int numOfSlices, int " - "numOfOpening, int numOfClosing,\n" - " long *windowStartPointers, long " - "*windowEndPointers,\n" - " int &numberOfOpeningWindows, int " - "&numberOfClosingWindows, int &numberOfCompleteWindows,\n" + "void fillEmptyWindows (int pid, int phase, int numOfSlices, int numOfOpening, int numOfClosing,\n" + " long *windowStartPointers, long *windowEndPointers,\n" + " int &numberOfOpeningWindows, int &numberOfClosingWindows, int &numberOfCompleteWindows,\n" " int ¤tSlide, int ¤tWindow,\n" - " int &completeWindowsPointer, output_tuple_t " - "*completeWindowsResults, int *completeStartPointers,\n" - " int &closingWindowsPointer, " + - closingDef + " *" + ptr + - "closingWindowsResults, int *closingStartPointers,\n" - //" long *openingWindowIds, long - //*closingWindowIds, long *pendingWindowIds, long *completeWindowIds,\n" - " long &prevCompletePane, long " - "&prevClosePane, long &prevOpenPane, long &currPane) {\n" - "\n" - " auto mapSize = MAP_SIZE;\n" - " auto paneSize = PANE_SIZE;\n" - " auto panesPerWindow = PANES_PER_WINDOW;\n" - " auto panesPerSlide = PANES_PER_SLIDE;\n" + - addTabs(singleKey, 1) + - " if (phase == 1) {\n" - " // case 1 -- opening at the beginning\n" - " auto prevPane = prevOpenPane + 1;\n" - " for (int ptr = 0; ptr < numOfSlices; ++ptr) {\n" - " // fillBuckets with empty;\n" - " //aggrStructs[pid].insertSlices();\n" - " if (prevPane - prevOpenPane == panesPerSlide) {\n" - " prevOpenPane += panesPerSlide;\n" - " windowStartPointers[currentSlide++] = -1;\n" - " }\n" - " prevPane++;\n" - " }\n" - " // add more complete\n" - " } else if (phase == 2) {\n" - " // case 2 - opening and complete at the beginning of " - "tumbling\n" - " auto prevPane = prevOpenPane + 1;\n" - " for (int ptr = 0; ptr < numOfSlices; ++ptr) {\n" - " //aggrStructs[pid].evictSlices();\n" - " // add elements from the next slide\n" - " //if (currPane-prevPane==1) {\n" - " // prevPane = currPane;\n" - " //aggrStructs[pid].insertSlices();\n" - " //}\n" - " if (prevPane - prevOpenPane == panesPerSlide) {\n" - " prevOpenPane += panesPerSlide;\n" - " windowStartPointers[currentSlide++] = -1;\n" - " }\n" - " // complete windows\n" - " if (prevPane - prevCompletePane == panesPerWindow) {\n" - " prevCompletePane += panesPerSlide; " - "//data[(windowStartPointers[idx++])].timestamp / paneSize;\n" - " windowEndPointers[currentWindow++] = -1;\n" - " // write and pack the complete window result\n" - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 5) + - " }\n" - " // reset values\n" + - addTabs(resetAggrs, 4) + - " numberOfCompleteWindows++;\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " }\n" - " prevPane++;\n" - " }\n" - " } else if (phase == 3) {\n" - " // case 3 - closing/opening and complete for tumbling\n" - " int ptr = 0;\n" - " auto prevPane = prevOpenPane + 1;\n" - " for (ptr = 0; ptr < numOfClosing; ++ptr) {\n" - " if (prevPane - prevClosePane == panesPerWindow) { // " - "closing window\n" - " // write result to the closing windows\n" + - addTabs(closingWindows, 4) + - " prevClosePane += panesPerSlide;\n" - " closingWindowsPointer += mapSize;\n" - " numberOfClosingWindows++;\n" - " closingStartPointers[numberOfClosingWindows] = " - "closingWindowsPointer;\n" - //" closingWindowIds[numberOfClosingWindows-1] = - // prevClosePane - panesPerSlide;\n" - " // reset values\n" + - addTabs(resetAggrs, 4) + - " }\n" - " prevPane++;\n" - " }\n" - "\n" - " while (ptr < numOfSlices) {\n" - " // fillBuckets with empty;\n" - " if (prevPane - prevOpenPane == panesPerSlide) { // new " - "slide and possible opening windows\n" - " prevOpenPane += panesPerSlide;\n" - " windowStartPointers[currentSlide++] = -1;\n" - " }\n" - " if (prevPane - prevCompletePane == panesPerWindow) { // " - "complete window\n" - " windowEndPointers[currentWindow++] = -1;\n" - " numberOfCompleteWindows++;\n" - " break;\n" - " }\n" - " if (prevPane - prevClosePane == panesPerWindow) { // " - "closing window\n" - " // write result to the closing windows\n" + - addTabs(closingWindows, 4) + - " prevClosePane += panesPerSlide;\n" - " closingWindowsPointer += mapSize;\n" - " numberOfClosingWindows++;\n" - " closingStartPointers[numberOfClosingWindows] = " - "closingWindowsPointer;\n" - //" closingWindowIds[numberOfClosingWindows-1] = - // prevClosePane - panesPerSlide;\n" - " // reset values\n" + - addTabs(resetAggrs, 4) + - " }\n" - " ptr++;\n" - " prevPane++;\n" - " }\n" - "\n" - "\n" - " for (; ptr < numOfSlices; ++ptr) {\n" + - " // add elements from the next slide\n" - " //if (currPane-prevPane==1) {\n" - " // prevPane = currPane;\n" - " // aggrStructs[pid].insertSlices();\n" - " //}\n" - " if (prevPane - prevOpenPane == panesPerSlide) {\n" - " prevOpenPane = currPane;\n" - " windowStartPointers[currentSlide++] = -1;\n" - " }\n" - " // complete windows\n" - " if (prevPane - prevCompletePane == panesPerWindow) {\n" - " prevCompletePane += panesPerWindow; " - "//data[(windowStartPointers[idx++])].timestamp / paneSize;\n" - " windowEndPointers[currentWindow++] = -1;\n" - " // write and pack the complete window result\n" + - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 5) + - " }\n" - " // reset values\n" + - addTabs(resetAggrs, 4) + - " numberOfCompleteWindows++;\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " }\n" - " prevPane++;\n" - " }\n" - " }\n" - "}\n" - "\n"); + " int &completeWindowsPointer, output_tuple_t *completeWindowsResults, int *completeStartPointers,\n" + " int &closingWindowsPointer, " + closingDef + " *" + ptr + + "closingWindowsResults, int *closingStartPointers,\n" + //" long *openingWindowIds, long *closingWindowIds, long *pendingWindowIds, long *completeWindowIds,\n" + " long &prevCompletePane, long &prevClosePane, long &prevOpenPane, long &currPane) {\n" + "\n" + " auto mapSize = MAP_SIZE;\n" + " auto paneSize = PANE_SIZE;\n" + " auto panesPerWindow = PANES_PER_WINDOW;\n" + " auto panesPerSlide = PANES_PER_SLIDE;\n" + + addTabs(singleKey, 1) + + " if (phase == 1) {\n" + " // case 1 -- opening at the beginning\n" + " auto prevPane = prevOpenPane + 1;\n" + " for (int ptr = 0; ptr < numOfSlices; ++ptr) {\n" + " // fillBuckets with empty;\n" + " //aggrStructs[pid].insertSlices();\n" + " if (prevPane - prevOpenPane == panesPerSlide) {\n" + " prevOpenPane += panesPerSlide;\n" + " windowStartPointers[currentSlide++] = -1;\n" + " }\n" + " prevPane++;\n" + " }\n" + " // add more complete\n" + " } else if (phase == 2) {\n" + " // case 2 - opening and complete at the beginning of tumbling\n" + " auto prevPane = prevOpenPane + 1;\n" + " for (int ptr = 0; ptr < numOfSlices; ++ptr) {\n" + " //aggrStructs[pid].evictSlices();\n" + " // add elements from the next slide\n" + " //if (currPane-prevPane==1) {\n" + " // prevPane = currPane;\n" + " //aggrStructs[pid].insertSlices();\n" + " //}\n" + " if (prevPane - prevOpenPane == panesPerSlide) {\n" + " prevOpenPane += panesPerSlide;\n" + " windowStartPointers[currentSlide++] = -1;\n" + " }\n" + " // complete windows\n" + " if (prevPane - prevCompletePane == panesPerWindow) {\n" + " prevCompletePane += panesPerSlide; //data[(windowStartPointers[idx++])].timestamp / paneSize;\n" + " windowEndPointers[currentWindow++] = -1;\n" + " // write and pack the complete window result\n" + + addPostWindowOperation(4) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 5) + + " }\n" + " // reset values\n" + + addTabs(resetAggrs, 4) + + " numberOfCompleteWindows++;\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " }\n" + " prevPane++;\n" + " }\n" + " } else if (phase == 3) {\n" + " // case 3 - closing/opening and complete for tumbling\n" + " int ptr = 0;\n" + " auto prevPane = prevOpenPane + 1;\n" + " for (ptr = 0; ptr < numOfClosing; ++ptr) {\n" + " if (prevPane - prevClosePane == panesPerWindow) { // closing window\n" + " // write result to the closing windows\n" + + addTabs(closingWindows, 4) + + " prevClosePane += panesPerSlide;\n" + " closingWindowsPointer += mapSize;\n" + " numberOfClosingWindows++;\n" + " closingStartPointers[numberOfClosingWindows] = closingWindowsPointer;\n" + //" closingWindowIds[numberOfClosingWindows-1] = prevClosePane - panesPerSlide;\n" + " // reset values\n" + + addTabs(resetAggrs, 4) + + " }\n" + " prevPane++;\n" + " }\n" + "\n" + " while (ptr < numOfSlices) {\n" + " // fillBuckets with empty;\n" + " if (prevPane - prevOpenPane == panesPerSlide) { // new slide and possible opening windows\n" + " prevOpenPane += panesPerSlide;\n" + " windowStartPointers[currentSlide++] = -1;\n" + " }\n" + " if (prevPane - prevCompletePane == panesPerWindow) { // complete window\n" + " windowEndPointers[currentWindow++] = -1;\n" + " numberOfCompleteWindows++;\n" + " break;\n" + " }\n" + " if (prevPane - prevClosePane == panesPerWindow) { // closing window\n" + " // write result to the closing windows\n" + + addTabs(closingWindows, 4) + + " prevClosePane += panesPerSlide;\n" + " closingWindowsPointer += mapSize;\n" + " numberOfClosingWindows++;\n" + " closingStartPointers[numberOfClosingWindows] = closingWindowsPointer;\n" + //" closingWindowIds[numberOfClosingWindows-1] = prevClosePane - panesPerSlide;\n" + " // reset values\n" + + addTabs(resetAggrs, 4) + + " }\n" + " ptr++;\n" + " prevPane++;\n" + " }\n" + "\n" + "\n" + " for (; ptr < numOfSlices; ++ptr) {\n" + + " // add elements from the next slide\n" + " //if (currPane-prevPane==1) {\n" + " // prevPane = currPane;\n" + " // aggrStructs[pid].insertSlices();\n" + " //}\n" + " if (prevPane - prevOpenPane == panesPerSlide) {\n" + " prevOpenPane = currPane;\n" + " windowStartPointers[currentSlide++] = -1;\n" + " }\n" + " // complete windows\n" + " if (prevPane - prevCompletePane == panesPerWindow) {\n" + " prevCompletePane += panesPerWindow; //data[(windowStartPointers[idx++])].timestamp / paneSize;\n" + " windowEndPointers[currentWindow++] = -1;\n" + + addPostWindowOperation(4) + + " // write and pack the complete window result\n" + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 5) + + " }\n" + " // reset values\n" + + addTabs(resetAggrs, 4) + + " numberOfCompleteWindows++;\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " }\n" + " prevPane++;\n" + " }\n" + " }\n" + "}\n" + "\n" + ); return s; } - std::string getTumblingWindowRange( - std::string outputBuffers, std::string initialiseAggrs, - std::string computeAggrs, std::string resetAggrs, - std::string openingWindows, std::string closingWindows, - std::string pendingWindows, std::string completeWindows, - std::string resultPointers, std::string filter) { + std::string getTumblingWindowRange(std::string outputBuffers, + std::string initialiseAggrs, + std::string computeAggrs, + std::string resetAggrs, + std::string openingWindows, + std::string closingWindows, + std::string pendingWindows, + std::string completeWindows, + std::string resultPointers, + std::string filter) { std::string s; std::string ptr; if (hasGroupBy() && m_usePtrs) { ptr.append("*"); } s.append( - "void processData (int pid, char *inputBuffer, size_t inputBufferSize, " - "long startPointer, long endPointer, long timestampFromPrevBatch,\n" - " long *windowStartPointers, long *windowEndPointers, " - "char *" + - ptr + "openingWindowsBuffer, char *" + ptr + - "closingWindowsBuffer,\n" - " char *" + - ptr + - "pendingWindowsBuffer, char *completeWindowsBuffer,\n" - " int *openingStartPointers, int " - "*closingStartPointers, int *pendingStartPointers, int " - "*completeStartPointers,\n" - //" long *openingWindowIds, long *closingWindowIds, - // long *pendingWindowIds, long *completeWindowIds,\n" - " long streamStartPointer, int *pointersAndCounts, " - "char *staticBuffer) {" - " // Input Buffer\n" - " input_tuple_t *data= (input_tuple_t *) inputBuffer;\n" - "\n" - " // Output Buffers\n" + - addTabs(outputBuffers, 1) + - " output_tuple_t *completeWindowsResults = (output_tuple_t *) " - "completeWindowsBuffer; // the results here are packed\n" - "\n" - //" for (int i = 0; i < PARTIAL_WINDOWS; i++) {\n" - //" windowStartPointers[i] = -1;\n" - //" windowEndPointers[i] = -1;\n" - //" }\n" - //"\n" - " int tupleSize = sizeof(input_tuple_t);\n" - " int mapSize = MAP_SIZE;\n" - " long paneSize = PANE_SIZE;\n" - " long panesPerSlide = PANES_PER_SLIDE;\n" - " long panesPerWindow = PANES_PER_WINDOW;\n" - " long windowSlide = WINDOW_SLIDE;\n" - " long windowSize = WINDOW_SIZE;\n" - " int openingWindowsPointer = 0, closingWindowsPointer = 0;\n" - " int pendingWindowsPointer = 0, completeWindowsPointer = 0;\n" - "\n" - " // Set the first pointer for all types of windows\n" - " openingStartPointers[0] = openingWindowsPointer;\n" - " closingStartPointers[0] = closingWindowsPointer;\n" - " pendingStartPointers[0] = pendingWindowsPointer;\n" - " completeStartPointers[0] = completeWindowsPointer;\n" - "\n" - " // initialize aggregation data structures\n" + - addTabs(initialiseAggrs, 1) + - "\n" - " // Slicing based on panes logic\n" - " // Previous, next, and current pane ids\n" - " long prevClosePane, currPane, prevCompletePane, prevOpenPane, " - "startPane;\n" - " int numberOfOpeningWindows = 0;\n" - " int numberOfClosingWindows = 0;\n" - " int numberOfPendingWindows = 0;\n" - " int numberOfCompleteWindows = 0;\n" - " int currentSlide = 0;\n" - " int currentWindow = 0;\n" - " long step = 1; //tupleSize;\n" - " long streamPtr = streamStartPointer / tupleSize;\n" - " long bufferPtr = startPointer / tupleSize;\n" - " startPointer = startPointer / tupleSize;\n" - " endPointer = endPointer / tupleSize;\n" - " long diff = streamPtr - bufferPtr;\n" - " long tempStartPos, tempEndPos;\n" - "\n" - " //windowStartPointers[currentSlide++] = bufferPtr;\n" - " bool hasAddedComplete = false;\n" -#if defined(HAVE_NUMA) - " auto bufferSize = (long) inputBufferSize;\n" -#else - " auto bufferSize = (long) BUFFER_SIZE;\n" -// " timestampFromPrevBatch = (bufferPtr != 0) ? -// data[bufferPtr - 1].timestamp / paneSize :\n" " -// data[bufferSize / sizeof(input_tuple_t) - 1].timestamp / -// paneSize;" -#endif - " long prevPane = timestampFromPrevBatch / paneSize;\n" - "\n" - " startPane = (data[bufferPtr].timestamp / paneSize);\n" - " prevClosePane = prevOpenPane = (data[bufferPtr].timestamp / " - "paneSize) - panesPerSlide;\n" - " prevCompletePane = data[bufferPtr].timestamp / paneSize;\n" - " if (streamStartPointer!=0) {\n" - " prevOpenPane = timestampFromPrevBatch / paneSize;\n" - " long tmpPtr = data[bufferPtr].timestamp;\n" - " tmpPtr = tmpPtr/windowSlide;\n" - " tmpPtr = tmpPtr * windowSlide;\n" - " if (data[bufferPtr].timestamp%windowSlide!=0) {\n" - " prevOpenPane = tmpPtr / paneSize;\n" - " prevCompletePane = (tmpPtr+windowSlide) / paneSize;\n" - " }\n" - " prevClosePane = timestampFromPrevBatch / paneSize;\n" - " if (prevOpenPane == prevCompletePane)\n" - " prevCompletePane += panesPerSlide;\n" - " //while (data[bufferPtr].timestamp-tmpPtr + windowSlide <= " - "windowSize) {\n" - " // tmpPtr -= windowSlide;\n" - " //}\n" - " //prevClosePane = tmpPtr / paneSize;\n" - " if (prevClosePane < 0)\n" - " prevClosePane = 0;\n" - " }\n" - "\n" - " bool completeStartsFromPane = startPane==prevCompletePane;\n" - " bool hasComplete = ((data[endPointer-1].timestamp - " - "data[startPointer].timestamp) >= windowSize);\n" - "\n" - " // The beginning of the stream. Check if we have at least one " - "complete window.\n" - " if (streamPtr == 0) {\n" - " // check for opening windows until finding the first " - "complete\n" - " while (bufferPtr < endPointer) {\n"); - if (!filter.empty()) s.append(addTabs(filter, 3) + "\t\t\t{\n"); + "void processData (int pid, char *inputBuffer, size_t inputBufferSize, long startPointer, long endPointer, long timestampFromPrevBatch,\n" + " long *windowStartPointers, long *windowEndPointers, char *" + ptr + + "openingWindowsBuffer, char *" + ptr + "closingWindowsBuffer,\n" + " char *" + ptr + + "pendingWindowsBuffer, char *completeWindowsBuffer,\n" + " int *openingStartPointers, int *closingStartPointers, int *pendingStartPointers, int *completeStartPointers,\n" + //" long *openingWindowIds, long *closingWindowIds, long *pendingWindowIds, long *completeWindowIds,\n" + " long streamStartPointer, int *pointersAndCounts, char *staticBuffer) {" + " // Input Buffer\n" + " input_tuple_t *data= (input_tuple_t *) inputBuffer;\n" + "\n" + " // Output Buffers\n" + + addTabs(outputBuffers, 1) + + " output_tuple_t *completeWindowsResults = (output_tuple_t *) completeWindowsBuffer; // the results here are packed\n" + "\n" + //" for (int i = 0; i < PARTIAL_WINDOWS; i++) {\n" + //" windowStartPointers[i] = -1;\n" + //" windowEndPointers[i] = -1;\n" + //" }\n" + //"\n" + " int tupleSize = sizeof(input_tuple_t);\n" + " int mapSize = MAP_SIZE;\n" + " long paneSize = PANE_SIZE;\n" + " long panesPerSlide = PANES_PER_SLIDE;\n" + " long panesPerWindow = PANES_PER_WINDOW;\n" + " long windowSlide = WINDOW_SLIDE;\n" + " long windowSize = WINDOW_SIZE;\n" + " int openingWindowsPointer = 0, closingWindowsPointer = 0;\n" + " int pendingWindowsPointer = 0, completeWindowsPointer = 0;\n" + "\n" + " // Set the first pointer for all types of windows\n" + " openingStartPointers[0] = openingWindowsPointer;\n" + " closingStartPointers[0] = closingWindowsPointer;\n" + " pendingStartPointers[0] = pendingWindowsPointer;\n" + " completeStartPointers[0] = completeWindowsPointer;\n" + "\n" + " // initialize aggregation data structures\n" + + addTabs(initialiseAggrs, 1) + + "\n" + " // Slicing based on panes logic\n" + " // Previous, next, and current pane ids\n" + " long prevClosePane, currPane, prevCompletePane, prevOpenPane, startPane;\n" + " int numberOfOpeningWindows = 0;\n" + " int numberOfClosingWindows = 0;\n" + " int numberOfPendingWindows = 0;\n" + " int numberOfCompleteWindows = 0;\n" + " int currentSlide = 0;\n" + " int currentWindow = 0;\n" + " long step = 1; //tupleSize;\n" + " long streamPtr = streamStartPointer / tupleSize;\n" + " long bufferPtr = startPointer / tupleSize;\n" + " startPointer = startPointer / tupleSize;\n" + " endPointer = endPointer / tupleSize;\n" + " long diff = streamPtr - bufferPtr;\n" + " long tempStartPos, tempEndPos;\n" + "\n" + " //windowStartPointers[currentSlide++] = bufferPtr;\n" + " bool hasAddedComplete = false;\n" + #if defined(HAVE_NUMA) + " auto bufferSize = (long) inputBufferSize;\n" + #else + " auto bufferSize = (long) BUFFER_SIZE;\n" + // " timestampFromPrevBatch = (bufferPtr != 0) ? data[bufferPtr - 1].timestamp / paneSize :\n" + // " data[bufferSize / sizeof(input_tuple_t) - 1].timestamp / paneSize;" + #endif + " long prevPane = timestampFromPrevBatch / paneSize;\n" + "\n" + " startPane = (data[bufferPtr].timestamp / paneSize);\n" + " prevClosePane = prevOpenPane = (data[bufferPtr].timestamp / paneSize) - panesPerSlide;\n" + " prevCompletePane = data[bufferPtr].timestamp / paneSize;\n" + " if (streamStartPointer!=0) {\n" + " prevOpenPane = timestampFromPrevBatch / paneSize;\n" + " long tmpPtr = data[bufferPtr].timestamp;\n" + " tmpPtr = tmpPtr/windowSlide;\n" + " tmpPtr = tmpPtr * windowSlide;\n" + " if (data[bufferPtr].timestamp%windowSlide!=0) {\n" + " prevOpenPane = tmpPtr / paneSize;\n" + " prevCompletePane = (tmpPtr+windowSlide) / paneSize;\n" + " }\n" + " prevClosePane = timestampFromPrevBatch / paneSize;\n" + " if (prevOpenPane == prevCompletePane)\n" + " prevCompletePane += panesPerSlide;\n" + " //while (data[bufferPtr].timestamp-tmpPtr + windowSlide <= windowSize) {\n" + " // tmpPtr -= windowSlide;\n" + " //}\n" + " //prevClosePane = tmpPtr / paneSize;\n" + " if (prevClosePane < 0)\n" + " prevClosePane = 0;\n" + " }\n" + "\n" + " bool completeStartsFromPane = startPane==prevCompletePane;\n" + " bool hasComplete = ((data[endPointer-1].timestamp - data[startPointer].timestamp) >= windowSize);\n" + "\n" + " // The beginning of the stream. Check if we have at least one complete window.\n" + " if (streamPtr == 0) {\n" + " // check for opening windows until finding the first complete\n" + " while (bufferPtr < endPointer) {\n"); + if (!filter.empty()) + s.append( + addTabs(filter, 3) + + "\t\t\t{\n"); s.append( " currPane = data[bufferPtr].timestamp / paneSize;\n" " if (currPane - prevOpenPane == panesPerSlide) {\n" @@ -4255,209 +4114,175 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " numberOfCompleteWindows++;\n" " break;\n" " }\n" - " if (currPane - prevOpenPane > panesPerSlide || currPane - " - "prevCompletePane > panesPerWindow) {\n" + " if (currPane - prevOpenPane > panesPerSlide || currPane - prevCompletePane > panesPerWindow) {\n" " // fill bubbles\n" " int numOfSlices, numOfComplete, numOfOpening, phase;\n" " phase = 1;\n" " //if (currPane - prevOpenPane > panesPerSlide) {\n" - " numOfSlices = currPane - prevOpenPane; // - " - "panesPerSlide -1;\n" - " numOfComplete = " - "currPane+numOfSlices-panesPerWindow;\n" + " numOfSlices = currPane - prevOpenPane; // - panesPerSlide -1;\n" + " numOfComplete = currPane+numOfSlices-panesPerWindow;\n" " //} else {\n" - " // numOfSlices = currPane - prevCompletePane - " - "panesPerWindow -1;\n" - " // numOfComplete = " - "currPane+numOfSlices-panesPerWindow;\n" + " // numOfSlices = currPane - prevCompletePane - panesPerWindow -1;\n" + " // numOfComplete = currPane+numOfSlices-panesPerWindow;\n" " //}\n" " if (numOfComplete > 0) {\n" - " numOfOpening = prevCompletePane + panesPerWindow " - "- prevOpenPane;\n" + " numOfOpening = prevCompletePane + panesPerWindow - prevOpenPane;\n" " //phase = 2;\n" " hasAddedComplete = true;\n" " }\n" - " fillEmptyWindows(pid, phase, numOfSlices, " - "numOfOpening, 0, windowStartPointers,\n" - " windowEndPointers, numberOfOpeningWindows, " - "numberOfClosingWindows, numberOfCompleteWindows,\n" + " fillEmptyWindows(pid, phase, numOfSlices, numOfOpening, 0, windowStartPointers,\n" + " windowEndPointers, numberOfOpeningWindows, numberOfClosingWindows, numberOfCompleteWindows,\n" " currentSlide, currentWindow,\n" - " completeWindowsPointer, " - "completeWindowsResults, completeStartPointers,\n" - " closingWindowsPointer, " - "closingWindowsResults, closingStartPointers,\n" - //" openingWindowIds, - // closingWindowIds, pendingWindowIds, completeWindowIds,\n" - " prevCompletePane, prevClosePane, " - "prevOpenPane, currPane);\n" + " completeWindowsPointer, completeWindowsResults, completeStartPointers,\n" + " closingWindowsPointer, closingWindowsResults, closingStartPointers,\n" + //" openingWindowIds, closingWindowIds, pendingWindowIds, completeWindowIds,\n" + " prevCompletePane, prevClosePane, prevOpenPane, currPane);\n" " }\n" " // project + aggregate here\n" + - addTabs(computeAggrs, 3)); - if (!filter.empty()) s.append("\t\t\t};\n"); + addTabs(computeAggrs, 3)); + if (!filter.empty()) + s.append("\t\t\t};\n"); s.append( " streamPtr += step;\n" " bufferPtr += step;\n" " }\n" " }\n" - " // Check for closing and opening windows, until we have a " - "complete window.\n" + " // Check for closing and opening windows, until we have a complete window.\n" " else {\n" " while (bufferPtr < endPointer) {\n"); - if (!filter.empty()) s.append(addTabs(filter, 3) + "\t\t\t{\n"); + if (!filter.empty()) + s.append( + addTabs(filter, 3) + + "\t\t\t{\n"); s.append( " currPane = data[bufferPtr].timestamp / paneSize;\n" - " if (currPane - prevOpenPane == panesPerSlide) { // new " - "slide and possible opening windows\n" + " if (currPane - prevOpenPane == panesPerSlide) { // new slide and possible opening windows\n" " prevOpenPane += panesPerSlide;\n" " windowStartPointers[currentSlide++] = bufferPtr;\n" " }\n" - " if (hasComplete && currPane - prevCompletePane == " - "panesPerWindow) { // complete window\n" + " if (hasComplete && currPane - prevCompletePane == panesPerWindow) { // complete window\n" " windowEndPointers[currentWindow++] = bufferPtr;\n" " numberOfCompleteWindows++;\n" " break;\n" " }\n" - " if (/*prevClosePane >= panesPerWindow &&*/ prevClosePane " - "<= startPane && currPane - prevClosePane == panesPerWindow) { // " - "closing window\n" + " if (/*prevClosePane >= panesPerWindow &&*/ prevClosePane <= startPane && currPane - prevClosePane == panesPerWindow) { // closing window\n" " // write result to the closing windows\n" + - addTabs(closingWindows, 4) + - " prevClosePane += panesPerSlide;\n" - " closingWindowsPointer += mapSize;\n" - " numberOfClosingWindows++;\n" - " closingStartPointers[numberOfClosingWindows] = " - "closingWindowsPointer;\n" - //" closingWindowIds[numberOfClosingWindows-1] = - // prevClosePane - panesPerSlide;\n" - " // reset values\n" + - addTabs(resetAggrs, 4) + - " }\n" - " if (currPane - prevOpenPane > panesPerSlide || currPane - " - "prevClosePane > panesPerWindow || currPane - prevCompletePane > " - "panesPerWindow) {\n" - " // fill bubbles\n" - " int numOfSlices, numOfComplete, numOfOpening, " - "numOfClosing, phase;\n" - " numOfClosing = 0;\n" - " phase = 3;\n" - " if (currPane - prevOpenPane > panesPerSlide) {\n" - " numOfSlices = currPane - prevPane; //prevOpenPane " - "- panesPerSlide -1;\n" - " numOfComplete = " - "prevPane+numOfSlices-(panesPerWindow+prevCompletePane);\n" - " } else if (currPane - prevClosePane > panesPerWindow) " - "{\n" - " numOfSlices = currPane - prevClosePane - " - "panesPerWindow -1;\n" - " numOfClosing = currPane - prevPane;\n" - " } else {\n" - " numOfSlices = currPane - prevCompletePane - " - "panesPerWindow -1;\n" - " numOfComplete = " - "currPane+numOfSlices-panesPerWindow;\n" - " }\n" - " if (numOfComplete > 0) {\n" - " hasAddedComplete = true;\n" - " }\n" - " fillEmptyWindows(pid, phase, numOfSlices, " - "numOfOpening, numOfClosing, windowStartPointers,\n" - " windowEndPointers, " - "numberOfOpeningWindows, numberOfClosingWindows, " - "numberOfCompleteWindows,\n" - " currentSlide, currentWindow,\n" - " completeWindowsPointer, " - "completeWindowsResults, completeStartPointers,\n" - " closingWindowsPointer, " - "closingWindowsResults, closingStartPointers,\n" - //" openingWindowIds, - // closingWindowIds, pendingWindowIds, completeWindowIds,\n" - " prevCompletePane, prevClosePane, " - "prevOpenPane, currPane);\n" - " if (hasAddedComplete)\n" - " break;\n" - " }\n" - " // project + aggregate here\n" + - addTabs(computeAggrs, 3)); - if (!filter.empty()) s.append("\t\t\t}\n"); + addTabs(closingWindows, 4) + + " prevClosePane += panesPerSlide;\n" + " closingWindowsPointer += mapSize;\n" + " numberOfClosingWindows++;\n" + " closingStartPointers[numberOfClosingWindows] = closingWindowsPointer;\n" + //" closingWindowIds[numberOfClosingWindows-1] = prevClosePane - panesPerSlide;\n" + " // reset values\n" + + addTabs(resetAggrs, 4) + + " }\n" + " if (currPane - prevOpenPane > panesPerSlide || currPane - prevClosePane > panesPerWindow || currPane - prevCompletePane > panesPerWindow) {\n" + " // fill bubbles\n" + " int numOfSlices, numOfComplete, numOfOpening, numOfClosing, phase;\n" + " numOfClosing = 0;\n" + " phase = 3;\n" + " if (currPane - prevOpenPane > panesPerSlide) {\n" + " numOfSlices = currPane - prevPane; //prevOpenPane - panesPerSlide -1;\n" + " numOfComplete = prevPane+numOfSlices-(panesPerWindow+prevCompletePane);\n" + " } else if (currPane - prevClosePane > panesPerWindow) {\n" + " numOfSlices = currPane - prevClosePane - panesPerWindow -1;\n" + " numOfClosing = currPane - prevPane;\n" + " } else {\n" + " numOfSlices = currPane - prevCompletePane - panesPerWindow -1;\n" + " numOfComplete = currPane+numOfSlices-panesPerWindow;\n" + " }\n" + " if (numOfComplete > 0) {\n" + " hasAddedComplete = true;\n" + " }\n" + " fillEmptyWindows(pid, phase, numOfSlices, numOfOpening, numOfClosing, windowStartPointers,\n" + " windowEndPointers, numberOfOpeningWindows, numberOfClosingWindows, numberOfCompleteWindows,\n" + " currentSlide, currentWindow,\n" + " completeWindowsPointer, completeWindowsResults, completeStartPointers,\n" + " closingWindowsPointer, closingWindowsResults, closingStartPointers,\n" + //" openingWindowIds, closingWindowIds, pendingWindowIds, completeWindowIds,\n" + " prevCompletePane, prevClosePane, prevOpenPane, currPane);\n" + " if (hasAddedComplete)\n" + " break;\n" + " }\n" + " // project + aggregate here\n" + + addTabs(computeAggrs, 3)); + if (!filter.empty()) + s.append("\t\t\t}\n"); s.append( " streamPtr += step;\n" " bufferPtr += step;\n" " }\n" "\n" " // check for pending windows\n" - " if ((numberOfClosingWindows == 0 || windowSize!=windowSlide) " - "&& numberOfCompleteWindows == 0) {\n" + " if ((numberOfClosingWindows == 0 || windowSize!=windowSlide) && numberOfCompleteWindows == 0) {\n" //" currPane = data[bufferPtr].timestamp / paneSize;\n" - //" if (/*prevClosePane >= panesPerWindow &&*/ prevClosePane - //<= startPane && currPane - prevClosePane == panesPerWindow) { // - // closing window\n" " // write result to the closing - // windows\n" + addTabs(closingWindows, 5) + " closingWindowsPointer += - // mapSize;\n" " numberOfClosingWindows++;\n" " - // closingStartPointers[numberOfClosingWindows] = - // closingWindowsPointer;\n" " // reset values\n" + - // addTabs(resetAggrs, 4) + + //" if (/*prevClosePane >= panesPerWindow &&*/ prevClosePane <= startPane && currPane - prevClosePane == panesPerWindow) { // closing window\n" + //" // write result to the closing windows\n" + + //addTabs(closingWindows, 5) + + //" closingWindowsPointer += mapSize;\n" + //" numberOfClosingWindows++;\n" + //" closingStartPointers[numberOfClosingWindows] = closingWindowsPointer;\n" + //" // reset values\n" + + //addTabs(resetAggrs, 4) + //" }\n" " // write result to pending windows\n" + - addTabs(pendingWindows, 3) + - " pendingWindowsPointer += mapSize;\n" - " numberOfPendingWindows++;\n" - " pendingStartPointers[numberOfPendingWindows] = " - "pendingWindowsPointer;\n" - " // reset values\n" + - addTabs(resetAggrs, 3) + - " }\n" - " }\n" - "\n" - " if (numberOfCompleteWindows == 0 && (streamStartPointer == 0 || " - "currentSlide >= 1)) { // We only have one opening window so far...\n" - " // write results\n" + - addTabs(openingWindows, 2) + - " openingWindowsPointer += mapSize;\n" - " numberOfOpeningWindows++;\n" - " openingStartPointers[numberOfOpeningWindows] = " - "openingWindowsPointer;\n" - //" openingWindowIds[numberOfOpeningWindows-1] = - // prevOpenPane++;\n" - "\n" - " } else if (numberOfCompleteWindows > 0) {\n" - " // write results and pack them for the first complete window " - "in the batch\n" - " if (!hasAddedComplete) {\n" - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 3) + - //" completeWindowsPointer++;\n" - " }\n" - " // reset values\n" + - addTabs(resetAggrs, 2) + - " // write in the correct slot, as the value has already been " - "incremented!\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " }\n" - " // project + aggregate here\n"); - if (!filter.empty()) s.append(addTabs(filter, 2) + "\t\t{\n"); - s.append(addTabs(computeAggrs, 2)); - if (!filter.empty()) s.append("\t\t}\n"); + addTabs(pendingWindows, 3) + + " pendingWindowsPointer += mapSize;\n" + " numberOfPendingWindows++;\n" + " pendingStartPointers[numberOfPendingWindows] = pendingWindowsPointer;\n" + " // reset values\n" + + addTabs(resetAggrs, 3) + + " }\n" + " }\n" + "\n" + " if (numberOfCompleteWindows == 0 && (streamStartPointer == 0 || currentSlide >= 1)) { // We only have one opening window so far...\n" + " // write results\n" + + addTabs(openingWindows, 2) + + " openingWindowsPointer += mapSize;\n" + " numberOfOpeningWindows++;\n" + " openingStartPointers[numberOfOpeningWindows] = openingWindowsPointer;\n" + //" openingWindowIds[numberOfOpeningWindows-1] = prevOpenPane++;\n" + "\n" + " } else if (numberOfCompleteWindows > 0) {\n" + " // write results and pack them for the first complete window in the batch\n" + " if (!hasAddedComplete) {\n" + + addPostWindowOperation(2) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 3) + + //" completeWindowsPointer++;\n" + " }\n" + " // reset values\n" + + addTabs(resetAggrs, 2) + + " // write in the correct slot, as the value has already been incremented!\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " }\n" + " // project + aggregate here\n"); + if (!filter.empty()) + s.append( + addTabs(filter, 2) + + "\t\t{\n"); + s.append( + addTabs(computeAggrs, 2)); + if (!filter.empty()) + s.append("\t\t}\n"); s.append( "\n" " bufferPtr = windowEndPointers[0];\n" - " //prevOpenPane = " - "(data[windowStartPointers[currentSlide-1]].timestamp) / paneSize;\n" + " //prevOpenPane = (data[windowStartPointers[currentSlide-1]].timestamp) / paneSize;\n" " int idx = 1;\n" - " prevCompletePane += panesPerSlide; " - "//(data[windowStartPointers[idx++]].timestamp) / paneSize;\n" - " int removalIndex = currentWindow; //(startingFromPane) ? " - "currentWindow : currentWindow + 1;\n" + " prevCompletePane += panesPerSlide; //(data[windowStartPointers[idx++]].timestamp) / paneSize;\n" + " int removalIndex = currentWindow; //(startingFromPane) ? currentWindow : currentWindow + 1;\n" " bool foundComplete = false;\n" " while (bufferPtr < endPointer) {\n" " // add elements from the next slide\n" - " //bufferPtr = windowEndPointers[currentWindow - 1] + 1; " - "// take the next position, as we have already computed this value\n" + " //bufferPtr = windowEndPointers[currentWindow - 1] + 1; // take the next position, as we have already computed this value\n" " foundComplete = false;\n" " while (true) {\n"); - if (!filter.empty()) s.append(addTabs(filter, 4) + "\t\t\t{\n"); + if (!filter.empty()) + s.append( + addTabs(filter, 4) + + "\t\t\t{\n"); s.append( " auto dataPtr = bufferPtr;\n" " if (bufferPtr*tupleSize >= bufferSize)\n" @@ -4470,69 +4295,54 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " // complete windows\n" " if (currPane - prevCompletePane == panesPerWindow) {\n" " //prevPane = currPane;\n" - " prevCompletePane += panesPerSlide; " - "//data[(windowStartPointers[idx++])].timestamp / paneSize;\n" + " prevCompletePane += panesPerSlide; //data[(windowStartPointers[idx++])].timestamp / paneSize;\n" "\n" " windowEndPointers[currentWindow++] = dataPtr;\n" " // write and pack the complete window result\n" " //map.setValues();\n" + + addPostWindowOperation(5) + " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 6) + - //" completeWindowsPointer++;\n" - " }\n" - " numberOfCompleteWindows++;\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " foundComplete = true;\n" - " // reset values\n" + - addTabs(resetAggrs, 5) + - " }\n" - " if (currPane - prevOpenPane > panesPerSlide || " - "currPane - prevCompletePane > panesPerWindow) {\n" - " // fill bubbles\n" - " int numOfSlices, numOfComplete, numOfOpening, " - "phase;\n" - " phase = 2;\n" - " //if (currPane - prevOpenPane > panesPerSlide) {\n" - " numOfSlices = currPane - prevPane; " - "//prevOpenPane - panesPerSlide -1;\n" - " numOfComplete = " - "prevPane+numOfSlices-(panesPerWindow+prevCompletePane);\n" - " //} else {\n" - " // numOfSlices = currPane - prevCompletePane - " - "panesPerWindow -1;\n" - " // numOfComplete = " - "currPane+numOfSlices-panesPerWindow;\n" - " //}\n" - " if (numOfComplete > 0) {\n" - "\n" - " numOfOpening = prevCompletePane + " - "panesPerWindow - prevOpenPane;\n" - " foundComplete = true;\n" - " }\n" - " fillEmptyWindows(pid, phase, numOfSlices, " - "numOfOpening, 0, windowStartPointers,\n" - " windowEndPointers, " - "numberOfOpeningWindows, numberOfClosingWindows, " - "numberOfCompleteWindows,\n" - " currentSlide, currentWindow,\n" - " completeWindowsPointer, " - "completeWindowsResults, completeStartPointers,\n" - " closingWindowsPointer, " - "closingWindowsResults, closingStartPointers,\n" - //" openingWindowIds, - // closingWindowIds, pendingWindowIds, completeWindowIds,\n" - " prevCompletePane, " - "prevClosePane, prevOpenPane, currPane);\n" - " }\n" - " if (bufferPtr >= endPointer) {\n" - " break;\n" - " }\n" - " // project + aggregate here\n" + - addTabs(computeAggrs, 4)); - if (!filter.empty()) s.append("\t\t\t}\n"); + addTabs(completeWindows, 6) + + //" completeWindowsPointer++;\n" + " }\n" + " numberOfCompleteWindows++;\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " foundComplete = true;\n" + " // reset values\n" + + addTabs(resetAggrs, 5) + + " }\n" + " if (currPane - prevOpenPane > panesPerSlide || currPane - prevCompletePane > panesPerWindow) {\n" + " // fill bubbles\n" + " int numOfSlices, numOfComplete, numOfOpening, phase;\n" + " phase = 2;\n" + " //if (currPane - prevOpenPane > panesPerSlide) {\n" + " numOfSlices = currPane - prevPane; //prevOpenPane - panesPerSlide -1;\n" + " numOfComplete = prevPane+numOfSlices-(panesPerWindow+prevCompletePane);\n" + " //} else {\n" + " // numOfSlices = currPane - prevCompletePane - panesPerWindow -1;\n" + " // numOfComplete = currPane+numOfSlices-panesPerWindow;\n" + " //}\n" + " if (numOfComplete > 0) {\n" + "\n" + " numOfOpening = prevCompletePane + panesPerWindow - prevOpenPane;\n" + " foundComplete = true;\n" + " }\n" + " fillEmptyWindows(pid, phase, numOfSlices, numOfOpening, 0, windowStartPointers,\n" + " windowEndPointers, numberOfOpeningWindows, numberOfClosingWindows, numberOfCompleteWindows,\n" + " currentSlide, currentWindow,\n" + " completeWindowsPointer, completeWindowsResults, completeStartPointers,\n" + " closingWindowsPointer, closingWindowsResults, closingStartPointers,\n" + //" openingWindowIds, closingWindowIds, pendingWindowIds, completeWindowIds,\n" + " prevCompletePane, prevClosePane, prevOpenPane, currPane);\n" + " }\n" + " if (bufferPtr >= endPointer) {\n" + " break;\n" + " }\n" + " // project + aggregate here\n" + + addTabs(computeAggrs, 4)); + if (!filter.empty()) + s.append("\t\t\t}\n"); s.append( " bufferPtr += step;\n" " if (foundComplete) {\n" @@ -4542,26 +4352,22 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " removalIndex++;\n" " }\n" "\n" - " if (!foundComplete) { // we have reached the first open " - "window after all the complete ones\n" - " // write the first open window if we have already " - "computed the result\n" + - addTabs(openingWindows, 3) + - " openingWindowsPointer += mapSize;\n" - " numberOfOpeningWindows++;\n" - " openingStartPointers[numberOfOpeningWindows] = " - "openingWindowsPointer;\n" - //" openingWindowIds[numberOfOpeningWindows-1] = - // prevCompletePane++;\n" - " }\n" - " }\n" - "\n" + - addTabs(resultPointers, 1) + - " pointersAndCounts[4] = numberOfOpeningWindows;\n" - " pointersAndCounts[5] = numberOfClosingWindows;\n" - " pointersAndCounts[6] = numberOfPendingWindows;\n" - " pointersAndCounts[7] = numberOfCompleteWindows;\n" - "}\n"); + " if (!foundComplete) { // we have reached the first open window after all the complete ones\n" + " // write the first open window if we have already computed the result\n" + + addTabs(openingWindows, 3) + + " openingWindowsPointer += mapSize;\n" + " numberOfOpeningWindows++;\n" + " openingStartPointers[numberOfOpeningWindows] = openingWindowsPointer;\n" + //" openingWindowIds[numberOfOpeningWindows-1] = prevCompletePane++;\n" + " }\n" + " }\n" + "\n" + + addTabs(resultPointers, 1) + + " pointersAndCounts[4] = numberOfOpeningWindows;\n" + " pointersAndCounts[5] = numberOfClosingWindows;\n" + " pointersAndCounts[6] = numberOfPendingWindows;\n" + " pointersAndCounts[7] = numberOfCompleteWindows;\n" + "}\n"); return s; } @@ -4584,536 +4390,470 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { closingDef = "interm_tuple_t"; std::string str1 = "data[bufferPtr-1].timestamp"; std::string str2 = "prevCompletePane"; - closingWindows.replace(closingWindows.find(str1), str1.length(), - "prevPane * paneSize;"); - completeWindows.replace(completeWindows.find(str2), str2.length(), - "prevPane"); + closingWindows.replace(closingWindows.find(str1), str1.length(), "prevPane * paneSize;"); + completeWindows.replace(completeWindows.find(str2), str2.length(), "prevPane"); singleKey = "node aggrs;\n"; } s.append( - "void fillEmptyWindows (int pid, int phase, int numOfSlices, int " - "numOfOpening, int numOfClosing,\n" - " long *windowStartPointers, long " - "*windowEndPointers,\n" - " int &numberOfOpeningWindows, int " - "&numberOfClosingWindows, int &numberOfCompleteWindows,\n" - " long bufferPtr, int ¤tSlide, int " - "¤tWindow,\n" - " int &completeWindowsPointer, output_tuple_t " - "*completeWindowsResults, int *completeStartPointers,\n" - " int &closingWindowsPointer, " + - closingDef + " *" + ptr + - "closingWindowsResults, int *closingStartPointers,\n" - //" long *openingWindowIds, long - //*closingWindowIds, long *pendingWindowIds, long *completeWindowIds,\n" - " long &prevCompletePane, long " - "&prevClosePane, long &prevOpenPane, long &currPane) {\n" - "\n" - " auto mapSize = MAP_SIZE;\n" - " auto paneSize = PANE_SIZE;\n" - " auto panesPerWindow = PANES_PER_WINDOW;\n" - " auto panesPerSlide = PANES_PER_SLIDE;\n" + - addTabs(singleKey, 1) + " // store previous results\n" + - addTabs(insertAggrs, 1) + addTabs(resetAggrs, 1) + - " if (phase == 1) {\n" - " // case 1 -- opening at the beginning\n" - " auto prevPane = prevOpenPane + 1;\n" - " int ptr;\n" - " for (ptr = 0; ptr < numOfOpening; ++ptr) {\n" - " // fillBuckets with empty;\n" - " //aggrStructs[pid].insertSlices();\n" - " if (prevPane - prevOpenPane == panesPerSlide) {\n" - " prevOpenPane += panesPerSlide;\n" - " if (ptr == 0) {\n" - " windowStartPointers[currentSlide++] = bufferPtr;\n" - " } else {\n" - " windowStartPointers[currentSlide++] = -1;\n" - " }\n" - " }\n" - " prevPane++;\n" - " }\n" - " if (numOfOpening!=numOfSlices) {\n" - " // write results and pack them for the first complete " - "window in the batch\n" + - addTabs(setValues, 3) + - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 4) + - " }\n" - " // reset values\n" + - addTabs(resetAggrs, 3) + - " // write in the correct slot, as the value has already " - "been incremented!\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - " prevCompletePane += panesPerSlide;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " numberOfCompleteWindows++;\n " - "\n" - " for (; ptr < numOfSlices; ++ptr) {\n" + - addTabs(evictAggrs, 4) + - " if (prevPane - prevOpenPane == panesPerSlide) {\n" - " prevOpenPane += panesPerSlide;\n" - " windowStartPointers[currentSlide++] = -1;\n" - " }\n" - " // complete windows\n" - " if (prevPane - prevCompletePane == panesPerWindow) {\n" - " prevCompletePane += panesPerSlide; " - "//data[(windowStartPointers[idx++])].timestamp / paneSize;\n" - " windowEndPointers[currentWindow++] = -1;\n" - " // write and pack the complete window result\n" + - addTabs(setValues, 5) + - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 6) + - " }\n" - " // reset values\n" + - addTabs(resetAggrs, 5) + - " numberOfCompleteWindows++;\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " }\n" - " prevPane++;\n" - " }\n" - "\n" - " }\n" - " // add more complete\n" - " } else if (phase == 2) {\n" - " // case 2 - opening and complete at the beginning of " - "tumbling\n" - " auto prevPane = prevOpenPane + 1;\n" - " for (int ptr = 0; ptr < numOfSlices; ++ptr) {\n" + - addTabs(evictAggrs, 3) + - " // add elements from the next slide\n" - " //if (currPane-prevPane==1) {\n" - " // prevPane = currPane;\n" - " //aggrStructs[pid].insertSlices();\n" - " //}\n" - " if (prevPane - prevOpenPane == panesPerSlide) {\n" - " prevOpenPane += panesPerSlide;\n" - " if (ptr == 0) {\n" - " windowStartPointers[currentSlide++] = bufferPtr;\n" - " } else {\n" - " windowStartPointers[currentSlide++] = -1;\n" - " }\n" - " }\n" - " // complete windows\n" - " if (prevPane - prevCompletePane == panesPerWindow) {\n" - " prevCompletePane += panesPerSlide; " - "//data[(windowStartPointers[idx++])].timestamp / paneSize;\n" - " windowEndPointers[currentWindow++] = -1;\n" - " // write and pack the complete window result\n" + - addTabs(setValues, 4) + - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 5) + - " }\n" - " // reset values\n" + - addTabs(resetAggrs, 4) + - " numberOfCompleteWindows++;\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " }\n" - " prevPane++;\n" - " }\n" - " } else if (phase == 3) {\n" - " // case 3 - closing/opening and complete for tumbling\n" - " int ptr = 0;\n" - " auto prevPane = prevOpenPane + 1;\n" - " for (ptr = 0; ptr < numOfClosing; ++ptr) {\n" - " if (prevPane - prevClosePane == panesPerWindow) { // " - "closing window\n" - " // write result to the closing windows\n" + - addTabs(closingWindows, 4) + - " prevClosePane += panesPerSlide;\n" - " closingWindowsPointer += mapSize;\n" - " numberOfClosingWindows++;\n" - " closingStartPointers[numberOfClosingWindows] = " - "closingWindowsPointer;\n" - //" closingWindowIds[numberOfClosingWindows-1] = - // prevClosePane - panesPerSlide;\n" - " // reset values\n" + - addTabs(resetAggrs, 4) + - " }\n" - " prevPane++;\n" - " }\n" - "\n" - " while (ptr < numOfSlices) {\n" - " // fillBuckets with empty;\n" - " if (prevPane - prevOpenPane == panesPerSlide) { // new " - "slide and possible opening windows\n" - " prevOpenPane += panesPerSlide;\n" - " if (ptr == 0) {\n" - " windowStartPointers[currentSlide++] = bufferPtr;\n" - " } else {\n" - " windowStartPointers[currentSlide++] = -1;\n" - " }\n" - " }\n" - " if (prevPane - prevCompletePane == panesPerWindow) { // " - "complete window\n" - " windowEndPointers[currentWindow++] = -1;\n" - " numberOfCompleteWindows++;\n" - " ptr++;\n" - " prevPane++;\n" - " break;\n" - " }\n" - " if (prevPane - prevClosePane == panesPerWindow) { // " - "closing window\n" - " // write result to the closing windows\n" + - addTabs(closingWindows, 4) + - " prevClosePane += panesPerSlide;\n" - " closingWindowsPointer += mapSize;\n" - " numberOfClosingWindows++;\n" - " closingStartPointers[numberOfClosingWindows] = " - "closingWindowsPointer;\n" - //" closingWindowIds[numberOfClosingWindows-1] = - // prevClosePane - panesPerSlide;\n" - " // reset values\n" + - addTabs(resetAggrs, 4) + - " }\n" - " ptr++;\n" - " prevPane++;\n" - " }\n" - "\n" - " if (ptr!=numOfSlices) {\n" - " // write results and pack them for the first complete " - "window in the batch\n" + - addTabs(setValues, 3) + - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 4) + - " }\n" - " // reset values\n" + - addTabs(resetAggrs, 3) + - " // write in the correct slot, as the value has already " - "been incremented!\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - " prevCompletePane += panesPerSlide;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " numberOfCompleteWindows++;\n" - " }\n" - "\n" - " for (; ptr < numOfSlices; ++ptr) {\n" + - addTabs(evictAggrs, 3) + - " if (prevPane - prevOpenPane == panesPerSlide) {\n" - " prevOpenPane = currPane;\n" - " windowStartPointers[currentSlide++] = -1;\n" - " }\n" - " // complete windows\n" - " if (prevPane - prevCompletePane == panesPerWindow) {\n" - " prevCompletePane += panesPerSlide; " - "//data[(windowStartPointers[idx++])].timestamp / paneSize;\n" - " windowEndPointers[currentWindow++] = -1;\n" - " // write and pack the complete window result\n" + - addTabs(setValues, 4) + - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 5) + - " }\n" - " // reset values\n" + - addTabs(resetAggrs, 4) + - " numberOfCompleteWindows++;\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " }\n" - " prevPane++;\n" - " }\n" - " }\n" - "}\n" - "\n"); + "void fillEmptyWindows (int pid, int phase, int numOfSlices, int numOfOpening, int numOfClosing,\n" + " long *windowStartPointers, long *windowEndPointers,\n" + " int &numberOfOpeningWindows, int &numberOfClosingWindows, int &numberOfCompleteWindows,\n" + " long bufferPtr, int ¤tSlide, int ¤tWindow,\n" + " int &completeWindowsPointer, output_tuple_t *completeWindowsResults, int *completeStartPointers,\n" + " int &closingWindowsPointer, " + closingDef + " *" + ptr + + "closingWindowsResults, int *closingStartPointers,\n" + //" long *openingWindowIds, long *closingWindowIds, long *pendingWindowIds, long *completeWindowIds,\n" + " long &prevCompletePane, long &prevClosePane, long &prevOpenPane, long &currPane) {\n" + "\n" + " auto mapSize = MAP_SIZE;\n" + " auto paneSize = PANE_SIZE;\n" + " auto panesPerWindow = PANES_PER_WINDOW;\n" + " auto panesPerSlide = PANES_PER_SLIDE;\n" + + addTabs(singleKey, 1) + + " // store previous results\n" + + addTabs(insertAggrs, 1) + + addTabs(resetAggrs, 1) + + " if (phase == 1) {\n" + " // case 1 -- opening at the beginning\n" + " auto prevPane = prevOpenPane + 1;\n" + " int ptr;\n" + " for (ptr = 0; ptr < numOfOpening; ++ptr) {\n" + " // fillBuckets with empty;\n" + " //aggrStructs[pid].insertSlices();\n" + " if (prevPane - prevOpenPane == panesPerSlide) {\n" + " prevOpenPane += panesPerSlide;\n" + " if (ptr == 0) {\n" + " windowStartPointers[currentSlide++] = bufferPtr;\n" + " } else {\n" + " windowStartPointers[currentSlide++] = -1;\n" + " }\n" + " }\n" + " prevPane++;\n" + " }\n" + " if (numOfOpening!=numOfSlices) {\n" + " // write results and pack them for the first complete window in the batch\n" + + addTabs(setValues, 3) + + addPostWindowOperation(3) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 4) + + " }\n" + " // reset values\n" + + addTabs(resetAggrs, 3) + + " // write in the correct slot, as the value has already been incremented!\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + " prevCompletePane += panesPerSlide;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " numberOfCompleteWindows++;\n " + "\n" + " for (; ptr < numOfSlices; ++ptr) {\n" + + addTabs(evictAggrs, 4) + + " if (prevPane - prevOpenPane == panesPerSlide) {\n" + " prevOpenPane += panesPerSlide;\n" + " windowStartPointers[currentSlide++] = -1;\n" + " }\n" + " // complete windows\n" + " if (prevPane - prevCompletePane == panesPerWindow) {\n" + " prevCompletePane += panesPerSlide; //data[(windowStartPointers[idx++])].timestamp / paneSize;\n" + " windowEndPointers[currentWindow++] = -1;\n" + " // write and pack the complete window result\n" + + addTabs(setValues, 5) + + addPostWindowOperation(4) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 6) + + " }\n" + " // reset values\n" + + addTabs(resetAggrs, 5) + + " numberOfCompleteWindows++;\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " }\n" + " prevPane++;\n" + " }\n" + "\n" + " }\n" + " // add more complete\n" + " } else if (phase == 2) {\n" + " // case 2 - opening and complete at the beginning of tumbling\n" + " auto prevPane = prevOpenPane + 1;\n" + " for (int ptr = 0; ptr < numOfSlices; ++ptr) {\n" + + addTabs(evictAggrs, 3) + + " // add elements from the next slide\n" + " //if (currPane-prevPane==1) {\n" + " // prevPane = currPane;\n" + " //aggrStructs[pid].insertSlices();\n" + " //}\n" + " if (prevPane - prevOpenPane == panesPerSlide) {\n" + " prevOpenPane += panesPerSlide;\n" + " if (ptr == 0) {\n" + " windowStartPointers[currentSlide++] = bufferPtr;\n" + " } else {\n" + " windowStartPointers[currentSlide++] = -1;\n" + " }\n" + " }\n" + " // complete windows\n" + " if (prevPane - prevCompletePane == panesPerWindow) {\n" + " prevCompletePane += panesPerSlide; //data[(windowStartPointers[idx++])].timestamp / paneSize;\n" + " windowEndPointers[currentWindow++] = -1;\n" + " // write and pack the complete window result\n" + + addTabs(setValues, 4) + + addPostWindowOperation(4) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 5) + + " }\n" + " // reset values\n" + + addTabs(resetAggrs, 4) + + " numberOfCompleteWindows++;\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " }\n" + " prevPane++;\n" + " }\n" + " } else if (phase == 3) {\n" + " // case 3 - closing/opening and complete for tumbling\n" + " int ptr = 0;\n" + " auto prevPane = prevOpenPane + 1;\n" + " for (ptr = 0; ptr < numOfClosing; ++ptr) {\n" + " if (prevPane - prevClosePane == panesPerWindow) { // closing window\n" + " // write result to the closing windows\n" + + addTabs(closingWindows, 4) + + " prevClosePane += panesPerSlide;\n" + " closingWindowsPointer += mapSize;\n" + " numberOfClosingWindows++;\n" + " closingStartPointers[numberOfClosingWindows] = closingWindowsPointer;\n" + //" closingWindowIds[numberOfClosingWindows-1] = prevClosePane - panesPerSlide;\n" + " // reset values\n" + + addTabs(resetAggrs, 4) + + " }\n" + " prevPane++;\n" + " }\n" + "\n" + " while (ptr < numOfSlices) {\n" + " // fillBuckets with empty;\n" + " if (prevPane - prevOpenPane == panesPerSlide) { // new slide and possible opening windows\n" + " prevOpenPane += panesPerSlide;\n" + " if (ptr == 0) {\n" + " windowStartPointers[currentSlide++] = bufferPtr;\n" + " } else {\n" + " windowStartPointers[currentSlide++] = -1;\n" + " }\n" + " }\n" + " if (prevPane - prevCompletePane == panesPerWindow) { // complete window\n" + " windowEndPointers[currentWindow++] = -1;\n" + " numberOfCompleteWindows++;\n" + " ptr++;\n" + " prevPane++;\n" + " break;\n" + " }\n" + " if (prevPane - prevClosePane == panesPerWindow) { // closing window\n" + " // write result to the closing windows\n" + + addTabs(closingWindows, 4) + + " prevClosePane += panesPerSlide;\n" + " closingWindowsPointer += mapSize;\n" + " numberOfClosingWindows++;\n" + " closingStartPointers[numberOfClosingWindows] = closingWindowsPointer;\n" + //" closingWindowIds[numberOfClosingWindows-1] = prevClosePane - panesPerSlide;\n" + " // reset values\n" + + addTabs(resetAggrs, 4) + + " }\n" + " ptr++;\n" + " prevPane++;\n" + " }\n" + "\n" + " if (ptr!=numOfSlices) {\n" + " // write results and pack them for the first complete window in the batch\n" + + addTabs(setValues, 3) + + addPostWindowOperation(3) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 4) + + " }\n" + " // reset values\n" + + addTabs(resetAggrs, 3) + + " // write in the correct slot, as the value has already been incremented!\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + " prevCompletePane += panesPerSlide;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " numberOfCompleteWindows++;\n" + " }\n" + "\n" + " for (; ptr < numOfSlices; ++ptr) {\n" + + addTabs(evictAggrs, 3) + + " if (prevPane - prevOpenPane == panesPerSlide) {\n" + " prevOpenPane = currPane;\n" + " windowStartPointers[currentSlide++] = -1;\n" + " }\n" + " // complete windows\n" + " if (prevPane - prevCompletePane == panesPerWindow) {\n" + " prevCompletePane += panesPerSlide; //data[(windowStartPointers[idx++])].timestamp / paneSize;\n" + " windowEndPointers[currentWindow++] = -1;\n" + " // write and pack the complete window result\n" + + addTabs(setValues, 4) + + addPostWindowOperation(4) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 5) + + " }\n" + " // reset values\n" + + addTabs(resetAggrs, 4) + + " numberOfCompleteWindows++;\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " }\n" + " prevPane++;\n" + " }\n" + " }\n" + "}\n" + "\n" + ); return s; } - std::string getSlidingWindowRange( - std::string outputBuffers, std::string initialiseAggrs, - std::string computeAggrs, std::string insertAggrs, std::string evictAggrs, - std::string resetAggrs, std::string setValues, std::string openingWindows, - std::string closingWindows, std::string pendingWindows, - std::string completeWindows, std::string resultPointers, - std::string filter) { + std::string getSlidingWindowRange(std::string outputBuffers, + std::string initialiseAggrs, + std::string computeAggrs, + std::string insertAggrs, + std::string evictAggrs, + std::string resetAggrs, + std::string setValues, + std::string openingWindows, + std::string closingWindows, + std::string pendingWindows, + std::string completeWindows, + std::string resultPointers, + std::string filter) { std::string s; std::string ptr; if (hasGroupBy() && m_usePtrs) { ptr.append("*"); } s.append( - "void processData (int pid, char *inputBuffer, size_t inputBufferSize, " - "long startPointer, long endPointer, long timestampFromPrevBatch,\n" - " long *windowStartPointers, long *windowEndPointers, " - "char *" + - ptr + "openingWindowsBuffer, char *" + ptr + - "closingWindowsBuffer,\n" - " char *" + - ptr + - "pendingWindowsBuffer, char *completeWindowsBuffer,\n" - " int *openingStartPointers, int " - "*closingStartPointers, int *pendingStartPointers, int " - "*completeStartPointers,\n" - //" long *openingWindowIds, long *closingWindowIds, - // long *pendingWindowIds, long *completeWindowIds,\n" - " long streamStartPointer, int *pointersAndCounts, " - "char *staticBuffer) {" - "\n" - " // Input Buffer\n" - " input_tuple_t *data= (input_tuple_t *) inputBuffer;\n" - "\n" - " // Output Buffers\n" + - addTabs(outputBuffers, 1) + - " output_tuple_t *completeWindowsResults = (output_tuple_t *) " - "completeWindowsBuffer; // the results here are packed\n" - "\n" - //" for (int i = 0; i < PARTIAL_WINDOWS; i++) {\n" - //" windowStartPointers[i] = -1;\n" - //" windowEndPointers[i] = -1;\n" - //" }\n" - //"\n" - " int tupleSize = sizeof(input_tuple_t);\n" - " int mapSize = MAP_SIZE;\n" - " long paneSize = PANE_SIZE;\n" - " long panesPerSlide = PANES_PER_SLIDE;\n" - " long panesPerWindow = PANES_PER_WINDOW;\n" - " long windowSlide = WINDOW_SLIDE;\n" - " long windowSize = WINDOW_SIZE;\n" - " int openingWindowsPointer = 0, closingWindowsPointer = 0;\n" - " int pendingWindowsPointer = 0, completeWindowsPointer = 0;\n" - "\n" - " // Set the first pointer for all types of windows\n" - " openingStartPointers[0] = openingWindowsPointer;\n" - " closingStartPointers[0] = closingWindowsPointer;\n" - " pendingStartPointers[0] = pendingWindowsPointer;\n" - " completeStartPointers[0] = completeWindowsPointer;\n" - "\n" - " // initialize aggregation data structures\n" + - addTabs(initialiseAggrs, 1) + - "\n" - " // Slicing based on panes logic\n" - " // Previous, next, and current pane ids\n" - " long prevClosePane, currPane, prevCompletePane, prevOpenPane, " - "startPane;\n" - " int numberOfOpeningWindows = 0;\n" - " int numberOfClosingWindows = 0;\n" - " int numberOfPendingWindows = 0;\n" - " int numberOfCompleteWindows = 0;\n" - " int currentSlide = 0;\n" - " int currentWindow = 0;\n" - " long step = 1; //tupleSize;\n" - " long streamPtr = streamStartPointer / tupleSize;\n" - " long bufferPtr = startPointer / tupleSize;\n" - " startPointer = startPointer / tupleSize;\n" - " endPointer = endPointer / tupleSize;\n" - " long diff = streamPtr - bufferPtr;\n" - " long tempStartPos, tempEndPos;\n" - "\n" - " //windowStartPointers[currentSlide++] = bufferPtr;\n" - " bool hasAddedComplete = false;\n" -#if defined(HAVE_NUMA) - " auto bufferSize = (long) inputBufferSize;\n" -#else - " auto bufferSize = (long) BUFFER_SIZE;\n" -// " timestampFromPrevBatch = (bufferPtr != 0) ? -// data[bufferPtr - 1].timestamp / paneSize :\n" " -// data[bufferSize / sizeof(input_tuple_t) - 1].timestamp / -// paneSize;" -#endif - " long prevBatchPane = timestampFromPrevBatch / paneSize;\n" - " if (streamStartPointer==0)\n" - " prevBatchPane = (data[bufferPtr].timestamp / paneSize) - " - "panesPerSlide;\n" - " startPane = (data[bufferPtr].timestamp / paneSize);\n" - " prevClosePane = prevOpenPane = (data[bufferPtr].timestamp / " - "paneSize) - panesPerSlide;\n" - " prevCompletePane = data[bufferPtr].timestamp / paneSize;\n" - " if (streamStartPointer!=0) {\n" - " prevOpenPane = timestampFromPrevBatch / paneSize;\n" - " long tmpPtr = data[bufferPtr].timestamp;\n" - " tmpPtr = tmpPtr/windowSlide;\n" - " tmpPtr = tmpPtr * windowSlide;\n" - " if (data[bufferPtr].timestamp%windowSlide!=0) {\n" - " prevOpenPane = tmpPtr / paneSize;\n" - " prevCompletePane = (tmpPtr+windowSlide) / paneSize;\n" - " }\n" - " long tempTimestamp = timestampFromPrevBatch; // / paneSize;\n" - " while (tempTimestamp-tmpPtr + windowSlide < windowSize) {\n" - " tmpPtr -= windowSlide;\n" - " }\n" - " prevClosePane = tmpPtr / paneSize;\n" - " if (prevClosePane < 0)\n" - " prevClosePane = 0;\n" - " }\n" - "\n" - " bool completeStartsFromPane = startPane==prevCompletePane;\n" - " bool hasComplete = ((data[endPointer-1].timestamp - " - "data[startPointer].timestamp) >= windowSize);" - "\n" - " // The beginning of the stream. Check if we have at least one " - "complete window.\n" - " if (streamPtr == 0) {\n" - " // check for opening windows until finding the first " - "complete\n" - " while (bufferPtr < endPointer) {\n"); - if (!filter.empty()) s.append(addTabs(filter, 3) + "\t\t\t{\n"); + "void processData (int pid, char *inputBuffer, size_t inputBufferSize, long startPointer, long endPointer, long timestampFromPrevBatch,\n" + " long *windowStartPointers, long *windowEndPointers, char *" + ptr + + "openingWindowsBuffer, char *" + ptr + "closingWindowsBuffer,\n" + " char *" + ptr + + "pendingWindowsBuffer, char *completeWindowsBuffer,\n" + " int *openingStartPointers, int *closingStartPointers, int *pendingStartPointers, int *completeStartPointers,\n" + //" long *openingWindowIds, long *closingWindowIds, long *pendingWindowIds, long *completeWindowIds,\n" + " long streamStartPointer, int *pointersAndCounts, char *staticBuffer) {" + "\n" + " // Input Buffer\n" + " input_tuple_t *data= (input_tuple_t *) inputBuffer;\n" + "\n" + " // Output Buffers\n" + + addTabs(outputBuffers, 1) + + " output_tuple_t *completeWindowsResults = (output_tuple_t *) completeWindowsBuffer; // the results here are packed\n" + "\n" + //" for (int i = 0; i < PARTIAL_WINDOWS; i++) {\n" + //" windowStartPointers[i] = -1;\n" + //" windowEndPointers[i] = -1;\n" + //" }\n" + //"\n" + " int tupleSize = sizeof(input_tuple_t);\n" + " int mapSize = MAP_SIZE;\n" + " long paneSize = PANE_SIZE;\n" + " long panesPerSlide = PANES_PER_SLIDE;\n" + " long panesPerWindow = PANES_PER_WINDOW;\n" + " long windowSlide = WINDOW_SLIDE;\n" + " long windowSize = WINDOW_SIZE;\n" + " int openingWindowsPointer = 0, closingWindowsPointer = 0;\n" + " int pendingWindowsPointer = 0, completeWindowsPointer = 0;\n" + "\n" + " // Set the first pointer for all types of windows\n" + " openingStartPointers[0] = openingWindowsPointer;\n" + " closingStartPointers[0] = closingWindowsPointer;\n" + " pendingStartPointers[0] = pendingWindowsPointer;\n" + " completeStartPointers[0] = completeWindowsPointer;\n" + "\n" + " // initialize aggregation data structures\n" + + addTabs(initialiseAggrs, 1) + + "\n" + " // Slicing based on panes logic\n" + " // Previous, next, and current pane ids\n" + " long prevClosePane, currPane, prevCompletePane, prevOpenPane, startPane;\n" + " int numberOfOpeningWindows = 0;\n" + " int numberOfClosingWindows = 0;\n" + " int numberOfPendingWindows = 0;\n" + " int numberOfCompleteWindows = 0;\n" + " int currentSlide = 0;\n" + " int currentWindow = 0;\n" + " long step = 1; //tupleSize;\n" + " long streamPtr = streamStartPointer / tupleSize;\n" + " long bufferPtr = startPointer / tupleSize;\n" + " startPointer = startPointer / tupleSize;\n" + " endPointer = endPointer / tupleSize;\n" + " long diff = streamPtr - bufferPtr;\n" + " long tempStartPos, tempEndPos;\n" + "\n" + " //windowStartPointers[currentSlide++] = bufferPtr;\n" + " bool hasAddedComplete = false;\n" + #if defined(HAVE_NUMA) + " auto bufferSize = (long) inputBufferSize;\n" + #else + " auto bufferSize = (long) BUFFER_SIZE;\n" + // " timestampFromPrevBatch = (bufferPtr != 0) ? data[bufferPtr - 1].timestamp / paneSize :\n" + // " data[bufferSize / sizeof(input_tuple_t) - 1].timestamp / paneSize;" + #endif + " long prevBatchPane = timestampFromPrevBatch / paneSize;\n" + " if (streamStartPointer==0)\n" + " prevBatchPane = (data[bufferPtr].timestamp / paneSize) - panesPerSlide;\n" + " startPane = (data[bufferPtr].timestamp / paneSize);\n" + " prevClosePane = prevOpenPane = (data[bufferPtr].timestamp / paneSize) - panesPerSlide;\n" + " prevCompletePane = data[bufferPtr].timestamp / paneSize;\n" + " if (streamStartPointer!=0) {\n" + " prevOpenPane = timestampFromPrevBatch / paneSize;\n" + " long tmpPtr = data[bufferPtr].timestamp;\n" + " tmpPtr = tmpPtr/windowSlide;\n" + " tmpPtr = tmpPtr * windowSlide;\n" + " if (data[bufferPtr].timestamp%windowSlide!=0) {\n" + " prevOpenPane = tmpPtr / paneSize;\n" + " prevCompletePane = (tmpPtr+windowSlide) / paneSize;\n" + " }\n" + " long tempTimestamp = timestampFromPrevBatch; // / paneSize;\n" + " while (tempTimestamp-tmpPtr + windowSlide < windowSize) {\n" + " tmpPtr -= windowSlide;\n" + " }\n" + " prevClosePane = tmpPtr / paneSize;\n" + " if (prevClosePane < 0)\n" + " prevClosePane = 0;\n" + " }\n" + "\n" + " bool completeStartsFromPane = startPane==prevCompletePane;\n" + " bool hasComplete = ((data[endPointer-1].timestamp - data[startPointer].timestamp) >= windowSize);" + "\n" + " // The beginning of the stream. Check if we have at least one complete window.\n" + " if (streamPtr == 0) {\n" + " // check for opening windows until finding the first complete\n" + " while (bufferPtr < endPointer) {\n"); + if (!filter.empty()) + s.append( + addTabs(filter, 3) + + "\t\t\t{\n"); s.append( " currPane = data[bufferPtr].timestamp / paneSize;\n" " if (currPane - prevOpenPane == panesPerSlide) {\n" " prevOpenPane += panesPerSlide;\n" - " windowStartPointers[currentSlide++] = bufferPtr; // " - "TODO: maybe store bPtr*tupleSize\n" + " windowStartPointers[currentSlide++] = bufferPtr; // TODO: maybe store bPtr*tupleSize\n" " if (bufferPtr!=0) {\n" + - addTabs(insertAggrs, 5) + addTabs(resetAggrs, 5) + - //" aggrStructures[pid].insert(aggrs);\n" - //" aggrs.reset();\n" - " }\n" - " }\n" - " if (currPane - prevCompletePane == panesPerWindow) {\n" - " windowEndPointers[currentWindow++] = bufferPtr;\n" - " numberOfCompleteWindows++;\n" - " break;\n" - " }\n" - " if (currPane - prevOpenPane > panesPerSlide || currPane - " - "prevCompletePane > panesPerWindow) {\n" - " // fill bubbles\n" - " int numOfSlices, numOfComplete, numOfOpening, phase;\n" - " phase = 1;\n" - " //if (currPane - prevOpenPane > panesPerSlide) {\n" - " numOfSlices = currPane - prevOpenPane;// - " - "panesPerSlide -1;\n" - " numOfOpening = numOfSlices;\n" - " numOfComplete = prevOpenPane + numOfSlices - " - "panesPerWindow;\n" - " if (numOfComplete > 0) {\n" - " numOfOpening = numOfSlices - numOfComplete;\n" - " // phase = 2;\n" - " hasAddedComplete = true;\n" - " }\n" - " fillEmptyWindows(pid, phase, numOfSlices, " - "numOfOpening, 0, windowStartPointers,\n" - " windowEndPointers, " - "numberOfOpeningWindows, numberOfClosingWindows, " - "numberOfCompleteWindows,\n" - " bufferPtr, currentSlide, " - "currentWindow,\n" - " completeWindowsPointer, " - "completeWindowsResults, completeStartPointers,\n" - " closingWindowsPointer, " - "closingWindowsResults, closingStartPointers,\n" - //" openingWindowIds, - // closingWindowIds, pendingWindowIds, completeWindowIds,\n" - " prevCompletePane, prevClosePane, " - "prevOpenPane, currPane);\n" - " if (numOfComplete > 0)\n" - " break;\n" - " }\n" - " // project + aggregate here\n" + - addTabs(computeAggrs, 3)); - if (!filter.empty()) s.append("\t\t\t};\n"); + addTabs(insertAggrs, 5) + + addTabs(resetAggrs, 5) + + //" aggrStructures[pid].insert(aggrs);\n" + //" aggrs.reset();\n" + " }\n" + " }\n" + " if (currPane - prevCompletePane == panesPerWindow) {\n" + " windowEndPointers[currentWindow++] = bufferPtr;\n" + " numberOfCompleteWindows++;\n" + " break;\n" + " }\n" + " if (currPane - prevOpenPane > panesPerSlide || currPane - prevCompletePane > panesPerWindow) {\n" + " // fill bubbles\n" + " int numOfSlices, numOfComplete, numOfOpening, phase;\n" + " phase = 1;\n" + " //if (currPane - prevOpenPane > panesPerSlide) {\n" + " numOfSlices = currPane - prevOpenPane;// - panesPerSlide -1;\n" + " numOfOpening = numOfSlices;\n" + " numOfComplete = prevOpenPane + numOfSlices - panesPerWindow;\n" + " if (numOfComplete > 0) {\n" + " numOfOpening = numOfSlices - numOfComplete;\n" + " // phase = 2;\n" + " hasAddedComplete = true;\n" + " }\n" + " fillEmptyWindows(pid, phase, numOfSlices, numOfOpening, 0, windowStartPointers,\n" + " windowEndPointers, numberOfOpeningWindows, numberOfClosingWindows, numberOfCompleteWindows,\n" + " bufferPtr, currentSlide, currentWindow,\n" + " completeWindowsPointer, completeWindowsResults, completeStartPointers,\n" + " closingWindowsPointer, closingWindowsResults, closingStartPointers,\n" + //" openingWindowIds, closingWindowIds, pendingWindowIds, completeWindowIds,\n" + " prevCompletePane, prevClosePane, prevOpenPane, currPane);\n" + " if (numOfComplete > 0)\n" + " break;\n" + " }\n" + " // project + aggregate here\n" + + addTabs(computeAggrs, 3)); + if (!filter.empty()) + s.append("\t\t\t};\n"); s.append( " streamPtr += step;\n" " bufferPtr += step;\n" " }\n" " }\n" - " // Check for closing and opening windows, until we have a " - "complete window.\n" + " // Check for closing and opening windows, until we have a complete window.\n" " else {\n" " auto prevPane = data[bufferPtr].timestamp / paneSize;\n" " int numOfPartials = 0;\n" " while (bufferPtr < endPointer) {\n"); - if (!filter.empty()) s.append(addTabs(filter, 3) + "\t\t\t{\n"); + if (!filter.empty()) + s.append( + addTabs(filter, 3) + + "\t\t\t{\n"); s.append( " currPane = data[bufferPtr].timestamp / paneSize;\n" " if (currPane-prevPane==1) {\n" " prevPane = currPane;\n" - " if (numOfPartials==BUCKET_SIZE) // remove the extra " - "values so that we have the first complete window\n" + - addTabs(evictAggrs, 5) + addTabs(insertAggrs, 4) + - addTabs(resetAggrs, 4) + - //" aggrStructures[pid].evict();\n" - //" aggrStructures[pid].insert(aggrs);\n" - //" aggrs.reset();\n" - " numOfPartials++;\n" - " }\n" - " if (currPane - prevOpenPane == panesPerSlide) { // new " - "slide and possible opening windows\n" - " prevOpenPane += panesPerSlide;\n" - " windowStartPointers[currentSlide++] = bufferPtr;\n" - " }\n" - " if (hasComplete && currPane - prevCompletePane == " - "panesPerWindow) { // complete window\n" - " windowEndPointers[currentWindow++] = bufferPtr;\n" - " numberOfCompleteWindows++;\n" - " break;\n" - " }\n" - " if (prevClosePane <= startPane && currPane - " - "prevClosePane == panesPerWindow) { // closing window\n" - " // write result to the closing windows\n" + - //" aggrs = - // aggrStructures[pid].queryIntermediate(PARENTS_SIZE-2);\n" + - addTabs(closingWindows, 4) + - " prevClosePane += panesPerSlide;\n" - " closingWindowsPointer += mapSize;\n" - " numberOfClosingWindows++;\n" - " closingStartPointers[numberOfClosingWindows] = " - "closingWindowsPointer;\n" + - //" closingWindowIds[numberOfClosingWindows-1] = - // prevClosePane - panesPerSlide;\n" + - addTabs(resetAggrs, 4) + - //" aggrs.reset();\n" - " }\n" - " if (currPane - prevOpenPane > panesPerSlide || currPane - " - "prevClosePane > panesPerWindow || currPane - prevCompletePane > " - "panesPerWindow) {\n" - " // fill bubbles\n" - " int numOfSlices, numOfComplete, numOfOpening, " - "numOfClosing, phase;\n" - " numOfClosing = 0;\n" - " phase = 3;\n" - " if (currPane - prevOpenPane >= panesPerSlide) {\n" - " numOfSlices = currPane - prevOpenPane; // - " - "panesPerSlide -1;\n" - " numOfComplete = prevPane + numOfSlices - " - "(panesPerWindow + prevCompletePane);//numOfComplete = " - "currPane+numOfSlices-panesPerWindow;\n" - " if (numOfSlices == 0)\n" - " prevOpenPane = currPane;\n" - " } else if (currPane - prevClosePane >= " - "panesPerWindow) {\n" - " numOfSlices = currPane - prevClosePane; // - " - "panesPerWindow -1;\n" - " numOfClosing = currPane - prevBatchPane;\n" - " } else {\n" - " numOfSlices = currPane - prevCompletePane - " - "panesPerWindow -1;\n" - " numOfComplete = currPane + numOfSlices - " - "panesPerWindow;\n" - " }\n" - " if (numOfComplete > 0) {\n" - " hasAddedComplete = true;\n" - " }\n" - " fillEmptyWindows(pid, phase, numOfSlices, " - "numOfOpening, numOfClosing, windowStartPointers,\n" - " windowEndPointers, " - "numberOfOpeningWindows, numberOfClosingWindows, " - "numberOfCompleteWindows,\n" - " bufferPtr, currentSlide, " - "currentWindow,\n" - " completeWindowsPointer, " - "completeWindowsResults, completeStartPointers,\n" - " closingWindowsPointer, " - "closingWindowsResults, closingStartPointers,\n" - //" openingWindowIds, - // closingWindowIds, pendingWindowIds, completeWindowIds,\n" - " prevCompletePane, " - "prevClosePane, prevOpenPane, currPane);\n" - " prevPane = currPane;\n" - " if (hasAddedComplete)\n" - " break;\n" - " }\n" - " // project + aggregate here\n" + - addTabs(computeAggrs, 3)); - if (!filter.empty()) s.append("\t\t\t};\n"); + " if (numOfPartials==BUCKET_SIZE) // remove the extra values so that we have the first complete window\n" + + + addTabs(evictAggrs, 5) + + addTabs(insertAggrs, 4) + + addTabs(resetAggrs, 4) + + //" aggrStructures[pid].evict();\n" + //" aggrStructures[pid].insert(aggrs);\n" + //" aggrs.reset();\n" + " numOfPartials++;\n" + " }\n" + " if (currPane - prevOpenPane == panesPerSlide) { // new slide and possible opening windows\n" + " prevOpenPane += panesPerSlide;\n" + " windowStartPointers[currentSlide++] = bufferPtr;\n" + " }\n" + " if (hasComplete && currPane - prevCompletePane == panesPerWindow) { // complete window\n" + " windowEndPointers[currentWindow++] = bufferPtr;\n" + " numberOfCompleteWindows++;\n" + " break;\n" + " }\n" + " if (prevClosePane <= startPane && currPane - prevClosePane == panesPerWindow) { // closing window\n" + " // write result to the closing windows\n" + + //" aggrs = aggrStructures[pid].queryIntermediate(PARENTS_SIZE-2);\n" + + addTabs(closingWindows, 4) + + " prevClosePane += panesPerSlide;\n" + " closingWindowsPointer += mapSize;\n" + " numberOfClosingWindows++;\n" + " closingStartPointers[numberOfClosingWindows] = closingWindowsPointer;\n" + + //" closingWindowIds[numberOfClosingWindows-1] = prevClosePane - panesPerSlide;\n" + + addTabs(resetAggrs, 4) + + //" aggrs.reset();\n" + " }\n" + " if (currPane - prevOpenPane > panesPerSlide || currPane - prevClosePane > panesPerWindow || currPane - prevCompletePane > panesPerWindow) {\n" + " // fill bubbles\n" + " int numOfSlices, numOfComplete, numOfOpening, numOfClosing, phase;\n" + " numOfClosing = 0;\n" + " phase = 3;\n" + " if (currPane - prevOpenPane >= panesPerSlide) {\n" + " numOfSlices = currPane - prevOpenPane; // - panesPerSlide -1;\n" + " numOfComplete = prevPane + numOfSlices - (panesPerWindow + prevCompletePane);//numOfComplete = currPane+numOfSlices-panesPerWindow;\n" + " if (numOfSlices == 0)\n" + " prevOpenPane = currPane;\n" + " } else if (currPane - prevClosePane >= panesPerWindow) {\n" + " numOfSlices = currPane - prevClosePane; // - panesPerWindow -1;\n" + " numOfClosing = currPane - prevBatchPane;\n" + " } else {\n" + " numOfSlices = currPane - prevCompletePane - panesPerWindow -1;\n" + " numOfComplete = currPane + numOfSlices - panesPerWindow;\n" + " }\n" + " if (numOfComplete > 0) {\n" + " hasAddedComplete = true;\n" + " }\n" + " fillEmptyWindows(pid, phase, numOfSlices, numOfOpening, numOfClosing, windowStartPointers,\n" + " windowEndPointers, numberOfOpeningWindows, numberOfClosingWindows, numberOfCompleteWindows,\n" + " bufferPtr, currentSlide, currentWindow,\n" + " completeWindowsPointer, completeWindowsResults, completeStartPointers,\n" + " closingWindowsPointer, closingWindowsResults, closingStartPointers,\n" + //" openingWindowIds, closingWindowIds, pendingWindowIds, completeWindowIds,\n" + " prevCompletePane, prevClosePane, prevOpenPane, currPane);\n" + " prevPane = currPane;\n" + " if (hasAddedComplete)\n" + " break;\n" + " }\n" + " // project + aggregate here\n" + + addTabs(computeAggrs, 3)); + if (!filter.empty()) + s.append("\t\t\t};\n"); s.append( " streamPtr += step;\n" " bufferPtr += step;\n" @@ -5121,94 +4861,93 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { "\n" " // check for pending windows\n" " if (numberOfCompleteWindows == 0) {\n" + - addTabs(insertAggrs, 3) + - //" if (bufferPtr * sizeof(input_tuple_t) == BUFFER_SIZE)\n" - //" bufferPtr = 0;\n" - //" currPane = data[bufferPtr].timestamp / paneSize;\n" - //" if (prevClosePane <= startPane && currPane - - // prevClosePane == panesPerWindow) { // closing window\n" " // write - // result to the closing windows\n" + addTabs(closingWindows, 4) + " - // closingWindowsPointer += mapSize;\n" " numberOfClosingWindows++;\n" " - // closingStartPointers[numberOfClosingWindows] = - // closingWindowsPointer;\n" " // reset values\n" + - // addTabs(resetAggrs, 4) + - //" }\n" - " // write result to pending windows\n" + - //" aggrs = aggrStructures[pid].queryIntermediate(-1);\n" + - addTabs(pendingWindows, 3) + - " pendingWindowsPointer += mapSize;\n" - " numberOfPendingWindows++;\n" - " pendingStartPointers[numberOfPendingWindows] = " - "pendingWindowsPointer;\n" + - addTabs(resetAggrs, 3) + - //" aggrs.reset();\n" - " }\n" - " }\n" - "\n" - " if (numberOfCompleteWindows == 0 && (streamStartPointer == 0 || " - "currentSlide >= 1)) { // We only have one opening window so far...\n" - " if (streamPtr%windowSlide!=0 && streamStartPointer!=0) {\n" + - addTabs(evictAggrs, 3) + - //" aggrStructures[pid].evict();\n" - " }\n" - " // write results\n" + - //" aggrs = - // aggrStructures[pid].queryIntermediate(numberOfOpeningWindows);\n" + - addTabs(openingWindows, 2) + - " openingWindowsPointer += mapSize;\n" - " numberOfOpeningWindows++;\n" - " openingStartPointers[numberOfOpeningWindows] = " - "openingWindowsPointer;\n" + - //" openingWindowIds[numberOfOpeningWindows-1] = - // prevOpenPane++;\n" + - addTabs(resetAggrs, 2) + - //" aggrs.reset();\n" - " } else if (numberOfCompleteWindows > 0) {\n" - " // write results and pack them for the first complete window " - "in the batch\n" + - " if (!hasAddedComplete) {\n" + addTabs(setValues, 2) + - //" aggrs = aggrStructures[pid].query();\n" + - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 3) + - //" completeWindowsPointer++;\n" - " }\n" + addTabs(resetAggrs, 2) + - //" aggrs.reset();\n" - " // write in the correct slot, as the value has already been " - "incremented!\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - " prevCompletePane += panesPerSlide;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " }\n" - " // project + aggregate here\n"); - if (!filter.empty()) s.append(addTabs(filter, 2) + "\t\t{\n"); - s.append(addTabs(computeAggrs, 2)); - if (!filter.empty()) s.append("\t\t}\n"); + addTabs(insertAggrs, 3) + + //" if (bufferPtr * sizeof(input_tuple_t) == BUFFER_SIZE)\n" + //" bufferPtr = 0;\n" + //" currPane = data[bufferPtr].timestamp / paneSize;\n" + //" if (prevClosePane <= startPane && currPane - prevClosePane == panesPerWindow) { // closing window\n" + //" // write result to the closing windows\n" + + //addTabs(closingWindows, 4) + + //" closingWindowsPointer += mapSize;\n" + //" numberOfClosingWindows++;\n" + //" closingStartPointers[numberOfClosingWindows] = closingWindowsPointer;\n" + //" // reset values\n" + + //addTabs(resetAggrs, 4) + + //" }\n" + " // write result to pending windows\n" + + //" aggrs = aggrStructures[pid].queryIntermediate(-1);\n" + + addTabs(pendingWindows, 3) + + " pendingWindowsPointer += mapSize;\n" + " numberOfPendingWindows++;\n" + " pendingStartPointers[numberOfPendingWindows] = pendingWindowsPointer;\n" + + addTabs(resetAggrs, 3) + + //" aggrs.reset();\n" + " }\n" + " }\n" + "\n" + " if (numberOfCompleteWindows == 0 && (streamStartPointer == 0 || currentSlide >= 1)) { // We only have one opening window so far...\n" + " if (streamPtr%windowSlide!=0 && streamStartPointer!=0) {\n" + + addTabs(evictAggrs, 3) + + //" aggrStructures[pid].evict();\n" + " }\n" + " // write results\n" + + //" aggrs = aggrStructures[pid].queryIntermediate(numberOfOpeningWindows);\n" + + addTabs(openingWindows, 2) + + " openingWindowsPointer += mapSize;\n" + " numberOfOpeningWindows++;\n" + " openingStartPointers[numberOfOpeningWindows] = openingWindowsPointer;\n" + + //" openingWindowIds[numberOfOpeningWindows-1] = prevOpenPane++;\n" + + addTabs(resetAggrs, 2) + + //" aggrs.reset();\n" + " } else if (numberOfCompleteWindows > 0) {\n" + " // write results and pack them for the first complete window in the batch\n" + + " if (!hasAddedComplete) {\n" + + addTabs(setValues, 2) + //" aggrs = aggrStructures[pid].query();\n" + + + addPostWindowOperation(2) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 3) + + //" completeWindowsPointer++;\n" + " }\n" + + addTabs(resetAggrs, 2) + + //" aggrs.reset();\n" + " // write in the correct slot, as the value has already been incremented!\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + " prevCompletePane += panesPerSlide;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " }\n" + " // project + aggregate here\n"); + if (!filter.empty()) + s.append( + addTabs(filter, 2) + + "\t\t{\n"); + s.append( + addTabs(computeAggrs, 2)); + if (!filter.empty()) + s.append("\t\t}\n"); s.append( " bufferPtr += step; // windowEndPointers[0];\n" - " //prevOpenPane = " - "(data[windowStartPointers[currentSlide-1]].timestamp) / paneSize;\n" + " //prevOpenPane = (data[windowStartPointers[currentSlide-1]].timestamp) / paneSize;\n" " int idx = 1;\n" - //" prevCompletePane += panesPerSlide; // - //(data[windowStartPointers[idx++]].timestamp) / paneSize;\n" - " int removalIndex = currentWindow; //(startingFromPane) ? " - "currentWindow : currentWindow + 1;\n" + //" prevCompletePane += panesPerSlide; // (data[windowStartPointers[idx++]].timestamp) / paneSize;\n" + " int removalIndex = currentWindow; //(startingFromPane) ? currentWindow : currentWindow + 1;\n" " bool foundComplete = false;\n" " while (bufferPtr < endPointer) {\n" " // remove previous slide \n" " tempStartPos = windowStartPointers[removalIndex - 1];\n" " if (tempStartPos!=-1)\n" + - addTabs(evictAggrs, 4) + - //" aggrStructures[pid].evict();\n" - "\n" - " // add elements from the next slide\n" - " //bufferPtr = windowEndPointers[currentWindow - 1] + 1; " - "// take the next position, as we have already computed this value\n" - " foundComplete = false;\n" - " auto prevPane = data[bufferPtr].timestamp / paneSize;\n" - " while (true) {\n"); - if (!filter.empty()) s.append(addTabs(filter, 4) + "\t\t\t{\n"); + addTabs(evictAggrs, 4) + + //" aggrStructures[pid].evict();\n" + "\n" + " // add elements from the next slide\n" + " //bufferPtr = windowEndPointers[currentWindow - 1] + 1; // take the next position, as we have already computed this value\n" + " foundComplete = false;\n" + " auto prevPane = data[bufferPtr].timestamp / paneSize;\n" + " while (true) {\n"); + if (!filter.empty()) + s.append( + addTabs(filter, 4) + + "\t\t\t{\n"); s.append( " auto dataPtr = bufferPtr;\n" " if (bufferPtr*tupleSize >= bufferSize)\n" @@ -5216,87 +4955,71 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " currPane = data[dataPtr].timestamp / paneSize;\n" " if (currPane-prevPane==1) {\n" " prevPane = currPane;\n" + - addTabs(insertAggrs, 4) + addTabs(resetAggrs, 4) + - //" aggrStructures[pid].insert(aggrs);\n" - //" aggrs.reset();\n" - " }\n" - " if (currPane - prevOpenPane == panesPerSlide) {\n" - " prevOpenPane = currPane;\n" - " windowStartPointers[currentSlide++] = dataPtr;\n" - " }\n" - " if (bufferPtr >= endPointer) {\n" - " break;\n" - " }\n" - " // complete windows\n" - " if (currPane - prevCompletePane == panesPerWindow) {\n" - " //prevPane = currPane;\n" - " prevCompletePane += panesPerSlide; // " - "data[(windowStartPointers[idx++])].timestamp / paneSize;\n" - "\n" - " windowEndPointers[currentWindow++] = dataPtr;\n" - " // write and pack the complete window result\n" + - addTabs(setValues, 5) + - //" aggrs = aggrStructures[pid].query();\n" - " for (int i = 0; i < mapSize; i++) {\n" + - addTabs(completeWindows, 6) + - //" completeWindowsPointer++;\n" - " }\n" + addTabs(resetAggrs, 5) + - //" aggrs.reset();\n" - " numberOfCompleteWindows++;\n" - " completeStartPointers[numberOfCompleteWindows] = " - "completeWindowsPointer;\n" - //" completeWindowIds[numberOfCompleteWindows-1] = - // prevCompletePane - 1;\n" - " foundComplete = true;\n" - " }\n" - " if (currPane - prevOpenPane > panesPerSlide || " - "currPane - prevCompletePane > panesPerWindow) {\n" - " // fill bubbles\n" - " int numOfSlices, numOfComplete, numOfOpening, " - "phase;\n" - " phase = 2;\n" - " //if (currPane - prevOpenPane > panesPerSlide) {\n" - " numOfSlices = currPane - prevPane; " - "//prevOpenPane - panesPerSlide -1;\n" - " numOfComplete = prevPane + numOfSlices - " - "(panesPerWindow + prevCompletePane);\n" - " //} else {\n" - " // numOfSlices = currPane - prevCompletePane - " - "panesPerWindow -1;\n" - " // numOfComplete = " - "currPane+numOfSlices-panesPerWindow;\n" - " //}\n" - " if (numOfComplete > 0) {\n" - "\n" - " numOfOpening = prevCompletePane + " - "panesPerWindow - prevOpenPane;\n" - " foundComplete = true;\n" - " }\n" - " fillEmptyWindows(pid, phase, numOfSlices, " - "numOfOpening, 0, windowStartPointers,\n" - " windowEndPointers, " - "numberOfOpeningWindows, numberOfClosingWindows, " - "numberOfCompleteWindows,\n" - " bufferPtr, currentSlide, " - "currentWindow,\n" - " completeWindowsPointer, " - "completeWindowsResults, completeStartPointers,\n" - " closingWindowsPointer, " - "closingWindowsResults, closingStartPointers,\n" - //" openingWindowIds, - // closingWindowIds, pendingWindowIds, completeWindowIds,\n" - " prevCompletePane, " - "prevClosePane, prevOpenPane, currPane);\n" - " }\n" - "\n" - " // aggregate here\n" + - addTabs(computeAggrs, 4)); + addTabs(insertAggrs, 4) + + addTabs(resetAggrs, 4) + + //" aggrStructures[pid].insert(aggrs);\n" + //" aggrs.reset();\n" + " }\n" + " if (currPane - prevOpenPane == panesPerSlide) {\n" + " prevOpenPane = currPane;\n" + " windowStartPointers[currentSlide++] = dataPtr;\n" + " }\n" + " if (bufferPtr >= endPointer) {\n" + " break;\n" + " }\n" + " // complete windows\n" + " if (currPane - prevCompletePane == panesPerWindow) {\n" + " //prevPane = currPane;\n" + " prevCompletePane += panesPerSlide; // data[(windowStartPointers[idx++])].timestamp / paneSize;\n" + "\n" + " windowEndPointers[currentWindow++] = dataPtr;\n" + " // write and pack the complete window result\n" + + addTabs(setValues, 5) + //" aggrs = aggrStructures[pid].query();\n" + + addPostWindowOperation(5) + + " for (int i = 0; i < mapSize; i++) {\n" + + addTabs(completeWindows, 6) + + //" completeWindowsPointer++;\n" + " }\n" + + addTabs(resetAggrs, 5) + + //" aggrs.reset();\n" + " numberOfCompleteWindows++;\n" + " completeStartPointers[numberOfCompleteWindows] = completeWindowsPointer;\n" + //" completeWindowIds[numberOfCompleteWindows-1] = prevCompletePane - 1;\n" + " foundComplete = true;\n" + " }\n" + " if (currPane - prevOpenPane > panesPerSlide || currPane - prevCompletePane > panesPerWindow) {\n" + " // fill bubbles\n" + " int numOfSlices, numOfComplete, numOfOpening, phase;\n" + " phase = 2;\n" + " //if (currPane - prevOpenPane > panesPerSlide) {\n" + " numOfSlices = currPane - prevPane; //prevOpenPane - panesPerSlide -1;\n" + " numOfComplete = prevPane + numOfSlices - (panesPerWindow + prevCompletePane);\n" + " //} else {\n" + " // numOfSlices = currPane - prevCompletePane - panesPerWindow -1;\n" + " // numOfComplete = currPane+numOfSlices-panesPerWindow;\n" + " //}\n" + " if (numOfComplete > 0) {\n" + "\n" + " numOfOpening = prevCompletePane + panesPerWindow - prevOpenPane;\n" + " foundComplete = true;\n" + " }\n" + " fillEmptyWindows(pid, phase, numOfSlices, numOfOpening, 0, windowStartPointers,\n" + " windowEndPointers, numberOfOpeningWindows, numberOfClosingWindows, numberOfCompleteWindows,\n" + " bufferPtr, currentSlide, currentWindow,\n" + " completeWindowsPointer, completeWindowsResults, completeStartPointers,\n" + " closingWindowsPointer, closingWindowsResults, closingStartPointers,\n" + //" openingWindowIds, closingWindowIds, pendingWindowIds, completeWindowIds,\n" + " prevCompletePane, prevClosePane, prevOpenPane, currPane);\n" + " }\n" + "\n" + " // aggregate here\n" + + addTabs(computeAggrs, 4)); if (!filter.empty()) - s.append( - "\t\t\t\t}\n" - " if (bufferPtr >= endPointer) {\n" - " break;\n" - " }\n"); + s.append("\t\t\t\t}\n" + " if (bufferPtr >= endPointer) {\n" + " break;\n" + " }\n"); s.append( " bufferPtr += step;\n" " if (foundComplete) {\n" @@ -5306,78 +5029,63 @@ class OperatorKernel : public OperatorCode, public AggregateOperatorCode { " removalIndex++;\n" " }\n" "\n" - " if (!foundComplete) { // we have reached the first open " - "window after all the complete ones\n" - " // write the first open window if we have already " - "computed the result\n" + - // addTabs(insertAggrs, 3) + - //" aggrStructures[pid].insert(aggrs);\n" + - //" aggrs = - // aggrStructures[pid].queryIntermediate(numberOfOpeningWindows);\n" + - addTabs(openingWindows, 3) + - " openingWindowsPointer += mapSize;\n" - " numberOfOpeningWindows++;\n" - " openingStartPointers[numberOfOpeningWindows] = " - "openingWindowsPointer;\n" - //" openingWindowIds[numberOfOpeningWindows-1] = - // prevCompletePane++;\n" - " } else { // otherwise remove the respective tuples for the " - "first opening window\n" - " currentWindow--;\n" - " }\n" - " }\n" - "\n" - " // compute the rest opening windows\n" - " if (currentSlide-currentWindow >= panesPerWindow)\t\t\n" - " currentWindow = " - "currentSlide-panesPerWindow+1;//+numberOfOpeningWindows;\n" - " if (numberOfCompleteWindows!=0 && " - "currentSlide-currentWindow= panesPerWindow) {\n"); + " if (!foundComplete) { // we have reached the first open window after all the complete ones\n" + " // write the first open window if we have already computed the result\n" + + //addTabs(insertAggrs, 3) + + //" aggrStructures[pid].insert(aggrs);\n" + + //" aggrs = aggrStructures[pid].queryIntermediate(numberOfOpeningWindows);\n" + + addTabs(openingWindows, 3) + + " openingWindowsPointer += mapSize;\n" + " numberOfOpeningWindows++;\n" + " openingStartPointers[numberOfOpeningWindows] = openingWindowsPointer;\n" + //" openingWindowIds[numberOfOpeningWindows-1] = prevCompletePane++;\n" + " } else { // otherwise remove the respective tuples for the first opening window\n" + " currentWindow--;\n" + " }\n" + " }\n" + "\n" + " // compute the rest opening windows\n" + " if (currentSlide-currentWindow >= panesPerWindow)\t\t\n" + " currentWindow = currentSlide-panesPerWindow+1;//+numberOfOpeningWindows;\n" + " if (numberOfCompleteWindows!=0 && currentSlide-currentWindow= panesPerWindow) {\n"); if (hasGroupBy()) - s.append( - " closingWindowsPointer -= (numberOfClosingWindows - " - "panesPerWindow + 1)*mapSize;\n"); + s.append(" closingWindowsPointer -= (numberOfClosingWindows - panesPerWindow + 1)*mapSize;\n"); else - s.append( - " closingWindowsPointer -= (numberOfClosingWindows - " - "panesPerWindow + 1);\n"); + s.append(" closingWindowsPointer -= (numberOfClosingWindows - panesPerWindow + 1);\n"); s.append( " numberOfClosingWindows = panesPerWindow - 1;\n" " }\n" "\n" + - addTabs(resultPointers, 1) + - " pointersAndCounts[4] = numberOfOpeningWindows;\n" - " pointersAndCounts[5] = numberOfClosingWindows;\n" - " pointersAndCounts[6] = numberOfPendingWindows;\n" - " pointersAndCounts[7] = numberOfCompleteWindows;\n" - "}\n"); + addTabs(resultPointers, 1) + + " pointersAndCounts[4] = numberOfOpeningWindows;\n" + " pointersAndCounts[5] = numberOfClosingWindows;\n" + " pointersAndCounts[6] = numberOfPendingWindows;\n" + " pointersAndCounts[7] = numberOfCompleteWindows;\n" + "}\n" + ); return s; } -}; \ No newline at end of file +}; diff --git a/src/dispatcher/ITaskDispatcher.cpp b/src/dispatcher/ITaskDispatcher.cpp new file mode 100644 index 0000000..b289410 --- /dev/null +++ b/src/dispatcher/ITaskDispatcher.cpp @@ -0,0 +1,9 @@ +#include "dispatcher/ITaskDispatcher.h" + +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "utils/Query.h" + +ITaskDispatcher::ITaskDispatcher(Query &query, bool triggerCheckpoints) + : m_workerQueue(query.getTaskQueue()), + m_parent(query), + m_triggerCheckpoints(triggerCheckpoints) {} \ No newline at end of file diff --git a/src/dispatcher/ITaskDispatcher.h b/src/dispatcher/ITaskDispatcher.h new file mode 100644 index 0000000..95d897a --- /dev/null +++ b/src/dispatcher/ITaskDispatcher.h @@ -0,0 +1,68 @@ +#pragma once + +#include +#include + +#include "utils/SystemConf.h" + +class Query; +class Task; +class QueryBuffer; +class FileBackedCheckpointCoordinator; +struct LineageGraph; +class UnboundedQueryBuffer; + +/* + * \brief Base class for task dispatchers. + * + * */ + +class ITaskDispatcher { + protected: + std::shared_ptr m_workerQueue; + Query &m_parent; + + /* Recovery and Checkpoints */ + std::atomic m_createMergeTasks = true; + bool m_startingFromRecovery = true; + int m_lastTaskId = 0; + bool m_triggerCheckpoints; + std::atomic m_checkpointFinished = false; + FileBackedCheckpointCoordinator *m_coordinator; + std::thread m_coordinationTimerThread; + std::atomic m_checkpointCounter = 0; + bool m_parallelInsertion = false; + + friend class FileBackedCheckpointCoordinator; + + friend class FileBackedCheckpointCoordinator; + + public: + ITaskDispatcher(Query &query, bool triggerCheckpoints = false); + virtual void dispatch(char *data, int length, long latencyMark = -1, long retainMark = -1) = 0; + virtual void dispatch(std::shared_ptr &data, long latencyMark = -1, long retainMark = -1) = 0; + virtual void dispatch(void *data, int length, long latencyMark = -1, long retainMark = -1) { + throw std::runtime_error("error: dispatching receive_element_t not implemented"); + } + virtual void dispatchToFirstStream(char *data, int length, long latencyMark) = 0; + virtual void dispatchToSecondStream(char *data, int length, long latencyMark) = 0; + virtual bool tryDispatchOrCreateTask(char *data, int length, long latencyMark = -1, long retain = -1, std::shared_ptr graph = nullptr) = 0; + virtual bool tryDispatch(char *data, int length, long latencyMark = -1, long retain = -1, std::shared_ptr graph = nullptr) = 0; + virtual bool tryDispatchToFirstStream(char *data, int length, long latencyMark = -1, std::shared_ptr graph = nullptr) = 0; + virtual bool tryDispatchToSecondStream(char *data, int length, long latencyMark = -1, std::shared_ptr graph = nullptr) = 0; + virtual bool tryDispatchSerialToFirstStream(char *data, int length, size_t id, long latencyMark = -1, std::shared_ptr graph = nullptr) = 0; + virtual bool tryDispatchSerialToSecondStream(char *data, int length, size_t id, long latencyMark = -1, std::shared_ptr graph = nullptr) = 0; + virtual void tryToConsume() = 0; + virtual void recover() = 0; + virtual QueryBuffer *getBuffer() = 0; + virtual QueryBuffer *getFirstBuffer() = 0; + virtual QueryBuffer *getSecondBuffer() = 0; + virtual long getBytesGenerated() = 0; + virtual void setTaskQueue(std::shared_ptr queue) = 0; + virtual int getTaskNumber() = 0; + virtual void setTaskNumber(int taskId) = 0; + virtual void setStepAndOffset(long step, long offset) = 0; + virtual ~ITaskDispatcher() {} + + virtual void createMergeTasks(bool flag) = 0; +}; \ No newline at end of file diff --git a/src/dispatcher/JoinTaskDispatcher.cpp b/src/dispatcher/JoinTaskDispatcher.cpp new file mode 100644 index 0000000..aaf1681 --- /dev/null +++ b/src/dispatcher/JoinTaskDispatcher.cpp @@ -0,0 +1,673 @@ +#include "JoinTaskDispatcher.h" + +#include + +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "tasks/Task.h" +#include "tasks/TaskFactory.h" +#include "tasks/WindowBatchFactory.h" +#include "utils/Query.h" +#include "utils/QueryConfig.h" +#include "utils/QueryOperator.h" +#include "utils/TupleSchema.h" +#include "utils/Utils.h" +#include "utils/WindowDefinition.h" + +JoinTaskDispatcher::JoinTaskDispatcher(Query &query, QueryBuffer &firstBuffer, + QueryBuffer &secondBuffer, bool replayTimestamps, bool triggerCheckpoints) + : ITaskDispatcher(query, triggerCheckpoints), + m_firstBuffer(firstBuffer), + m_secondBuffer(secondBuffer), + m_firstWindow(query.getFirstWindowDefinition()), + m_secondWindow(query.getSecondWindowDefinition()), + m_firstSchema(query.getFirstSchema()), + m_secondSchema(query.getSecondSchema()), + m_batchSize(query.getConfig() ? query.getConfig()->getBatchSize() : SystemConf::getInstance().BATCH_SIZE), + m_firstTupleSize(m_firstSchema->getTupleSize()), + m_secondTupleSize(m_secondSchema->getTupleSize()), + m_nextTask(1), + m_firstEndIndex(-m_firstTupleSize), + m_firstLastEndIndex(-m_firstTupleSize), + m_secondLastEndIndex(-m_secondTupleSize), + m_mask(m_firstBuffer.getMask()), + m_latencyMark(-1), + m_leftList(std::make_unique(128)), + m_rightList(std::make_unique(128)) { + + if (m_firstBuffer.getCapacity() != m_secondBuffer.getCapacity()) + throw std::runtime_error( + "error: both first and second buffer have to be the same size for the join dispatcher"); + + if (!(!(m_firstTupleSize == 0) && + !(m_firstTupleSize & (m_firstTupleSize - 1))) || + !(!(m_secondTupleSize == 0) && + !(m_secondTupleSize & (m_secondTupleSize - 1)))) + throw std::runtime_error( + "error: both first and second tuple sizes have to be a power of two"); + + if (replayTimestamps) { + m_replayBarrier = query.getConfig() ? (query.getConfig()->getBundleSize() / query.getConfig()->getBatchSize()) : + (SystemConf::getInstance().BUNDLE_SIZE / SystemConf::getInstance().BATCH_SIZE); + if (m_replayBarrier == 0) + throw std::runtime_error( + "error: the bundle size should be greater or equal to the batch size " + "when replaying data with range-based windows"); + } +} + +void JoinTaskDispatcher::dispatch(char *data, int length, long latencyMark, long retainMark) { + long idx; + while ((idx = m_firstBuffer.put(data, length, latencyMark)) < 0) { + _mm_pause(); + //std::cout << "Failed to dispatch..." << std::endl; + //tryCreateNonProcessingTasks(); + } + assembleFirst(idx, length); +} + +void JoinTaskDispatcher::dispatch(std::shared_ptr &data, long latencyMark, long retainMark) { + throw std::runtime_error("error: dispatch with UnboundedQueryBuffer is not implemented"); +} + +void JoinTaskDispatcher::dispatchToFirstStream(char *data, int length, long latencyMark) { + dispatch(data, length, latencyMark); +} + +void JoinTaskDispatcher::dispatchToSecondStream(char *data, int length, long latencyMark) { + long idx; + while ((idx = m_secondBuffer.put(data, length, latencyMark)) < 0) { + _mm_pause(); + //std::cout << "Failed to dispatch..." << std::endl; + //tryCreateNonProcessingTasks(); + } + assembleSecond(idx, length); +} + +bool JoinTaskDispatcher::tryDispatchOrCreateTask(char *data, int length, long latencyMark, long retain, std::shared_ptr graph) { + long idx; + if ((idx = m_firstBuffer.put(data, length, latencyMark, -1, graph)) < 0) { + //tryCreateNonProcessingTasks(); + return false; + } + assembleFirst(idx, length); + return true; +} + +bool JoinTaskDispatcher::tryDispatch(char *data, int length, long latencyMark, long retain, std::shared_ptr graph) { + long idx; + if ((idx = m_firstBuffer.put(data, length, latencyMark, -1, graph)) < 0) { + return false; + } + assembleFirst(idx, length); + return true; +} + +bool JoinTaskDispatcher::tryDispatchToFirstStream (char *data, int length, long latencyMark, std::shared_ptr graph) { + return tryDispatch(data, length, latencyMark, -1, graph); +} + +bool JoinTaskDispatcher::tryDispatchToSecondStream(char *data, int length, long latencyMark, std::shared_ptr graph) { + long idx; + if ((idx = m_secondBuffer.put(data, length, latencyMark, -1, graph)) < 0) { + return false; + } + assembleSecond(idx, length); + return true; +} + +bool JoinTaskDispatcher::tryDispatchSerialToFirstStream(char *data, int length, size_t id, long latencyMark, std::shared_ptr graph) { + long idx; + if ((idx = m_firstBuffer.put(data, length, latencyMark, -1, graph)) < 0) { + return false; + } + { + std::lock_guard lock(m_left); + m_leftList->push_back(id, idx, length); + } + tryAssembleTask(); + return true; +} + +bool JoinTaskDispatcher::tryDispatchSerialToSecondStream(char *data, int length, size_t id, long latencyMark, std::shared_ptr graph) { + long idx; + if ((idx = m_secondBuffer.put(data, length, latencyMark, -1, graph)) < 0) { + return false; + } + { + std::lock_guard lock(m_right); + m_rightList->push_back(id, idx, length); + } + tryAssembleTask(); + return true; +} + +void JoinTaskDispatcher::tryToConsume() { + throw std::runtime_error("error: the recover function is not implemented"); +} + +void JoinTaskDispatcher::recover() { + throw std::runtime_error("error: the recover function is not implemented"); +} + +void JoinTaskDispatcher::tryAssembleTask() { + { + std::lock_guard lock(m_left); + while (m_leftList->size() > 0) { + auto t = m_leftList->front(); + if (t != nullptr && t->m_id <= m_assembleId) { + assembleFirst(t->m_idx, t->m_length); + m_leftList->pop_front(); + } else { + break; + } + } + } + { + std::lock_guard lock(m_right); + while (m_rightList->size() > 0) { + auto t = m_rightList->front(); + if (t != nullptr && t->m_id <= m_assembleId) { + assembleSecond(t->m_idx, t->m_length); + m_rightList->pop_front(); + } else if (t != nullptr && t->m_id > m_assembleId) { + m_assembleId++; + break; + } else { + break; + } + } + } +} + +QueryBuffer *JoinTaskDispatcher::getBuffer() { return &m_firstBuffer; } + +QueryBuffer *JoinTaskDispatcher::getFirstBuffer() { return &m_firstBuffer; } + +QueryBuffer *JoinTaskDispatcher::getSecondBuffer() {return &m_secondBuffer; } + +void JoinTaskDispatcher::setTaskQueue(std::shared_ptr queue) { + m_workerQueue = queue; +} + +long JoinTaskDispatcher::getBytesGenerated() { + return m_parent.getBytesGenerated(); +} + +void JoinTaskDispatcher::setLastTaskId(int taskId) { m_lastTaskId = taskId; } + +void JoinTaskDispatcher::setCheckpointCoordinator( + FileBackedCheckpointCoordinator *coordinator) { + (void*) coordinator; + throw std::runtime_error("error: setting a coordinator is not supported yet"); +} + +JoinTaskDispatcher::~JoinTaskDispatcher() { + m_triggerCheckpoints = false; + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); +} + +void JoinTaskDispatcher::assembleFirst(long idx, int length) { + if (m_debug) { + std::cout << "[DBG] assemble 1: idx " + std::to_string(idx) + + " length " + std::to_string(length) << std::endl; + } + if (SystemConf::getInstance().LATENCY_ON) { + if (m_latencyMark < 0) { + // latencyMark = index; + // get latency mark + auto systemTimestamp = getSystemTimestamp(m_firstBuffer, idx); + m_latencyMark = systemTimestamp; + // reset the correct timestamp for the first tuple + auto tupleTimestamp = getTimestamp(m_firstBuffer, idx); + setTimestamp(m_firstBuffer, (int)idx, tupleTimestamp); + } + } + + m_firstEndIndex = idx + length - m_firstTupleSize; + { + std::lock_guard lock(m_lock); + if (m_firstEndIndex < m_firstStartIndex) + m_firstEndIndex += m_firstBuffer.getCapacity(); + + m_firstToProcessCount = + (m_firstEndIndex - m_firstStartIndex + m_firstTupleSize) / + m_firstTupleSize; + + /* + * Check whether we have to move the pointer that indicates the oldest + * window in this buffer that has not yet been closed. If we grab the data + * to create a task, the start pointer will be set to this next pointer. + * + */ + if (m_firstWindow.isRowBased()) { + while ((m_firstNextIndex + m_firstWindow.getSize() * m_firstTupleSize) < + m_firstEndIndex) { + m_firstNextIndex += m_firstTupleSize * m_firstWindow.getSlide(); + } + } else if (m_firstWindow.isRangeBased()) { + m_firstNextTime = getTimestamp(m_firstBuffer, m_firstNextIndex); + m_firstEndTime = getTimestamp(m_firstBuffer, m_firstEndIndex); + + while ((m_firstNextTime + m_firstWindow.getSize()) < m_firstEndTime && m_firstNextIndex < m_firstEndIndex) { + m_firstNextIndex += m_firstTupleSize; + m_firstNextTime = getTimestamp(m_firstBuffer, m_firstNextIndex); + } + + } else { + throw std::runtime_error( + "error: window is neither row-based nor range-based"); + } + + /* Check whether we have enough data to create a task */ + int size = (m_firstToProcessCount * m_firstTupleSize) + + (m_secondToProcessCount * m_secondTupleSize); + + if (!m_symmetric) { + if (size >= m_batchSize && m_firstToProcessCount > 0 && m_secondToProcessCount > 0) + createTask(true); + } else { + if (size >= m_batchSize && m_firstToProcessCount > m_firstWindow.getSlide() && m_secondToProcessCount > m_secondWindow.getSlide()) + createSymmetricTask(true); + } + } + if (m_debug) { + std::cout << "[DBG] finishing assemble 1: idx " + std::to_string(idx) + + " length " + std::to_string(length) << std::endl; + } +} + +void JoinTaskDispatcher::assembleSecond(long idx, int length) { + if (m_debug) { + std::cout << "[DBG] assemble 2: idx " + std::to_string(idx) + + " length " + std::to_string(length) << std::endl; + } + + m_secondEndIndex = idx + length - m_secondTupleSize; + + { + std::lock_guard lock(m_lock); + if (m_secondEndIndex < m_secondStartIndex) + m_secondEndIndex += m_secondBuffer.getCapacity(); + + m_secondToProcessCount = + (m_secondEndIndex - m_secondStartIndex + m_secondTupleSize) / + m_secondTupleSize; + + if (m_secondWindow.isRowBased()) { + while ((m_secondNextIndex + m_secondWindow.getSize() * + m_secondTupleSize) < m_secondEndIndex) { + m_secondNextIndex += m_secondTupleSize * m_secondWindow.getSlide(); + } + + } else if (m_secondWindow.isRangeBased()) { + m_secondNextTime = getTimestamp(m_secondBuffer, m_secondNextIndex); + m_secondEndTime = getTimestamp(m_secondBuffer, m_secondEndIndex); + + while ((m_secondNextTime + m_secondWindow.getSize()) < m_secondEndTime && m_secondNextIndex < m_secondEndIndex) { + m_secondNextIndex += m_secondTupleSize; + m_secondNextTime = getTimestamp(m_secondBuffer, m_secondNextIndex); + } + + } else { + throw std::runtime_error( + "error: window is neither row-based nor range-based"); + } + + /* Check whether we have enough data to create a task */ + int size = (m_firstToProcessCount * m_firstTupleSize) + + (m_secondToProcessCount * m_secondTupleSize); + + if (!m_symmetric) { + if (size >= m_batchSize && m_firstToProcessCount > 0 && m_secondToProcessCount > 0) + createTask(false); + } else { + if (size >= m_batchSize && m_firstToProcessCount > m_firstWindow.getSlide() && m_secondToProcessCount > m_secondWindow.getSlide()) + createSymmetricTask(false); + } + } + if (m_debug) { + std::cout << "[DBG] finishing assemble 2: idx " + std::to_string(idx) + + " length " + std::to_string(length) << std::endl; + } +} + +void JoinTaskDispatcher::createTask(bool assembledFirst) { + int taskId = getTaskNumber(); + long firstFreePointer = INT_MIN; + long secondFreePointer = INT_MIN; + + if (m_firstNextIndex != m_firstStartIndex) { + firstFreePointer = (m_firstNextIndex - m_firstTupleSize) & m_mask; + m_prevFirstFreePointer = firstFreePointer; + } + + if (m_secondNextIndex != m_secondStartIndex) { + secondFreePointer = (m_secondNextIndex - m_secondTupleSize) & m_mask; + m_prevSecondFreePointer = secondFreePointer; + } + + /* Find latency mark */ + int mark = -1; + if (SystemConf::getInstance().LATENCY_ON) { + if (m_latencyMark >= 0) { + mark = m_latencyMark; + m_latencyMark = -1; + } + } + + auto batch1 = WindowBatchFactory::getInstance().newInstance( + m_batchSize, taskId, firstFreePointer, secondFreePointer, &m_parent, + &m_firstBuffer, &m_firstWindow, m_firstSchema, mark, + m_prevFirstFreePointer, m_prevSecondFreePointer); + + auto batch2 = WindowBatchFactory::getInstance().newInstance( + m_batchSize, taskId, INT_MIN, INT_MIN, &m_parent, &m_secondBuffer, + &m_secondWindow, m_secondSchema, -1); + + // todo: Fix the buffer pointers. At the moment it works only when inserting small number of tuples + // from both sides. Set properly the input rate and batch size for optimal performance. + if (assembledFirst) { + batch1->setBufferPointers( + m_firstLastEndIndex + m_firstTupleSize, + normaliseIndex(m_firstBuffer, m_firstLastEndIndex, m_firstEndIndex)); + + batch2->setBufferPointers( + m_secondStartIndex, + normaliseIndex(m_secondBuffer, m_secondStartIndex, m_secondEndIndex)); + } else { + batch1->setBufferPointers( + m_firstStartIndex, + normaliseIndex(m_firstBuffer, m_firstStartIndex, m_firstEndIndex)); + + batch2->setBufferPointers( + m_secondLastEndIndex + m_secondTupleSize, + normaliseIndex(m_secondBuffer, m_secondLastEndIndex, m_secondEndIndex)); + } + + if (SystemConf::getInstance().LINEAGE_ON) { + auto graph = LineageGraphFactory::getInstance().newInstance(); + batch1->setLineageGraph(graph); + graph.reset(); + auto slotId1 = batch1->getBufferStartPointer() / batch1->getBatchSize(); + auto &slot1 = m_firstBuffer.getSlots()[slotId1]; + { + std::lock_guard l (slot1.m_updateLock); + if (slot1.m_graph && slot1.m_graph->m_isValid) { + batch1->getLineageGraph()->mergeGraphs(slot1.m_graph); + slot1.m_graph.reset(); + } else { + slot1.m_graph.reset(); + } + } + + auto slotId2 = batch2->getBufferStartPointer() / batch2->getBatchSize(); + auto &slot2 = m_secondBuffer.getSlots()[slotId2]; + { + std::lock_guard l(slot2.m_updateLock); + if (slot2.m_graph && slot2.m_graph->m_isValid) { + batch1->getLineageGraph()->mergeGraphs(slot2.m_graph); + slot2.m_graph.reset(); + } else { + slot1.m_graph.reset(); + } + } + + if (batch1->getLineageGraph().use_count() > 1) { + throw std::runtime_error("error: the lineage graph has multiple owners"); + } + + if (m_firstBuffer.isPersistent() || m_secondBuffer.isPersistent()) + throw std::runtime_error( + "error: the lineage graph is not supported yet for joins"); + } + + if (m_debug) { + std::cout << "[DBG] dispatch task " + std::to_string(taskId) + + " batch-1 [" + + std::to_string(batch1->getBufferStartPointer()) + ", " + + std::to_string(batch1->getBufferEndPointer()) + + "] batch-2 [" + + std::to_string(batch2->getBufferStartPointer()) + ", " + + std::to_string(batch2->getBufferEndPointer()) + "]" + << std::endl; + } + + m_firstLastEndIndex = m_firstEndIndex; + m_secondLastEndIndex = m_secondEndIndex; + + TaskType type = + (m_createMergeTasks.load()) ? TaskType::PROCESS : TaskType::ONLY_PROCESS; + batch1->setTaskType(type); + auto task = + TaskFactory::getInstance().newInstance(taskId, batch1, batch2, type); + + if (SystemConf::getInstance().LINEAGE_ON) { + m_parent.getOperator()->writeOffsets(taskId, batch1->getFreePointer(), batch2->getFreePointer()); + } + + while (!m_workerQueue->try_enqueue(task)) { + if (m_debug) + std::cout << "warning: waiting to enqueue PROCESS task in the join task dispatcher " + << std::to_string(m_parent.getId()) + << " with size " << std::to_string(m_workerQueue->size_approx()) << std::endl; + } + + /* + * First, reduce the number of tuples that are ready for processing by the + * number of tuples that are fully processed in the task that was just + * created. + */ + if (m_firstNextIndex != m_firstStartIndex) + m_firstToProcessCount -= + (m_firstNextIndex - m_firstStartIndex) / m_firstTupleSize; + + if (m_secondNextIndex != m_secondStartIndex) + m_secondToProcessCount -= + (m_secondNextIndex - m_secondStartIndex) / m_secondTupleSize; + + /* + * Second, move the start pointer for the next task to the next pointer. + */ + if (m_firstNextIndex > m_mask) + m_firstNextIndex = m_firstNextIndex & m_mask; + + if (m_secondNextIndex > m_mask) + m_secondNextIndex = m_secondNextIndex & m_mask; + + m_firstStartIndex = m_firstNextIndex; + m_secondStartIndex = m_secondNextIndex; +} + +void JoinTaskDispatcher::createSymmetricTask(bool assembledFirst) { + int taskId = getTaskNumber(); + long firstFreePointer = INT_MIN; + long secondFreePointer = INT_MIN; + + if (!m_firstWindow.isTumbling() || !m_secondWindow.isTumbling() || + !m_firstWindow.isRowBased() || !m_secondWindow.isRowBased()) + throw std::runtime_error("error: these window types are not supported by the join dipatcher"); + + /* Find latency mark */ + int mark = -1; + if (SystemConf::getInstance().LATENCY_ON) { + if (m_latencyMark >= 0) { + mark = m_latencyMark; + m_latencyMark = -1; + } + } + + int iterations = m_batchSize / (m_firstWindow.getSlide() * m_firstTupleSize + m_secondWindow.getSlide() * m_secondTupleSize); + if (iterations == 0) + iterations = 1; + + if (m_firstToProcessCount < iterations * m_firstWindow.getSlide()) + iterations = m_firstToProcessCount / m_firstWindow.getSlide(); + + if (m_secondToProcessCount < iterations * m_secondWindow.getSlide()) + iterations = m_secondToProcessCount / m_secondWindow.getSlide(); + + firstFreePointer = (m_firstStartIndex + iterations * m_firstWindow.getSlide() * m_firstTupleSize) & m_mask; + firstFreePointer = (firstFreePointer == 0) ? m_firstBuffer.getCapacity() : firstFreePointer; + firstFreePointer--; + + secondFreePointer = (m_secondStartIndex + iterations * m_secondWindow.getSlide() * m_secondTupleSize) & m_mask; + secondFreePointer = (secondFreePointer == 0) ? m_secondBuffer.getCapacity() : secondFreePointer; + secondFreePointer--; + + auto batch1 = WindowBatchFactory::getInstance().newInstance( + m_batchSize, taskId, firstFreePointer, secondFreePointer, &m_parent, + &m_firstBuffer, &m_firstWindow, m_firstSchema, mark); + + auto batch2 = WindowBatchFactory::getInstance().newInstance( + m_batchSize, taskId, INT_MIN, INT_MIN, &m_parent, &m_secondBuffer, + &m_secondWindow, m_secondSchema, -1); + + + batch1->setBufferPointers(m_firstStartIndex, + normaliseIndex(m_firstBuffer, m_firstStartIndex, + m_firstStartIndex + iterations * m_firstWindow.getSlide() * m_firstTupleSize - m_firstTupleSize)); + batch2->setBufferPointers(m_secondStartIndex, + normaliseIndex(m_secondBuffer, m_secondStartIndex, + m_secondStartIndex + iterations * m_secondWindow.getSlide() * m_secondTupleSize - m_secondTupleSize)); + + if (SystemConf::getInstance().LINEAGE_ON) { + throw std::runtime_error("error: lineage not supported yet for symmetric tasks"); + } + + if (m_debug) { + std::cout << "[DBG] dispatch task " + std::to_string(taskId) + + " batch-1 [" + + std::to_string(batch1->getBufferStartPointer()) + ", " + + std::to_string(batch1->getBufferEndPointer()) + + "] f[" + std::to_string(batch1->getFreePointer()) + "] batch-2 [" + + std::to_string(batch2->getBufferStartPointer()) + ", " + + std::to_string(batch2->getBufferEndPointer()) + "] f[" + + std::to_string(batch1->getSecondFreePointer()) + "]" << std::endl; + } + + m_firstLastEndIndex = m_firstEndIndex; + m_secondLastEndIndex = m_secondEndIndex; + + TaskType type = + (m_createMergeTasks.load()) ? TaskType::PROCESS : TaskType::ONLY_PROCESS; + batch1->setTaskType(type); + auto task = + TaskFactory::getInstance().newInstance(taskId, batch1, batch2, type); + + while (!m_workerQueue->try_enqueue(task)) { + if (m_debug) + std::cout << "warning: waiting to enqueue PROCESS task in the join task dispatcher " + << std::to_string(m_parent.getId()) + << " with size " << std::to_string(m_workerQueue->size_approx()) << std::endl; + } + + /* + * First, reduce the number of tuples that are ready for processing by the + * number of tuples that are fully processed in the task that was just + * created. + */ + + m_firstToProcessCount -= iterations * m_firstWindow.getSlide(); + m_secondToProcessCount -= iterations * m_secondWindow.getSlide(); + + /* + * Second, move the start pointer for the next task to the next pointer. + */ + + m_firstStartIndex = (m_firstStartIndex + iterations * m_firstWindow.getSlide() * m_firstTupleSize) & m_mask; + m_secondStartIndex = (m_secondStartIndex + iterations * m_secondWindow.getSlide() * m_secondTupleSize) & m_mask; +} + +int JoinTaskDispatcher::getTaskNumber() { + int id = m_nextTask.fetch_add(1); + if (m_nextTask.load() == INT_MAX) m_nextTask.store(1); + return id; +} + +void JoinTaskDispatcher::setTaskNumber(int taskId) { + throw std::runtime_error("error: the setTaskNumber function is not implemented"); +} + +void JoinTaskDispatcher::setStepAndOffset(long step, long offset) { + throw std::runtime_error("error: the setStepAndOffset function is not implemented"); +} + +void JoinTaskDispatcher::createMergeTasks(bool flag) { + m_createMergeTasks.store(flag); +} + +void JoinTaskDispatcher::tryCreateNonProcessingTasks() { + if (m_workerQueue->size_approx() >= m_parent.getTaskQueueCapacity()) { + return; + } + // create a merge task + bool flag = (SystemConf::getInstance().CREATE_MERGE_WITH_CHECKPOINTS && m_createMergeTasks.load()) || + !SystemConf::getInstance().CREATE_MERGE_WITH_CHECKPOINTS; + if ((int)m_workerQueue->size_approx() < SystemConf::getInstance().WORKER_THREADS && flag) { + //&& m_createMergeTasks.load()) { + //if (m_createMergeTasks.load()) + // std::this_thread::sleep_for(std::chrono::milliseconds(100)); + auto batch = WindowBatchFactory::getInstance().newInstance( + 0, 0, -1, -1, &m_parent, nullptr, &m_firstWindow, m_firstSchema, -1); + auto type = TaskType::MERGE_FORWARD; // m_createMergeTasks.load() ? TaskType::MERGE : TaskType::MERGE_FORWARD; + batch->setTaskType(type); + auto task = + TaskFactory::getInstance().newInstance(0, batch, nullptr, type); + if (!m_workerQueue->try_enqueue(task)) { + std::cout << "warning: waiting to enqueue MERGE_FORWARD task in the join task dispatcher " + << std::to_string(m_parent.getId()) + << " with size " << std::to_string(m_workerQueue->size_approx()) << std::endl; + WindowBatchFactory::getInstance().free(batch); + TaskFactory::getInstance().free(task); + } + } + // create a checkpoint task + if (m_triggerCheckpoints && m_coordinator && m_coordinator->hasWorkUnsafe(m_parent.getId()) && m_checkpointCounter < SystemConf::getInstance().WORKER_THREADS) { + auto batch = WindowBatchFactory::getInstance().newInstance( + 0, 0, -1, -1, &m_parent, nullptr, &m_firstWindow, m_firstSchema, -1); + batch->setTaskType(TaskType::CHECKPOINT); + auto task = + TaskFactory::getInstance().newInstance(0, batch, nullptr, TaskType::CHECKPOINT); + if (!m_workerQueue->try_enqueue(task)) { + std::cout << "warning: waiting to enqueue CHECKPOINT task in the join task dispatcher " + << std::to_string(m_parent.getId()) + << " with size " << std::to_string(m_workerQueue->size_approx()) << std::endl; + WindowBatchFactory::getInstance().free(batch); + TaskFactory::getInstance().free(task); + } else { + m_checkpointCounter++; + } + } +} + +long JoinTaskDispatcher::normaliseIndex(QueryBuffer &buffer, long p, long q) { + if (q < p) + return (q + buffer.getCapacity()); + return q; +} + +long JoinTaskDispatcher::getTimestamp(QueryBuffer &buffer, int index) { + // wrap around if it gets out of bounds + if (index < 0) index = buffer.getCapacity() + index; + long value = buffer.getLong(index); + if (SystemConf::getInstance().LATENCY_ON) + return (long)Utils::getTupleTimestamp(value); + else + return value; +} + +long JoinTaskDispatcher::getSystemTimestamp(QueryBuffer &buffer, int index) { + // wrap around if it gets out of bounds + if (index < 0) index = buffer.getCapacity() + index; + long value = buffer.getLong(index); + if (SystemConf::getInstance().LATENCY_ON) + return (long)Utils::getSystemTimestamp(value); + else + return value; +} + +void JoinTaskDispatcher::setTimestamp(QueryBuffer &buffer, int index, long timestamp) { + // wrap around if it gets out of bounds + if (index < 0) index = buffer.getCapacity() + index; + buffer.setLong(index, timestamp); +} \ No newline at end of file diff --git a/src/dispatcher/JoinTaskDispatcher.h b/src/dispatcher/JoinTaskDispatcher.h new file mode 100644 index 0000000..577738c --- /dev/null +++ b/src/dispatcher/JoinTaskDispatcher.h @@ -0,0 +1,175 @@ +#pragma once + +#include + +#include +#include +#include +#include + +#include "dispatcher/ITaskDispatcher.h" + +class ResultHandler; +class Query; +class WindowDefinition; +class TupleSchema; +class Task; +class QueryBuffer; +class OperatorCode; +class FileBackedCheckpointCoordinator; +class UnboundedQueryBuffer; + +/* + * \brief This a task dispatcher for two streams used for joins. + * + * */ + +class JoinTaskDispatcher : public ITaskDispatcher { + private: + QueryBuffer &m_firstBuffer, &m_secondBuffer; + WindowDefinition &m_firstWindow, &m_secondWindow; + TupleSchema *m_firstSchema, *m_secondSchema; + int m_batchSize; + int m_firstTupleSize, m_secondTupleSize; + + /* Task Identifier */ + std::atomic m_nextTask; + + /* Pointers */ + long m_firstStartIndex = 0; + long m_firstNextIndex = 0; + long m_firstEndIndex = 0; + long m_firstLastEndIndex = 0; + long m_firstToProcessCount = 0; + long m_firstNextTime = 0; + long m_firstEndTime = 0; + long m_prevFirstFreePointer = 0; + + long m_secondStartIndex = 0; + long m_secondNextIndex = 0; + long m_secondLastEndIndex = 0; + long m_secondEndIndex = 0; + long m_secondToProcessCount = 0; + long m_secondNextTime = 0; + long m_secondEndTime = 0; + long m_prevSecondFreePointer = 0; + + long m_mask; + long m_latencyMark; + std::mutex m_lock; + + int m_replayBarrier = 0; + + /* Watermark ingestion */ + long m_watermark = LONG_MIN; + int m_watermarkFrequency = SystemConf::WORKER_THREADS; + + size_t m_assembleId = 0; + const bool m_symmetric = false; + const bool m_debug = false; + + public: + JoinTaskDispatcher(Query &query, QueryBuffer &buffer, + QueryBuffer &secondBuffer, + bool replayTimestamps = false, + bool triggerCheckpoints = false); + void dispatch(char *data, int length, long latencyMark = -1, long retainMark = 1) override; + void dispatch(std::shared_ptr &data, long latencyMark = -1, long retainMark = -1) override; + void dispatchToFirstStream(char *data, int length, long latencyMark) override; + void dispatchToSecondStream(char *data, int length, long latencyMark) override; + bool tryDispatchOrCreateTask(char *data, int length, long latencyMark = -1, long retain = -1, std::shared_ptr graph = nullptr) override; + bool tryDispatch(char *data, int length, long latencyMark = -1, long retain = -1, std::shared_ptr graph = nullptr) override; + bool tryDispatchToFirstStream(char *data, int length, long latencyMark = -1, std::shared_ptr graph = nullptr) override; + bool tryDispatchToSecondStream(char *data, int length, long latencyMark = -1, std::shared_ptr graph = nullptr) override; + bool tryDispatchSerialToFirstStream(char *data, int length, size_t id, long latencyMark = -1, std::shared_ptr graph = nullptr) override; + bool tryDispatchSerialToSecondStream(char *data, int length, size_t id, long latencyMark = -1, std::shared_ptr graph = nullptr) override; + void tryToConsume() override; + void recover() override; + QueryBuffer *getBuffer() override; + QueryBuffer *getFirstBuffer() override; + QueryBuffer *getSecondBuffer() override; + void setTaskQueue(std::shared_ptr queue) override; + long getBytesGenerated() override; + void setLastTaskId(int taskId); + void setCheckpointCoordinator(FileBackedCheckpointCoordinator *coordinator); + int getTaskNumber() override; + void setTaskNumber(int taskId) override; + void setStepAndOffset(long step, long offset) override; + void createMergeTasks(bool flag) override; + ~JoinTaskDispatcher() override; + + private: + void tryAssembleTask(); + void assembleFirst(long index, int length); + void assembleSecond(long index, int length); + void createTask(bool assembledFirst); + void createSymmetricTask(bool assembledFirst); + void tryCreateNonProcessingTasks(); + long normaliseIndex(QueryBuffer &buffer, long p, long q); + long getTimestamp(QueryBuffer &buffer, int index); + long getSystemTimestamp(QueryBuffer &buffer, int index); + void setTimestamp(QueryBuffer &buffer, int index, long timestamp); + + struct SerialTask { + size_t m_id; + long m_idx; + int m_length; + SerialTask() : m_id(0), m_idx(0), m_length(0) {} + }; + + struct CircularTaskList { + std::vector> m_buffer; + int m_size; + int m_readIdx; + int m_writeIdx; + int m_elements = 0; + CircularTaskList(int size = 0) : m_buffer(size, SerialTask()), m_size(size) { + m_readIdx = 0; + m_writeIdx = size - 1; + } + void set_capacity(int size) { + m_buffer.resize(size, SerialTask()); + m_size = size; + m_readIdx = 0; + m_writeIdx = size - 1; + } + void push_back(size_t id, long idx, int length) { + if (m_elements == m_size) { + m_buffer.resize(m_size * 2, SerialTask()); + m_size = 2 * m_size; + } + + m_writeIdx++; + if (m_writeIdx == (int) m_buffer.size()) + m_writeIdx = 0; + + m_buffer[m_writeIdx].m_id = id; + m_buffer[m_writeIdx].m_idx = idx; + m_buffer[m_writeIdx].m_length = length; + + m_elements++; + } + SerialTask *front() { + if (m_elements > 0) + return &m_buffer[m_readIdx]; + else + return nullptr; + //throw std::runtime_error("error: empty CircularList"); + } + void pop_front() { + m_elements--; + m_readIdx++; + if (m_readIdx == (int) m_buffer.size()) + m_readIdx = 0; + } + int size() { return m_elements; } + int capacity() { return m_size; } + }; + + // variables for serializing tasks + size_t m_index = 0; + std::mutex m_left; + std::unique_ptr m_leftList; + std::mutex m_right; + std::unique_ptr m_rightList; +}; \ No newline at end of file diff --git a/src/dispatcher/TaskDispatcher.cpp b/src/dispatcher/TaskDispatcher.cpp index c557573..7fc83e1 100644 --- a/src/dispatcher/TaskDispatcher.cpp +++ b/src/dispatcher/TaskDispatcher.cpp @@ -2,53 +2,195 @@ #include +#include "buffers/UnboundedQueryBuffer.h" + +#if defined(HAVE_NUMA) +#include "buffers/PersistentNumaCircularQueryBuffer.h" +#else +#include "buffers/PersistentCircularQueryBuffer.h" +#endif +#include "checkpoint/FileBackedCheckpointCoordinator.h" #include "result/ResultHandler.h" #include "tasks/Task.h" #include "tasks/TaskFactory.h" #include "tasks/WindowBatchFactory.h" #include "utils/Query.h" +#include "utils/QueryConfig.h" #include "utils/TupleSchema.h" #include "utils/Utils.h" #include "utils/WindowDefinition.h" TaskDispatcher::TaskDispatcher(Query &query, QueryBuffer &buffer, - bool replayTimestamps) - : m_workerQueue(query.getTaskQueue()), - m_parent(query), + bool replayTimestamps, bool triggerCheckpoints) + : ITaskDispatcher(query, triggerCheckpoints), m_buffer(buffer), m_window(query.getWindowDefinition()), m_schema(query.getSchema()), - m_batchSize(SystemConf::getInstance().BATCH_SIZE), + m_batchSize(query.getConfig() ? query.getConfig()->getBatchSize() : SystemConf::getInstance().BATCH_SIZE), m_tupleSize(m_schema->getTupleSize()), m_nextTask(1), m_mask(buffer.getMask()), m_latencyMark(-1), m_thisBatchStartPointer(0), m_nextBatchEndPointer(m_batchSize), - m_replayTimestamps(replayTimestamps) { + m_replayTimestamps(replayTimestamps) +#if defined(HAVE_SHARED) + , + m_segment(std::make_unique( + boost::interprocess::open_only, "MySharedMemory")) +#endif +{ + if (replayTimestamps) { - m_replayBarrier = SystemConf::getInstance().BUNDLE_SIZE / - SystemConf::getInstance().BATCH_SIZE; + m_replayBarrier = query.getConfig() ? + (query.getConfig()->getBundleSize() / query.getConfig()->getBatchSize()) : + (SystemConf::getInstance().BUNDLE_SIZE / SystemConf::getInstance().BATCH_SIZE); if (m_replayBarrier == 0) throw std::runtime_error( "error: the bundle size should be greater or equal to the batch size " "when replaying data with range-based windows"); } +#if defined(HAVE_NUMA) + if (PersistentNumaCircularQueryBuffer *b = dynamic_cast(&m_buffer)) { +#else + if (PersistentCircularQueryBuffer *b = dynamic_cast(&m_buffer)) { +#endif + m_parallelInsertion = true; + } +#if defined(HAVE_SHARED) + m_o = m_segment->find("offset").first; + m_s = m_segment->find("step").first; + m_offset = *m_o; + m_step = *m_s; +#endif } -void TaskDispatcher::dispatch(char *data, int length, long latencyMark) { - long idx; - while ((idx = m_buffer.put(data, length, latencyMark)) < 0) { - _mm_pause(); - // std::cout << "Failed to dispatch..." << std::endl; - tryCreateNonProcessingTasks(); +void TaskDispatcher::dispatch(char *data, int length, long latencyMark, long retainMark) { +#if defined(HAVE_SHARED) + /* Check if we try to recover from existing data */ + if (m_startingFromRecovery) { + if (m_buffer.getUnsafeRemainingBytes() != 0) { + m_accumulated = m_buffer.getUnsafeStartPointer(); + m_thisBatchStartPointer = m_buffer.getUnsafeStartPointer(); + m_nextBatchEndPointer = m_buffer.getUnsafeStartPointer() + m_batchSize; + m_watermark = m_buffer.getLong( + m_buffer.normalise(m_buffer.getUnsafeStartPointer())) - + 1; + m_buffer.fixTimestamps( + m_buffer.getUnsafeStartPointer(), m_watermark + 1, m_step, + m_buffer.getUnsafeRemainingBytes()); // todo: fix this + assemble(m_buffer.normalise(m_buffer.getUnsafeEndPointer()), + m_buffer.getUnsafeRemainingBytes()); + } + m_startingFromRecovery = false; } - assemble(idx, length); +#endif + + if (!m_parallelInsertion) { + long idx; + while ((idx = m_buffer.put(data, length, latencyMark)) < 0) { + _mm_pause(); + // std::cout << "Failed to dispatch..." << std::endl; + tryCreateNonProcessingTasks(); + } + assemble(idx, length); + } else { +#if defined(HAVE_NUMA) + PersistentNumaCircularQueryBuffer *b = dynamic_cast(&m_buffer); +#else + PersistentCircularQueryBuffer *b = dynamic_cast(&m_buffer); +#endif + long idx; bool ready = false; + while ((idx = m_buffer.put(data, length, latencyMark, retainMark)) < 0) { + while (b->tryConsumeNextSlot(idx, length)) { + assemble(idx, length); + } + _mm_pause(); + // std::cout << "Failed to dispatch..." << std::endl; + tryCreateNonProcessingTasks(); + } + while (b->tryConsumeNextSlot(idx, length)) { + assemble(idx, length); + } + } +} + +void TaskDispatcher::dispatch(std::shared_ptr &data, long latencyMark, long retainMark) { + if (!m_parallelInsertion) { + long idx; + while ((idx = m_buffer.put(data, latencyMark)) < 0) { + _mm_pause(); + // std::cout << "Failed to dispatch..." << std::endl; + tryCreateNonProcessingTasks(); + } + assemble(idx, data->getBuffer().size()); + } else { +#if defined(HAVE_NUMA) + PersistentNumaCircularQueryBuffer *b = dynamic_cast(&m_buffer); +#else + PersistentCircularQueryBuffer *b = dynamic_cast(&m_buffer); +#endif + long idx; bool ready = false; + int length = data->getBuffer().size(); + while ((idx = m_buffer.put(data, latencyMark, retainMark)) < 0) { + while (b->tryConsumeNextSlot(idx, length)) { + assemble(idx, length); + } + _mm_pause(); + // std::cout << "Failed to dispatch..." << std::endl; + tryCreateNonProcessingTasks(); + } + while (b->tryConsumeNextSlot(idx, length)) { + assemble(idx, length); + } + } +} + +void TaskDispatcher::dispatch(void *data, int length, long latencyMark, long retainMark) { + if (!m_parallelInsertion) { + long idx; + while ((idx = m_buffer.put(data, latencyMark)) < 0) { + _mm_pause(); + // std::cout << "Failed to dispatch..." << std::endl; + tryCreateNonProcessingTasks(); + } + assemble(idx, length); + } else { +#if defined(HAVE_NUMA) + PersistentNumaCircularQueryBuffer *b = dynamic_cast(&m_buffer); +#else + PersistentCircularQueryBuffer *b = dynamic_cast(&m_buffer); +#endif + long idx; bool ready = false; +#if defined(RDMA_INPUT) + while ((idx = m_buffer.put(data, latencyMark, retainMark)) < 0) { + while (b->tryConsumeNextSlot(idx, length)) { + assemble(idx, length); + } + _mm_pause(); + // std::cout << "Failed to dispatch..." << std::endl; + tryCreateNonProcessingTasks(); + } + while (b->tryConsumeNextSlot(idx, length)) { + assemble(idx, length); + } +#else + throw std::runtime_error("error: enable RDMA_INPUT"); +#endif + } +} + +void TaskDispatcher::dispatchToFirstStream(char *data, int length, long latencyMark) { + dispatch(data, length, latencyMark); +} + +void TaskDispatcher::dispatchToSecondStream(char *data, int length, long latencyMark) { + throw std::runtime_error("error: dispatching to the second stream is not supported by this dispatcher"); } -bool TaskDispatcher::tryDispatch(char *data, int length, long latencyMark) { +bool TaskDispatcher::tryDispatchOrCreateTask(char *data, int length, long latencyMark, long retain, std::shared_ptr graph) { long idx; - if ((idx = m_buffer.put(data, length, latencyMark)) < 0) { + if ((idx = m_buffer.put(data, length, latencyMark, retain, graph)) < 0) { tryCreateNonProcessingTasks(); return false; } @@ -56,8 +198,95 @@ bool TaskDispatcher::tryDispatch(char *data, int length, long latencyMark) { return true; } +bool TaskDispatcher::tryDispatch(char *data, int length, long latencyMark, long retain, std::shared_ptr graph) { + if (!m_parallelInsertion) { + long idx; + + if ((idx = m_buffer.put(data, length, latencyMark, -1, graph)) < 0) { + return false; + } + assemble(idx, length); + return true; + } else { + long idx; +#if defined(HAVE_NUMA) + PersistentNumaCircularQueryBuffer *b = dynamic_cast(&m_buffer); +#else + PersistentCircularQueryBuffer *b = dynamic_cast(&m_buffer); +#endif + if ((idx = m_buffer.put(data, length, latencyMark, retain, graph)) < 0) { + return false; + } + while (b->tryConsumeNextSlot(idx, length)) { + assemble(idx, length); + } + return true; + } +} + +bool TaskDispatcher::tryDispatchToFirstStream (char *data, int length, long latencyMark, std::shared_ptr graph) { + return tryDispatch(data, length, latencyMark, -1, graph); +} + +bool TaskDispatcher::tryDispatchToSecondStream(char *data, int length, long latencyMark, std::shared_ptr graph) { + throw std::runtime_error("error: dispatching to the second stream is not supported by this dispatcher"); +} + +bool TaskDispatcher::tryDispatchSerialToFirstStream(char *data, int length, size_t id, long latencyMark, std::shared_ptr graph) { + throw std::runtime_error("error: this operations is not supported by the task dispatcher"); +} + +bool TaskDispatcher::tryDispatchSerialToSecondStream(char *data, int length, size_t id, long latencyMark, std::shared_ptr graph) { + throw std::runtime_error("error: dispatching to the second stream is not supported by this dispatcher"); +} + +void TaskDispatcher::tryToConsume() { + if (!m_buffer.isPersistent()) + return; + +#if defined(HAVE_NUMA) + PersistentNumaCircularQueryBuffer *b = dynamic_cast(&m_buffer); +#else + PersistentCircularQueryBuffer *b = dynamic_cast(&m_buffer); +#endif + long idx; bool ready = false; int length; + while(b->getEmptySlots() < b->getNumberOfSlots()*0.75) { + while (b->tryConsumeNextSlot(idx, length, true)) { + assemble(idx, length); + } + } +} + +void TaskDispatcher::recover() { +#if defined(HAVE_NUMA) + PersistentNumaCircularQueryBuffer *b = dynamic_cast(&m_buffer); +#else + PersistentCircularQueryBuffer *b = dynamic_cast(&m_buffer); +#endif + long idx; int length; bool ready = false; + while ((idx = m_buffer.recover(length)) == 0) { + while (b->tryConsumeNextSlot(idx, length, true)) { + assemble(idx, length); + } + _mm_pause(); + // std::cout << "Failed to dispatch..." << std::endl; + } + while (b->getRemainingSlotsToFree() != 0) { + while (b->tryConsumeNextSlot(idx, length, true)) { + assemble(idx, length); + } + } +} + QueryBuffer *TaskDispatcher::getBuffer() { return &m_buffer; } +QueryBuffer *TaskDispatcher::getFirstBuffer() { return &m_buffer; } + +QueryBuffer *TaskDispatcher::getSecondBuffer() { + throw std::runtime_error( + "error: getting a second buffer is not supported by this dispatcher"); +} + void TaskDispatcher::setTaskQueue(std::shared_ptr queue) { m_workerQueue = queue; } @@ -66,7 +295,48 @@ long TaskDispatcher::getBytesGenerated() { return m_parent.getBytesGenerated(); } -TaskDispatcher::~TaskDispatcher() {} +void TaskDispatcher::setLastTaskId(int taskId) { m_lastTaskId = taskId; } + +void TaskDispatcher::setCheckpointCoordinator( + FileBackedCheckpointCoordinator *coordinator) { + if (!coordinator) throw std::runtime_error("error: null coordinator pointer"); + if (!m_triggerCheckpoints) return; + + m_coordinator = coordinator; + + m_coordinationTimerThread = std::thread([&]() { + //Utils::bindProcess(SystemConf::getInstance().WORKER_THREADS+1); + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + auto t1 = std::chrono::high_resolution_clock::now(); + auto t2 = t1; + while (m_triggerCheckpoints) { + t1 = std::chrono::high_resolution_clock::now(); + while (!m_checkpointFinished) { + _mm_pause(); + } + m_checkpointFinished.store(false); + auto lastTaskId = m_nextTask.load(); + setLastTaskId(lastTaskId); + m_coordinator->signalWaitCondition(); + // change the type of tasks here? + t2 = std::chrono::high_resolution_clock::now(); + auto time_span = + std::chrono::duration_cast>(t2 - t1); + if (time_span.count()*1000 < SystemConf::getInstance().CHECKPOINT_INTERVAL) { + auto duration = SystemConf::getInstance().CHECKPOINT_INTERVAL - + (size_t) (time_span.count() * 1000); + std::this_thread::sleep_for(std::chrono::milliseconds(duration)); + } + } + }); + m_coordinationTimerThread.detach(); + +} + +TaskDispatcher::~TaskDispatcher() { + m_triggerCheckpoints = false; + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); +} void TaskDispatcher::assemble(long index, int length) { if (SystemConf::getInstance().LATENCY_ON) { @@ -143,7 +413,7 @@ void TaskDispatcher::newTaskFor(long p, long q, long free, long b_, long _d) { } auto batch = WindowBatchFactory::getInstance().newInstance( - m_batchSize, taskId, (int)(free), &m_parent, &m_buffer, &m_window, + m_batchSize, taskId, free, -1, &m_parent, &m_buffer, &m_window, m_schema, mark); if (m_window.isRangeBased()) { @@ -162,30 +432,76 @@ void TaskDispatcher::newTaskFor(long p, long q, long free, long b_, long _d) { (p != 0) ? getTimestamp((int)(p - m_tupleSize)) : getTimestamp((int)(m_buffer.getCapacity() - m_tupleSize)); - if (startTime + m_offset - prevEndTime >= m_step) // sanity check + + if (startTime + m_offset - prevEndTime >= m_step && m_offset > 0 && + !SystemConf::getInstance().RECOVER) { // sanity check m_offset -= m_step; + if (m_buffer.isPersistent()) { + m_buffer.updateStepAndOffset(m_step, m_offset); + } + } } setTimestamp((int)(p), startTime + m_offset); setTimestamp((int)(q - m_tupleSize), endTime + m_offset); batch->setBatchTimestamps(startTime + m_offset, endTime + m_offset); batch->setPrevTimestamps(prevStartTime, prevEndTime); - /*std::cout << "[DBG] " << taskId << " taskId " << m_step << " step "<< - m_offset << " offset " - << p << " startIdx " << q << " endIdx " << prevEndTime << " - prevEndTime " - << startTime << " initialStartTime " << endTime << " - initialEndTime " - << getTimestamp((int) (p)) << " startTime " << - getTimestamp((int) (q - m_tupleSize)) << " endTime " << std::endl;*/ +#if defined(HAVE_OoO) + auto tmpWatermark = startTime + m_offset - 1; + if (taskId % m_watermarkFrequency == 0 && tmpWatermark > m_watermark) { + m_watermark = tmpWatermark; + // std::cout << "[DBG] " << taskId << " taskId " << m_watermark << " + // watermark changed " << std::endl; + } + batch->setWatermark(m_watermark); +#endif + if (m_debug) { + std::cout << "[DBG] " << taskId << " taskId " << m_step << " step " + << m_offset << " offset " << (p + m_recoveryOffset) + << " startIdx " << (q + m_recoveryOffset) + << " endIdx " << prevEndTime << " prevEndTime " << startTime + << " initialStartTime " << endTime << " initialEndTime " + << getTimestamp((int)(p)) << " startTime " + << getTimestamp((int)(q - m_tupleSize)) << " endTime " + << std::endl; + } batch->setTimestampOffset(m_offset); if (taskId % (m_replayBarrier) == 0) { - if (m_step == -1) m_step = endTime + 1; - if (_d <= - (long)m_buffer - .getCapacity()) // SystemConf::getInstance().CIRCULAR_BUFFER_SIZE) + if (m_step == -1) { + m_step = endTime + 1; + if (m_buffer.isPersistent()) { + m_buffer.updateStepAndOffset(m_step, m_offset); + } + } + + if (_d <= (long)m_buffer.getCapacity()) { // && + //!SystemConf::getInstance().RECOVER) { // SystemConf::getInstance().CIRCULAR_BUFFER_SIZE) m_offset += m_step; + if (m_buffer.isPersistent()) { + m_buffer.updateStepAndOffset(m_step, m_offset); + } + } // batch->setBatchTimestamps(startTime, endTime+offset); } +#if defined(HAVE_SHARED) + *m_o = m_offset; + *m_s = m_step; +#endif + } else { + long prevStartTime = 0; + long prevEndTime = 0; + if (m_step != -1) { + prevStartTime = (p != 0) + ? getTimestamp((int)(p - m_batchSize - m_tupleSize)) + : getTimestamp((int)(m_buffer.getCapacity() - + m_batchSize - m_tupleSize)); + prevEndTime = + (p != 0) + ? getTimestamp((int)(p - m_tupleSize)) + : getTimestamp((int)(m_buffer.getCapacity() - m_tupleSize)); + } else { + m_step = 0; + } + batch->setPrevTimestamps(prevStartTime, prevEndTime); } } else { batch->setBatchTimestamps(-1, -1); @@ -194,46 +510,98 @@ void TaskDispatcher::newTaskFor(long p, long q, long free, long b_, long _d) { batch->setBufferPointers((int)p, (int)q); batch->setStreamPointers(b_, _d); - TaskType type = TaskType::PROCESS; + TaskType type = (m_createMergeTasks.load()) ? TaskType::PROCESS : TaskType::ONLY_PROCESS; batch->setTaskType(type); - auto task = TaskFactory::getInstance().newInstance(taskId, batch, type); + if (SystemConf::getInstance().LINEAGE_ON) { + auto slotId = p / batch->getBatchSize(); //batch->getBufferStartPointer() / batch->getBatchSize(); + auto &slot = m_buffer.getSlots()[slotId]; + auto graph = slot.getLineageGraph(); + if (!graph) { + /*throw std::runtime_error( + "error: the lineage graph is not initialized in task dispatcher " + + std::to_string(m_parent.getId()) + " for slot " + + std::to_string(slot.m_id) + " with start pointer " + + std::to_string(batch->getBufferStartPointer()));*/ + graph = LineageGraphFactory::getInstance().newInstance(); + } + batch->setLineageGraph(graph); + graph.reset(); + if (m_buffer.isPersistent()) { + auto fp1 = batch->getFreePointer(); + auto fo1 = m_nextBatchEndPointer + m_recoveryOffset; + if (m_debug) { + std::cout << "[DBG] " << taskId << " taskId " << m_buffer.getBufferId() + << " bufferId setting freePtr1 " << fp1 + << " freeOffset1 " << fo1 + << " in slot " << (fp1/m_buffer.getCapacity())%10 + << " for query " << m_parent.getId() + << " with ptr " << batch->getLineageGraph()->m_graph[m_parent.getId()].get() << std::endl; + } + batch->getLineageGraph()->m_graph[m_parent.getId()]->m_freePtr1 = fp1; + batch->getLineageGraph()->m_graph[m_parent.getId()]->m_freeOffset1 = fo1; + batch->getLineageGraph()->m_isValid = true; + } + } + + auto task = TaskFactory::getInstance().newInstance(taskId, batch, nullptr, type); while (!m_workerQueue->try_enqueue(task)) { - /*std::cout << "warning: waiting to enqueue PROCESS task in the task - dispatcher " + /*std::cout << "warning: waiting to enqueue PROCESS task in the task dispatcher " << std::to_string(m_parent.getId()) << " with size " << std::to_string(m_workerQueue->size_approx()) << std::endl;*/ } + + if (m_checkpointCounter > 0) + m_checkpointCounter--; } int TaskDispatcher::getTaskNumber() { - int id = m_nextTask++; - if (m_nextTask == INT_MAX) m_nextTask = 1; + int id = m_nextTask.fetch_add(1); + if (m_nextTask.load() == INT_MAX) m_nextTask.store(1); return id; } +void TaskDispatcher::setTaskNumber(int taskId) { + m_nextTask.store(taskId); + m_parent.getResultHandler()->restorePtrs(taskId); +} + +void TaskDispatcher::setStepAndOffset(long step, long offset) { + m_step = step; + m_recoveryOffset = offset; + //m_accumulated = offset; + //m_thisBatchStartPointer = offset; + //m_nextBatchEndPointer += offset; + //m_offset = offset; +} + +void TaskDispatcher::createMergeTasks(bool flag) { + m_createMergeTasks.store(flag); +} + void TaskDispatcher::tryCreateNonProcessingTasks() { if (m_workerQueue->size_approx() >= m_parent.getTaskQueueCapacity()) { return; } // create a merge task - if ((int)m_workerQueue->size_approx() < - SystemConf::getInstance().WORKER_THREADS) { - //&& m_createMergeTasks.load()) { - // if (m_createMergeTasks.load()) + bool flag = (SystemConf::getInstance().CREATE_MERGE_WITH_CHECKPOINTS && m_createMergeTasks.load()) || + !SystemConf::getInstance().CREATE_MERGE_WITH_CHECKPOINTS; + bool nextQueryJoin = false; //m_parent.getDownstreamQuery() != nullptr ? m_parent.getDownstreamQuery()->getNumberOfUpstreamQueries() == 2 : false; + if (((int)m_workerQueue->size_approx() < SystemConf::getInstance().WORKER_THREADS && flag) || nextQueryJoin) { + //&& m_createMergeTasks.load()) { + //if (m_createMergeTasks.load()) // std::this_thread::sleep_for(std::chrono::milliseconds(100)); auto batch = WindowBatchFactory::getInstance().newInstance( - 0, 0, -1, &m_parent, nullptr, &m_window, m_schema, -1); - auto type = TaskType::MERGE; + 0, 0, -1, -1, &m_parent, nullptr, &m_window, m_schema, -1); + auto type = TaskType::MERGE_FORWARD; // m_createMergeTasks.load() ? TaskType::MERGE : TaskType::MERGE_FORWARD; batch->setTaskType(type); - auto task = TaskFactory::getInstance().newInstance(0, batch, type); + auto task = + TaskFactory::getInstance().newInstance(0, batch, nullptr, type); while (!m_workerQueue->try_enqueue(task)) { - if (m_workerQueue->size_approx() >= - (0.75 * m_parent.getTaskQueueCapacity())) { - std::cout << "warning: waiting to enqueue MERGE task in the " - "task dispatcher " + if (m_workerQueue->size_approx() >= (0.75 * m_parent.getTaskQueueCapacity())) { + std::cout << "warning: waiting to enqueue MERGE_FORWARD task in the task dispatcher " << std::to_string(m_parent.getId()) << " with size " << std::to_string(m_workerQueue->size_approx()) << std::endl; WindowBatchFactory::getInstance().free(batch); @@ -241,6 +609,23 @@ void TaskDispatcher::tryCreateNonProcessingTasks() { } } } + // create a checkpoint task + if (m_triggerCheckpoints && m_coordinator && m_coordinator->hasWorkUnsafe(m_parent.getId()) && m_checkpointCounter < SystemConf::getInstance().WORKER_THREADS) { + auto batch = WindowBatchFactory::getInstance().newInstance( + 0, 0, -1, -1, &m_parent, nullptr, &m_window, m_schema, -1); + batch->setTaskType(TaskType::CHECKPOINT); + auto task = + TaskFactory::getInstance().newInstance(0, batch, nullptr, TaskType::CHECKPOINT); + if (!m_workerQueue->try_enqueue(task)) { + std::cout << "warning: waiting to enqueue CHECKPOINT task in the task dispatcher " + << std::to_string(m_parent.getId()) << " with size " + << std::to_string(m_workerQueue->size_approx()) << std::endl; + WindowBatchFactory::getInstance().free(batch); + TaskFactory::getInstance().free(task); + } else { + m_checkpointCounter++; + } + } } long TaskDispatcher::getTimestamp(int index) { diff --git a/src/dispatcher/TaskDispatcher.h b/src/dispatcher/TaskDispatcher.h index d46d273..830b4ab 100644 --- a/src/dispatcher/TaskDispatcher.h +++ b/src/dispatcher/TaskDispatcher.h @@ -3,7 +3,7 @@ #include #include -#include "utils/SystemConf.h" +#include "dispatcher/ITaskDispatcher.h" class ResultHandler; class Query; @@ -12,23 +12,24 @@ class TupleSchema; class Task; class QueryBuffer; class OperatorCode; +class FileBackedCheckpointCoordinator; +class UnboundedQueryBuffer; /* * \brief This a task dispatcher for a single stream. * - * For time-based windows, it keeps track of the timestamps when creating a task to - * find gaps between consecutive batches and inform the worker about it. In the NUMA-aware context, - * it assigns to each task information regarding the data locality. + * For time-based windows, it keeps track of the timestamps when creating a task + * to find gaps between consecutive batches and inform the worker about it. In + * the NUMA-aware context, it assigns to each task information regarding the + * data locality. * - * If the replay mode is on by setting true replayTimestamps, the bundle and batch size - * should be defined appropriately for it to work. + * If the replay mode is on by setting true replayTimestamps, the bundle and + * batch size should be defined appropriately for it to work. * * */ -class TaskDispatcher { +class TaskDispatcher : public ITaskDispatcher { private: - std::shared_ptr m_workerQueue; - Query &m_parent; QueryBuffer &m_buffer; WindowDefinition &m_window; TupleSchema *m_schema; @@ -51,24 +52,54 @@ class TaskDispatcher { int m_replayBarrier = 0; + /* Watermark ingestion */ + long m_watermark = LONG_MIN; + int m_watermarkFrequency = SystemConf::WORKER_THREADS; + + long m_recoveryOffset = 0; const bool m_debug = false; +#if defined(HAVE_SHARED) + std::unique_ptr m_segment; + long *m_o; + long *m_s; +#endif + public: TaskDispatcher(Query &query, QueryBuffer &buffer, - bool replayTimestamps = false); - void dispatch(char *data, int length, long latencyMark = -1); - bool tryDispatch(char *data, int length, long latencyMark = -1); - QueryBuffer *getBuffer(); - void setTaskQueue(std::shared_ptr queue); - long getBytesGenerated(); - ~TaskDispatcher(); + bool replayTimestamps = false, + bool triggerCheckpoints = false); + void dispatch(char *data, int length, long latencyMark = -1, long retainMark = -1) override; + void dispatch(std::shared_ptr &data, long latencyMark = -1, long retainMark = -1) override; + void dispatch(void *data, int length, long latencyMark = -1, long retainMark = -1) override; + void dispatchToFirstStream(char *data, int length, long latencyMark) override; + void dispatchToSecondStream(char *data, int length, long latencyMark) override; + bool tryDispatchOrCreateTask(char *data, int length, long latencyMark = -1, long retain = -1, std::shared_ptr graph = nullptr) override; + bool tryDispatch(char *data, int length, long latencyMark = -1, long retain = -1, std::shared_ptr graph = nullptr) override; + bool tryDispatchToFirstStream(char *data, int length, long latencyMark = -1, std::shared_ptr graph = nullptr) override; + bool tryDispatchToSecondStream(char *data, int length, long latencyMark = -1, std::shared_ptr graph = nullptr) override; + bool tryDispatchSerialToFirstStream(char *data, int length, size_t id, long latencyMark = -1, std::shared_ptr graph = nullptr) override; + bool tryDispatchSerialToSecondStream(char *data, int length, size_t id, long latencyMark = -1, std::shared_ptr graph = nullptr) override; + void tryToConsume() override; + void recover() override; + QueryBuffer *getBuffer() override; + QueryBuffer *getFirstBuffer() override; + QueryBuffer *getSecondBuffer() override; + void setTaskQueue(std::shared_ptr queue) override; + long getBytesGenerated() override; + void setLastTaskId(int taskId); + void setCheckpointCoordinator(FileBackedCheckpointCoordinator *coordinator); + int getTaskNumber() override; + void setTaskNumber(int taskId) override; + void setStepAndOffset(long step, long offset) override; + void createMergeTasks(bool flag) override; + ~TaskDispatcher() override; private: void assemble(long index, int length); void newTaskFor(long p, long q, long free, long b_, long _d); - int getTaskNumber(); - long getTimestamp(int index); void tryCreateNonProcessingTasks(); + long getTimestamp(int index); long getSystemTimestamp(int index); void setTimestamp(int index, long timestamp); }; \ No newline at end of file diff --git a/src/filesystem/File.cpp b/src/filesystem/File.cpp new file mode 100644 index 0000000..995e371 --- /dev/null +++ b/src/filesystem/File.cpp @@ -0,0 +1,216 @@ +#include "filesystem/File.h" + +#include + +#include +#include +#include +#include + +#ifdef _DEBUG +#define DCHECK_ALIGNMENT(o, l, b) \ + do { \ + assert(reinterpret_cast(b) % device_alignment() == 0); \ + assert((o) % device_alignment() == 0); \ + assert((l) % device_alignment() == 0); \ + } while (0) +#else +#define DCHECK_ALIGNMENT(o, l, b) \ + do { \ + } while (0) +#endif + +Status File::open(int flags, FileCreateDisposition create_disposition, + bool* exists) { + if (exists) { + *exists = false; + } + + int create_flags = getCreateDisposition(create_disposition); + + // Always unbuffered (O_DIRECT). + m_fd = ::open(m_filename.c_str(), flags | O_RDWR | O_DIRECT | create_flags, + S_IRUSR | S_IWUSR); + + if (exists) { + // Let the caller know whether the file we tried to open or create (already) + // exists. + if (create_disposition == FileCreateDisposition::CreateOrTruncate || + create_disposition == FileCreateDisposition::OpenOrCreate) { + *exists = (errno == EEXIST); + } else if (create_disposition == FileCreateDisposition::OpenExisting) { + *exists = (errno != ENOENT); + if (!*exists) { + // The file doesn't exist. Don't return an error, since the caller is + // expecting this case. + return Status::Ok; + } + } + } + if (m_fd == -1) { + int error = errno; + return Status::IOError; + } + + Status result = getDeviceAlignment(); + if (result != Status::Ok) { + close(); + } + m_owner = true; + return result; +} + +Status File::close() { + if (m_fd != -1) { + int result = ::close(m_fd); + m_fd = -1; + if (result == -1) { + int error = errno; + return Status::IOError; + } + } + m_owner = false; + return Status::Ok; +} + +Status File::erase() { + int result = ::remove(m_filename.c_str()); + if (result == -1) { + int error = errno; + return Status::IOError; + } + return Status::Ok; +} + +Status File::getDeviceAlignment() { + // For now, just hardcode 512-byte alignment. + m_device_alignment = 512; + return Status::Ok; +} + +int File::getCreateDisposition(FileCreateDisposition create_disposition) { + switch (create_disposition) { + case FileCreateDisposition::CreateOrTruncate: + return O_CREAT | O_TRUNC; + case FileCreateDisposition::OpenOrCreate: + return O_CREAT; + case FileCreateDisposition::OpenExisting: + return 0; + default: + assert(false); + } +} + +void QueueIoHandler::ioCompletionCallback(io_context_t ctx, struct iocb* iocb, + long res, long res2) { + auto callback_context = std::unique_ptr( + reinterpret_cast(iocb)); + size_t bytes_transferred; + Status return_status; + if (res < 0) { + return_status = Status::IOError; + bytes_transferred = 0; + } else { + return_status = Status::Ok; + bytes_transferred = res; + } + callback_context->m_callback(callback_context->m_callerContext, return_status, + bytes_transferred); +} + +bool QueueIoHandler::tryComplete() { + struct timespec timeout; + std::memset(&timeout, 0, sizeof(timeout)); + struct io_event events[1]; + int result = ::io_getevents(m_ioObject, 1, 1, events, &timeout); + if (result == 1) { + io_callback_t callback = reinterpret_cast(events[0].data); + callback(m_ioObject, events[0].obj, events[0].res, events[0].res2); + return true; + } else { + return false; + } +} + +bool QueueIoHandler::tryCompleteMultiple() { + const int numOfReq = 10; + struct timespec timeout; + std::memset(&timeout, 0, sizeof(timeout)); + struct io_event events[numOfReq]; + int result = ::io_getevents(m_ioObject, 1, numOfReq, events, &timeout); + if (result >= 1) { + for (int i = 0; i < result; ++i) { + io_callback_t callback = reinterpret_cast(events[i].data); + callback(m_ioObject, events[i].obj, events[i].res, events[i].res2); + } + return true; + } else { + return false; + } +} + +Status QueueFile::open(FileCreateDisposition create_disposition, + const FileOptions& options, QueueIoHandler* handler, + bool* exists, long size) { + int flags = 0; + if (options.m_unbuffered) { + flags |= O_DIRECT; + } + RETURN_NOT_OK(File::open(flags, create_disposition, exists)); + + if (size > 0 && ftruncate(m_fd, size) != 0) { + throw std::runtime_error("error: problem in setting the size of the file"); + } + + if (exists && !*exists) { + return Status::Ok; + } + + m_ioObject = handler->getIoObject(); + return Status::Ok; +} + +Status QueueFile::read(size_t offset, uint32_t length, uint8_t* buffer, + IAsyncContext& context, AsyncIOCallback callback) const { + DCHECK_ALIGNMENT(offset, length, buffer); + return const_cast(this)->scheduleOperation( + FileOperationType::Read, buffer, offset, length, context, callback); +} + +Status QueueFile::readSync(size_t offset, uint32_t length, uint8_t* buffer) const { + DCHECK_ALIGNMENT(offset, length, buffer); + auto res = pread(const_cast(this)->m_fd, buffer, length, offset); + return (res != length) ? Status::IOError : Status::Ok; +} + +Status QueueFile::write(size_t offset, uint32_t length, const uint8_t* buffer, + IAsyncContext& context, AsyncIOCallback callback) { + DCHECK_ALIGNMENT(offset, length, buffer); + return scheduleOperation(FileOperationType::Write, + const_cast(buffer), offset, length, + context, callback); +} + +Status QueueFile::scheduleOperation(FileOperationType operationType, + uint8_t* buffer, size_t offset, + uint32_t length, IAsyncContext& context, + AsyncIOCallback callback) { + IAsyncContext* callerContextCopy; + RETURN_NOT_OK(context.deepCopy(callerContextCopy)); + + // TODO: check if this scales with multiple threads + auto ioContext = std::make_unique( + operationType, m_fd, offset, length, buffer, callerContextCopy, callback); + if (!ioContext.get()) return Status::OutOfMemory; + + struct iocb* iocbs[1]; + iocbs[0] = reinterpret_cast(ioContext.get()); + + int result = ::io_submit(m_ioObject, 1, iocbs); + if (result != 1) { + return Status::IOError; + } + + ioContext.release(); + return Status::Ok; +} \ No newline at end of file diff --git a/src/filesystem/File.h b/src/filesystem/File.h new file mode 100644 index 0000000..724d696 --- /dev/null +++ b/src/filesystem/File.h @@ -0,0 +1,245 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "utils/Async.h" +#include "utils/Status.h" + +/* + * \brief Utilities for File operations. + * + * The code is heavily based on https://github.com/microsoft/FishStore/blob/master/src/environment/file_linux.h. + * */ + +#define CREATE_MODE_RW (S_IWUSR | S_IRUSR) + +constexpr const char* kPathSeparator = "/"; + +enum class FileCreateDisposition : uint8_t { + CreateOrTruncate, + OpenOrCreate, + OpenExisting +}; + +inline std::ostream& operator<<(std::ostream& os, FileCreateDisposition val) { + switch (val) { + case FileCreateDisposition::CreateOrTruncate: + os << "CreateOrTruncate"; + break; + case FileCreateDisposition::OpenOrCreate: + os << "OpenOrCreate"; + break; + case FileCreateDisposition::OpenExisting: + os << "OpenExisting"; + break; + default: + os << "UNKNOWN: " << static_cast(val); + break; + } + return os; +} + +enum class FileOperationType : uint8_t { Read, Write }; + +struct FileOptions { + bool m_unbuffered; + bool m_deleteOnClose; + + FileOptions() : m_unbuffered{false}, m_deleteOnClose{false} {} + FileOptions(bool unbuffered, bool deleteOnClose) + : m_unbuffered{unbuffered}, m_deleteOnClose{deleteOnClose} {} +}; + +/* + * \brief The File class represents the OS file handle + * + * */ + +class File { + protected: + int m_fd; + + private: + size_t m_device_alignment; + std::string m_filename; + bool m_owner; + + protected: + File() : m_fd{-1}, m_device_alignment{0}, m_filename{}, m_owner{false} {} + + File(const std::string& filename) + : m_fd{-1}, m_device_alignment{0}, m_filename{filename}, m_owner{false} {} + + /// Move constructor. + File(File&& other) + : m_fd{other.m_fd}, + m_device_alignment{other.m_device_alignment}, + m_filename{std::move(other.m_filename)}, + m_owner{other.m_owner} { + other.m_owner = false; + } + + // Move assignment operator. + File& operator=(File&& other) { + m_fd = other.m_fd; + m_device_alignment = other.m_device_alignment; + m_filename = std::move(other.m_filename); + m_owner = other.m_owner; + other.m_owner = false; + return *this; + } + + protected: + Status open(int flags, FileCreateDisposition createDisposition, + bool* exists = nullptr); + Status getDeviceAlignment(); + static int getCreateDisposition(FileCreateDisposition createDisposition); + + public: + Status close(); + Status erase(); + + uint64_t getSize() const { + struct stat stat_buffer; + int result = ::fstat(m_fd, &stat_buffer); + return (result == 0) ? stat_buffer.st_size : 0; + } + + size_t getDeviceAlignment() const { return m_device_alignment; } + + const std::string& getFilename() const { return m_filename; } + + ~File() { + if (m_owner) { + Status s = close(); + } + } +}; + +class QueueFile; + +/* + * \brief The QueueIoHandler class encapsulates completions for async file I/O, + * where the completions are put on the AIO completion queue. + * + * */ + +class QueueIoHandler { + private: + // The Linux AIO context used for IO completions. + io_context_t m_ioObject; + + constexpr static int kMaxEvents = 128; + + public: + typedef QueueFile async_file_t; + + QueueIoHandler() : m_ioObject{} {} + QueueIoHandler(size_t maxThreads) : m_ioObject{} { + int result = ::io_setup(kMaxEvents, &m_ioObject); + assert(result >= 0); + } + + // Move constructor + QueueIoHandler(QueueIoHandler&& other) { + m_ioObject = other.m_ioObject; + other.m_ioObject = {}; + } + + // Invoked whenever a Linux AIO completes. + static void ioCompletionCallback(io_context_t ctx, struct iocb* iocb, + long res, long res2); + + struct IoCallbackContext { + IoCallbackContext(FileOperationType operation, int fd, size_t offset, + uint32_t length, uint8_t* buffer, IAsyncContext* context, + AsyncIOCallback callback) + : m_callerContext{context}, m_callback{callback} { + if (FileOperationType::Read == operation) { + ::io_prep_pread(&this->m_parentIocb, fd, buffer, length, offset); + } else { + ::io_prep_pwrite(&this->m_parentIocb, fd, buffer, length, offset); + } + ::io_set_callback(&this->m_parentIocb, ioCompletionCallback); + } + + // WARNING: "m_parentIocb" must be the first field in AioCallbackContext. + // This class is a C-style subclass of "struct iocb". + + // The iocb structure for Linux AIO. + struct iocb m_parentIocb; + + // Caller callback context. + IAsyncContext* m_callerContext; + + // The caller's asynchronous callback function + AsyncIOCallback m_callback; + }; + + inline io_context_t getIoObject() const { return m_ioObject; } + + // Try to execute the next IO completion on the queue, if any. + bool tryComplete(); + bool tryCompleteMultiple(); + + ~QueueIoHandler() { + if (m_ioObject != nullptr) ::io_destroy(m_ioObject); + } +}; + +/* + * \brief The QueueFile class encapsulates asynchronous reads and writes, using + * the specified AIO context. + * + * */ + +class QueueFile : public File { + private: + io_context_t m_ioObject; + + public: + QueueFile() : File(), m_ioObject{} {} + + QueueFile(const std::string& filename) : File(filename), m_ioObject{} {} + + // Move constructor + QueueFile(QueueFile&& other) + : File(std::move(other)), m_ioObject{other.m_ioObject} {} + + // Move assignment operator. + QueueFile& operator=(QueueFile&& other) { + File::operator=(std::move(other)); + m_ioObject = other.m_ioObject; + return *this; + } + + Status open(FileCreateDisposition create_disposition, + const FileOptions& options, QueueIoHandler* handler, + bool* exists = nullptr, long size = 0); + + Status read(size_t offset, uint32_t length, uint8_t* buffer, + IAsyncContext& context, AsyncIOCallback callback) const; + Status readSync(size_t offset, uint32_t length, uint8_t* buffer) const; + Status write(size_t offset, uint32_t length, const uint8_t* buffer, + IAsyncContext& context, AsyncIOCallback callback); + + private: + Status scheduleOperation(FileOperationType operationType, uint8_t* buffer, + size_t offset, uint32_t length, + IAsyncContext& context, AsyncIOCallback callback); +}; + +/* + * \brief Used by the disk devices + * + * */ +typedef void (*truncate_callback_t)(uint64_t offset); \ No newline at end of file diff --git a/src/filesystem/FileSystemDisk.h b/src/filesystem/FileSystemDisk.h new file mode 100644 index 0000000..5c11308 --- /dev/null +++ b/src/filesystem/FileSystemDisk.h @@ -0,0 +1,223 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "filesystem/File.h" +#include "utils/Guid.h" + +/* + * + * \brief A poor-man's file system implementation for handling file operations. + * + * The code heavily based on https://github.com/microsoft/FishStore/blob/master/src/device/file_system_disk.h. + * + * */ + +template +class FileSystemDisk; + +template +class FileSystemFile { + public: + typedef H handler_t; + typedef typename handler_t::async_file_t file_t; + + // Default constructor + FileSystemFile() : m_file{}, m_fileOptions{} {} + + FileSystemFile(const std::string& filename, const FileOptions& fileOptions) + : m_file{filename}, m_fileOptions{fileOptions} {} + + // Move constructor. + FileSystemFile(FileSystemFile&& other) + : m_file{std::move(other.m_file)}, m_fileOptions{other.m_fileOptions} {} + + /// Move assignment operator. + FileSystemFile& operator=(FileSystemFile&& other) { + m_file = std::move(other.file_); + m_fileOptions = other.m_fileOptions; + return *this; + } + + Status open(handler_t* handler, long size = 0) { + return m_file.open(FileCreateDisposition::OpenOrCreate, m_fileOptions, + handler, nullptr, size); + } + + Status close() { return m_file.close(); } + + Status erase() { return m_file.erase(); } + + void truncate(uint64_t new_begin_offset, truncate_callback_t callback) { + // Truncation is a no-op. + if (callback) { + callback(new_begin_offset); + } + } + + Status readAsync(uint64_t source, void* dest, uint32_t length, + AsyncIOCallback callback, IAsyncContext& context) const { + return m_file.read(source, length, reinterpret_cast(dest), + context, callback); + } + + Status readSync(uint64_t source, void* dest, uint32_t length) const { + return m_file.readSync(source, length, reinterpret_cast(dest)); + } + + Status writeAsync(const void* source, uint64_t dest, uint32_t length, + AsyncIOCallback callback, IAsyncContext& context) { + return m_file.write(dest, length, reinterpret_cast(source), + context, callback); + } + + size_t getAlignment() const { return m_file.device_alignment(); } + + private: + file_t m_file; + FileOptions m_fileOptions; +}; + +template +class FileSystemDisk { + public: + typedef H handler_t; + typedef FileSystemFile file_t; + typedef std::vector> log_file_t; + + private: + std::string m_rootPath; + handler_t m_handler; + FileOptions m_defaultFileOptions; + std::mutex m_mutex; + // Contains all files. + log_file_t m_files; + + static std::string normalizePath(std::string rootPath) { + if (rootPath.empty() || rootPath.back() != kPathSeparator[0]) { + rootPath += kPathSeparator; + } + return rootPath; + } + + public: + FileSystemDisk(const std::string& rootPath, size_t maxThreads = 16, + bool enablePrivileges = false, bool unbuffered = true, + bool delete_on_close = false) + : m_rootPath{normalizePath(rootPath)}, + m_handler{maxThreads}, + m_defaultFileOptions{unbuffered, delete_on_close} { + // create file path if it doesn't exist + std::experimental::filesystem::path path{m_rootPath}; + if (!std::experimental::filesystem::exists( + std::experimental::filesystem::status(path))) { + std::experimental::filesystem::create_directories(path); + } + } + + // Methods required by the (implicit) disk interface. + uint32_t getSectorSize() const { + return 512; // For now, assume all disks have 512-bytes alignment. + } + + const log_file_t& getFilesUnsafe() const { return m_files; } + log_file_t& getFilesUnsafe() { return m_files; } + + std::string getRelativeCheckpointPath(const Guid& token) const { + std::string retval = "scabbard"; + retval += kPathSeparator; + retval += token.ToString(); + retval += kPathSeparator; + return retval; + } + + std::string getRootPath() const { return m_rootPath; } + + std::string getCheckpointPath(const Guid& token) const { + return m_rootPath + getRelativeCheckpointPath(token); + } + + std::string getRelativeNamingCheckpointPath(const Guid& token) const { + std::string retval = "naming-checkpoint-"; + retval += token.ToString(); + retval += ".txt"; + return retval; + } + + std::string getNamingCheckpointPath(const Guid& token) const { + return m_rootPath + getRelativeNamingCheckpointPath(token); + } + + void createCheckpointDirectory(const Guid& token) { + std::string indexDir = getCheckpointPath(token); + std::experimental::filesystem::path path{indexDir}; + try { + std::experimental::filesystem::remove_all(path); + } catch (std::experimental::filesystem::filesystem_error&) { + // Ignore; throws when path doesn't exist yet. + } + std::experimental::filesystem::create_directories(path); + } + + void createOrOpenCheckpointDirectory(const Guid& token) { + std::string indexDir = getCheckpointPath(token); + std::experimental::filesystem::path path{indexDir}; + if (!std::experimental::filesystem::exists( + std::experimental::filesystem::status(path))) { + std::experimental::filesystem::create_directories(path); + } + } + + void tryDeleteCheckpointDirectory(const Guid& token) { + std::string indexDir = getCheckpointPath(token); + std::experimental::filesystem::path path{indexDir}; + try { + std::experimental::filesystem::remove_all(path); + } catch (std::experimental::filesystem::filesystem_error&) { + // Ignore; throws when path doesn't exist yet. + } + } + + file_t newUnmanagedFile(const std::string& relativePath) { + return file_t{m_rootPath + relativePath, m_defaultFileOptions}; + } + + file_t* newFile(const std::string& relativePath, long size = 0) { + // Only one thread can modify the list of files at a given time. + std::lock_guard lock{m_mutex}; + m_files.push_back(std::make_unique(m_rootPath + relativePath, + m_defaultFileOptions)); + std::cout << "[DBG] Creating file " << m_rootPath << relativePath << std::endl; + assert(m_files.back()->open(&m_handler, size) == Status::Ok); + return m_files.back().get(); + } + + // Implementation-specific accessor. + handler_t& getHandler() { return m_handler; } + + bool tryComplete() { return m_handler.tryComplete(); } + + void eraseFile(file_t *file) { + auto it = m_files.begin(); + while (it != m_files.end()) { + if (file == (*it).get()) { + assert((*it)->erase() == Status::Ok); + m_files.erase(it); + return; + } else { + ++it; + } + } + } + + void eraseFiles() { + for (auto& f : m_files) { + assert(f->erase() == Status::Ok); + } + m_files.clear(); + } +}; \ No newline at end of file diff --git a/src/filesystem/NullDisk.h b/src/filesystem/NullDisk.h new file mode 100644 index 0000000..49e6ead --- /dev/null +++ b/src/filesystem/NullDisk.h @@ -0,0 +1,120 @@ +#pragma once + +#include +#include +#include + +#include "filesystem/File.h" +#include "utils/Guid.h" + +/* + * + * \brief A null disk, used for in-memory-only execution to test the system's + * overhead. + * + * The code heavily based on https://github.com/microsoft/FishStore/blob/master/src/device/null_disk.h. + * + * */ + +struct NullHandler { + inline static constexpr bool tryComplete() { return false; } +}; + +class NullFile { + public: + NullFile() {} + + Status open(NullHandler* handler) { return Status::Ok; } + Status close() { return Status::Ok; } + Status erase() { return Status::Ok; } + void truncate(uint64_t new_begin_offset, truncate_callback_t callback) { + if (callback) { + callback(new_begin_offset); + } + } + + Status readAsync(uint64_t source, void* dest, uint32_t length, + AsyncIOCallback callback, IAsyncContext& context) const { + callback(&context, Status::Ok, length); + return Status::Ok; + } + + Status writeAsync(const void* source, uint64_t dest, uint32_t length, + AsyncIOCallback callback, IAsyncContext& context) { + callback(&context, Status::Ok, length); + return Status::Ok; + } + + static size_t getAlignment() { + // Align null device to cache line. + return 64; + } + + void setHandler(NullHandler* handler) {} +}; + +class NullDisk { + public: + typedef NullHandler handler_t; + typedef NullFile file_t; + typedef std::vector> log_file_t; + + private: + handler_t m_handler; + std::mutex m_mutex; + log_file_t m_files; + std::string m_rootPath; + + static std::string normalizePath(std::string rootPath) { + if (rootPath.empty() || rootPath.back() != kPathSeparator[0]) { + rootPath += kPathSeparator; + } + return rootPath; + } + + public: + NullDisk(const std::string& rootPath, size_t maxThreads = 16) + : m_rootPath{normalizePath(rootPath)} { + (void)maxThreads; + } + + static uint32_t getSectorSize() { return 64; } + + // Methods required by the (implicit) disk interface. + const log_file_t& getFiles() const { return m_files; } + + log_file_t& getFiles() { return m_files; } + + std::string getRootPath() const { return m_rootPath; } + + std::string getCheckpointPath(const Guid& token) const { + assert(false); + return ""; + } + + void createCheckpointDirectory(const Guid& token) { assert(false); } + + void createOrOpenCheckpointDirectory(const Guid& token) { assert(false); } + + void tryDeleteCheckpointDirectory(const Guid& token) { assert(false); } + + std::string getRelativeCheckpointPath(const Guid& token) const { return ""; } + + file_t* newFile(const std::string& relativePath, long size = 0) { + std::lock_guard lock{m_mutex}; + m_files.push_back(std::make_unique()); + assert(m_files.back()->open(&m_handler) == Status::Ok); + return m_files.back().get(); + } + + handler_t& getHandler() { return m_handler; } + + inline static constexpr bool tryComplete() { return false; } + + void eraseFiles() { + for (auto& f : m_files) { + assert(f->erase() == Status::Ok); + } + m_files.clear(); + } +}; \ No newline at end of file diff --git a/src/monitors/CompressionMonitor.cpp b/src/monitors/CompressionMonitor.cpp new file mode 100644 index 0000000..607b21c --- /dev/null +++ b/src/monitors/CompressionMonitor.cpp @@ -0,0 +1,558 @@ +#include "monitors/CompressionMonitor.h" + +#include + +#include "buffers/QueryBuffer.h" +#include "compression/CompressionCodeGenUtils.h" +#include "compression/CompressionStatistics.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/operators/OperatorCode.h" +#include "utils/Query.h" +#include "utils/QueryApplication.h" +#include "utils/QueryOperator.h" +#include "utils/SystemConf.h" + +CompressionMonitor::CompressionMonitor(QueryApplication *application) : m_application(application), + m_size(application->getQueries().size()), + m_codeGenPos(m_size, 0) { + if (!m_application) { + throw std::runtime_error("error: the application is not set"); + } + // assume queries are pre-sorted based on their id + for (int idx = 0; idx < m_size; ++idx) { + if (m_application->getQueries()[idx]->getSecondSchema()) { + throw std::runtime_error( + "error: enabling adaptive compression to queries with more than one " + "input streams is not supported"); + } + // generate instrumentation code for all queries + auto query = m_application->getQueries()[idx].get(); + m_threads.emplace_back(std::thread([&, query] { + generateInstrumentation(query); + })); + } +} + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wmissing-noreturn" +void CompressionMonitor::operator()() { + for (int idx = 0; idx < m_size; ++idx) { + m_threads[idx].join(); + } + m_threads.clear(); + while (true) { + try { + std::this_thread::sleep_for(std::chrono::milliseconds(SystemConf::getInstance().COMPRESSION_MONITOR_INTERVAL)); + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + } + auto t2 = std::chrono::system_clock::now(); + m_time = t2.time_since_epoch() / std::chrono::milliseconds(1); // milliseconds + m_dt = m_time - m__time; + + // iterate over the statistics of all queries + for (int idx = 0; idx < m_size; ++idx) { + // streams + auto query = m_application->getQueries()[idx].get(); + auto buffer = query->getBuffer(); + if (buffer->hasCompressionPolicyChanged()) { + std::cout << "[CMON] Q" + std::to_string(query->getId()) + " " + std::to_string(m_time) + + " Generating a new compression scheme" << std::endl; + generateCode(query); + } + + // state + } + + m__time = m_time; + } +} +#pragma clang diagnostic pop + +void CompressionMonitor::generateInstrumentation(Query *query) { + // generate code + std::string code; + auto opcode = &query->getOperator()->getCode(); + //if (!opcode) { + // throw std::runtime_error("error: wrong operator type"); + //} + // get headers + code.append(getIncludesString()); + // get num of workers and cols to generate static vars + auto cols = opcode->getInputCols(); + code.append(getInstrumentationMetrics(SystemConf::getInstance().WORKER_THREADS, cols->size())); + // get input schema + code.append(opcode->getInputSchemaString()); + // generate function header with initializations + code.append( + "extern \"C\" {\n" + "void instrument(int pid, char *inputBuffer, int batchSize, uint32_t *&dv, double *&cv, double *&mn, double *&mx, double *&md) {\n" + "\tif (batchSize < 0)\n" + "\t\treturn;\n" + "\n" + "\t// Input Buffer\n" + " input_tuple_t *data= (input_tuple_t *) inputBuffer;\n" + "\n" + " //Output Buffers\n" + " for (int i = 0; i < numOfCols; ++i) {\n" + " \tcVals[pid][i] = 0;\n" + " \tmin[pid][i] = DBL_MAX;\n" + " \tmax[pid][i] = DBL_MIN;\n" + " \tmaxDiff[pid][i] = DBL_MIN;\n" + " \ttemp[pid][i] = DBL_MIN;\n" + " }\n" + "\n" + " int tupleSize = sizeof(input_tuple_t);\n" + " int endPtr = batchSize / tupleSize;\n" + " int tupleCounter = 0;\n" + "\n" + " for (int i = 0; i < endPtr; ++i) {\n"); + // generate filter and projections + if (opcode->hasSelection()) { + auto sel = opcode->getSelectionExpr(); + std::string str = "bufferPtr"; + sel.replace(sel.find(str), str.length(), "i"); + code.append(sel + "{\n"); + } + // generate per column stats + int idx = 0; + for (auto col: *cols) { + std::string scol = "data[i]."; + std::string colPos = std::to_string(idx++); + if (col->getColumn() == 0) { + scol += "timestamp"; + } else if (col->getColumn() == -1) { + scol = col->getExpression(); + std::string toReplace("bufferPtr"); + size_t pos = scol.find(toReplace); + scol.replace(pos, toReplace.length(), "i"); + } else { + scol += "_" + std::to_string(col->getColumn()); + } + code.append( + " \t// check each column\n" + "\t\tif (temp[pid][" + colPos + "] != " + scol + ") {\n" + " \t\tcVals[pid][" + colPos + "]++;\n" + " \t}\n" + " \tmin[pid][" + colPos + "] = min[pid][" + colPos + "] < " + scol + " ? min[pid][" + colPos + "] : " + scol + ";\n" + " \tmax[pid][" + colPos + "] = max[pid][" + colPos + "] > " + scol + " ? max[pid][" + colPos + "] : " + scol + ";\n" + " \tif (i != 0) {\n" + " \t\tmaxDiff[pid][" + colPos + "] = maxDiff[pid][" + colPos + "] > abs(" + scol + " - min[pid][" + colPos + "]) ? maxDiff[pid][" + colPos + "] : abs(" + scol + " - min[pid][" + colPos + "]);\n" + " \t}\n" + " \ttemp[pid][" + colPos + "] = " + scol + " ;\n"); + } + code.append(" \ttupleCounter++;\n"); + + // finish loop and finalize results + if (opcode->hasSelection()) { + code.append("}\n"); + } + code.append( + " }\n" + "\n" + " for (int i = 0; i < numOfCols; ++i) {\n" + " \tcVals[pid][i] = (double)tupleCounter/cVals[pid][i];\n" + " }\n" + "\n" + "\t// set output pointers\n" + " cv = cVals[pid];\n" + " mn = min[pid];\n" + " mx = max[pid];\n" + " md = maxDiff[pid]; \n" + "}\n" + "}"); + + auto id = query->getId(); + // create file on disk + //auto path = Utils::getCurrentWorkingDir(); + auto path = SystemConf::getInstance().FILE_ROOT_PATH + "/scabbard"; + std::ofstream out(path + "/InstrumentationCode_" + std::to_string(id) + ".cpp"); + out << code; + out.close(); + // generate library + std::string generatedPath = path + "/InstrumentationCode_" + std::to_string(id) + ".cpp"; + std::string libPath = path + "/InstrumentationCode_" + std::to_string(id) + ".so"; + std::string command = "clang -shared -fPIC -O3 -march=native -g -o " + libPath + " " + generatedPath; + system(command.c_str()); + // load library + m_dLoader.addLibrary(libPath); + //auto dl = std::make_unique(libPath.c_str()); + + // load function + auto fp = m_dLoader.load(libPath, "instrument"); + // enable instrumentation + query->getBuffer()->enableInstrumentation(fp); +} + +void CompressionMonitor::generateCode(Query *query) { + // todo: skip compression if code exists + + // generate code + std::string code; + auto opcode = &query->getOperator()->getCode(); + // get headers and compression algorithms + code.append(getIncludesString()); + code.append(getCompressionAlgorithms()); + // get num of workers and cols to generate static vars + auto cols = opcode->getInputCols(); + auto hashset = opcode->getHashTableExpr(); + auto compStats = query->getBuffer()->getCompressionStatistics(); + auto compCols = compStats->m_compCols; + bool hasDict = false; + for (auto &col: compCols) { + for(auto &comp: col->m_comps) { + if (comp == CompressionType::Dictionary) { + hasDict = true; + } + } + } + if (hasDict) { + if (!hashset.empty()) { + code.append(hashset); + } + code.append(getCompressionVars(true, SystemConf::getInstance().WORKER_THREADS, cols->size())); + } else { + code.append(getCompressionVars(false, SystemConf::getInstance().WORKER_THREADS, cols->size())); + } + // get input schema + code.append(opcode->getInputSchemaString()); + // generate function header with initializations + code.append( + "inline void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency) {\n" + "\tif (start == 0 && end == -1) {\n"); + if (!hashset.empty()) { + // todo: reduce hashtable size here + code.append(" \t// write metadata\n"); + } + code.append( + " \treturn;\n" + " \t}\n"); + + // initialize variables + size_t idx = 0; + code.append(" \tif (isFirst[pid]) {\n"); + for (auto &col: compCols) { + // do we need to initialize metadata here? + if (col->m_comps.find(CompressionType::Dictionary) != col->m_comps.end()) { + auto htSize = opcode->hasStaticHashJoin() ? Utils::getPowerOfTwo(SystemConf::getInstance().CAMPAIGNS_NUM * 10) : SystemConf::getInstance().HASH_TABLE_SIZE; + code.append( + " \tdcomp[pid][" + std::to_string(idx) + "] = std::make_unique>(" + std::to_string(htSize)+ ");\n"); + } + idx++; + } + code.append(" \tisFirst[pid] = false;\n" + " \t}\n"); + code.append(" \tif (clear) {\n"); + idx = 0; + for (auto &col: compCols) { + if (col->m_comps.find(CompressionType::Dictionary) != col->m_comps.end()) { + code.append(" \tdcomp[pid][" + std::to_string(idx) + "]->clear();\n"); + } + idx++; + } + code.append( + " \tclear = false;\n" + " \t}\n" + "\n" + "\tif (start == end || end < start) {\n" + "\t\treturn;\n" + "\t}\n" + "\n" + "\t// Input Buffer\n" + " auto data = (input_tuple_t *)input;\n"); + + // setup compression: each compression technique get a monotonically increasing _id + // setup output structs + idx = 0; + code.append("\tstd::vector idxs (" + std::to_string(cols->size()) + ", 0);\n"); + for (auto &col: compCols) { + for (const auto& c: col->m_comps) { + if (c == CompressionType::RLE) { + continue; + } + // todo: fix float compression without a multiplier + std::string resStruct; + resStruct.append("\tstruct t_" + std::to_string(idx) + " {\n"); + std::string resType; + uint32_t precision = 0; + if (c == CompressionType::BaseDelta) { + resType = getRoundedType(col->m_diffPrecision); + resStruct.append("\t\t" + resType + " _" + std::to_string(idx) + " : "); + resStruct.append(std::to_string(col->m_diffPrecision) + ";\n"); + precision = col->m_diffPrecision; + } else if (c == CompressionType::Dictionary) { + resType = getRoundedType(16); + resStruct.append("\t\t" + resType + " _" + std::to_string(idx) + " : "); + resStruct.append(std::to_string(16) + ";\n"); + precision = 16; + } else { + resType = getRoundedType(col->m_precision); + resStruct.append("\t\t" + resType + " _" + std::to_string(idx) + " : "); + resStruct.append(std::to_string(col->m_precision) + ";\n"); + precision = col->m_precision; + } + if (col->m_comps.find(CompressionType::RLE) != col->m_comps.end()) { + auto maxCounter = std::max(col->m_RLEPrecision, getRoundedTypeInt(precision)-precision); + col->m_RLEPrecision = maxCounter; + resStruct.append("\t\t"+getRoundedType(col->m_RLEPrecision)+" counter : " + std::to_string(col->m_RLEPrecision) + ";\n"); + // create an RLE counter for that column + code.append("\t"+getRoundedType(col->m_RLEPrecision)+" count_" + std::to_string(idx) + " = 1;\n"); + } + + std::string base; + if (col->m_column == 0) { + base = "data[0].timestamp"; + } else if (col->m_column == -1) { + base = col->m_expression; + std::string toReplace("bufferPtr"); + size_t pos = base.find(toReplace); + base.replace(pos, toReplace.length(), "0"); + } else { + base = "data[0]._" + std::to_string(col->m_column); + } + if (c == CompressionType::BaseDelta) { + code.append("\tBaseDeltaCompressor<" + col->m_typeString + ", " + getRoundedType(col->m_diffPrecision) + "> comp_"+std::to_string(idx)+"("+base+");\n"); + code.append("\tauto temp_" + std::to_string(idx) + " = comp_"+std::to_string(idx)+".compress("+base+");\n"); + } else if (c == CompressionType::FloatMult) { + code.append("\tFloatMultCompressor<" + getRoundedType(col->m_precision) + "> comp_"+std::to_string(idx)+"("+std::to_string(col->m_multiplier)+");\n"); + code.append("\tauto temp_" + std::to_string(idx) + " = comp_"+std::to_string(idx)+".compress("+base+");\n"); + } else if (c == CompressionType::None) { + code.append("\tauto temp_" + std::to_string(idx) + " = ("+getRoundedType(col->m_precision)+")"+base+";\n"); + } + resStruct.append("\t};\n"); + code.append(resStruct); + } + idx++; + } + // setup output buffers + idx = 0; + float mult = (float) 1 / (float) cols->size(); + code.append("\t// output buffers\n"); + code.append("\tint barriers["+std::to_string(cols->size())+"];\n"); + for (auto &col: compCols) { + auto bar = (float)(idx * mult); + code.append("\tbarriers["+std::to_string(idx)+"] = (int)(length*"+std::to_string(bar)+");\n"); + code.append("\tt_"+std::to_string(idx)+" *buf"+std::to_string(idx)+" = (t_"+std::to_string(idx)+" *) (output + barriers["+std::to_string(idx)+"]);\n"); + idx++; + } + + // iterate over data + code.append("\tsize_t n = (end - start) / sizeof(input_tuple_t);\n\n"); + code.append("\tfor (size_t idx = 0; idx < n; idx++) {\n"); + // generate filter and projections + if (opcode->hasSelection()) { + auto sel = opcode->getSelectionExpr(); + std::string str = "bufferPtr"; + sel.replace(sel.find(str), str.length(), "idx"); + code.append("\t\t" + sel + "\t\t{\n"); + } + + // compression code + idx = 0; + for (auto &col: compCols) { + std::string rleS; + auto idxS = std::to_string(idx); + for (const auto &c : col->m_comps) { + // apply compression + std::string base; + if (col->m_column == 0) { + base = "data[idx].timestamp"; + } else if (col->m_column == -1) { + base = col->m_expression; + std::string toReplace("bufferPtr"); + size_t pos = base.find(toReplace); + base.replace(pos, toReplace.length(), "idx"); + } else { + base = "data[idx]._" + std::to_string(col->m_column); + } + if (c == CompressionType::BaseDelta || c == CompressionType::FloatMult) { + code.append(" // apply compression\n"); + code.append( + "\t\tif (comp_" + idxS + ".check("+base+")) {\n" + "\t\t\tstd::cout << \"warning: falling back to the original compression scheme\"<< std::endl;\n" + "\t\t\tclear = true;\n" + "\t\t\treturn;\n" + "\t\t}\n"); + code.append("\t\tauto res_" + idxS + " = comp_" + idxS + ".compress("+base+");\n"); + } else if (c == CompressionType::Dictionary) { + code.append("\t\tauto res_" + idxS + " = dcomp[pid][" + idxS + "]->compress("+base+");\n"); + } else if (c == CompressionType::None) { + std::string resType = getRoundedType(col->m_precision); + code.append( + "\t\tif (!CanTypeFitValue<"+resType+","+col->m_typeString+">("+base+")) {\n" + "\t\t\tstd::cout << \"warning: falling back to the original compression scheme\"<< std::endl;\n" + "\t\t\tclear = true;\n" + "\t\t\treturn;\n" + "\t\t}\n"); + code.append("\t\t"+resType+" res_" + idxS + " = ("+resType+") "+base+";\n"); + } + if (c == CompressionType::RLE) { + auto cntCheck = std::pow(2, col->m_RLEPrecision) - 1; + rleS.append( + " // apply RLE\n" + " if (temp_" + idxS + " != res_" + idxS + " || count_" + idxS + " >= " + std::to_string(cntCheck) + ") {\n" + "\t\t buf" + idxS + "[idxs[" + idxS + "]++] = {temp_" + idxS + ", count_" + idxS + "};\n" + " count_" + idxS + " = 0;\n" + " temp_" + idxS + " = res_" + idxS + ";\n" + "\t\t} else {\n" + " count_" + idxS + "++;\n" + " }\n"); + } + // deal with no compression + } + if (!rleS.empty()) { + code.append(rleS); + } else { // if no rle takes place, just store the compressed result + code.append("\t\tbuf" + idxS + "[idxs[" + idxS + "]++] = {res_" + idxS + "};\n"); + } + idx++; + } + + if (opcode->hasSelection()) { + code.append("\t\t}\n"); + } + code.append("\t}\n"); + + // after loop result finalization + idx = 0; + for (auto &col: compCols) { + for (const auto &c : col->m_comps) { + auto idxS = std::to_string(idx); + std::string base; + if (c == CompressionType::RLE) { + auto cntCheck = std::pow(2, col->m_RLEPrecision) - 1; + code.append( + " if (count_" + idxS + " != 0) {\n" + "\t buf" + idxS + "[idxs[" + idxS + "]++] = {temp_" + idxS + ", count_" + idxS + "};\n" + "\t}\n"); + } + } + idx++; + } + + // copy results and set output pointers + idx = 0; + code.append("\t// copy results and set output pointers\n"); + for (auto &col: compCols) { + code.append("\twritePos += idxs["+std::to_string(idx)+"] * sizeof(t_"+std::to_string(idx)+");\n"); + if (idx < cols->size() - 1) { + code.append("\tif (writePos > barriers["+std::to_string(idx+1)+"]) {throw std::runtime_error(\"error: larger barriers needed\");}\n"); + code.append("\tstd::memcpy((void *)(output + writePos), (void *)buf"+std::to_string(idx+1)+", idxs["+std::to_string(idx+1)+"] * sizeof(t_"+std::to_string(idx+1)+"));\n"); + } else { + code.append("\tif (writePos > length) {throw std::runtime_error(\"error: larger barriers needed\");}\n"); + } + idx++; + } + + // write metadata: col comp schema s-idx e-idx + if (m_writeMetadata) { + idx = 0; + code.append("\t//write metadata\n"); + code.append("\twritePos = 0;\n"); + code.append("\tmetadata[pid][0] = \"\";\n"); + for (auto &col : compCols) { + std::string comps; + std::string resStruct; + for (const auto &c : col->m_comps) { + if (c == CompressionType::RLE) { + comps += "\"RLE \""; + continue; + } + resStruct.append("{"); + resStruct.append(col->m_typeString + ":"); + if (c == CompressionType::BaseDelta) { + comps += "\"BD \"+comp_" + std::to_string(idx) + ".getBase()+\" \""; + resStruct.append(std::to_string(col->m_diffPrecision) + ";"); + } else { + if (c == CompressionType::FloatMult) { + comps += "\"FM \"+comp_" + std::to_string(idx) + + ".getMultiplier()+\" \""; + } else if (c == CompressionType::Dictionary) { + std::string endPtr = "\tauto endPtr = "; + size_t tmpIdx = 0; + for (auto &cl : compCols) { + endPtr += "idxs[" + std::to_string(tmpIdx) + "] * sizeof(t_" + + std::to_string(tmpIdx) + ")"; + tmpIdx++; + if (tmpIdx != compCols.size()) { + endPtr += "+"; + } else { + endPtr += ";\n"; + } + } + code.append(endPtr); + code.append("\tauto dcompSize = dcomp[pid][" + std::to_string(idx) + + "]->getTable().bucket_size() * dcomp[pid][" + + std::to_string(idx) + "]->getTable().max_size();\n"); + comps += + "\"D \"+std::to_string(endPtr)+\" \"+std::to_string(endPtr+dcompSize)+\" \""; + if (!opcode->hasStaticHashJoin()) { + code.append( + "\tstd::memcpy((void *)(output + writePos), (void *)dcomp[pid][" + + std::to_string(idx) + + "]->getTable().buckets(), dcompSize);\n" + "\twritePos += dcompSize;\n" + "\tif (writePos > length) {\n" + "\t\tthrow std::runtime_error(\"error: larger barriers needed\");\n" + "\t}\n"); + } + } else { + comps += "\"N \""; + } + resStruct.append(std::to_string(col->m_precision) + ";"); + } + if (col->m_comps.find(CompressionType::RLE) != col->m_comps.end()) { + resStruct.append("uint16_t:" + std::to_string(col->m_RLEPrecision) + + ";"); + } + resStruct.append("} "); + } + code.append("\tmetadata[pid][0] += \"" + std::to_string(col->m_column) + + " \"" + comps + "\"" + resStruct + "\" " + + "+ std::to_string(writePos) + \" \";\n"); + // if (idx < cols->size() - 1) { + code.append("\twritePos += idxs[" + std::to_string(idx) + + "] * sizeof(t_" + std::to_string(idx) + ");\n"); + code.append("\tmetadata[pid][0] += std::to_string(writePos) + \" \";\n"); + //} + idx++; + } + // copy string + code.append( + "\tif (metadata[pid][0].size() > 128) { throw std::runtime_error(\"error: increase the size of metadata\"); }\n"); + code.append( + "\tstd::memcpy((void *)(output - 128), (void *)metadata[pid][0].data(), metadata[pid][0].size());\n"); + } + code.append("}\n"); + // generate c wrapped function + code.append( + "extern \"C\" {\n" + "\tvoid compress(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) {\n" + "\t\tcompressInput(pid, input, start, end, output, writePos, length, clear, latency);\n" + "\t}\n" + "}"); + + auto id = query->getId(); + // create file on disk + //auto path = Utils::getCurrentWorkingDir(); + auto path = SystemConf::getInstance().FILE_ROOT_PATH + "/scabbard"; + std::string filePath = path + "/" + "CompressionCode_" + std::to_string(id) + "_" + std::to_string(m_codeGenPos[id]); + std::ofstream out(filePath + ".cpp"); + out << code; + out.close(); + // generate library + std::string generatedPath = filePath + ".cpp"; + std::string libPath = filePath + ".so"; + std::string command = "clang -shared -fPIC -O3 -march=native -g -o " + libPath + " " + generatedPath; + system(command.c_str()); + // load library + m_dLoader.addLibrary(libPath); + + // load function + auto fp = m_dLoader.load(libPath, "compress"); + m_codeGenPos[id]++; // increment compression pointer + // enable the new compression + query->getBuffer()->setCompressionFP(fp, m_codeGenPos[id]); + //query->getBuffer()->setDeCompressionFP(fp); +} \ No newline at end of file diff --git a/src/monitors/CompressionMonitor.h b/src/monitors/CompressionMonitor.h new file mode 100644 index 0000000..8a8095d --- /dev/null +++ b/src/monitors/CompressionMonitor.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils/Utils.h" + +class Query; +class QueryApplication; +class QueryBuffer; +class ColumnReference; + +/* + * \brief This class is used to measure the compression statistics of a pipeline. + * + * */ + +class CompressionMonitor { + private: + long m_time, m__time = 0L; + long m_dt; + QueryApplication *m_application; + int m_size; + bool m_writeMetadata = false; + + // define (disk throughput) ? + // codegen + std::vector m_threads; + std::vector m_codeGenPos; + Utils::DynamicLoader m_dLoader; + //std::vector> m_compressionFP; + //std::vector> m_decompressionFP; + //std::function m_instrFP; + + public: + explicit CompressionMonitor(QueryApplication *application); + void operator()(); + + private: + void generateInstrumentation(Query *query); + void generateCode(Query *query); +}; \ No newline at end of file diff --git a/src/monitors/LatencyMonitor.cpp b/src/monitors/LatencyMonitor.cpp index 66a0ea0..5c5800a 100644 --- a/src/monitors/LatencyMonitor.cpp +++ b/src/monitors/LatencyMonitor.cpp @@ -1,14 +1,48 @@ #include "monitors/LatencyMonitor.h" + +#include +#include +#include +#include + #include "utils/Utils.h" #include "buffers/QueryBuffer.h" -LatencyMonitor::LatencyMonitor(long timeReference) : m_count(0L), +LatencyMonitor::LatencyMonitor(long timeReference, bool clearFiles) : m_count(0L), m_min(DBL_MAX), m_max(DBL_MIN), m_avg(0.0), m_timestampReference(timeReference), m_latency(0.0), - m_active(true) {} + m_active(true), + m_clearFiles(clearFiles) { + + if (m_clearFiles) { + std::remove(m_fileName.c_str()); + m_fd = ::open(m_fileName.c_str(), O_RDWR | O_CREAT, + S_IRUSR | S_IWUSR); + auto timeString = std::to_string(m_timestampReference); + ::pwrite(m_fd, timeString.data(), timeString.size(), 0); + fsync(m_fd); + std::remove(m_fileName2.c_str()); + m_fd2 = ::open(m_fileName2.c_str(), O_RDWR | O_CREAT, + S_IRUSR | S_IWUSR); + } else { + if ((m_fd = ::open(m_fileName.c_str(), O_RDONLY)) > 0) { + off_t fsize; + fsize = lseek(m_fd, 0, SEEK_END); + auto timeString = std::string(fsize, ' '); + ::pread(m_fd, timeString.data(), fsize, 0); + m_timestampReference = std::stol(timeString); + } + if ((m_fd2 = ::open(m_fileName2.c_str(), O_RDWR, S_IRUSR | S_IWUSR)) > 0) { + auto fsize = lseek(m_fd2, 0, SEEK_END); + auto timeString = std::string(fsize, ' '); + ::pread(m_fd2, timeString.data(), fsize, 0); + m_lastTimestamp = std::stol(timeString); + } + } +} void LatencyMonitor::disable() { m_active.store(false); } @@ -17,6 +51,7 @@ std::string LatencyMonitor::toString() { if (m_count < 2 || !m_active.load()) return latencyString; + //const std::chrono::time_point currentTime = std::chrono::high_resolution_clock::now(); m_avg = m_latency / ((double) m_count); std::ostringstream streamObj; streamObj << std::fixed; @@ -24,6 +59,7 @@ std::string LatencyMonitor::toString() { streamObj << " [avg " << std::to_string(m_avg); streamObj << " min " << std::to_string(m_min); streamObj << " max " << std::to_string(m_max); + //streamObj << " ts " << std::to_string(currentTime.time_since_epoch().count()); streamObj << "]"; latencyString = streamObj.str(); @@ -46,8 +82,26 @@ void LatencyMonitor::monitor(QueryBuffer &buffer, long latencyMark) { long t2 = (currentTimeNano - m_timestampReference) / 1000L; dt = ((double) (t2 - t1)) / 1000.; /* In milliseconds */ + /*if (m_clearFiles) { + if (m_restartReference == 0) { + m_restartReference = std::chrono::duration_cast(std::chrono::high_resolution_clock::now().time_since_epoch()).count(); + m_remainingTime = dt; //1000; + } else { + double diff = ((double)(currentTimeNano - m_restartReference) / 1000L) / 1000.; + if (diff > m_remainingTime) { + m_clearFiles = false; + } else { + dt = dt + (m_remainingTime - diff); + } + } + }*/ + m_measurements.push_back(dt); + auto latencyString = std::to_string(latencyMark); + ::pwrite(m_fd2, latencyString.data(), latencyString.size(), 0); + // fsync(m_fd2); + m_latency += dt; m_count += 1; @@ -55,6 +109,14 @@ void LatencyMonitor::monitor(QueryBuffer &buffer, long latencyMark) { m_max = std::max(dt, m_max); } +long LatencyMonitor::getTimestampReference() const { + return m_timestampReference; +} + +long LatencyMonitor::getLastTimestamp() const { + return m_lastTimestamp; +} + void LatencyMonitor::stop() { m_active.store(false); @@ -78,7 +140,7 @@ void LatencyMonitor::stop() { std::cout << streamObj.str() << std::endl; } -double LatencyMonitor::evaluateSorted(const double p) { +double LatencyMonitor::evaluateSorted(double p) { double n = m_measurements.size(); double pos = p * (n + 1) / 100; double fpos = floor(pos); diff --git a/src/monitors/LatencyMonitor.h b/src/monitors/LatencyMonitor.h index d8d0660..9a45db4 100644 --- a/src/monitors/LatencyMonitor.h +++ b/src/monitors/LatencyMonitor.h @@ -1,13 +1,15 @@ #pragma once +#include + +#include #include -#include -#include #include -#include #include -#include +#include +#include #include +#include #include class QueryBuffer; @@ -26,17 +28,26 @@ class LatencyMonitor { long m_count; double m_min, m_max, m_avg; long m_timestampReference = 0; + long m_lastTimestamp = 0; double m_latency; std::atomic m_active; std::vector m_measurements; + const std::string m_fileName = SystemConf::FILE_ROOT_PATH + "/scabbard/latency-metrics"; + const std::string m_fileName2 = SystemConf::FILE_ROOT_PATH + "/scabbard/latency-metrics-2"; + int m_fd, m_fd2; + bool m_clearFiles; + long m_restartReference = 0; + double m_remainingTime = 0.; public: - explicit LatencyMonitor(long timeReference); + explicit LatencyMonitor(long timeReference, bool clearFiles = true); void disable(); std::string toString(); void monitor(QueryBuffer &buffer, long latencyMark); + [[nodiscard]] long getTimestampReference() const; + [[nodiscard]] long getLastTimestamp() const; void stop(); private: - double evaluateSorted(const double p); + double evaluateSorted(double p); }; \ No newline at end of file diff --git a/src/monitors/Measurement.cpp b/src/monitors/Measurement.cpp index 17ac73f..0aebe1e 100644 --- a/src/monitors/Measurement.cpp +++ b/src/monitors/Measurement.cpp @@ -1,16 +1,22 @@ -#include "Measurement.h" -#include "buffers/QueryBuffer.h" -#include "dispatcher/TaskDispatcher.h" -#include "result/ResultHandler.h" -#include "LatencyMonitor.h" +#include "monitors/Measurement.h" #include +#include "buffers/QueryBuffer.h" +#include "dispatcher/ITaskDispatcher.h" +#include "dispatcher/JoinTaskDispatcher.h" +#include "dispatcher/TaskDispatcher.h" +#include "monitors/LatencyMonitor.h" + long Measurement::m_sumTuples = 0; int Measurement::m_measurements = 0; -Measurement::Measurement(int id, TaskDispatcher *dispatcher, LatencyMonitor *monitor) : - m_id(id), m_dispatcher(dispatcher), m_buffer(dispatcher->getBuffer()), m_monitor(monitor) {} +Measurement::Measurement(int id, ITaskDispatcher *dispatcher, LatencyMonitor *monitor) : + m_id(id), m_dispatcher(dispatcher), m_firstBuffer(dispatcher->getBuffer()), m_monitor(monitor) { + if (JoinTaskDispatcher *d = dynamic_cast(m_dispatcher)) { + m_secondBuffer = d->getSecondBuffer(); + } +} void Measurement::stop() { m_monitor->stop(); @@ -18,7 +24,9 @@ void Measurement::stop() { std::string Measurement::getInfo(long delta, int inputTuple, int outputTuple) { std::string s; - m_bytesProcessed = m_buffer->getBytesProcessed(); + auto storedBytes = m_firstBuffer->getAverageStoredBytes(); + m_bytesProcessed = (long) m_firstBuffer->getBytesProcessed(); + m_bytesGenerated += (m_secondBuffer == nullptr) ? 0 : + (long) m_secondBuffer->getBytesProcessed(); m_bytesGenerated = m_dispatcher->getBytesGenerated(); if (m__bytesProcessed > 0) { m_Dt = ((double) delta / 1000.0); @@ -33,14 +41,17 @@ std::string Measurement::getInfo(long delta, int inputTuple, int outputTuple) { streamObj << std::setprecision(3); streamObj << " S" + q_id + " " << m_MBpsProcessed << " MB/s "; if (inputTuple != 0) { - streamObj << "(" << (m_bytesProcessed - m__bytesProcessed) / inputTuple << " tuples/sec) "; + streamObj << "(" << (m_bytesProcessed - m__bytesProcessed) / inputTuple << " t/sec) "; m_sumTuples += (m_bytesProcessed - m__bytesProcessed) / inputTuple; m_measurements++; - streamObj << "(Average: " << m_sumTuples / m_measurements << " tuples/sec) "; + streamObj << "(Average: " << m_sumTuples / m_measurements << " t/sec) "; + if (storedBytes > 0) { + streamObj << "[ASB: " << storedBytes << "] "; + } } streamObj << "output " << m_MBpsGenerated << " MB/s "; //["+std::to_string(monitor)+"]"; if (outputTuple != 0) { - streamObj << "(" << (m_bytesGenerated - m__bytesGenerated) / outputTuple << " tuples/sec) "; + streamObj << "(" << (m_bytesGenerated - m__bytesGenerated) / outputTuple << " t/sec) "; } if (m_monitor != nullptr) { streamObj << m_monitor->toString(); diff --git a/src/monitors/Measurement.h b/src/monitors/Measurement.h index ef05f57..2973d9b 100644 --- a/src/monitors/Measurement.h +++ b/src/monitors/Measurement.h @@ -1,7 +1,7 @@ #pragma once class QueryBuffer; -class TaskDispatcher; +class ITaskDispatcher; class ResultHandler; class LatencyMonitor; @@ -17,8 +17,8 @@ class LatencyMonitor; class Measurement { private: int m_id; - TaskDispatcher *m_dispatcher; - QueryBuffer *m_buffer; + ITaskDispatcher *m_dispatcher; + QueryBuffer *m_firstBuffer, *m_secondBuffer = nullptr; LatencyMonitor *m_monitor; double m_Dt; double m__1MB_ = 1048576.0; @@ -30,7 +30,7 @@ class Measurement { static int m_measurements; public: - Measurement(int id = -1, TaskDispatcher *dispatcher = nullptr, LatencyMonitor *monitor = nullptr); + Measurement(int id = -1, ITaskDispatcher *dispatcher = nullptr, LatencyMonitor *monitor = nullptr); void stop(); std::string getInfo(long delta, int inputTuple = 0, int outputTuple = 0); ~Measurement(); diff --git a/src/monitors/PerformanceMonitor.cpp b/src/monitors/PerformanceMonitor.cpp index cc3cf2a..5b4cb95 100644 --- a/src/monitors/PerformanceMonitor.cpp +++ b/src/monitors/PerformanceMonitor.cpp @@ -2,7 +2,6 @@ #include "Measurement.h" #include "LatencyMonitor.h" #include "utils/QueryApplication.h" -#include "utils/Query.h" #include "utils/TupleSchema.h" #include "tasks/TaskFactory.h" #include "buffers/PartialWindowResultsFactory.h" @@ -15,7 +14,8 @@ PerformanceMonitor::PerformanceMonitor(QueryApplication &application) : m_applic // assume queries are pre-sorted based on their id for (int idx = 0; idx < m_size; ++idx) { - std::cout << "[MON] [MultiOperator] S" << std::setfill('0') << std::setw(3) + auto currentMs = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + std::cout << "[MON] [MultiOperator] " << currentMs <<" S" << std::setfill('0') << std::setw(3) << std::to_string(application.getQueries()[idx]->getId()) << std::endl; m_measurements[idx] = new Measurement( application.getQueries()[idx]->getId(), @@ -23,6 +23,7 @@ PerformanceMonitor::PerformanceMonitor(QueryApplication &application) : m_applic &application.getQueries()[idx]->getLatencyMonitor() ); } + m_t1 = std::chrono::high_resolution_clock::now(); } #pragma clang diagnostic push @@ -34,11 +35,14 @@ void PerformanceMonitor::operator()() { } catch (std::exception &e) { std::cout << e.what() << std::endl; } - m_time = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1); // milliseconds + auto t2 = std::chrono::system_clock::now(); + m_time = t2.time_since_epoch() / std::chrono::milliseconds(1); // milliseconds m_dt = m_time - m__time; std::string builder; builder.append("[MON]"); + auto currentMs = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + builder.append(" " + std::to_string(currentMs)); for (int i = 0; i < m_size; i++) builder.append(m_measurements[i]->getInfo(m_dt, (*m_application.getQueries()[i]).getSchema()->getTupleSize(), @@ -54,7 +58,8 @@ void PerformanceMonitor::operator()() { m__time = m_time; if (SystemConf::getInstance().DURATION > 0) { - if (m_counter++ > SystemConf::getInstance().DURATION) { + auto time_span = std::chrono::duration_cast>(t2 - m_t1); + if (time_span.count() > SystemConf::getInstance().DURATION) { for (int i = 0; i < m_size; i++) m_measurements[i]->stop(); std::cout << "[MON] Done." << std::endl; diff --git a/src/monitors/PerformanceMonitor.h b/src/monitors/PerformanceMonitor.h index 40406ba..5fb7933 100644 --- a/src/monitors/PerformanceMonitor.h +++ b/src/monitors/PerformanceMonitor.h @@ -22,6 +22,7 @@ class PerformanceMonitor { QueryApplication &m_application; int m_size; std::vector m_measurements; + std::chrono::high_resolution_clock::time_point m_t1; public: PerformanceMonitor(QueryApplication &application); diff --git a/src/monitors/ThroughputMonitor.cpp b/src/monitors/ThroughputMonitor.cpp new file mode 100644 index 0000000..fe15ae3 --- /dev/null +++ b/src/monitors/ThroughputMonitor.cpp @@ -0,0 +1,122 @@ +#include +#include + +#include "ThroughputMonitor.h" +#include "LatencyMonitor.h" +#include "utils/QueryApplication.h" + +long SimpleMeasurement::m_sumTuples = 0; +int SimpleMeasurement::m_measurements = 0; + +SimpleMeasurement::SimpleMeasurement(std::atomic &bytesProcessed) : m_bytesProcessedAtomic(bytesProcessed) {} + +std::string SimpleMeasurement::getInfo(long delta, int inputTuple, int outputTuple) { + std::string s; + m_bytesProcessed = m_bytesProcessedAtomic.load(std::memory_order_relaxed); + if (m__bytesProcessed > 0) { + m_Dt = ((double) delta / 1000.0); + m_MBpsProcessed = ((double) m_bytesProcessed - (double) m__bytesProcessed) / m__1MB_ / m_Dt; + m_MBpsGenerated = ((double) m_bytesGenerated - (double) m__bytesGenerated) / m__1MB_ / m_Dt; + std::string q_id = std::to_string(m_id); + q_id = std::string(3 - q_id.length(), '0') + q_id; + + // Create an output string stream + std::ostringstream streamObj; + streamObj << std::fixed; + streamObj << std::setprecision(3); + streamObj << " S" + q_id + " " << m_MBpsProcessed << " MB/s "; + if (inputTuple != 0) { + streamObj << "(" << (m_bytesProcessed - m__bytesProcessed) / inputTuple << " t/sec) "; + m_sumTuples += (m_bytesProcessed - m__bytesProcessed) / inputTuple; + m_measurements++; + streamObj << "(Average: " << m_sumTuples / m_measurements << " t/sec) "; + } + streamObj << "output " << m_MBpsGenerated << " MB/s "; //["+std::to_string(monitor)+"]"; + if (outputTuple != 0) { + streamObj << "(" << (m_bytesGenerated - m__bytesGenerated) / outputTuple << " t/sec) "; + } + + streamObj << "m_bytesProcessed " << m_bytesProcessed << " "; + s = streamObj.str(); + } + m__bytesProcessed = m_bytesProcessed; + m__bytesGenerated = m_bytesGenerated; + return s; +} + +std::string SimpleMeasurement::getThroughput(long delta, int inputTuple, int outputTuple) { + std::string s; + m_bytesProcessed = m_bytesProcessedAtomic.load(std::memory_order_relaxed); + if (m__bytesProcessed > 0) { + m_Dt = ((double) delta / 1000.0); + m_MBpsProcessed = ((double) m_bytesProcessed - (double) m__bytesProcessed) / m__1MB_ / m_Dt; + m_MBpsGenerated = ((double) m_bytesGenerated - (double) m__bytesGenerated) / m__1MB_ / m_Dt; + std::string q_id = std::to_string(m_id); + q_id = std::string(3 - q_id.length(), '0') + q_id; + + // Create an output string stream + std::ostringstream streamObj; + streamObj << std::fixed; + streamObj << std::setprecision(3); + streamObj << m_MBpsProcessed << " MB/s "; + if (inputTuple != 0) { + streamObj << "(" << (m_bytesProcessed - m__bytesProcessed) / inputTuple + << " t/sec) "; + m_sumTuples += (m_bytesProcessed - m__bytesProcessed) / inputTuple; + m_measurements++; + streamObj << "(Average: " << m_sumTuples / m_measurements + << " t/sec) "; + } + + s = streamObj.str(); + } + m__bytesProcessed = m_bytesProcessed; + m__bytesGenerated = m_bytesGenerated; + return s; +} + +SimpleMeasurement::~SimpleMeasurement() {}; + +ThroughputMonitor::ThroughputMonitor(std::atomic &bytesProcessed, int inputTuple, int outputTuple) : m_bytesProcessed(bytesProcessed), + m_measurement(std::make_unique(bytesProcessed)), + m_inputTupleSize(inputTuple), m_outputTupleSize(outputTuple) {} +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wmissing-noreturn" +void ThroughputMonitor::operator()() { + while (true) { + try { + std::this_thread::sleep_for(std::chrono::milliseconds(m_throughputMonitorInterval)); + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + } + m_time = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1); // milliseconds + m_dt = m_time - m__time; + + std::string builder; + if (m_printInConsole) { + builder.append("[Q-MON]"); + builder.append(m_measurement->getInfo(m_dt, m_inputTupleSize, m_outputTupleSize)); + std::cout << std::setprecision(3) << builder << std::endl; + } else { + builder.append(std::to_string(m_counter*m_throughputMonitorInterval) + " "); + builder.append(m_measurement->getThroughput(m_dt, m_inputTupleSize, m_outputTupleSize)); + m_storedMeasurements.emplace_back(builder); + } + + m__time = m_time; + if (m_duration > 0) { + if (m_counter++ > m_duration) { + if (!m_printInConsole) { + std::ofstream output_file("throughput_measurements.txt"); + std::ostream_iterator output_iterator(output_file, "\n"); + std::copy(m_storedMeasurements.begin(), m_storedMeasurements.end(), output_iterator); + } + std::cout << "[Q-MON] Done." << std::endl; + break; + } + } + } +} +#pragma clang diagnostic pop + +ThroughputMonitor::~ThroughputMonitor() {} \ No newline at end of file diff --git a/src/monitors/ThroughputMonitor.h b/src/monitors/ThroughputMonitor.h new file mode 100644 index 0000000..30e84ed --- /dev/null +++ b/src/monitors/ThroughputMonitor.h @@ -0,0 +1,54 @@ +#pragma once + +#include +#include +#include +#include +#include + +class SimpleMeasurement { + private: + int m_id = 0; + std::atomic &m_bytesProcessedAtomic; + double m_Dt; + double m__1MB_ = 1048576.0; + long m_bytesProcessed, m__bytesProcessed = 0; + long m_bytesGenerated, m__bytesGenerated = 0; + double m_MBpsProcessed, m_MBpsGenerated; + static long m_sumTuples; + static int m_measurements; + + public: + SimpleMeasurement(std::atomic &bytesProcessed); + std::string getInfo(long delta, int inputTuple = 0, int outputTuple = 0); + std::string getThroughput(long delta, int inputTuple = 0, int outputTuple = 0); + ~SimpleMeasurement(); +}; + +/* + * The ThroughputMonitor runs in the background and collects performance @Measurements that are + * printed to the user. + * + * */ + +class ThroughputMonitor { + private: + int m_counter = 0; + long m_time, m__time = 0L; + long m_dt; + int m_size; + std::atomic &m_bytesProcessed; + std::unique_ptr m_measurement; + int m_inputTupleSize, m_outputTupleSize; + bool m_printInConsole = false; + std::vector m_storedMeasurements; + size_t m_throughputMonitorInterval = 10; // in ms + size_t m_duration = 1200; + + public: + ThroughputMonitor(std::atomic &bytesProcessed, int inputTuple, int outputTuple); + void operator()(); + ~ThroughputMonitor(); +}; + + diff --git a/src/result/PartialResultSlot.h b/src/result/PartialResultSlot.h index f958a94..eb5d3f9 100644 --- a/src/result/PartialResultSlot.h +++ b/src/result/PartialResultSlot.h @@ -2,8 +2,8 @@ #include -#include "buffers/PartialWindowResultsFactory.h" #include "cql/operators/AggregateOperatorCode.h" +#include "buffers/PartialWindowResultsFactory.h" #include "tasks/WindowBatch.h" static const bool debug = false; @@ -19,39 +19,40 @@ struct alignas(64) PartialResultSlotWithoutFragments { int m_index; std::atomic m_numberOfResults; std::atomic m_slot; - int m_freePointer; + long m_freePointer1, m_freePointer2; + long m_prevFreePointer1, m_prevFreePointer2; + long m_freeOffset1, m_freeOffset2; + bool m_clearCheckpoint; // A query can have more than one downstream queries int m_latch; long m_latencyMark; std::atomic m_taskId; std::atomic m_previousSlot; - PartialResultSlotWithoutFragments(int index = -1) - : m_index(index), - m_numberOfResults(0), - m_slot(-1), - m_freePointer(INT_MIN), - m_latch(0), - m_latencyMark(-1), - m_taskId(-1), - m_previousSlot(-1) {} - - int getNumberOfResults() { + std::mutex m_updateLock; + std::shared_ptr m_graph; + + PartialResultSlotWithoutFragments(int index = -1) : m_index(index), m_numberOfResults(0), m_slot(-1), + m_freePointer1(INT_MIN), m_freePointer2(INT_MIN), + m_prevFreePointer1(INT_MIN), m_prevFreePointer2(INT_MIN), + m_freeOffset1(INT_MIN), m_freeOffset2(INT_MIN), + m_clearCheckpoint(false), + m_latch(0), m_latencyMark(-1), m_taskId(-1), m_previousSlot(-1) {} + + int getNumberOfResults () { m_numberOfResults.store(1); return m_numberOfResults.load(); } - void setPreviousSlot(int prev) { + void setPreviousSlot (int prev) { if (prev != 1 && prev != 3) { - throw std::runtime_error("error: setting the previous slot value to " + - std::to_string(prev)); + throw std::runtime_error("error: setting the previous slot value to " + std::to_string(prev)); } m_previousSlot = prev; } - int getPreviousSlot() { + int getPreviousSlot () { if (m_previousSlot != 1) { - throw std::runtime_error("error: getting the previous slot value " + - std::to_string(m_previousSlot)); + throw std::runtime_error("error: getting the previous slot value " + std::to_string(m_previousSlot)); } return m_previousSlot.load(); } @@ -61,20 +62,28 @@ struct alignas(64) PartialResultSlotWithoutFragments { __builtin_prefetch(m_result->getBufferRaw(), 1, 3); } } + + void freeGraph() { + if (m_graph) { + if (m_graph.use_count() == 1) + LineageGraphFactory::getInstance().free(m_graph); + m_graph.reset(); + } + } }; /* - * \brief PartialResultSlots are used for operators with window fragments and - * single-threaded merge phase + * \brief PartialResultSlots are used for operators with window fragments and single-threaded merge phase * */ struct alignas(64) PartialResultSlot { - std::shared_ptr m_closingWindows, m_pendingWindows, - m_openingWindows, m_completeWindows; + std::shared_ptr m_closingWindows, m_pendingWindows, m_openingWindows, m_completeWindows; std::atomic m_slot; int m_index; long m_freePointer; + long m_freeOffset; + bool m_clearCheckpoint; int m_latch; long m_latencyMark; std::atomic m_numberOfFragments; @@ -83,28 +92,35 @@ struct alignas(64) PartialResultSlot { std::atomic m_previousSlot; std::atomic m_taskId; PartialResultSlot *m_next; - - PartialResultSlot(int index = -1) - : m_closingWindows(nullptr), - m_pendingWindows(nullptr), - m_openingWindows(nullptr), - m_completeWindows(nullptr), - m_slot(-1), - m_index(index), - m_freePointer(INT_MIN), - m_latch(0), - m_latencyMark(-1), - m_numberOfFragments(0), - m_numberOfWindows(0), - m_numberOfCompleteWindows(0), - m_previousSlot(-1), - m_taskId(-1), - m_next(nullptr) {} - - void connectTo(PartialResultSlot *nextSlot) { m_next = nextSlot; } + std::mutex m_updateLock; + std::shared_ptr m_graph; + + PartialResultSlot(int index = -1) : m_closingWindows(nullptr), + m_pendingWindows(nullptr), + m_openingWindows(nullptr), + m_completeWindows(nullptr), + m_slot(-1), + m_index(index), + m_freePointer(INT_MIN), + m_freeOffset(INT_MIN), + m_clearCheckpoint(false), + m_latch(0), + m_latencyMark(-1), + m_numberOfFragments(0), + m_numberOfWindows(0), + m_numberOfCompleteWindows(0), + m_previousSlot(-1), + m_taskId(-1), + m_next(nullptr) {} + + void connectTo(PartialResultSlot *nextSlot) { + m_next = nextSlot; + } void init(WindowBatch *batch) { m_freePointer = batch->getFreePointer(); + m_freeOffset = batch->getStreamEndPointer(); + m_clearCheckpoint = false; m_latencyMark = batch->getLatencyMark(); m_latch = 0; m_taskId = batch->getTaskId(); @@ -116,10 +132,10 @@ struct alignas(64) PartialResultSlot { if (debug) { std::string str = "[DBG] initializing slot " + std::to_string(m_index) + - " with " + std::to_string(m_freePointer) + - " m_freePointer " + std::to_string(m_latencyMark) + - " m_latencyMark " + std::to_string(m_latencyMark) + - " m_taskId " + std::to_string(m_taskId) + " m_taskId "; + " with " + std::to_string(m_freePointer) + + " m_freePointer " + std::to_string(m_latencyMark) + + " m_latencyMark " + std::to_string(m_latencyMark) + + " m_taskId " + std::to_string(m_taskId) + " m_taskId "; size_t totalSize = 0; if (m_closingWindows) { str += std::to_string(m_closingWindows->getPosition()) + @@ -147,58 +163,54 @@ struct alignas(64) PartialResultSlot { void init(WindowBatch *batch, int window) { m_freePointer = batch->getFreePointer(); + m_freeOffset = batch->getStreamEndPointer(); m_latencyMark = batch->getLatencyMark(); m_latch = 0; m_taskId = batch->getTaskId(); + m_clearCheckpoint = false; batch->clear(); } void release() { if (m_closingWindows != nullptr) { - PartialWindowResultsFactory::getInstance().free( - m_closingWindows->getThreadId(), m_closingWindows); + PartialWindowResultsFactory::getInstance().free(m_closingWindows->getThreadId(), m_closingWindows); m_closingWindows.reset(); } if (m_openingWindows != nullptr) { - PartialWindowResultsFactory::getInstance().free( - m_openingWindows->getThreadId(), m_openingWindows); + PartialWindowResultsFactory::getInstance().free(m_openingWindows->getThreadId(), m_openingWindows); m_openingWindows.reset(); } if (m_pendingWindows != nullptr) { - PartialWindowResultsFactory::getInstance().free( - m_pendingWindows->getThreadId(), m_pendingWindows); + PartialWindowResultsFactory::getInstance().free(m_pendingWindows->getThreadId(), m_pendingWindows); m_pendingWindows.reset(); } if (m_completeWindows != nullptr) { - PartialWindowResultsFactory::getInstance().free( - m_completeWindows->getThreadId(), m_completeWindows); + PartialWindowResultsFactory::getInstance().free(m_completeWindows->getThreadId(), m_completeWindows); m_completeWindows.reset(); } + if (m_graph) { + if (m_graph.use_count() == 1) { + LineageGraphFactory::getInstance().free(m_graph); + } + m_graph.reset(); + } } bool releaseEmptyPartials() { - if (m_closingWindows != nullptr && - m_closingWindows->numberOfWindows() == 0) { - PartialWindowResultsFactory::getInstance().free( - m_closingWindows->getThreadId(), m_closingWindows); + if (m_closingWindows != nullptr && m_closingWindows->numberOfWindows() == 0) { + PartialWindowResultsFactory::getInstance().free(m_closingWindows->getThreadId(), m_closingWindows); m_closingWindows.reset(); } - if (m_openingWindows != nullptr && - m_openingWindows->numberOfWindows() == 0) { - PartialWindowResultsFactory::getInstance().free( - m_openingWindows->getThreadId(), m_openingWindows); + if (m_openingWindows != nullptr && m_openingWindows->numberOfWindows() == 0) { + PartialWindowResultsFactory::getInstance().free(m_openingWindows->getThreadId(), m_openingWindows); m_openingWindows.reset(); } - if (m_pendingWindows != nullptr && - m_pendingWindows->numberOfWindows() == 0) { - PartialWindowResultsFactory::getInstance().free( - m_pendingWindows->getThreadId(), m_pendingWindows); + if (m_pendingWindows != nullptr && m_pendingWindows->numberOfWindows() == 0) { + PartialWindowResultsFactory::getInstance().free(m_pendingWindows->getThreadId(), m_pendingWindows); m_pendingWindows.reset(); } - if (m_completeWindows != nullptr && - m_completeWindows->numberOfWindows() == 0) { - PartialWindowResultsFactory::getInstance().free( - m_completeWindows->getThreadId(), m_completeWindows); + if (m_completeWindows != nullptr && m_completeWindows->numberOfWindows() == 0) { + PartialWindowResultsFactory::getInstance().free(m_completeWindows->getThreadId(), m_completeWindows); m_completeWindows.reset(); } return true; @@ -207,41 +219,33 @@ struct alignas(64) PartialResultSlot { bool tryRelease() { int cnt = 0; if (m_closingWindows != nullptr) { - if (m_closingWindows->numberOfWindows() == 0 || - m_closingWindows.use_count() == 1) { + if (m_closingWindows->numberOfWindows() == 0 || m_closingWindows.use_count() == 1) { cnt++; - PartialWindowResultsFactory::getInstance().free( - m_closingWindows->getThreadId(), m_closingWindows); + PartialWindowResultsFactory::getInstance().free(m_closingWindows->getThreadId(), m_closingWindows); m_closingWindows.reset(); } else { if (debug) - std::cout << m_closingWindows.use_count() - << " closing windows with still a reference\n"; + std::cout << m_closingWindows.use_count() << " closing windows with still a reference\n"; } } else { cnt++; } if (m_openingWindows != nullptr) { - if (m_openingWindows->numberOfWindows() == 0 || - m_openingWindows.use_count() == 1) { + if (m_openingWindows->numberOfWindows() == 0 || m_openingWindows.use_count() == 1) { cnt++; - PartialWindowResultsFactory::getInstance().free( - m_openingWindows->getThreadId(), m_openingWindows); + PartialWindowResultsFactory::getInstance().free(m_openingWindows->getThreadId(), m_openingWindows); m_openingWindows.reset(); } else { if (debug) - std::cout << m_openingWindows.use_count() - << " opening windows with still a reference\n"; + std::cout << m_openingWindows.use_count() << " opening windows with still a reference\n"; } } else { cnt++; } if (m_pendingWindows != nullptr) { - if (m_pendingWindows->numberOfWindows() == 0 || - m_pendingWindows.use_count() == 1) { + if (m_pendingWindows->numberOfWindows() == 0 || m_pendingWindows.use_count() == 1) { cnt++; - PartialWindowResultsFactory::getInstance().free( - m_pendingWindows->getThreadId(), m_pendingWindows); + PartialWindowResultsFactory::getInstance().free(m_pendingWindows->getThreadId(), m_pendingWindows); m_pendingWindows.reset(); } else { if (debug) @@ -257,57 +261,42 @@ struct alignas(64) PartialResultSlot { } void prefetch() { - if (m_closingWindows != nullptr && - m_closingWindows->numberOfWindows() > 0 && - m_closingWindows->getType() == 1) { + if (m_closingWindows != nullptr && m_closingWindows->numberOfWindows() > 0 && m_closingWindows->getType() == 1) { for (int w = 0; w < m_closingWindows->numberOfWindows(); ++w) { __builtin_prefetch(m_closingWindows->getBufferPtrs()[w], 1, 3); } } - if (m_openingWindows != nullptr && - m_openingWindows->numberOfWindows() > 0 && - m_openingWindows->getType() == 1) { + if (m_openingWindows != nullptr && m_openingWindows->numberOfWindows() > 0 && m_openingWindows->getType() == 1) { for (int w = 0; w < m_openingWindows->numberOfWindows(); ++w) { __builtin_prefetch(m_openingWindows->getBufferPtrs()[w], 1, 3); } } - if (m_pendingWindows != nullptr && - m_pendingWindows->numberOfWindows() > 0 && - m_pendingWindows->getType() == 1) { + if (m_pendingWindows != nullptr && m_pendingWindows->numberOfWindows() > 0 && m_pendingWindows->getType() == 1) { for (int w = 0; w < m_pendingWindows->numberOfWindows(); ++w) { __builtin_prefetch(m_pendingWindows->getBufferPtrs()[w], 1, 3); } } - if (m_completeWindows != nullptr && - m_completeWindows->numberOfWindows() > 0) { + if (m_completeWindows != nullptr && m_completeWindows->numberOfWindows() > 0) { __builtin_prefetch(m_completeWindows->getBuffer().data(), 1, 3); } } /* - * Aggregate this node's opening windows with node p's closing or pending - * windows. The output of this operation will always produce complete or - * opening windows - never pending and never closing ones. + * Aggregate this node's opening windows with node p's closing or pending windows. The output of this + * operation will always produce complete or opening windows - never pending and never closing ones. */ - void aggregate(PartialResultSlot *partialSlot, - AggregateOperatorCode *aggrOperator) { + void aggregate(PartialResultSlot *partialSlot, AggregateOperatorCode *aggrOperator) { if (m_openingWindows->isEmpty()) { /* Nothing to aggregate */ - if ((!partialSlot->m_closingWindows->isEmpty()) || - (!partialSlot->m_pendingWindows->isEmpty())) { - throw std::runtime_error( - "error: there are no opening windows but next slot has closing or " - "pending windows"); + if ((!partialSlot->m_closingWindows->isEmpty()) || (!partialSlot->m_pendingWindows->isEmpty())) { + throw std::runtime_error("error: there are no opening windows but next slot has closing or pending windows"); } m_openingWindows->nullify(); partialSlot->m_closingWindows->nullify(); partialSlot->m_pendingWindows->nullify(); return; } - if (partialSlot->m_closingWindows->isEmpty() && - partialSlot->m_pendingWindows->isEmpty()) { - throw std::runtime_error( - "error: there are opening windows but next slot has neither closing " - "nor pending windows"); + if (partialSlot->m_closingWindows->isEmpty() && partialSlot->m_pendingWindows->isEmpty()) { + throw std::runtime_error("error: there are opening windows but next slot has neither closing nor pending windows"); } /* * Populate this node's complete windows or p's opening windows. @@ -315,15 +304,11 @@ struct alignas(64) PartialResultSlot { * and pending ones. */ if (debug) { - std::cout << "[DBG] aggregate " - << std::to_string(m_openingWindows->getPosition()) << " bytes (" + std::cout << "[DBG] aggregate " << std::to_string(m_openingWindows->getPosition()) << " bytes (" << std::to_string(m_openingWindows->numberOfWindows()) - << " opening windows) with " - << std::to_string(partialSlot->m_closingWindows->getPosition()) + << " opening windows) with " << std::to_string(partialSlot->m_closingWindows->getPosition()) << " bytes (" - << std::to_string( - partialSlot->m_closingWindows->numberOfWindows()) - << " closing windows)" << std::endl; + << std::to_string(partialSlot->m_closingWindows->numberOfWindows()) << " closing windows)" << std::endl; } auto numOfClosingWindows = partialSlot->m_closingWindows->numberOfWindows(); auto numOfOpeningWindows = m_openingWindows->numberOfWindows(); @@ -333,49 +318,51 @@ struct alignas(64) PartialResultSlot { /* Merge opening and closing windows and store the complete result */ if (numOfClosingWindows > 0) { if (numOfOpeningWindows < numOfClosingWindows) - throw std::runtime_error( - "error: closing window partial results are more then the opening " - "ones"); - - aggrOperator->aggregatePartials( - m_openingWindows, partialSlot->m_closingWindows, m_completeWindows, - numOfClosingWindows, pos, tupleSize, true); + throw std::runtime_error("error: closing window partial results are more then the opening ones"); + + aggrOperator->aggregatePartials(m_openingWindows, + partialSlot->m_closingWindows, + m_completeWindows, + numOfClosingWindows, + pos, + tupleSize, + true); m_completeWindows->setPosition(pos); m_completeWindows->incrementCount(numOfClosingWindows); partialSlot->m_closingWindows->nullify(); } - /* There may be some opening windows left, in which case they are aggregated - * with node partialSlot's pending one. The result will be stored - * (prepended) in partialSlot's opening windows */ + /* There may be some opening windows left, in which case they are aggregated with node partialSlot's pending one. + * The result will be stored (prepended) in partialSlot's opening windows */ auto remainingWindows = numOfOpeningWindows - numOfClosingWindows; if (remainingWindows) { if (numOfPendingWindows != 1) { - throw std::runtime_error( - "error: there are opening windows left but next slot has no " - "pending windows"); + throw std::runtime_error("error: there are opening windows left but next slot has no pending windows"); } if (debug) { - std::cout << "[DBG] aggregate " << std::to_string(remainingWindows) - << " remaining opening windows with pending" << std::endl; + std::cout << "[DBG] aggregate " << std::to_string(remainingWindows) << " remaining opening windows with pending" + << std::endl; } int nextOpenWindow = numOfClosingWindows; - aggrOperator->aggregatePartials( - m_openingWindows, partialSlot->m_pendingWindows, m_completeWindows, - remainingWindows, pos, tupleSize, false); - /* Prepend this opening windows (starting from `nextOpenWindow`) to node - * partialSlot's opening windows. We have to shift the start pointers of - * partialSlot's opening windows down. There are `count` new windows. The - * window size equal the hash table size or a single tuple if we don't - * have group by: + aggrOperator->aggregatePartials(m_openingWindows, + partialSlot->m_pendingWindows, + m_completeWindows, + remainingWindows, + pos, + tupleSize, + false); + /* Prepend this opening windows (starting from `nextOpenWindow`) to node partialSlot's opening windows. + * We have to shift the start pointers of partialSlot's opening windows down. + * There are `count` new windows. The window size equal the hash table size or a single tuple if we don't have group by: */ - auto windowSize = (aggrOperator->hasGroupBy()) - ? SystemConf::getInstance().HASH_TABLE_SIZE - : 1; // aggrOperator->getValueLength() + 12; + auto windowSize = (aggrOperator->hasGroupBy()) ? SystemConf::getInstance().HASH_TABLE_SIZE + : 1; //aggrOperator->getValueLength() + 12; partialSlot->m_openingWindows->prepend(m_openingWindows.get(), - nextOpenWindow, remainingWindows, - windowSize, tupleSize); + nextOpenWindow, + remainingWindows, + windowSize, + tupleSize); partialSlot->m_pendingWindows->nullify(); } m_openingWindows->nullify(); @@ -385,61 +372,60 @@ struct alignas(64) PartialResultSlot { if (m_closingWindows->numberOfWindows() > 0 || m_openingWindows->numberOfWindows() > 0 || m_pendingWindows->numberOfWindows() > 0) { - /*std::cout << "closingWindows: " << m_closingWindows->numberOfWindows() - << " openingWindows: " << m_openingWindows->numberOfWindows() << " - pendingWindows:" << m_pendingWindows->numberOfWindows() << std::endl;*/ + /*std::cout << "closingWindows: " << m_closingWindows->numberOfWindows() << + " openingWindows: " << m_openingWindows->numberOfWindows() << + " pendingWindows:" << m_pendingWindows->numberOfWindows() << std::endl;*/ return false; } return true; } - int getNumberOfWindowFragments(bool hasPtrs = true) { - int fragments = 0; - int windows = 0; + int getNumberOfWindowFragments (bool hasPtrs = true) { + int fragments = 0; int windows = 0; if (hasPtrs) { fragments += (m_closingWindows) ? m_closingWindows->numberOfWindows() : 0; fragments += (m_pendingWindows) ? m_pendingWindows->numberOfWindows() : 0; fragments += (m_openingWindows) ? m_openingWindows->numberOfWindows() : 0; - fragments += - (m_completeWindows && m_completeWindows->numberOfWindows() > 0); + fragments += (m_completeWindows && m_completeWindows->numberOfWindows() > 0); fragments = (fragments >= 1) ? 1 : 0; } else { - fragments += - (m_closingWindows && m_closingWindows->numberOfWindows() > 0); - fragments += - (m_pendingWindows && m_pendingWindows->numberOfWindows() > 0); - fragments += - (m_openingWindows && m_openingWindows->numberOfWindows() > 0); - fragments += - (m_completeWindows && m_completeWindows->numberOfWindows() > 0); + fragments += (m_closingWindows && m_closingWindows->numberOfWindows() > 0); + fragments += (m_pendingWindows && m_pendingWindows->numberOfWindows() > 0); + fragments += (m_openingWindows && m_openingWindows->numberOfWindows() > 0); + fragments += (m_completeWindows && m_completeWindows->numberOfWindows() > 0); } windows += (m_closingWindows) ? m_closingWindows->numberOfWindows() : 0; windows += (m_pendingWindows) ? m_pendingWindows->numberOfWindows() : 0; windows += (m_openingWindows) ? m_openingWindows->numberOfWindows() : 0; windows += (m_completeWindows) ? m_completeWindows->numberOfWindows() : 0; - m_numberOfCompleteWindows = - (m_completeWindows) ? m_completeWindows->numberOfWindows() : 0; + m_numberOfCompleteWindows = (m_completeWindows) ? m_completeWindows->numberOfWindows() : 0; m_numberOfWindows = windows; m_numberOfFragments.store(fragments); return m_numberOfFragments.load(); } - void setPreviousSlot(int prev) { + void setPreviousSlot (int prev) { if (prev != 1 && prev != 3) { - throw std::runtime_error("error: setting the previous slot value to " + - std::to_string(prev)); + throw std::runtime_error("error: setting the previous slot value to " + std::to_string(prev)); } m_previousSlot = prev; } - int getPreviousSlot() { + int getPreviousSlot () { if (m_previousSlot != 1 && m_previousSlot != 3) { - throw std::runtime_error("error: getting the previous slot value " + - std::to_string(m_previousSlot)); + throw std::runtime_error("error: getting the previous slot value " + std::to_string(m_previousSlot)); } return m_previousSlot.load(); } + void freeGraph() { + if (m_graph) { + if (m_graph.use_count() == 1) + LineageGraphFactory::getInstance().free(m_graph); + m_graph.reset(); + } + } + std::string toString() { std::string s; s.append(std::to_string(m_index)); @@ -460,24 +446,19 @@ struct PartialWindowResultsWrapper { std::shared_ptr m_partialWindows; int m_windowPos; bool m_isClosing = false; - PartialWindowResultsWrapper( - std::shared_ptr partialWindows = nullptr, - int pos = -1, bool isClosing = false) - : m_partialWindows(partialWindows), - m_windowPos(pos), - m_isClosing(isClosing){}; + PartialWindowResultsWrapper(std::shared_ptr partialWindows = nullptr, + int pos = -1, + bool isClosing = false) : + m_partialWindows(partialWindows), m_windowPos(pos), m_isClosing(isClosing) {}; }; struct CircularList { - std::vector> - m_buffer; + std::vector> m_buffer; int m_size; int m_readIdx; int m_writeIdx; int m_elements = 0; - CircularList(int size = 0) - : m_buffer(size, PartialWindowResultsWrapper()), m_size(size) { + CircularList(int size = 0) : m_buffer(size, PartialWindowResultsWrapper()), m_size(size) { m_readIdx = 0; m_writeIdx = size - 1; } @@ -495,7 +476,8 @@ struct CircularList { } m_writeIdx++; - if (m_writeIdx == (int)m_buffer.size()) m_writeIdx = 0; + if (m_writeIdx == (int) m_buffer.size()) + m_writeIdx = 0; m_buffer[m_writeIdx].m_partialWindows = partialWindows; m_buffer[m_writeIdx].m_windowPos = windowPos; @@ -507,29 +489,29 @@ struct CircularList { if (m_elements > 0) return &m_buffer[m_readIdx]; else - // return nullptr; - throw std::runtime_error("error: empty CircularList"); + //return nullptr; + throw std::runtime_error("error: empty CircularList in PartialWindowResultsWrapper"); } void pop_front() { m_elements--; m_readIdx++; - if (m_readIdx == (int)m_buffer.size()) m_readIdx = 0; + if (m_readIdx == (int) m_buffer.size()) + m_readIdx = 0; } int size() { return m_elements; } int capacity() { return m_size; } }; /* - * \brief PartialWindowResultSlot are used for operators with window fragments - * and multi-threaded merge phase + * \brief PartialWindowResultSlot are used for operators with window fragments and multi-threaded + * merge phase * */ struct alignas(64) PartialWindowResultSlot { - // tbb::concurrent_queue> - // partialWindows; + //tbb::concurrent_queue> partialWindows; CircularList m_partialWindows; - // std::vector> partialWindows; + //std::vector> partialWindows; std::shared_ptr m_completeWindows; int m_completeWindowsStartPos = 0; int m_completeWindow = 0; @@ -546,49 +528,51 @@ struct alignas(64) PartialWindowResultSlot { int m_closingBatchId; bool m_isFirstAggregation; long m_hashTableSize = 0; - - PartialWindowResultSlot(int index = -1) - : m_completeWindows(nullptr), - m_slot(-1), - m_finalize(false), - m_hasComplete(false), - m_windowId(-1), - m_index(index), - m_latch(0), - m_latencyMark(-1), - m_openingBatchId(-1), - m_closingBatchId(-1), - m_isFirstAggregation(false) {} + std::shared_ptr m_graph; + + PartialWindowResultSlot(int index = -1) : m_completeWindows(nullptr), + m_slot(-1), + m_finalize(false), + m_hasComplete(false), + m_windowId(-1), + m_index(index), + m_latch(0), + m_latencyMark(-1), + m_openingBatchId(-1), + m_closingBatchId(-1), + m_isFirstAggregation(false) {} void init(AggregateOperatorCode *aggrOperator) { m_hashTableSize = aggrOperator->getHashTableSizeAfterCodeGeneration(); } - void release(int qid, bool isMostDownstream, QueryBuffer &freeBuffer, - std::atomic &nextToForward, int numberOfSlots, - std::vector &slotsToRelease) { + void release(int qid, bool isMostDownstream, QueryBuffer &freeBuffer, std::atomic &nextToForward, int numberOfSlots, std::vector &slotsToRelease) { if (!m_resSlots.empty()) { if (slotsToRelease.empty()) { - // nextToForward += m_resSlots.size(); + //nextToForward += m_resSlots.size(); if (m_completeWindows != nullptr) { PartialWindowResultsFactory::getInstance().free( m_completeWindows->getThreadId(), m_completeWindows); } for (auto slot : m_resSlots) { if (slot->m_slot.load() == 3) { + if (SystemConf::getInstance().LINEAGE_ON && isMostDownstream) { + if (slot->m_graph) { + slot->m_graph->freePersistentState(qid); + slot->freeGraph(); + } + } slot->release(); // bool success = resSlot->tryRelease(); // if (!success) - // throw std::runtime_error("error: invalid state for - // PartialResultSlot while freeing"); + // throw std::runtime_error("error: invalid state for PartialResultSlot while freeing"); /* Free input buffer */ auto fPointer = slot->m_freePointer; if (fPointer != INT_MIN) freeBuffer.free(fPointer); - // std::cout << "[DBG] setting slot " + - // std::to_string(slot->m_index) + " to -1" << std::endl; + //std::cout << "[DBG] setting slot " + std::to_string(slot->m_index) + " to -1" << std::endl; slot->m_slot.store(-1); - nextToForward = (nextToForward + 1) % numberOfSlots; + nextToForward = (nextToForward+1) % numberOfSlots; // m_resSlot = nullptr; } else { slotsToRelease.emplace_back(slot); @@ -606,28 +590,31 @@ struct alignas(64) PartialWindowResultSlot { m_completeWindowsStartPos = 0; m_completeWindow = 0; m_length = 0; - // finalize.store(false); - // hasComplete.store(false); + if (m_graph) { + if (m_graph.use_count() == 1) { + LineageGraphFactory::getInstance().free(m_graph); + } + m_graph.reset(); + } + //finalize.store(false); + //hasComplete.store(false); } /* - * Aggregate this node's opening windows with node p's closing or pending - * windows. The output of this operation will always produce complete or - * opening windows - never pending and never closing ones. + * Aggregate this node's opening windows with node p's closing or pending windows. The output of this + * operation will always produce complete or opening windows - never pending and never closing ones. */ void aggregate(AggregateOperatorCode *aggrOperator, int pid) { std::shared_ptr partialWindowResult; // first aggregate in place and then continue if (m_completeWindows == nullptr || m_completeWindows->numberOfWindows() == 0) { - m_completeWindows = - PartialWindowResultsFactory::getInstance().newInstance( - pid, aggrOperator->getHashTableSizeAfterCodeGeneration(), 2); + m_completeWindows = PartialWindowResultsFactory::getInstance().newInstance(pid, + aggrOperator->getHashTableSizeAfterCodeGeneration(), + 2); m_completeWindows->getStartPointers()[0] = 0; - m_completeWindows->getStartPointers()[1] = - m_hashTableSize; // store the final result in the first half - // m_completeWindows->getStartPointers()[2] = hashTableSize * 2; // store - // intermediate results in the second half + m_completeWindows->getStartPointers()[1] = m_hashTableSize; // store the final result in the first half + //m_completeWindows->getStartPointers()[2] = hashTableSize * 2; // store intermediate results in the second half m_completeWindows->incrementCount(1); } // aggregate whatever is available @@ -635,15 +622,12 @@ struct alignas(64) PartialWindowResultSlot { int tupleSize = 0; int window = 0; bool isClosing = false; - while ((partialWindowResult = peekNextWindow(window, isClosing)) != - nullptr) { + while ((partialWindowResult = peekNextWindow(window, isClosing)) != nullptr) { if (!isClosing) { - // aggrOperator->aggregateSinglePartial(m_completeWindows, - // partialWindowResult, window, pos, tupleSize, false); + //aggrOperator->aggregateSinglePartial(m_completeWindows, partialWindowResult, window, pos, tupleSize, false); } else { // pack results and finalize the window - // aggrOperator->aggregateSinglePartial(m_completeWindows, - // partialWindowResult, window, pos, tupleSize, true); + //aggrOperator->aggregateSinglePartial(m_completeWindows, partialWindowResult, window, pos, tupleSize, true); m_completeWindows->setPosition(pos); m_completeWindows->setCount(1); this->m_slot.store(3); @@ -656,15 +640,14 @@ struct alignas(64) PartialWindowResultSlot { int idx = m_partialWindows.m_readIdx; for (int i = 0; i < m_partialWindows.m_elements; ++i) { auto window = m_partialWindows.m_buffer[idx].m_windowPos; - __builtin_prefetch(m_partialWindows.m_buffer[idx] - .m_partialWindows->getBufferPtrs()[window], - 1, 3); + __builtin_prefetch(m_partialWindows.m_buffer[idx].m_partialWindows->getBufferPtrs()[window], 1, 3); idx++; - if (idx == m_partialWindows.size()) idx = 0; + if (idx == m_partialWindows.size()) + idx = 0; } } - void aggregateAll(AggregateOperatorCode *aggrOperator, int pid) { + void aggregateAll(AggregateOperatorCode *aggrOperator, int pid, bool mark = true) { // aggregate whatever is available int startPos = -1; int endPos = -1; @@ -678,35 +661,46 @@ struct alignas(64) PartialWindowResultSlot { window = pw->m_windowPos; m_partialWindows.pop_front(); if (pw->m_partialWindows == nullptr) - throw std::runtime_error( - "error: the next partial window for aggregation is null"); + throw std::runtime_error("error: the next partial window for aggregation is null"); if (i == 0) { // memcpy the first hashtable for simplicity - aggrOperator->aggregateSinglePartial( - m_completeWindows, m_completeWindow, m_completeWindowsStartPos, - pw->m_partialWindows, window, startPos, endPos, tupleSize, false); + aggrOperator->aggregateSinglePartial(m_completeWindows, + m_completeWindow, + m_completeWindowsStartPos, + pw->m_partialWindows, + window, + startPos, + endPos, + tupleSize, + false); } else { - aggrOperator->aggregateSinglePartial( - m_completeWindows, m_completeWindow, m_completeWindowsStartPos, - pw->m_partialWindows, window, startPos, endPos, tupleSize, false); + aggrOperator->aggregateSinglePartial(m_completeWindows, m_completeWindow, m_completeWindowsStartPos, + pw->m_partialWindows, window, startPos, endPos, tupleSize, false); } pw->m_partialWindows.reset(); } // pack results in the end - aggrOperator->aggregateSinglePartial( - m_completeWindows, m_completeWindow, m_completeWindowsStartPos, nullptr, - window, startPos, endPos, tupleSize, true); + aggrOperator->aggregateSinglePartial(m_completeWindows, + m_completeWindow, + m_completeWindowsStartPos, + nullptr, + window, + startPos, + endPos, + tupleSize, + true); m_completeWindowsStartPos = startPos; m_length = endPos - startPos; - m_slot.store(3); + if (mark) { + m_slot.store(3); + } } - std::shared_ptr peekNextWindow(int &windowPos, - bool &isClosing) { + std::shared_ptr peekNextWindow(int &windowPos, bool &isClosing) { std::shared_ptr partial = nullptr; std::shared_ptr nextWindow = nullptr; - // partialWindows.try_pop(partial); - partial = nullptr; // partialWindows.front(); + //partialWindows.try_pop(partial); + partial = nullptr; //partialWindows.front(); m_partialWindows.pop_front(); if (partial != nullptr) { nextWindow = partial->m_partialWindows; @@ -718,12 +712,9 @@ struct alignas(64) PartialWindowResultSlot { return nextWindow; } - void tryReleaseWindow( - bool isPending, - std::shared_ptr partialWindowResult) { + void tryReleaseWindow(bool isPending, std::shared_ptr partialWindowResult) { if (!isPending || partialWindowResult.use_count() == 1) { - PartialWindowResultsFactory::getInstance().free( - partialWindowResult->getThreadId(), partialWindowResult); + PartialWindowResultsFactory::getInstance().free(partialWindowResult->getThreadId(), partialWindowResult); } } @@ -735,38 +726,44 @@ struct alignas(64) PartialWindowResultSlot { m_resSlot = slot; m_resSlot->m_slot.store(2);*/ m_resSlots.emplace_back(slot); - // slot->m_slot.store(3); - auto oldVal = 1; // slot->m_slot.load(); + //slot->m_slot.store(3); + auto oldVal = 1; //slot->m_slot.load(); if (!slot->m_slot.compare_exchange_weak(oldVal, 3)) { - // std::cout << "[DBG] warning: setting the previous slot value because - // the state is not equal to 3" << std::endl; + //std::cout << "[DBG] warning: setting the previous slot value because the state is not equal to 3" << std::endl; // todo: does this work?? slot->setPreviousSlot(3); - // throw std::runtime_error("error: failed to set the result slot as the - // previous value was " + std::to_string(oldVal)); + //throw std::runtime_error("error: failed to set the result slot as the previous value was " + std::to_string(oldVal)); } - // std::cout << "[DBG] setting slot " + std::to_string(slot->m_index) + " to - // 3" << std::endl; + //std::cout << "[DBG] setting slot " + std::to_string(slot->m_index) + " to 3" << std::endl; } - void setOpeningBatchId(int batchId) { m_openingBatchId = batchId; } + void setOpeningBatchId(int batchId) { + m_openingBatchId = batchId; + } - void setClosingBatchId(int batchId) { m_closingBatchId = batchId; } + void setClosingBatchId(int batchId) { + m_closingBatchId = batchId; + } - void setHasAllBatches() { m_finalize.store(true); } + void setHasAllBatches() { + m_finalize.store(true); + } - void setHasComplete() { m_hasComplete.store(true); } + void setHasComplete() { + m_hasComplete.store(true); + } bool hasAllBatches() { - // if (openingBatchId != -1 && closingBatchId != -1) + //if (openingBatchId != -1 && closingBatchId != -1) // return counter.load() == (closingBatchId-openingBatchId); - // return false; + //return false; return m_finalize.load(); } bool hasWork() { - // if (partialWindows.unsafe_size() == 0) - if (m_partialWindows.size() == 0) return false; + //if (partialWindows.unsafe_size() == 0) + if (m_partialWindows.size() == 0) + return false; return true; } @@ -774,12 +771,10 @@ struct alignas(64) PartialWindowResultSlot { size_t currentIndex; size_t indexToInsert = 0; auto completeWindow = m_completeWindows->getBufferPtrs()[0]; - for (currentIndex = 0; currentIndex < (size_t)m_hashTableSize * tupleSize; - currentIndex += tupleSize) { + for (currentIndex = 0; currentIndex < (size_t) m_hashTableSize * tupleSize; currentIndex += tupleSize) { if (completeWindow[currentIndex] == 1) { if (indexToInsert != currentIndex) { - std::memcpy(&completeWindow[indexToInsert], - &completeWindow[currentIndex], tupleSize); + std::memcpy(&completeWindow[indexToInsert], &completeWindow[currentIndex], tupleSize); } indexToInsert += tupleSize; } @@ -787,13 +782,15 @@ struct alignas(64) PartialWindowResultSlot { m_completeWindows->setPosition(indexToInsert); } - bool isReady() { return m_hasComplete.load(); } + bool isReady() { + return m_hasComplete.load(); + } std::string toString() { std::string s; s.append(std::to_string(m_index)); s.append(" ["); - // s.append(std::to_string(partialWindows.unsafe_size())).append(" "); + //s.append(std::to_string(partialWindows.unsafe_size())).append(" "); s.append(std::to_string(m_partialWindows.size())).append(" "); s.append(std::to_string(m_completeWindows->numberOfWindows())); s.append("] "); diff --git a/src/result/ResultHandler.cpp b/src/result/ResultHandler.cpp index 3a36bdd..02c51ad 100644 --- a/src/result/ResultHandler.cpp +++ b/src/result/ResultHandler.cpp @@ -1,43 +1,58 @@ #include "result/ResultHandler.h" +#if defined(TCP_OUTPUT) +#include +#endif #include #include +#include #include #include "buffers/PartialWindowResultsFactory.h" #include "buffers/QueryBuffer.h" +#include "buffers/UnboundedQueryBufferFactory.h" +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "checkpoint/LineageGraphFactory.h" +#include "dispatcher/ITaskDispatcher.h" +#include "dispatcher/JoinTaskDispatcher.h" #include "dispatcher/TaskDispatcher.h" #include "monitors/LatencyMonitor.h" #include "result/PartialResultSlot.h" #include "tasks/WindowBatch.h" #include "utils/Query.h" +#include "utils/QueryApplication.h" +#include "utils/QueryConfig.h" +#include "utils/QueryOperator.h" #include "utils/Utils.h" -ResultHandler::ResultHandler(Query &query, QueryBuffer &freeBuffer, - bool hasWindowFragments, bool useParallelMerge) - : m_query(query), - m_freeBuffer(freeBuffer), - m_hasWindowFragments(hasWindowFragments), - m_useParallelMerge(useParallelMerge), - m_nextToForward(0), - m_nextWindowToForward(0), - m_nextToAggregate(0), - m_totalOutputBytes(0L), - m_numberOfSlots(SystemConf::getInstance().SLOTS), - m_reservedSlots(0), - m_currentWindowSlot(0), - m_nextToAggregateWindows(0), - m_nextToForwardPtrs(0), - m_mergeLocks(useParallelMerge ? m_numberOfWindowSlots : 0), - m_resultsWithoutFrags(hasWindowFragments ? 0 : m_numberOfSlots), - m_results(hasWindowFragments ? m_numberOfSlots : 0), - m_windowResults(useParallelMerge ? m_numberOfWindowSlots : 0), - m_openingWindowsList(m_windowResults.size()) { +ResultHandler::ResultHandler(Query &query, QueryBuffer &freeBuffer1, QueryBuffer &freeBuffer2, bool hasWindowFragments, bool useParallelMerge) : + m_query(query), + m_freeBuffer1(freeBuffer1), + m_freeBuffer2(freeBuffer2), + m_hasWindowFragments(hasWindowFragments), + m_useParallelMerge(useParallelMerge), + m_maxTaskId(0), + m_nextToForward(0), + m_nextWindowToForward(0), + m_nextToAggregate(0), + m_totalOutputBytes(0L), + m_numberOfSlots(!query.getConfig() ? SystemConf::getInstance().SLOTS : query.getConfig()->getNumberOfSlots()), + m_reservedSlots(0), + m_currentWindowSlot(0), + m_nextToAggregateWindows(0), + m_nextToForwardPtrs(0), + m_mergeLocks(useParallelMerge ? m_numberOfWindowSlots : 0), + m_resultsWithoutFrags(hasWindowFragments ? 0 : m_numberOfSlots), + m_results(hasWindowFragments ? m_numberOfSlots : 0), + m_windowResults(useParallelMerge ? m_numberOfWindowSlots : 0), + m_openingWindowsList(m_windowResults.size()) { + if (hasWindowFragments) { for (int i = 0, j = i - 1; i < m_numberOfSlots; i++, j++) { m_results[i].m_index = i; - if (j >= 0) m_results[j].connectTo(&m_results[i]); + if (j >= 0) + m_results[j].connectTo(&m_results[i]); } m_results[m_numberOfSlots - 1].connectTo(&m_results[0]); } else { @@ -47,10 +62,58 @@ ResultHandler::ResultHandler(Query &query, QueryBuffer &freeBuffer, } } -long ResultHandler::getTotalOutputBytes() { return m_totalOutputBytes; } +long ResultHandler::getTotalOutputBytes() { + return m_totalOutputBytes; +} void ResultHandler::incTotalOutputBytes(int bytes) { - m_totalOutputBytes += ((long)bytes); + m_totalOutputBytes += ((long) bytes); +} + +void ResultHandler::setupSocket() { +#if defined(TCP_OUTPUT) + if (m_compressOutput) { + //m_compressBuffer = UnboundedQueryBufferFactory::getInstance().newInstance(0); + m_compressBuffers.resize(SystemConf::getInstance().WORKER_THREADS); + for (auto &b: m_compressBuffers) { + b = ByteBuffer(SystemConf::getInstance().UNBOUNDED_BUFFER_SIZE); + } + } + struct sockaddr_in serv_addr {}; + if ((m_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) { + throw std::runtime_error("error: Socket creation error"); + } + + serv_addr.sin_family = AF_INET; + serv_addr.sin_port = htons(PORT); + + std::cout << "Setting up output socket: " << SystemConf::getInstance().REMOTE_CLIENT << std::endl; + // Convert IPv4 and IPv6 addresses from text to binary form + if (inet_pton(AF_INET, SystemConf::getInstance().REMOTE_CLIENT.c_str(), + &serv_addr.sin_addr) <= 0) { + throw std::runtime_error("error: Invalid address/ Address not supported"); + } + + if (connect(m_sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) { + throw std::runtime_error("error: Connection Failed"); + } + +#elif defined(RDMA_OUTPUT) + // Create new context + m_context = new infinity::core::Context(); + // Create a queue pair + m_qpFactory = new infinity::queues::QueuePairFactory(m_context); + std::cout << "Connecting to remote node " << SystemConf::getInstance().REMOTE_CLIENT << "..." << std::endl; + m_qp = m_qpFactory->connectToRemoteHost(SystemConf::getInstance().REMOTE_CLIENT.c_str(), PORT); + std::cout << "Connected to remote node " << SystemConf::getInstance().REMOTE_CLIENT << std::endl; + m_sendBuffer = new infinity::memory::Buffer(m_context, SystemConf::getInstance().OUTPUT_BUFFER_SIZE * sizeof(char)); + m_receiveBuffer = new infinity::memory::Buffer(m_context, sizeof(char)); + m_context->postReceiveBuffer(m_receiveBuffer); + + std::cout <<"Sending first message" << std::endl; + m_qp->send(m_sendBuffer, sizeof(char), m_context->defaultRequestToken); + m_context->defaultRequestToken->waitUntilCompleted(); +#endif } void ResultHandler::forwardAndFree(WindowBatch *batch) { @@ -69,7 +132,10 @@ void ResultHandler::forwardAndFreeWithoutFrags(WindowBatch *batch) { int taskId = batch->getTaskId(); auto currentQuery = batch->getQuery(); auto result = batch->getOutputBuffer(); - int freePtr = batch->getFreePointer(); + auto freePtr1 = batch->getFreePointer(); + auto freePtr2 = batch->getSecondFreePointer(); + auto prevFreePtr1 = batch->getPrevFreePointer(); + auto prevFreePtr2 = batch->getPrevSecondFreePointer(); auto latencyMark = batch->getLatencyMark(); auto taskType = batch->getTaskType(); @@ -80,7 +146,7 @@ void ResultHandler::forwardAndFreeWithoutFrags(WindowBatch *batch) { int idx = ((taskId - 1) % m_numberOfSlots); try { auto oldVal = -1; - if (taskType == TaskType::PROCESS) { + if (taskType == TaskType::PROCESS || taskType == TaskType::ONLY_PROCESS) { while ( !m_resultsWithoutFrags[idx].m_slot.compare_exchange_weak(oldVal, 0)) { std::cout << "warning: result collector (" << std::this_thread::get_id() @@ -91,110 +157,238 @@ void ResultHandler::forwardAndFreeWithoutFrags(WindowBatch *batch) { _mm_pause(); } - m_resultsWithoutFrags[idx].m_freePointer = freePtr; + m_resultsWithoutFrags[idx].m_freePointer1 = freePtr1; + m_resultsWithoutFrags[idx].m_freePointer2 = freePtr2; + m_resultsWithoutFrags[idx].m_prevFreePointer1 = prevFreePtr1; + m_resultsWithoutFrags[idx].m_prevFreePointer2 = prevFreePtr2; m_resultsWithoutFrags[idx].m_result = result; m_resultsWithoutFrags[idx].m_latch = 0; m_resultsWithoutFrags[idx].m_latencyMark = latencyMark; + if (SystemConf::getInstance().LINEAGE_ON) { + m_resultsWithoutFrags[idx].m_graph = std::move(batch->getLineageGraph()); + m_resultsWithoutFrags[idx].m_freeOffset1 = batch->getStreamEndPointer(); + // todo: fix second offset here + batch->getLineageGraph().reset(); + } m_resultsWithoutFrags[idx].m_taskId = taskId; /* No other thread can modify this slot. */ m_resultsWithoutFrags[idx].m_slot.store(1); + updateMaximumTaskId(taskId); } - /* Forward and free */ - if (!m_forwardLock.try_lock()) return; + if (taskType == TaskType::PROCESS || taskType == TaskType::FORWARD || taskType == TaskType::MERGE_FORWARD) { + /* Forward and free */ + if (!m_forwardLock.try_lock()) return; - /* No other thread can enter this section */ - /* Is slot `next` occupied? */ - oldVal = 1; - if (!m_resultsWithoutFrags[m_nextToForward].m_slot.compare_exchange_strong( - oldVal, 2)) { - m_forwardLock.unlock(); - return; - } + /* No other thread can enter this section */ + /* Is slot `next` occupied? */ + oldVal = 1; + if (!m_resultsWithoutFrags[m_nextToForward] + .m_slot.compare_exchange_strong(oldVal, 2)) { + m_forwardLock.unlock(); + return; + } - bool busy = true; - while (busy) { - auto buffer = m_resultsWithoutFrags[m_nextToForward].m_result; - int length = buffer->getPosition(); + bool busy = true; + while (busy) { + auto buffer = m_resultsWithoutFrags[m_nextToForward].m_result; + int length = buffer->getPosition(); + + /* Forward results */ + if (length > 0 && currentQuery->getNumberOfDownstreamQueries() > 0) { + if (SystemConf::getInstance().LINEAGE_ON && length + m_totalOutputBytes <= m_query.getOperator()->getOutputPtr()) { + std::cout << "warning: dropping duplicate results for query " + std::to_string(m_query.getId()) + + " with offset lower than " + std::to_string(m_query.getOperator()->getOutputPtr()) << std::endl; + } else { + /* Forward the latency mark downstream... */ + if (SystemConf::getInstance().LATENCY_ON && + (m_resultsWithoutFrags[m_nextToForward].m_latencyMark != -1)) { + long t1 = + m_resultsWithoutFrags[m_nextToForward] + .m_latencyMark; //(long) Utils::getSystemTimestamp (freeBuffer.getLong (resultsWithoutFrags[m_nextToForward].latencyMark)); + long t2 = (long)Utils::getTupleTimestamp(buffer->getLong(0)); + buffer->putLong(0, Utils::pack(t1, t2)); + } - /* Forward results */ - if (length > 0 && currentQuery->getNumberOfDownstreamQueries() > 0) { - /* Forward the latency mark downstream... */ - if (SystemConf::getInstance().LATENCY_ON && - (m_resultsWithoutFrags[m_nextToForward].m_latencyMark != -1)) { - long t1 = - m_resultsWithoutFrags[m_nextToForward] - .m_latencyMark; //(long) Utils::getSystemTimestamp - //(freeBuffer.getLong - //(resultsWithoutFrags[m_nextToForward].latencyMark)); - long t2 = (long)Utils::getTupleTimestamp(buffer->getLong(0)); - buffer->putLong(0, Utils::pack(t1, t2)); - } + if (SystemConf::getInstance().LINEAGE_ON && (m_resultsWithoutFrags[m_nextToForward].m_graph || m_graph)) { + if (m_graph && m_resultsWithoutFrags[m_nextToForward].m_graph) { + m_resultsWithoutFrags[m_nextToForward].m_graph->mergeGraphs(m_graph); + m_graph.reset(); + } else if (m_graph) { + m_resultsWithoutFrags[m_nextToForward].m_graph = m_graph; + m_graph.reset(); + } + m_resultsWithoutFrags[m_nextToForward].m_graph->setOutputPtr(m_query.getId(), m_totalOutputBytes + length); + } - int nextQuery = m_resultsWithoutFrags[m_nextToForward].m_latch; - for (int q = nextQuery; - q < currentQuery->getNumberOfDownstreamQueries(); ++q) { - if (currentQuery->getDownstreamQuery(q) != nullptr) { - bool success = - currentQuery->getDownstreamQuery(q) - ->getTaskDispatcher() - ->tryDispatch( - buffer->getBuffer().data(), length, - m_resultsWithoutFrags[m_nextToForward].m_latencyMark); - if (!success) { - std::cout << "[DBG] failed to forward results to next query..." - << std::endl; - m_resultsWithoutFrags[m_nextToForward].m_latch = q; - m_resultsWithoutFrags[m_nextToForward].m_slot.store(1); - m_forwardLock.unlock(); - return; + int nextQuery = m_resultsWithoutFrags[m_nextToForward].m_latch; + for (int q = nextQuery; + q < currentQuery->getNumberOfDownstreamQueries(); ++q) { + if (currentQuery->getDownstreamQuery(q) != nullptr) { + bool success = false; + auto dispatcher = + currentQuery->getDownstreamQuery(q)->getTaskDispatcher(); + if (m_query.getIsLeft()) { + auto upstream = currentQuery->getDownstreamQuery(q) + ->getNumberOfUpstreamQueries(); + success = + (upstream == 1) + ? dispatcher->tryDispatchToFirstStream( + buffer->getBufferRaw(), length, + m_resultsWithoutFrags[m_nextToForward] + .m_latencyMark, + m_resultsWithoutFrags[m_nextToForward].m_graph) + : dispatcher->tryDispatchSerialToFirstStream( + buffer->getBufferRaw(), length, m_forwardId, + m_resultsWithoutFrags[m_nextToForward] + .m_latencyMark, + m_resultsWithoutFrags[m_nextToForward].m_graph); + } else { + success = currentQuery->getDownstreamQuery(q) + ->getTaskDispatcher() + ->tryDispatchSerialToSecondStream( + buffer->getBufferRaw(), length, m_forwardId, + m_resultsWithoutFrags[m_nextToForward] + .m_latencyMark, m_resultsWithoutFrags[m_nextToForward].m_graph); + } + + if (!success) { + std::cout << "[DBG] failed to forward results from query " + << std::to_string(currentQuery->getId()) + << " to next query " + << std::to_string( + currentQuery->getDownstreamQuery(q)->getId()) + << "..." << std::endl; + m_resultsWithoutFrags[m_nextToForward].m_latch = q; + m_resultsWithoutFrags[m_nextToForward].m_slot.store(1); + m_forwardLock.unlock(); + return; + } else { + m_resultsWithoutFrags[m_nextToForward].m_graph.reset(); + m_forwardId++; + } + } + } + } + } else if (length > 0 && currentQuery->isMostDownstream()) { +#if defined(TCP_OUTPUT) + // send data over tcp to a remote sink + if (SystemConf::getInstance().LINEAGE_ON) { + m_graph->serialize(); + auto vecSize = m_graph->m_clockVector.size(); + send(m_sock, m_graph->m_clockVector.data(), vecSize, 0); + } + //if (!m_compressOutput) { + send(m_sock, buffer->getBufferRaw(), length, 0); + //} else { + // size_t output_length; + // snappy::RawCompress(buffer->getBufferRaw(), length, + // m_compressBuffer->getBuffer().data(), &output_length); + // send(m_sock, m_compressBuffer->getBuffer().data(), output_length, 0); + //} + m_forwardId++; +#elif defined(RDMA_OUTPUT) + infinity::requests::RequestToken requestToken(m_context); + if (SystemConf::getInstance().LINEAGE_ON) { + m_graph->serialize(); + auto vecSize = m_graph->m_clockVector.size(); + std::memcpy(m_sendBuffer->getData(), m_graph->m_clockVector.data(), vecSize); + m_qp->send(m_sendBuffer, vecSize, &requestToken); + } + if (length < m_sendBuffer->getSizeInBytes()) { + std::memcpy(m_sendBuffer->getData(), buffer->getBufferRaw(), length); + m_qp->send(m_sendBuffer, length, &requestToken); + requestToken.waitUntilCompleted(); + } else { + std::cout << "[DBG] sending with RDMA " + std::to_string(length) + " > " + std::to_string(m_sendBuffer->getSizeInBytes())<< std::endl; + auto curLength = std::min(length, (int)m_sendBuffer->getSizeInBytes()); + auto maxLength = length; + while (maxLength < length) { + std::memcpy(m_sendBuffer->getData(), buffer->getBufferRaw(), curLength); + m_qp->send(m_sendBuffer, curLength, &requestToken); + requestToken.waitUntilCompleted(); + maxLength += curLength; + curLength = std::min(length-maxLength, (int)m_sendBuffer->getSizeInBytes()); + } + } + m_forwardId++; +#endif + if (SystemConf::getInstance().LINEAGE_ON) { + if (m_resultsWithoutFrags[m_nextToForward].m_graph) { + m_resultsWithoutFrags[m_nextToForward].m_graph->setOutputPtr(m_query.getId(), m_totalOutputBytes + length); + } + if (length + m_totalOutputBytes <= m_query.getOperator()->getOutputPtr()) { + std::cout << "warning: dropping duplicate results for query " + + std::to_string(m_query.getId()) + " with offset lower than " + + std::to_string(m_query.getOperator()->getOutputPtr()) << std::endl; } } } - } - /* Forward to the distributed API */ - /* Measure latency */ - if (currentQuery->isMostDownstream()) { - if (SystemConf::getInstance().LATENCY_ON && - (m_resultsWithoutFrags[m_nextToForward].m_latencyMark != -1)) { - m_query.getLatencyMonitor().monitor( - m_freeBuffer, - m_resultsWithoutFrags[m_nextToForward].m_latencyMark); + /* Forward to the distributed API */ + /* Measure latency */ + if (currentQuery->isMostDownstream()) { + if (SystemConf::getInstance().LATENCY_ON && + (m_resultsWithoutFrags[m_nextToForward].m_latencyMark != -1 + && m_resultsWithoutFrags[m_nextToForward].m_latencyMark != 0)) { // when we have pipelined operators many 0 marks arrive + m_query.getLatencyMonitor().monitor( + m_freeBuffer1, + m_resultsWithoutFrags[m_nextToForward].m_latencyMark); + } } - } - /* - * Before releasing the result buffer, increment bytes generated. It is - * important all operators set the position of the buffer accordingly. - * Assume that the start position is 0. - */ - incTotalOutputBytes(length); - buffer->clear(); // reset position - PartialWindowResultsFactory::getInstance().free(buffer->getThreadId(), - buffer); + /* + * Before releasing the result buffer, increment bytes generated. It is important all operators set the position of the buffer accordingly. Assume that the start position is 0. + */ + incTotalOutputBytes(length); + buffer->clear(); // reset position + PartialWindowResultsFactory::getInstance().free(buffer->getThreadId(),buffer); + + /* Free input buffer */ + auto fPointer1 = m_resultsWithoutFrags[m_nextToForward].m_freePointer1; + if (fPointer1 != INT_MIN) m_freeBuffer1.free(fPointer1); + auto fPointer2 = m_resultsWithoutFrags[m_nextToForward].m_freePointer2; + if (fPointer2 != INT_MIN) m_freeBuffer2.free(fPointer2); + + if (SystemConf::getInstance().LINEAGE_ON && m_resultsWithoutFrags[m_nextToForward].m_graph) { + if (currentQuery->isMostDownstream()) { + if (SystemConf::getInstance().CHECKPOINT_ON) { + if (!m_checkpointGraph) { + m_checkpointGraph = LineageGraphFactory::getInstance().newInstance(); + } + m_checkpointGraph->advanceOffsets(m_resultsWithoutFrags[m_nextToForward].m_graph); + m_query.getParent()->getCheckpointCoordinator()->tryToPurgeCheckpoint(m_checkpointGraph); + } + m_resultsWithoutFrags[m_nextToForward].m_graph->freePersistentState(m_query.getId()); + m_resultsWithoutFrags[m_nextToForward].freeGraph(); + } else if (m_resultsWithoutFrags[m_nextToForward].m_graph.use_count() == 1) { + if (m_graph) { + m_graph->mergeGraphs(m_resultsWithoutFrags[m_nextToForward].m_graph); + } else { + m_graph = m_resultsWithoutFrags[m_nextToForward].m_graph; + } + m_resultsWithoutFrags[m_nextToForward].m_graph.reset(); + } + } - /* Free input buffer */ - auto fPointer = m_resultsWithoutFrags[m_nextToForward].m_freePointer; - if (fPointer != INT_MIN) m_freeBuffer.free(fPointer); + /* Release the current slot */ + m_resultsWithoutFrags[m_nextToForward].m_slot.store(-1); - /* Release the current slot */ - m_resultsWithoutFrags[m_nextToForward].m_slot.store(-1); + /* Increment next */ + m_nextToForward += 1; + if (m_nextToForward == m_numberOfSlots) m_nextToForward = 0; - /* Increment next */ - m_nextToForward += 1; - if (m_nextToForward == m_numberOfSlots) m_nextToForward = 0; - - /* Check if next is ready to be pushed */ - oldVal = 1; - if (!m_resultsWithoutFrags[m_nextToForward] - .m_slot.compare_exchange_strong(oldVal, 2)) { - busy = false; + /* Check if next is ready to be pushed */ + oldVal = 1; + if (!m_resultsWithoutFrags[m_nextToForward] + .m_slot.compare_exchange_strong(oldVal, 2)) { + busy = false; + } } + /* Thread exit critical section */ + m_forwardLock.unlock(); } - /* Thread exit critical section */ - m_forwardLock.unlock(); } catch (std::exception &e) { std::cout << e.what() << std::endl; @@ -212,10 +406,10 @@ void ResultHandler::aggregateAndForwardAndFree(WindowBatch *batch) { int idx = ((taskId - 1) % m_numberOfSlots); try { - // First try to merge partial results and then if we have complete result - // try to forward them + auto oldVal = -1; - if (taskType == TaskType::PROCESS) { + // First try to merge partial results and then if we have complete result try to forward them + if (taskType == TaskType::PROCESS || taskType == TaskType::ONLY_PROCESS) { while (!m_results[idx].m_slot.compare_exchange_weak(oldVal, 0)) { std::cout << "warning: result collector (" << std::this_thread::get_id() << ") blocked: query " + @@ -227,151 +421,309 @@ void ResultHandler::aggregateAndForwardAndFree(WindowBatch *batch) { } /* Slot `idx` has been reserved for this task id */ + if (SystemConf::getInstance().LINEAGE_ON) { + m_results[idx].m_graph = std::move(batch->getLineageGraph()); + m_results[idx].m_freeOffset = batch->getStreamEndPointer(); + batch->getLineageGraph().reset(); + } m_results[idx].init(batch); /* No other thread can modify this slot. */ m_results[idx].m_slot.store(1); + updateMaximumTaskId(taskId); } - /* Try aggregate slots pair-wise */ - if (m_mergeLock.try_lock()) { - PartialResultSlot *currentSlot; - PartialResultSlot *nextSlot; - while (true) { - currentSlot = &m_results[m_nextToAggregate]; - nextSlot = currentSlot->m_next; + int mergeCounter = 0; + if (taskType == TaskType::PROCESS || taskType == TaskType::MERGE || taskType == TaskType::MERGE_FORWARD) { + /* Try aggregate slots pair-wise */ + if (m_mergeLock.try_lock()) { + PartialResultSlot *currentSlot; + PartialResultSlot *nextSlot; + while (true) { + currentSlot = &m_results[m_nextToAggregate]; + nextSlot = currentSlot->m_next; + + int currentSlotFlag = currentSlot->m_slot.load(); + if (currentSlotFlag > 1 && currentSlotFlag != 5) + throw std::runtime_error( + "error: inconsistent state in next result slot to aggregate with currentSlotFlag " + + std::to_string(currentSlotFlag)); + if (currentSlotFlag < 1 || currentSlotFlag == 5) break; + int nextSlotFlag = nextSlot->m_slot.load(); + if (nextSlotFlag > 1 && nextSlotFlag != 5) { + if (nextSlotFlag == 4) break; + debugAggregateAndForwardAndFree(); + throw std::runtime_error( + "error: inconsistent state in next result slot to aggregate with nextSlotFlag " + + std::to_string(nextSlotFlag)); + } + if (nextSlotFlag < 1 || nextSlotFlag == 5) break; - int currentSlotFlag = currentSlot->m_slot.load(); - if (currentSlotFlag > 1) - throw std::runtime_error( - "error: inconsistent state in next result slot to aggregate"); - if (currentSlotFlag < 1) break; - int nextSlotFlag = nextSlot->m_slot.load(); - if (nextSlotFlag > 1) - throw std::runtime_error( - "error: inconsistent state in next result slot to aggregate"); - if (nextSlotFlag < 1) break; - - /* Both currentSlot and nextSlot nodes are ready to aggregate. */ - currentSlot->aggregate(nextSlot, m_aggrOperator); - if (!currentSlot->isReady()) - throw std::runtime_error( - "error: result slot aggregated but is not ready"); + /* Both currentSlot and nextSlot nodes are ready to aggregate. */ + currentSlot->aggregate(nextSlot, m_aggrOperator); + if (!currentSlot->isReady()) + throw std::runtime_error( + "error: result slot aggregated but is not ready"); - m_nextToAggregate = nextSlot->m_index; - currentSlot->m_slot.store(3); // READY for forwarding - if (nextSlot->isReady()) { - m_nextToAggregate = nextSlot->m_next->m_index; - nextSlot->m_slot.store(3); // READY for forwarding + m_nextToAggregate = nextSlot->m_index; + currentSlot->m_slot.store(3); // READY for forwarding + if (nextSlot->isReady()) { + m_nextToAggregate = nextSlot->m_next->m_index; + nextSlot->m_slot.store(3); // READY for forwarding + } + //if (SystemConf::getInstance().CHECKPOINT_ON && m_stopMerging && mergeCounter++ == 3) { + // break; + //} } + m_mergeLock.unlock(); } - m_mergeLock.unlock(); } - /* Forward and free */ - if (!m_forwardLock.try_lock()) return; + if (taskType == TaskType::PROCESS || taskType == TaskType::FORWARD || taskType == TaskType::MERGE_FORWARD) { + //if (SystemConf::getInstance().CHECKPOINT_ON && m_stopMerging) + // return; + /* Forward and free */ + if (!m_forwardLock.try_lock()) return; - /* No other thread can enter this section */ - /* Is slot `next` occupied? */ - oldVal = 3; - if (!m_results[m_nextToForward].m_slot.compare_exchange_strong(oldVal, 4)) { - m_forwardLock.unlock(); - return; - } + /* No other thread can enter this section */ + /* Is slot `next` occupied? */ + oldVal = 3; + if (!m_results[m_nextToForward].m_slot.compare_exchange_strong(oldVal, + 4)) { + m_forwardLock.unlock(); + return; + } - bool busy = true; - while (busy) { - auto buffer = m_results[m_nextToForward].m_completeWindows; - int length = buffer->getPosition(); + bool busy = true; + while (busy) { + auto buffer = m_results[m_nextToForward].m_completeWindows; + int length = buffer->getPosition(); + + //auto buff = ((long*)buffer->getBufferRaw()); + //for (int ii = 0; ii < length/16; ++ii) { + // std::cout << std::to_string(buff[ii*2]) << std::endl; + //} + + /* Forward results */ + if (length > 0 && currentQuery->getNumberOfDownstreamQueries() > 0) { + if (SystemConf::getInstance().LINEAGE_ON && length + m_totalOutputBytes <= m_query.getOperator()->getOutputPtr()) { + std::cout << "warning: dropping duplicate results for query " + std::to_string(m_query.getId()) + + " with offset lower than " + std::to_string(m_query.getOperator()->getOutputPtr()) << std::endl; + + } else { + /* Forward the latency mark downstream... */ + if (SystemConf::getInstance().LATENCY_ON && + (m_results[m_nextToForward].m_latencyMark != -1)) { + long t1 = + m_results[m_nextToForward] + .m_latencyMark; //(long) Utils::getSystemTimestamp (freeBuffer.getLong (results[m_nextToForward].latencyMark)); + long t2 = (long)Utils::getTupleTimestamp(buffer->getLong(0)); + buffer->putLong(0, Utils::pack(t1, t2)); + } - /* Forward results */ - if (length > 0 && currentQuery->getNumberOfDownstreamQueries() > 0) { - /* Forward the latency mark downstream... */ - if (SystemConf::getInstance().LATENCY_ON && - (m_results[m_nextToForward].m_latencyMark != -1)) { - long t1 = - m_results[m_nextToForward] - .m_latencyMark; //(long) Utils::getSystemTimestamp - //(freeBuffer.getLong - //(results[m_nextToForward].latencyMark)); - long t2 = (long)Utils::getTupleTimestamp(buffer->getLong(0)); - buffer->putLong(0, Utils::pack(t1, t2)); - } + if (SystemConf::getInstance().LINEAGE_ON && (m_results[m_nextToForward].m_graph || m_graph)) { + if (m_graph && m_results[m_nextToForward].m_graph) { + m_results[m_nextToForward].m_graph->mergeGraphs(m_graph); + m_graph.reset(); + } else if (m_graph) { + m_results[m_nextToForward].m_graph = m_graph; + m_graph.reset(); + } + m_results[m_nextToForward].m_graph->setOutputPtr(m_query.getId(), m_totalOutputBytes + length); + } - int nextQuery = m_results[m_nextToForward].m_latch; - for (int q = nextQuery; - q < currentQuery->getNumberOfDownstreamQueries(); ++q) { - if (currentQuery->getDownstreamQuery(q) != nullptr) { - bool success = - currentQuery->getDownstreamQuery(q) - ->getTaskDispatcher() - ->tryDispatch(buffer->getBuffer().data(), length, - m_results[m_nextToForward].m_latencyMark); - if (!success) { - std::cout << "[DBG] failed to forward results to next query..." - << std::endl; - m_results[m_nextToForward].m_latch = q; - m_results[m_nextToForward].m_slot.store(3); - m_forwardLock.unlock(); - return; + int nextQuery = m_results[m_nextToForward].m_latch; + for (int q = nextQuery; + q < currentQuery->getNumberOfDownstreamQueries(); ++q) { + if (currentQuery->getDownstreamQuery(q) != nullptr) { + bool success = false; + auto dispatcher = + currentQuery->getDownstreamQuery(q)->getTaskDispatcher(); + if (m_query.getIsLeft()) { + auto upstream = currentQuery->getDownstreamQuery(q) + ->getNumberOfUpstreamQueries(); + success = + (upstream == 1) + ? dispatcher->tryDispatchToFirstStream( + buffer->getBufferRaw(), length, + m_results[m_nextToForward].m_latencyMark, + m_results[m_nextToForward].m_graph) + : dispatcher->tryDispatchSerialToFirstStream( + buffer->getBufferRaw(), length, m_forwardId, + m_results[m_nextToForward].m_latencyMark, + m_results[m_nextToForward].m_graph); + } else { + success = dispatcher->tryDispatchSerialToSecondStream( + buffer->getBufferRaw(), length, m_forwardId, + m_results[m_nextToForward].m_latencyMark, + m_results[m_nextToForward].m_graph); + } + + if (!success) { + std::cout << "[DBG] ForwardAndFree: failed to forward results from query " + << std::to_string(currentQuery->getId()) + << " to next query " + << std::to_string( + currentQuery->getDownstreamQuery(q)->getId()) + << "..." << std::endl; + m_results[m_nextToForward].m_latch = q; + m_results[m_nextToForward].m_slot.store(3); + m_forwardLock.unlock(); + return; + } else { + m_results[m_nextToForward].m_graph.reset(); + m_forwardId++; + } + } + } + } + } else if (length > 0 && currentQuery->isMostDownstream()) { +#if defined(TCP_OUTPUT) + // send data over tcp to a remote sink + if (SystemConf::getInstance().LINEAGE_ON) { + m_graph->serialize(); + auto vecSize = m_graph->m_clockVector.size(); + send(m_sock, m_graph->m_clockVector.data(), vecSize, 0); + } + //if (!m_compressOutput) { + send(m_sock, buffer->getBufferRaw(), length, 0); + //} else { + // size_t output_length; + // snappy::RawCompress(buffer->getBufferRaw(), length, + // m_compressBuffer->getBuffer().data(), &output_length); + // send(m_sock, m_compressBuffer->getBuffer().data(), output_length, 0); + //} + m_forwardId++; +#elif defined(RDMA_OUTPUT) + infinity::requests::RequestToken requestToken(m_context); + //std::memcpy(m_sendBuffer->getData(), buffer->getBufferRaw(), length); + if (SystemConf::getInstance().LINEAGE_ON) { + m_graph->serialize(); + auto vecSize = m_graph->m_clockVector.size(); + std::memcpy(m_sendBuffer->getData(), m_graph->m_clockVector.data(), vecSize); + m_qp->send(m_sendBuffer, vecSize, &requestToken); + } + if (length < m_sendBuffer->getSizeInBytes()) { + std::memcpy(m_sendBuffer->getData(), buffer->getBufferRaw(), length); + m_qp->send(m_sendBuffer, length, &requestToken); + requestToken.waitUntilCompleted(); + } else { + std::cout << "[DBG] sending with RDMA " + std::to_string(length) + " > " + std::to_string(m_sendBuffer->getSizeInBytes())<< std::endl; + auto curLength = std::min(length, (int)m_sendBuffer->getSizeInBytes()); + auto maxLength = 0; + while (maxLength < length) { + std::memcpy(m_sendBuffer->getData(), buffer->getBufferRaw(), curLength); + m_qp->send(m_sendBuffer, curLength, &requestToken); + requestToken.waitUntilCompleted(); + maxLength += curLength; + curLength = std::min(length-maxLength, (int)m_sendBuffer->getSizeInBytes()); + } + } + m_forwardId++; +#endif + if (SystemConf::getInstance().LINEAGE_ON) { + if (m_results[m_nextToForward].m_graph) { + m_results[m_nextToForward].m_graph->setOutputPtr(m_query.getId(), m_totalOutputBytes + length); + } + // drop duplicates + if (length + m_totalOutputBytes <= m_query.getOperator()->getOutputPtr()) { + std::cout << "warning: dropping duplicate results for query " + std::to_string(m_query.getId()) + + " with offset lower than " + std::to_string(m_query.getOperator()->getOutputPtr()) << std::endl; } } } - } - /* Forward to the distributed API */ - /* Measure latency */ - if (currentQuery->isMostDownstream()) { - if (SystemConf::getInstance().LATENCY_ON && - (m_results[m_nextToForward].m_latencyMark != -1)) { - m_query.getLatencyMonitor().monitor( - m_freeBuffer, m_results[m_nextToForward].m_latencyMark); + /* Forward to the distributed API */ + /* Measure latency */ + if (currentQuery->isMostDownstream()) { + if (SystemConf::getInstance().LATENCY_ON && + (m_results[m_nextToForward].m_latencyMark != -1 + && m_results[m_nextToForward].m_latencyMark != 0)) { // when we have pipelined operators many 0 marks arrive + //std::cout << "The latency mark arrived for monitoring is " + std::to_string(m_results[m_nextToForward].m_latencyMark) << std::endl; + m_query.getLatencyMonitor().monitor( + m_freeBuffer1, m_results[m_nextToForward].m_latencyMark); + } } - } - /* - * Before releasing the result buffer, increment bytes generated. It is - * important all operators set the position of the buffer accordingly. - * Assume that the start position is 0. - */ - incTotalOutputBytes(length); + /* + * Before releasing the result buffer, increment bytes generated. It is important all operators set the position of the buffer accordingly. Assume that the start position is 0. + */ + incTotalOutputBytes(length); - /* Free input buffer */ - auto fPointer = m_results[m_nextToForward].m_freePointer; - if (fPointer != INT_MIN) m_freeBuffer.free(fPointer); + /* Free input buffer */ + auto fPointer = m_results[m_nextToForward].m_freePointer; + if (fPointer != INT_MIN) m_freeBuffer1.free(fPointer); - m_results[m_nextToForward].release(); - /* Release the current slot */ - m_results[m_nextToForward].m_slot.store(-1); + if (SystemConf::getInstance().LINEAGE_ON && m_results[m_nextToForward].m_graph) { + if (currentQuery->isMostDownstream()) { + if (SystemConf::getInstance().CHECKPOINT_ON) { + if (!m_checkpointGraph) { + m_checkpointGraph = LineageGraphFactory::getInstance().newInstance(); + } + m_checkpointGraph->advanceOffsets(m_results[m_nextToForward].m_graph); + m_query.getParent()->getCheckpointCoordinator()->tryToPurgeCheckpoint(m_checkpointGraph); + } + m_results[m_nextToForward].m_graph->freePersistentState(m_query.getId()); + m_results[m_nextToForward].freeGraph(); + } else if (m_results[m_nextToForward].m_graph.use_count() == 1) { + if (m_graph) { + m_graph->mergeGraphs(m_results[m_nextToForward].m_graph); + } else { + m_graph = m_results[m_nextToForward].m_graph; + } + m_results[m_nextToForward].m_graph.reset(); + } + } - /* Increment next */ - m_nextToForward += 1; - if (m_nextToForward == m_numberOfSlots) m_nextToForward = 0; + m_results[m_nextToForward].release(); + /* Release the current slot */ + m_results[m_nextToForward].m_slot.store(-1); - /* Check if next is ready to be pushed */ - oldVal = 3; - if (!m_results[m_nextToForward].m_slot.compare_exchange_strong(oldVal, - 4)) { - busy = false; + /* Increment next */ + m_nextToForward += 1; + if (m_nextToForward == m_numberOfSlots) m_nextToForward = 0; + + if (SystemConf::getInstance().CHECKPOINT_ON && m_stopMerging && mergeCounter++ == 5) { + break; + } + + /* Check if next is ready to be pushed */ + oldVal = 3; + if (!m_results[m_nextToForward].m_slot.compare_exchange_strong(oldVal, + 4)) { + busy = false; + } } + /* Thread exit critical section */ + m_forwardLock.unlock(); } - /* Thread exit critical section */ - m_forwardLock.unlock(); - } catch (std::exception &e) { std::cout << e.what() << std::endl; } } +void ResultHandler::debugAggregateAndForwardAndFree() { + std::string str; + size_t idx = 0; + for (auto &slot : m_results) { + str.append(std::to_string(idx) + ": slot " + + std::to_string(slot.m_slot.load()) + " taskId " + + std::to_string(slot.m_taskId) + " windowFrags " + + std::to_string(slot.getNumberOfWindowFragments(true)) + "\n"); + idx++; + } + std::cout << "[DBG] warning: \n" << str << std::endl; +} + /* * Flags: * -1: slot is free * 0: slot is being populated by a thread - * 1: slot is occupied, but "unlocked", thus it can be aggregated with its - * next one" 2: slot is occupied, but "locked", thus it is being processed Extra - * Aggregation Flags: 3: slot is occupied with BufferPtrs result, but - * "unlocked", thus it is ready to be forwarded 4: slot is occupied with Buffer - * result, but "unlocked", thus it is ready to be forwarded 5: slot is occupied, - * but "locked", thus the result are being forwarded + * 1: slot is occupied, but "unlocked", thus it can be aggregated with its next one" + * 2: slot is occupied, but "locked", thus it is being processed + * Extra Aggregation Flags: + * 3: slot is occupied with BufferPtrs result, but "unlocked", thus it is ready to be forwarded + * 4: slot is occupied with Buffer result, but "unlocked", thus it is ready to be forwarded + * 5: slot is occupied, but "locked", thus the result are being forwarded */ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { int pid = batch->getPid(); @@ -385,10 +737,10 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { int taskIdx = ((taskId - 1) % m_numberOfSlots); try { - // First try to merge partial results and then if we have complete result - // try to forward them + auto oldVal = -1; - if (taskType == TaskType::PROCESS) { + // First try to merge partial results and then if we have complete result try to forward them + if (taskType == TaskType::PROCESS || taskType == TaskType::ONLY_PROCESS) { while (!m_results[taskIdx].m_slot.compare_exchange_weak(oldVal, 0)) { std::cout << "warning: result collector (" << std::this_thread::get_id() << ") blocked: query " + @@ -405,11 +757,23 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { throw std::runtime_error("error: a partial window is nullptr"); /* Slot `idx` has been reserved for this task id */ + if (SystemConf::getInstance().LINEAGE_ON) { + m_results[taskIdx].m_graph = std::move(batch->getLineageGraph()); + m_results[taskIdx].m_freeOffset = batch->getStreamEndPointer(); + batch->getLineageGraph().reset(); + } m_results[taskIdx].init(batch); /* No other thread can modify this slot. */ m_results[taskIdx].m_slot.store(1); + //std::cout << "[DBG] setting slot " + std::to_string(m_results[taskIdx].m_index) + " to 2" << std::endl; + updateMaximumTaskId(taskId); } + if (taskType == TaskType::ONLY_PROCESS) + return; + + int mergeCounter = 0; + /* Try assign window ids to partials in slots pair-wise */ if (m_prepareMergeLock.try_lock()) { PartialResultSlot *currentSlot; @@ -420,16 +784,10 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { int currentSlotFlag = currentSlot->m_slot.load(); int windowsToStore = 0; - windowsToStore += (currentSlot->m_openingWindows) - ? currentSlot->m_openingWindows->numberOfWindows() - : 0; - windowsToStore += - (currentSlot->m_completeWindows) - ? currentSlot->m_completeWindows->numberOfWindows() - : 0; - // if (currentSlotFlag > 1) - // throw std::runtime_error("error: invalid slot state equal to " + - // std::to_string(currentSlotFlag)); + windowsToStore += (currentSlot->m_openingWindows) ? currentSlot->m_openingWindows->numberOfWindows() : 0; + windowsToStore += (currentSlot->m_completeWindows) ? currentSlot->m_completeWindows->numberOfWindows() : 0; + //if (currentSlotFlag > 1) + // throw std::runtime_error("error: invalid slot state equal to " + std::to_string(currentSlotFlag)); if ((currentSlotFlag != 1 && currentSlotFlag != 5) || (windowsToStore > m_numberOfWindowSlots - m_reservedSlots.load())) { /*std::cout << "[DBG] waiting for empty window slots " + @@ -441,21 +799,16 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { } if (debug) { - std::cout << "[DBG] ForwardAndFree " << std::this_thread::get_id() - << " starts the window assignment with: " - << m_numberOfWindowSlots << " numberOfWindowSlots " - << m_nextToAggregate << " nextToAggregate " - << m_openingWindowsList.size() - << " remaining opening windows " << m_currentWindowSlot - << " currentWindowSlot " - << currentSlot->m_closingWindows->numberOfWindows() - << " closingWindows " - << currentSlot->m_pendingWindows->numberOfWindows() - << " pendingWindows " - << currentSlot->m_completeWindows->numberOfWindows() - << " completeWindows " - << currentSlot->m_openingWindows->numberOfWindows() - << " openingWindows " << std::endl; + std::cout << "[DBG] ForwardAndFree " << std::this_thread::get_id() << " starts the window assignment with: " + << + m_numberOfWindowSlots << " numberOfWindowSlots " << + m_nextToAggregate << " nextToAggregate " << + m_openingWindowsList.size() << " remaining opening windows " << + m_currentWindowSlot << " currentWindowSlot " << + currentSlot->m_closingWindows->numberOfWindows() << " closingWindows " << + currentSlot->m_pendingWindows->numberOfWindows() << " pendingWindows " << + currentSlot->m_completeWindows->numberOfWindows() << " completeWindows " << + currentSlot->m_openingWindows->numberOfWindows() << " openingWindows " << std::endl; } #if defined(PREFETCH) @@ -467,12 +820,10 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { int window, newWindows = 0, latestWindowSlot = -1; { // first insert closing windows if any - // when we reach a closing window, the window slot is ready to output - // a full result + // when we reach a closing window, the window slot is ready to output a full result auto &closingWindows = currentSlot->m_closingWindows; if (closingWindows->numberOfWindows() > 0) { - for (window = 0; window < closingWindows->numberOfWindows(); - ++window) { + for (window = 0; window < closingWindows->numberOfWindows(); ++window) { if (m_openingWindowsList.empty()) { std::cout << "[DBG] ForwardAndFree: warning found additional " << "closing window with current window slot " @@ -481,28 +832,24 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { } auto firstOpeningWindow = m_openingWindowsList.front(); latestWindowSlot = firstOpeningWindow; - // while (!(m_windowResults[firstOpeningWindow].slot.load() == 0 - // || + //while (!(m_windowResults[firstOpeningWindow].slot.load() == 0 || // m_windowResults[firstOpeningWindow].slot.load() == 1)) while (m_windowResults[firstOpeningWindow].m_slot.load() != 0) - std::cout << "[DBG] ForwardAndFree [Warning]: closingWindow " - "waiting for slot " - << m_currentWindowSlot - << std::endl; // partial results are already in + std::cout << "[DBG] ForwardAndFree [Warning]: closingWindow waiting for slot " << m_currentWindowSlot + << std::endl; // partial results are already in m_openingWindowsList.pop_front(); m_windowResults[firstOpeningWindow].m_partialWindows.push_back( - // push( - // std::make_shared(closingWindows, - // window, true)); + //push( + //std::make_shared(closingWindows, window, true)); closingWindows, window, true); - // m_windowResults[firstOpeningWindow].slot.store(2); - // m_windowResults[firstOpeningWindow].setHasAllBatches(); + //m_windowResults[firstOpeningWindow].slot.store(2); + //m_windowResults[firstOpeningWindow].setHasAllBatches(); // let workers know that there is work m_availableSlots.push(firstOpeningWindow); } - // closingWindows.reset(); + //closingWindows.reset(); } // insert complete windows if any, which are ready for forwarding @@ -510,18 +857,14 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { if (completeWindows->numberOfWindows() > 0) { latestWindowSlot = m_currentWindowSlot; while (m_windowResults[m_currentWindowSlot].m_slot.load() != -1) - std::cout << "[DBG] ForwardAndFree [Warning]: completeWindow " - "waiting for slot " - << m_currentWindowSlot - << std::endl; // the slot must be empty - m_windowResults[m_currentWindowSlot].m_completeWindows = - completeWindows; + std::cout << "[DBG] ForwardAndFree [Warning]: completeWindow waiting for slot " << m_currentWindowSlot + << std::endl; // the slot must be empty + m_windowResults[m_currentWindowSlot].m_completeWindows = completeWindows; // set that that the slot is ready to forward with Buffer result m_windowResults[m_currentWindowSlot].m_slot.store(3); m_windowResults[m_currentWindowSlot].m_completeWindowsStartPos = 0; - m_windowResults[m_currentWindowSlot].m_length = - completeWindows->getPosition(); - // m_windowResults[m_currentWindowSlot].setHasComplete(); + m_windowResults[m_currentWindowSlot].m_length = completeWindows->getPosition(); + //m_windowResults[m_currentWindowSlot].setHasComplete(); m_currentWindowSlot++; if (m_currentWindowSlot == m_numberOfWindowSlots) m_currentWindowSlot = 0; @@ -531,62 +874,49 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { // insert pending windows if any auto &pendingWindows = currentSlot->m_pendingWindows; if (pendingWindows->numberOfWindows() > 0) { - // auto pendingWindow = - // std::make_shared(pendingWindows, 0, - // false); for (window = 0; window < - // pendingWindows->numberOfWindows(); ++window) { + //auto pendingWindow = std::make_shared(pendingWindows, 0, false); + //for (window = 0; window < pendingWindows->numberOfWindows(); ++window) { for (auto &firstOpeningWindow : m_openingWindowsList) { latestWindowSlot = firstOpeningWindow; while (m_windowResults[firstOpeningWindow].m_slot.load() != 0) - std::cout << "[DBG] ForwardAndFree [Warning]: pendingWindow " - "waiting for slot " - << m_currentWindowSlot - << std::endl; // partial results are already in + std::cout << "[DBG] ForwardAndFree [Warning]: pendingWindow waiting for slot " << m_currentWindowSlot + << std::endl; // partial results are already in m_windowResults[firstOpeningWindow].m_partialWindows.push_back( - // push(pendingWindow); + //push(pendingWindow); pendingWindows, 0, false); - // m_windowResults[firstOpeningWindow].slot.store(1); + //m_windowResults[firstOpeningWindow].slot.store(1); } - // pendingWindows.reset(); + //pendingWindows.reset(); } // finally insert opening windows if they exist // and keep the earliest window auto &openingWindows = currentSlot->m_openingWindows; if (openingWindows->numberOfWindows() > 0) { - for (window = 0; window < openingWindows->numberOfWindows(); - ++window) { + for (window = 0; window < openingWindows->numberOfWindows(); ++window) { latestWindowSlot = m_currentWindowSlot; while (m_windowResults[m_currentWindowSlot].m_slot.load() != -1) - std::cout << "[DBG] ForwardAndFree [Warning]: openingWindow " - "waiting for slot " - << m_currentWindowSlot - << std::endl; // the slot must be empty + std::cout << "[DBG] ForwardAndFree [Warning]: openingWindow waiting for slot " << m_currentWindowSlot + << std::endl; // the slot must be empty m_openingWindowsList.push_back(m_currentWindowSlot); - if (m_windowResults[m_currentWindowSlot] - .m_partialWindows.size() == 0) - m_windowResults[m_currentWindowSlot] - .m_partialWindows.set_capacity(4); + if (m_windowResults[m_currentWindowSlot].m_partialWindows.size() == 0) + m_windowResults[m_currentWindowSlot].m_partialWindows.set_capacity(4); m_windowResults[m_currentWindowSlot].m_partialWindows.push_back( - // push( - // std::make_shared(openingWindows, - // window, false)); + //push( + //std::make_shared(openingWindows, window, false)); openingWindows, window, false); // reuse the completeWindowsBuffer here - m_windowResults[m_currentWindowSlot].m_completeWindows = - currentSlot->m_completeWindows; + m_windowResults[m_currentWindowSlot].m_completeWindows = currentSlot->m_completeWindows; m_windowResults[m_currentWindowSlot].m_completeWindowsStartPos = currentSlot->m_completeWindows->getPosition(); m_windowResults[m_currentWindowSlot].m_completeWindow = - window; // currentSlot->completeWindows->numberOfWindows() + - // window; + window; //currentSlot->completeWindows->numberOfWindows() + window; // set latency mark - m_windowResults[m_currentWindowSlot].m_latencyMark = - currentSlot->m_latencyMark; + m_windowResults[m_currentWindowSlot].m_latencyMark = currentSlot->m_latencyMark; m_windowResults[m_currentWindowSlot].m_slot.store(0); m_currentWindowSlot++; @@ -594,7 +924,7 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { m_currentWindowSlot = 0; newWindows++; } - // openingWindows.reset(); + //openingWindows.reset(); } m_insertedWindows += newWindows; @@ -602,64 +932,58 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { // sanity check /*if (currentSlot->completeWindows->numberOfWindows() > 0 && - currentSlot->openingWindows->numberOfWindows() + 2 != - currentSlot->completeWindows.use_count()) { std::string errorMsg = - "error: wrong ref counters after assigning window ids for - completeWindows " + - std::to_string(currentSlot->openingWindows->numberOfWindows()) - + " " + std::to_string(currentSlot->completeWindows.use_count()); + currentSlot->openingWindows->numberOfWindows() + 2 != currentSlot->completeWindows.use_count()) { + std::string errorMsg = "error: wrong ref counters after assigning window ids for completeWindows " + + std::to_string(currentSlot->openingWindows->numberOfWindows()) + " " + + std::to_string(currentSlot->completeWindows.use_count()); throw std::runtime_error(errorMsg); } if (currentSlot->openingWindows->numberOfWindows() > 0 && - currentSlot->openingWindows->numberOfWindows() + 1 != - currentSlot->openingWindows.use_count()) { std::string errorMsg = - "error: wrong ref counters after assigning window ids for - openingWindows " + - std::to_string(currentSlot->openingWindows->numberOfWindows()) - + " " + std::to_string(currentSlot->openingWindows.use_count()); throw - std::runtime_error(errorMsg); + currentSlot->openingWindows->numberOfWindows() + 1 != currentSlot->openingWindows.use_count()) { + std::string errorMsg = "error: wrong ref counters after assigning window ids for openingWindows " + + std::to_string(currentSlot->openingWindows->numberOfWindows()) + " " + + std::to_string(currentSlot->openingWindows.use_count()); + throw std::runtime_error(errorMsg); } if (currentSlot->closingWindows->numberOfWindows() > 0 && - currentSlot->closingWindows->numberOfWindows() + 1 != - currentSlot->closingWindows.use_count()) { std::string errorMsg = - "error: wrong ref counters after assigning window ids for - closingWindows " + - std::to_string(currentSlot->closingWindows->numberOfWindows()) - + " " + std::to_string(currentSlot->closingWindows.use_count()); throw - std::runtime_error(errorMsg); + currentSlot->closingWindows->numberOfWindows() + 1 != currentSlot->closingWindows.use_count()) { + std::string errorMsg = "error: wrong ref counters after assigning window ids for closingWindows " + + std::to_string(currentSlot->closingWindows->numberOfWindows()) + " " + + std::to_string(currentSlot->closingWindows.use_count()); + throw std::runtime_error(errorMsg); } if (currentSlot->pendingWindows->numberOfWindows() > 0 && - currentSlot->pendingWindows->numberOfWindows() + 1 != - currentSlot->pendingWindows.use_count()) { std::string errorMsg = - "error: wrong ref counters after assigning window ids for - pendingWindows " + - std::to_string(currentSlot->pendingWindows->numberOfWindows()) - + " " + std::to_string(currentSlot->pendingWindows.use_count()); throw - std::runtime_error(errorMsg); + currentSlot->pendingWindows->numberOfWindows() + 1 != currentSlot->pendingWindows.use_count()) { + std::string errorMsg = "error: wrong ref counters after assigning window ids for pendingWindows " + + std::to_string(currentSlot->pendingWindows->numberOfWindows()) + " " + + std::to_string(currentSlot->pendingWindows.use_count()); + throw std::runtime_error(errorMsg); }*/ // set the end pointer on the result slot in order to free them if (latestWindowSlot == -1) throw std::runtime_error("error: invalid latestWindowSlot"); m_windowResults[latestWindowSlot].setResultSlot(currentSlot); + if (SystemConf::getInstance().LINEAGE_ON && !currentQuery->isMostDownstream()) { + m_windowResults[latestWindowSlot].m_graph = std::move(currentSlot->m_graph); + } // Free complete windows here!!! currentSlot->m_completeWindows.reset(); /* Free input buffer */ - // int fPointer = currentSlot->freePointer; - // if (fPointer != INT_MIN) + //int fPointer = currentSlot->freePointer; + //if (fPointer != INT_MIN) // freeBuffer.free(fPointer); } m_nextToAggregate = nextSlot->m_index; if (debug) { - std::cout << "[DBG] ForwardAndFree " << std::this_thread::get_id() - << " ends the window assignment with: " << m_insertedWindows - << " insertedWindows " << m_nextToAggregate - << " nextToAggregate " << m_openingWindowsList.size() - << " remaining opening windows " << m_currentWindowSlot - << " currentWindowSlot " << std::endl; + std::cout << "[DBG] ForwardAndFree " << std::this_thread::get_id() << " ends the window assignment with: " << + m_insertedWindows << " insertedWindows " << + m_nextToAggregate << " nextToAggregate " << + m_openingWindowsList.size() << " remaining opening windows " << + m_currentWindowSlot << " currentWindowSlot " << std::endl; } } m_prepareMergeLock.unlock(); @@ -671,55 +995,87 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { while (hasWork) { int idx = nextWindowSlotForAggregation; if (debug) { - std::cout << "[DBG] ForwardAndFree " << std::this_thread::get_id() - << " start aggregating: " << nextWindowSlotForAggregation - << " nextToAggregateWindows " << idx - << " m_currentWindowSlot " << std::endl; + std::cout << "[DBG] ForwardAndFree " << std::this_thread::get_id() << " start aggregating: " << + nextWindowSlotForAggregation << " nextToAggregateWindows " << + idx << " m_currentWindowSlot " << std::endl; } #if defined(PREFETCH) // prefetch data here m_windowResults[idx].prefetch(); #endif +#if defined(TCP_OUTPUT) + // compress output if needed + if (m_query.isMostDownstream() && m_compressOutput) { + m_windowResults[idx].aggregateAll(m_aggrOperator, pid, false); + int length = m_windowResults[idx].m_length; + if (length > 0) { + size_t output_length; + snappy::RawCompress(m_windowResults[idx].m_completeWindows->getBufferRaw() + m_windowResults[idx].m_completeWindowsStartPos, length,m_compressBuffers[pid].data(), &output_length); + std::memcpy(m_windowResults[idx].m_completeWindows->getBufferRaw() + m_windowResults[idx].m_completeWindowsStartPos,m_compressBuffers[pid].data(), output_length); + m_windowResults[idx].m_length = output_length; + } + m_windowResults[idx].m_slot.store(3); + } else { + m_windowResults[idx].aggregateAll(m_aggrOperator, pid); + } +#else m_windowResults[idx].aggregateAll(m_aggrOperator, pid); - +#endif // set that that the slot is ready to forward with BufferPtrs - // if (m_windowResults[idx].isReady()) { + //if (m_windowResults[idx].isReady()) { // m_windowResults[idx].slot.store(3); //} if (debug) { - std::cout << "[DBG] ForwardAndFree " << std::this_thread::get_id() - << " finished aggregating: " << nextWindowSlotForAggregation - << " nextToAggregateWindows " << idx - << " m_currentWindowSlot " << std::endl; + std::cout << "[DBG] ForwardAndFree " << std::this_thread::get_id() << " finished aggregating: " << + nextWindowSlotForAggregation << " nextToAggregateWindows " << + idx << " m_currentWindowSlot " << std::endl; } hasWork = m_availableSlots.try_pop(nextWindowSlotForAggregation); } - // try to forward from m_nextToForward: there are two types to forward + // try to forward from m_nextWindowToForward: there are two types to forward + //if (SystemConf::getInstance().CHECKPOINT_ON && m_stopMerging) + // return; + /* Forward and free */ - if (!m_forwardLock.try_lock()) return; + if (!m_forwardLock.try_lock()) + return; /* Release any previous kept slots */ auto iter = m_slotsToRelease.begin(); while (iter != m_slotsToRelease.end()) { - // std::cout << "[DBG] warning: releasing slots after checkpoint" << - // std::endl; + //std::cout << "[DBG] warning: releasing slots after checkpoint" << std::endl; auto slot = *iter; - if (slot->m_slot.load() == 3) { - // std::cout << "[DBG] warning: slot " + std::to_string(slot->m_index) + - // " released " << std::endl; - + if(slot->m_slot.load() == 3) { + //std::cout << "[DBG] warning: slot " + std::to_string(slot->m_index) + " released " << std::endl; + if (SystemConf::getInstance().LINEAGE_ON && currentQuery->isMostDownstream()) { + // todo: fix lineage merge for the parallel window merge approach + if (m_results[m_nextToForward].m_graph) { + if (SystemConf::getInstance().CHECKPOINT_ON) { + if (!m_checkpointGraph) { + m_checkpointGraph = LineageGraphFactory::getInstance().newInstance(); + } + m_checkpointGraph->advanceOffsets(m_results[m_nextToForward].m_graph); + m_query.getParent()->getCheckpointCoordinator()->tryToPurgeCheckpoint(m_checkpointGraph); + } + m_results[m_nextToForward].m_graph->freePersistentState(m_query.getId()); + m_results[m_nextToForward].freeGraph(); + } + if (slot->m_graph) { + slot->m_graph->freePersistentState(m_query.getId()); + slot->freeGraph(); + } + } slot->release(); // Free input buffer auto fPointer = slot->m_freePointer; - if (fPointer != INT_MIN) m_freeBuffer.free(fPointer); + if (fPointer != INT_MIN) m_freeBuffer1.free(fPointer); - // std::cout << "[DBG] setting slot " + std::to_string(slot->m_index) + - // " to -1" << std::endl; + //std::cout << "[DBG] setting slot " + std::to_string(slot->m_index) + " to -1" << std::endl; slot->m_slot.store(-1); iter = m_slotsToRelease.erase(iter); - m_nextToForward = (m_nextToForward + 1) % m_numberOfSlots; + m_nextToForward = (m_nextToForward+1) % m_numberOfSlots; } else { break; } @@ -736,93 +1092,184 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { bool busy = true; while (busy) { + if (debug) { - std::cout << "[DBG] ForwardAndFree " << std::this_thread::get_id() - << " start forwarding: " << m_nextWindowToForward - << " nextWindowToForward " << std::endl; + std::cout << "[DBG] ForwardAndFree " << std::this_thread::get_id() << " start forwarding: " << + m_nextWindowToForward << " nextWindowToForward " << std::endl; } auto &buffer = m_windowResults[m_nextWindowToForward].m_completeWindows; - int startPos = - m_windowResults[m_nextWindowToForward].m_completeWindowsStartPos; + int startPos = m_windowResults[m_nextWindowToForward].m_completeWindowsStartPos; int length = m_windowResults[m_nextWindowToForward].m_length; int type = buffer->getType(); + //if (length) + // std::cout << length << std::endl; + //auto buff = ((long *)(buffer->getBufferRaw() + startPos)); + //for (int ii = 0; ii < length / 32; ++ii) { + // std::cout << std::to_string(buff[ii * 4]) << std::endl; + //} + // Forward m_windowResults if (length > 0 && currentQuery->getNumberOfDownstreamQueries() > 0) { - // Forward the latency mark downstream... - if (SystemConf::getInstance().LATENCY_ON && - (m_windowResults[m_nextWindowToForward].m_latencyMark != -1)) { - if (type == 1) - throw std::runtime_error( - "Latency is not supported for type 1 buffer yet."); - long t1 = - m_windowResults[m_nextWindowToForward] - .m_latencyMark; //(long) Utils::getSystemTimestamp - //(freeBuffer.getLong - //(m_windowResults[m_nextWindowToForward].latencyMark)); - long t2 = (long)Utils::getTupleTimestamp(buffer->getLong(0)); - buffer->putLong(0, Utils::pack(t1, t2)); - } - - int nextQuery = m_windowResults[m_nextWindowToForward].m_latch; - for (int q = nextQuery; - q < currentQuery->getNumberOfDownstreamQueries(); ++q) { - if (currentQuery->getDownstreamQuery(q) != nullptr) { - bool success; - if (type == 0) { - success = - currentQuery->getDownstreamQuery(q) - ->getTaskDispatcher() - ->tryDispatch( - buffer->getBufferRaw() + startPos, length, - m_windowResults[m_nextWindowToForward].m_latencyMark); - } else { - // success = - // currentQuery->getDownstreamQuery(q)->getTaskDispatcher()->tryDispatch(buffer->getBufferPtrs()[0], - // length); + if (SystemConf::getInstance().LINEAGE_ON && length + m_totalOutputBytes <= m_query.getOperator()->getOutputPtr()) { + std::cout << "warning: dropping duplicate results for query " + std::to_string(m_query.getId()) + + " with offset lower than " + std::to_string(m_query.getOperator()->getOutputPtr()) << std::endl; + } else { + // Forward the latency mark downstream... + if (SystemConf::getInstance().LATENCY_ON && + (m_windowResults[m_nextWindowToForward].m_latencyMark != -1)) { + if (type == 1) throw std::runtime_error( - "Forwarding for type 1 buffer is not supported yet."); - } - if (!success) { - std::cout << "[DBG] WindowsForwardAndFree: failed to forward " - "results from query " - << std::to_string(currentQuery->getId()) - << " to next query " - << std::to_string( - currentQuery->getDownstreamQuery(q)->getId()) - << "..." << std::endl; - m_windowResults[m_nextWindowToForward].m_latch = q; - m_windowResults[m_nextWindowToForward].m_slot.store(oldVal); - m_forwardLock.unlock(); - return; + "Latency is not supported for type 1 buffer yet."); + long t1 = + m_windowResults[m_nextWindowToForward] + .m_latencyMark; //(long) Utils::getSystemTimestamp (freeBuffer.getLong (m_windowResults[m_nextWindowToForward].latencyMark)); + long t2 = (long)Utils::getTupleTimestamp(buffer->getLong(0)); + // std::cout << "Forwarding latency mark " + std::to_string(t1) + " merged as " + std::to_string(Utils::pack(t1, t2)) + " timestamp." << std::endl; + buffer->putLong(0, Utils::pack(t1, t2)); + } + + if (SystemConf::getInstance().LINEAGE_ON && m_windowResults[m_nextWindowToForward].m_graph) { + m_windowResults[m_nextWindowToForward].m_graph->setOutputPtr(m_query.getId(), m_totalOutputBytes + length); + } + + int nextQuery = m_windowResults[m_nextWindowToForward].m_latch; + for (int q = nextQuery; + q < currentQuery->getNumberOfDownstreamQueries(); ++q) { + if (currentQuery->getDownstreamQuery(q) != nullptr) { + bool success = false; + if (type == 0) { + auto dispatcher = + currentQuery->getDownstreamQuery(q)->getTaskDispatcher(); + if (m_query.getIsLeft()) { + auto upstream = currentQuery->getDownstreamQuery(q) + ->getNumberOfUpstreamQueries(); + success = + (upstream == 1) + ? dispatcher->tryDispatchToFirstStream( + buffer->getBufferRaw() + startPos, length, + m_windowResults[m_nextWindowToForward] + .m_latencyMark, + m_windowResults[m_nextWindowToForward].m_graph) + : dispatcher->tryDispatchSerialToFirstStream( + buffer->getBufferRaw() + startPos, length, + m_forwardId, + m_windowResults[m_nextWindowToForward] + .m_latencyMark, + m_windowResults[m_nextWindowToForward].m_graph); + } else { + //if (m_windowResults[m_nextWindowToForward].m_graph) { + // std::cout << "a graph reached this point" << std::endl; + //} + success = dispatcher->tryDispatchSerialToSecondStream( + buffer->getBufferRaw() + startPos, length, m_forwardId, + m_windowResults[m_nextWindowToForward].m_latencyMark, m_windowResults[m_nextWindowToForward].m_graph); + } + } else { + // success = currentQuery->getDownstreamQuery(q)->getTaskDispatcher()->tryDispatch(buffer->getBufferPtrs()[0], length); + throw std::runtime_error( + "Forwarding for type 1 buffer is not supported yet."); + } + if (!success) { + std::cout << "[DBG] WindowsForwardAndFree: failed to forward results from query " + << std::to_string(currentQuery->getId()) + << " to next query " + << std::to_string( + currentQuery->getDownstreamQuery(q)->getId()) + << "..." << std::endl; + m_windowResults[m_nextWindowToForward].m_latch = q; + m_windowResults[m_nextWindowToForward].m_slot.store(oldVal); + m_forwardLock.unlock(); + return; + } else { + m_windowResults[m_nextWindowToForward].m_graph.reset(); + if (!m_windowResults[m_nextWindowToForward].m_resSlots.empty()) + m_forwardId++; + } } } } + } else if (length > 0 && currentQuery->isMostDownstream()) { +#if defined(TCP_OUTPUT) + // send data over tcp to a remote sink + if (SystemConf::getInstance().LINEAGE_ON) { + m_graph->serialize(); + auto vecSize = m_graph->m_clockVector.size(); + send(m_sock, m_graph->m_clockVector.data(), vecSize, 0); + } + //if (!m_compressOutput) { + send(m_sock, buffer->getBufferRaw() + startPos, length, 0); + //} else { + // size_t output_length; + // snappy::RawCompress(buffer->getBufferRaw() + startPos, length, + // m_compressBuffer->getBuffer().data(), &output_length); + // send(m_sock, m_compressBuffer->getBuffer().data(), output_length, 0); + //} + m_forwardId++; + +#elif defined(RDMA_OUTPUT) + infinity::requests::RequestToken requestToken(m_context); + //std::memcpy(m_sendBuffer->getData(), buffer->getBufferRaw() + startPos, length); + //m_qp->send(m_sendBuffer, m_sendBuffer->getSizeInBytes(), &requestToken); + //requestToken.waitUntilCompleted(); + if (SystemConf::getInstance().LINEAGE_ON) { + m_graph->serialize(); + auto vecSize = m_graph->m_clockVector.size(); + std::memcpy(m_sendBuffer->getData(), m_graph->m_clockVector.data(), vecSize); + m_qp->send(m_sendBuffer, vecSize, &requestToken); + } + if (length < m_sendBuffer->getSizeInBytes()) { + std::memcpy(m_sendBuffer->getData(), buffer->getBufferRaw() + startPos, length); + m_qp->send(m_sendBuffer, length, &requestToken); + requestToken.waitUntilCompleted(); + } else { + std::cout << "[DBG] sending with RDMA " + std::to_string(length) + " > " + std::to_string(m_sendBuffer->getSizeInBytes())<< std::endl; + auto curLength = std::min(length, (int)m_sendBuffer->getSizeInBytes()); + auto maxLength = 0; + auto cnter = 0; + while (maxLength < length) { + std::memcpy(m_sendBuffer->getData(), buffer->getBufferRaw() + startPos + maxLength, curLength); + m_qp->send(m_sendBuffer, curLength, &requestToken); + requestToken.waitUntilCompleted(); + maxLength += curLength; + curLength = std::min(length-maxLength, (int)m_sendBuffer->getSizeInBytes()); + cnter++; + } + //std::cout << "[DBG] entered the loop " + std::to_string(cnter) << std::endl; + } + m_forwardId++; +#endif + if (SystemConf::getInstance().LINEAGE_ON) { + if (m_windowResults[m_nextWindowToForward].m_graph) { + m_windowResults[m_nextWindowToForward].m_graph->setOutputPtr(m_query.getId(), m_totalOutputBytes + length); + } + if (length + m_totalOutputBytes <= m_query.getOperator()->getOutputPtr()) { + std::cout << "warning: dropping duplicate results for query " + std::to_string(m_query.getId()) + + " with offset lower than " + std::to_string(m_query.getOperator()->getOutputPtr()) << std::endl; + } + } } /* Forward to the distributed API */ /* Measure latency */ if (currentQuery->isMostDownstream()) { - if (SystemConf::getInstance().LATENCY_ON && - (m_windowResults[m_nextWindowToForward].m_latencyMark != -1)) { - m_query.getLatencyMonitor().monitor( - m_freeBuffer, - m_windowResults[m_nextWindowToForward].m_latencyMark); + if (SystemConf::getInstance().LATENCY_ON && (m_windowResults[m_nextWindowToForward].m_latencyMark != -1 + && m_windowResults[m_nextWindowToForward].m_latencyMark != 0)) { // when we have pipelined operators many 0 marks arrive + //std::cout << "The latency mark arrived for monitoring is " + std::to_string(m_windowResults[m_nextWindowToForward].m_latencyMark) << std::endl; + m_query.getLatencyMonitor().monitor(m_freeBuffer1, m_windowResults[m_nextWindowToForward].m_latencyMark); } } /* - * Before releasing the result buffer, increment bytes generated. It is - * important all operators set the position of the buffer accordingly. - * Assume that the start position is 0. + * Before releasing the result buffer, increment bytes generated. It is important + * all operators set the position of the buffer accordingly. Assume that the start + * position is 0. */ incTotalOutputBytes(length); /* Release the current slot */ - m_windowResults[m_nextWindowToForward].release( - m_query.getId(), currentQuery->isMostDownstream(), m_freeBuffer, - m_nextToForward, m_numberOfSlots, m_slotsToRelease); + m_windowResults[m_nextWindowToForward].release(m_query.getId(), currentQuery->isMostDownstream(), m_freeBuffer1, m_nextToForward, m_numberOfSlots, m_slotsToRelease); m_reservedSlots--; /* Increment next */ @@ -836,16 +1283,18 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { if (oldVal < 3) { busy = false; } + if (SystemConf::getInstance().CHECKPOINT_ON && m_stopMerging && mergeCounter++ == 5) { + break; + } } if (debug) { - std::cout << "[DBG] ForwardAndFree " << std::this_thread::get_id() - << " end forwarding: " << m_nextToAggregate - << " m_nextToAggregate " << m_nextToForward - << " m_nextToForward " << m_forwardedWindows - << " forwardedWindows " << m_nextWindowToForward - << " nextWindowToForward " << m_nextToAggregateWindows - << " nextToAggregateWindows " << std::endl; + std::cout << "[DBG] ForwardAndFree " << std::this_thread::get_id() << " end forwarding: " << + m_nextToAggregate << " m_nextToAggregate " << + m_nextToForward << " m_nextToForward " << + m_forwardedWindows << " forwardedWindows " << + m_nextWindowToForward << " nextWindowToForward " << + m_nextToAggregateWindows << " nextToAggregateWindows " << std::endl; } /* Thread exit critical section */ @@ -856,11 +1305,44 @@ void ResultHandler::aggregateWindowsAndForwardAndFree(WindowBatch *batch) { } } +void ResultHandler::updateMaximumTaskId(int value) { + auto prev_value = m_maxTaskId.load(); + while (prev_value < value && + !m_maxTaskId.compare_exchange_weak(prev_value, value)) { + } +} + void ResultHandler::setAggregateOperator(AggregateOperatorCode *aggrOperator) { std::cout << "[DBG] set aggregate operator" << std::endl; m_aggrOperator = aggrOperator; } -bool ResultHandler::containsFragmentedWindows() { return m_hasWindowFragments; } +bool ResultHandler::containsFragmentedWindows() { + return m_hasWindowFragments; +} + +void ResultHandler::restorePtrs(int taskId) { + if (!m_hasRestored) { + int taskIdx = ((taskId - 1) % m_numberOfSlots); + m_nextToForward.store(taskIdx); + m_nextToAggregate.store(taskIdx); + m_hasRestored = true; + } +} + +ResultHandler::~ResultHandler() = default; + +std::vector &ResultHandler::getPartialsWithoutFrags() { + std::cout << "warning: use this function only for testing" << std::endl; + return m_resultsWithoutFrags; +} + +std::vector &ResultHandler::getPartials() { + std::cout << "warning: use this function only for testing" << std::endl; + return m_results; +} -ResultHandler::~ResultHandler() = default; \ No newline at end of file +std::vector &ResultHandler::getWindowPartials() { + std::cout << "warning: use this function only for testing" << std::endl; + return m_windowResults; +} \ No newline at end of file diff --git a/src/result/ResultHandler.h b/src/result/ResultHandler.h index b29add7..0e45c89 100644 --- a/src/result/ResultHandler.h +++ b/src/result/ResultHandler.h @@ -1,15 +1,22 @@ #pragma once -#include -#include -#include - -#include +#include +#include #include +#include + #include +#include +#include +#include -#include "utils/SystemConf.h" #include "utils/PaddedInt.h" +#include "utils/SystemConf.h" + +#if defined(RDMA_OUTPUT) +#include "RDMA/infinity/infinity.h" +#include "buffers/RDMABufferPool.h" +#endif class QueryBuffer; class PartialWindowResults; @@ -20,6 +27,8 @@ class AggregateOperatorCode; struct PartialResultSlotWithoutFragments; struct PartialResultSlot; struct PartialWindowResultSlot; +class FileBackedCheckpointCoordinator; +struct LineageGraph; /* * \brief This class handles the result phase of both stateless and stateful @@ -42,14 +51,15 @@ struct PartialWindowResultSlot; class ResultHandler { private: Query &m_query; - QueryBuffer &m_freeBuffer; + QueryBuffer &m_freeBuffer1, &m_freeBuffer2; bool m_hasWindowFragments; bool m_useParallelMerge; + std::atomic m_maxTaskId; std::mutex m_forwardLock; /* Protects nextToForward */ std::atomic m_nextToForward; std::atomic m_nextWindowToForward; std::mutex m_mergeLock; /* Protects nextToAggregate */ - std::mutex m_prepareMergeLock; /* Protects merge preparation */ + std::mutex m_prepareMergeLock; /* Protects nextToAggregate */ std::atomic m_nextToAggregate; AggregateOperatorCode *m_aggrOperator; long m_totalOutputBytes; @@ -69,6 +79,15 @@ class ResultHandler { int m_insertedWindows = 0; int m_forwardedWindows = 0; + size_t m_forwardId = 0; + + bool m_stopMerging = false; + + bool m_hasRestored = false; + + std::shared_ptr m_graph = nullptr; + std::shared_ptr m_checkpointGraph = nullptr; + /* * Flags: * -1: slot is free @@ -85,16 +104,40 @@ class ResultHandler { std::vector m_windowResults; boost::circular_buffer m_openingWindowsList; + // Variables for sending data to a sink over TCP + int m_sock = 0; + const bool m_compressOutput = false; + std::vector m_compressBuffers; + +#if defined(RDMA_OUTPUT) + infinity::core::Context *m_context; + infinity::queues::QueuePairFactory *m_qpFactory; + infinity::queues::QueuePair *m_qp; + infinity::memory::Buffer *m_sendBuffer, *m_receiveBuffer; +#endif + inline void forwardAndFreeWithoutFrags(WindowBatch *batch); inline void aggregateAndForwardAndFree(WindowBatch *batch); inline void aggregateWindowsAndForwardAndFree(WindowBatch *batch); + inline void debugAggregateAndForwardAndFree(); + inline void updateMaximumTaskId(int value); + + friend class FileBackedCheckpointCoordinator; + public: - ResultHandler(Query &query, QueryBuffer &freeBuffer, bool hasWindowFragments, bool useParallelMerge = false); + ResultHandler(Query &query, QueryBuffer &freeBuffer1, QueryBuffer &freeBuffer2, bool hasWindowFragments, bool useParallelMerge = false); + void setupSocket(); long getTotalOutputBytes(); void incTotalOutputBytes(int bytes); void forwardAndFree(WindowBatch *batch); void setAggregateOperator(AggregateOperatorCode *aggrOperator); bool containsFragmentedWindows(); + void restorePtrs(int taskId); virtual ~ResultHandler(); + + // Used only for testing + std::vector &getPartialsWithoutFrags(); + std::vector &getPartials(); + std::vector &getWindowPartials(); }; \ No newline at end of file diff --git a/src/tasks/Task.cpp b/src/tasks/Task.cpp index d33883c..d93164a 100644 --- a/src/tasks/Task.cpp +++ b/src/tasks/Task.cpp @@ -1,53 +1,106 @@ #include "tasks/Task.h" + +#include "buffers/UnboundedQueryBuffer.h" +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "result/ResultHandler.h" #include "tasks/WindowBatch.h" #include "tasks/WindowBatchFactory.h" #include "utils/Query.h" +#include "utils/QueryApplication.h" #include "utils/QueryOperator.h" -#include "result/ResultHandler.h" -#include "dispatcher/TaskDispatcher.h" -Task::Task() : m_batch(nullptr), m_taskId(-1) {} +Task::Task() : m_leftBatch(nullptr), m_rightBatch(nullptr), m_taskId(-1) {} -Task::Task(int taskId, std::shared_ptr batch, TaskType type) : - m_batch(batch), m_taskId(taskId), m_queryId(batch->getQuery()->getId()), m_type(type) {} +Task::Task(int taskId, const std::shared_ptr& lBatch, const std::shared_ptr& rBatch, TaskType type) : + m_leftBatch(lBatch), m_rightBatch(rBatch), m_taskId(taskId), m_queryId(lBatch->getQuery()->getId()), m_type(type) {} -void Task::set(int taskId, std::shared_ptr batch, TaskType type) { +void Task::set(int taskId, const std::shared_ptr& lBatch, const std::shared_ptr& rBatch, TaskType type) { m_taskId = taskId; - m_batch = batch; - m_queryId = batch->getQuery()->getId(); - m_numaNodeId = batch->getNumaNodeId(); + m_leftBatch = lBatch; + m_rightBatch = rBatch; + if (!m_leftBatch) + throw std::runtime_error("error: the left batch of the task has to be set"); + m_queryId = lBatch->getQuery()->getId(); + m_numaNodeId = lBatch->getNumaNodeId(); // for now pick numa locality based on the first batch only m_type = type; } int Task::run(int pid) { - Query *query = m_batch->getQuery(); + Query *query = m_leftBatch->getQuery(); QueryOperator *next = query->getMostUpstreamOperator(); if (next->getDownstream() != nullptr) throw std::runtime_error("error: execution of chained query operators is not yet tested"); - m_batch->setPid(pid); - - if (m_type == TaskType::PROCESS) { + m_leftBatch->setPid(pid); + //std::cout << "[DBG] running a "<< taskTypeToString(m_type) << " task with id " << m_taskId << std::endl; + if (m_type == TaskType::RECOVER) { + query->getParent()->getCheckpointCoordinator()->recover(pid, query->getId()); + WindowBatchFactory::getInstance().free(m_leftBatch); + return 0; + } + if (m_type == TaskType::INSERT) { + auto buffer = query->getBuffer(); +#if defined(TCP_INPUT) + buffer->putRows(pid, m_qBuffer, m_bytes, m_slot, m_latencyMark, m_retainMark, m_wraps); +#elif defined(RDMA_INPUT) + buffer->putRows(pid, m_rdmaBuffer, m_bytes, m_slot, m_latencyMark, m_retainMark, m_wraps); +#else + buffer->putRows(pid, m_buffer, m_bytes, m_slot, m_latencyMark, m_retainMark, m_wraps); +#endif + WindowBatchFactory::getInstance().free(m_leftBatch); + return 0; + } + if (m_type == TaskType::PROCESS || m_type == TaskType::ONLY_PROCESS) { + //std::cout << "[DBG] running a "<< taskTypeToString(m_type) << " task with id " << m_taskId << std::endl; // update here timestamps in the case that data is replayed from memory - if (m_batch->hasTimestampOffset()) - m_batch->updateTimestamps(); - - next->getCode().processData(m_batch, *this, pid); + if (m_leftBatch->hasTimestampOffset()) + m_leftBatch->updateTimestamps(); + + if (query->isTaskDropped(m_taskId)) { + std::cout << "[DBG] dropping "<< taskTypeToString(m_type) << " task with id " << m_taskId << std::endl; + if (auto &lGraph = m_leftBatch->getLineageGraph()) { + lGraph->freePersistentState(m_queryId); + if (lGraph.use_count() == 1) + LineageGraphFactory::getInstance().free(lGraph); + lGraph.reset(); + } + if (auto &rGraph = m_rightBatch->getLineageGraph()) { + rGraph->freePersistentState(m_queryId); + if (rGraph.use_count() == 1) + LineageGraphFactory::getInstance().free(rGraph); + rGraph.reset(); + } + WindowBatchFactory::getInstance().free(m_leftBatch); + if (m_rightBatch) { + WindowBatchFactory::getInstance().free(m_rightBatch); + } + return 0; + } + if (!m_rightBatch) { + next->getCode().processData(m_leftBatch, *this, pid); + } else { + next->getCode().processData(m_leftBatch, m_rightBatch, *this, pid); + // Operator `next` calls `outputWindowBatchResult()` and updates `batch1`; `batch2`, if not null, is no longer needed + WindowBatchFactory::getInstance().free(m_rightBatch); + } + + if (m_leftBatch == nullptr) return 0; } - if (m_batch == nullptr) - return 0; + if (m_type == TaskType::CHECKPOINT) { + query->getParent()->getCheckpointCoordinator()->checkpoint(pid, query->getId()); + } auto handler = query->getResultHandler(); - handler->forwardAndFree(m_batch.get()); + handler->forwardAndFree(m_leftBatch.get()); - WindowBatchFactory::getInstance().free(m_batch); + WindowBatchFactory::getInstance().free(m_leftBatch); return 0; } -void Task::outputWindowBatchResult(std::shared_ptr result) { - m_batch = result; +void Task::outputWindowBatchResult(const std::shared_ptr& result) { + m_leftBatch = result; } int Task::getTaskId() { @@ -58,8 +111,51 @@ int Task::getQueryId() { return m_queryId; } +void Task::setNumaNodeId(int node) { + m_numaNodeId = node; +} + int Task::getNumaNodeId() { return m_numaNodeId; } +void Task::setLineageGraph(std::shared_ptr &graph) { + /*if (m_graph && m_graph.use_count() == 1) { + LineageGraphFactory::getInstance().free(m_graph); + }*/ + m_graph = graph; +} + +std::shared_ptr &Task::getLineageGraph() { + return m_graph; +} + +void Task::setInsertion(char *buffer, size_t bytes, size_t slot, long latencyMark, long retainMark, int wraps) { + m_buffer = buffer; + m_bytes = bytes; + m_slot = slot; + m_latencyMark = latencyMark; + m_retainMark = retainMark; + m_wraps = wraps; +} + +void Task::setInsertion(std::shared_ptr &buffer, size_t bytes, size_t slot, long latencyMark, long retainMark, int wraps) { + m_buffer = buffer->getBuffer().data(); + m_qBuffer = buffer; + m_bytes = bytes; + m_slot = slot; + m_latencyMark = latencyMark; + m_retainMark = retainMark; + m_wraps = wraps; +} + +void Task::setInsertion(void *rdmaBuffer, size_t bytes, size_t slot, long latencyMark, long retainMark, int wraps) { + m_rdmaBuffer = rdmaBuffer; + m_bytes = bytes; + m_slot = slot; + m_latencyMark = latencyMark; + m_retainMark = retainMark; + m_wraps = wraps; +} + Task::~Task() = default; diff --git a/src/tasks/Task.h b/src/tasks/Task.h index 80c2def..02cc258 100644 --- a/src/tasks/Task.h +++ b/src/tasks/Task.h @@ -1,22 +1,30 @@ #pragma once -#include #include +#include class WindowBatch; +struct LineageGraph; +class UnboundedQueryBuffer; -enum TaskType : uint8_t { PROCESS, MERGE}; +enum TaskType : uint8_t { PROCESS, ONLY_PROCESS, MERGE, FORWARD, MERGE_FORWARD, CHECKPOINT, RECOVER, INSERT }; static inline const std::string taskTypeToString(TaskType v) { switch (v) { case PROCESS: return "PROCESS"; + case ONLY_PROCESS: return "ONLY_PROCESS"; case MERGE: return "MERGE"; + case FORWARD: return "FORWARD"; + case MERGE_FORWARD: return "MERGE_FORWARD"; + case CHECKPOINT: return "CHECKPOINT"; + case RECOVER: return "RECOVER"; + case INSERT: return "INSERT"; default:throw std::runtime_error("error: unknown aggregation type"); } } /* - * \brief This class represent a task in LightSaber and has a pointer to @WindowBatch of data. + * \brief This class represents a task in LightSaber and has a pointer to @WindowBatch of data. * * The @m_numaNodeId indicates a preference for NUMA-aware scheduling. * @@ -24,20 +32,38 @@ static inline const std::string taskTypeToString(TaskType v) { class Task { private: - std::shared_ptr m_batch; + std::shared_ptr m_leftBatch, m_rightBatch; int m_taskId; int m_queryId; int m_numaNodeId; TaskType m_type; + // used for insertion + std::shared_ptr m_qBuffer; + char *m_buffer; + void *m_rdmaBuffer; + size_t m_bytes; + size_t m_slot; + long m_latencyMark; + long m_retainMark; + int m_wraps; + + std::shared_ptr m_graph; + public: Task(); - Task(int taskId, std::shared_ptr batch, TaskType type = TaskType::PROCESS); - void set(int taskId, std::shared_ptr batch, TaskType type = TaskType::PROCESS); + Task(int taskId, const std::shared_ptr& lBatch, const std::shared_ptr& rBatch = nullptr, TaskType type = TaskType::PROCESS); + void set(int taskId, const std::shared_ptr& batch, const std::shared_ptr& rBatch = nullptr, TaskType type = TaskType::PROCESS); int run(int pid); - void outputWindowBatchResult(std::shared_ptr result); + void outputWindowBatchResult(const std::shared_ptr& result); int getTaskId(); int getQueryId(); + void setNumaNodeId(int node); int getNumaNodeId(); + void setLineageGraph(std::shared_ptr &graph); + std::shared_ptr &getLineageGraph(); + void setInsertion(char *buffer, size_t bytes, size_t slot, long latencyMark, long retainMark, int wraps); + void setInsertion(std::shared_ptr &buffer, size_t bytes, size_t slot, long latencyMark, long retainMark, int wraps); + void setInsertion(void *rdmaBuffer, size_t bytes, size_t slot, long latencyMark, long retainMark, int wraps); ~Task(); }; \ No newline at end of file diff --git a/src/tasks/TaskFactory.h b/src/tasks/TaskFactory.h index af06f6d..731bfe2 100644 --- a/src/tasks/TaskFactory.h +++ b/src/tasks/TaskFactory.h @@ -29,19 +29,22 @@ class TaskFactory { TaskFactory(TaskFactory const &) = delete; void operator=(TaskFactory const &) = delete; - std::shared_ptr newInstance(int taskId, std::shared_ptr batch, TaskType type = TaskType::PROCESS) { + std::shared_ptr newInstance(int taskId, const std::shared_ptr& lBatch, const std::shared_ptr& rBatch, TaskType type = TaskType::PROCESS) { std::shared_ptr task; bool hasRemaining = m_pool.try_pop(task); if (!hasRemaining) { m_count.fetch_add(1); - task = std::make_shared(taskId, batch, type); + task = std::make_shared(taskId, lBatch, rBatch, type); } - task->set(taskId, batch, type); + task->set(taskId, lBatch, rBatch, type); return task; } void free(std::shared_ptr &task) { //std::cout << "[DBG] free task "+std::to_string(task->getTaskId())+" task-refs "+std::to_string(task.use_count()) << std::endl; + if (task->getLineageGraph()) { + throw std::runtime_error("error: invalid place for a graph"); + } m_pool.push(task); task.reset(); } diff --git a/src/tasks/WindowBatch.cpp b/src/tasks/WindowBatch.cpp index fde4594..4e1b45b 100644 --- a/src/tasks/WindowBatch.cpp +++ b/src/tasks/WindowBatch.cpp @@ -1,6 +1,6 @@ #include "tasks/WindowBatch.h" #include "utils/Utils.h" -#include "buffers/NUMACircularQueryBuffer.h" +#include "buffers/NumaBuffer.h" #include "buffers/PartialWindowResults.h" #include "utils/Query.h" #include "utils/TupleSchema.h" @@ -14,24 +14,31 @@ * * */ -WindowBatch::WindowBatch(size_t batchSize, int taskId, int freePointer, - Query *query, QueryBuffer *buffer, - WindowDefinition *windowDefinition, TupleSchema *schema, long mark) : - m_batchSize(batchSize), m_taskId(taskId), m_freePointer(freePointer), m_query(query), m_inputBuffer(buffer), +WindowBatch::WindowBatch(size_t batchSize, int taskId, long freePointer1, + long freePointer2, Query *query, QueryBuffer *buffer, + WindowDefinition *windowDefinition, TupleSchema *schema, long mark, + long prevFreePointer1, long prevFreePointer2) : + m_batchSize(batchSize), m_taskId(taskId), m_pid(0), m_freePointer1(freePointer1), + m_freePointer2(freePointer2), m_prevFreePointer1(prevFreePointer1), + m_prevFreePointer2(prevFreePointer2), m_query(query), m_inputBuffer(buffer), m_openingWindows(nullptr), m_closingWindows(nullptr), m_pendingWindows(nullptr), m_completeWindows(nullptr), m_windowDefinition(windowDefinition), m_schema(schema), m_latencyMark(mark), m_startPointer(-1), m_endPointer(-1), m_streamStartPointer(-1), m_streamEndPointer(-1), m_startTimestamp(-1), m_endTimestamp(-1), m_windowStartPointers(SystemConf::getInstance().PARTIAL_WINDOWS), m_windowEndPointers(SystemConf::getInstance().PARTIAL_WINDOWS), m_lastWindowIndex(0), - m_fragmentedWindows(false), m_hasPendingWindows(false), m_initialised(false) {} + m_fragmentedWindows(false), m_hasPendingWindows(false), m_initialised(false), m_type(TaskType::PROCESS) {} -void WindowBatch::set(size_t batchSize, int taskId, int freePointer, - Query *query, QueryBuffer *buffer, - WindowDefinition *windowDefinition, TupleSchema *schema, long mark) { +void WindowBatch::set(size_t batchSize, int taskId, long freePointer1, + long freePointer2, Query *query, QueryBuffer *buffer, + WindowDefinition *windowDefinition, TupleSchema *schema, long mark, + long prevFreePointer1, long prevFreePointer2) { m_batchSize = batchSize; m_taskId = taskId; - m_freePointer = freePointer; + m_freePointer1 = freePointer1; + m_freePointer2 = freePointer2; + m_prevFreePointer1 = prevFreePointer1; + m_prevFreePointer2 = prevFreePointer2; m_query = query; m_inputBuffer = buffer; m_windowDefinition = windowDefinition; @@ -53,8 +60,11 @@ void WindowBatch::set(size_t batchSize, int taskId, int freePointer, m_replayTimestamps = false; m_offset = 0; + m_type = TaskType::PROCESS; + #if defined(HAVE_NUMA) - m_numaNodeId = ((NUMACircularQueryBuffer *) m_inputBuffer)->geNumaNodeWithPtr(m_freePointer); + NumaBuffer *b = dynamic_cast(m_inputBuffer); + m_numaNodeId = (m_inputBuffer) ? b->geNumaNodeWithPtr(m_freePointer1) : 0; #else m_numaNodeId = 0; #endif @@ -106,7 +116,9 @@ QueryBuffer *WindowBatch::getInputQueryBuffer() { ByteBuffer &WindowBatch::getBuffer() { #if defined(HAVE_NUMA) - return ((NUMACircularQueryBuffer *)m_inputBuffer)->getBuffer(m_numaNodeId); + NumaBuffer *b = dynamic_cast(m_inputBuffer); + assert(b != nullptr && "error: invalid buffer pointer"); + return b->getBuffer(m_numaNodeId); #else return m_inputBuffer->getBuffer(); #endif @@ -114,7 +126,7 @@ ByteBuffer &WindowBatch::getBuffer() { char *WindowBatch::getBufferRaw() { #if defined(HAVE_NUMA) - NUMACircularQueryBuffer *b = dynamic_cast(m_inputBuffer); + NumaBuffer *b = dynamic_cast(m_inputBuffer); assert(b != nullptr && "error: invalid buffer pointer"); return b->getBufferRaw(m_numaNodeId); #else @@ -142,25 +154,53 @@ WindowDefinition *WindowBatch::getWindowDefinition() { return m_windowDefinition; } -int WindowBatch::getFreePointer() { - return m_freePointer; +void WindowBatch::setLineageGraph(std::shared_ptr &graph) { + /*if (m_graph && m_graph.use_count() == 1) { + LineageGraphFactory::getInstance().free(m_graph); + }*/ + m_graph.reset(); + m_graph = std::move(graph); + if (graph) { + graph.reset(); + } +} + +std::shared_ptr &WindowBatch::getLineageGraph() { + return m_graph; } -int WindowBatch::getBufferStartPointer() { - return m_startPointer; +long WindowBatch::getFreePointer() { + return m_freePointer1; +} + +long WindowBatch::getSecondFreePointer() { + return m_freePointer2; +} + +long WindowBatch::getPrevFreePointer() { + return m_prevFreePointer1; +} + +long WindowBatch::getPrevSecondFreePointer() { + return m_prevFreePointer2; +} +long WindowBatch::getBufferStartPointer() { + return m_startPointer; } -int WindowBatch::getBufferEndPointer() { +long WindowBatch::getBufferEndPointer() { return m_endPointer; } -void WindowBatch::setBufferPointers(int startP, int endP) { +void WindowBatch::setBufferPointers(long startP, long endP) { #if defined(HAVE_NUMA) - m_startPointer = startP % ((NUMACircularQueryBuffer*)m_inputBuffer)->getBuffer(m_numaNodeId).size(); - m_endPointer = endP % ((NUMACircularQueryBuffer*)m_inputBuffer)->getBuffer(m_numaNodeId).size(); + NumaBuffer *b = dynamic_cast(m_inputBuffer); + assert(b != nullptr && "error: invalid buffer pointer"); + m_startPointer = startP % b->getBuffer(m_numaNodeId).size(); + m_endPointer = endP % b->getBuffer(m_numaNodeId).size(); if (m_endPointer == 0) - m_endPointer = ((NUMACircularQueryBuffer*)m_inputBuffer)->getBuffer(m_numaNodeId).size(); + m_endPointer = b->getBuffer(m_numaNodeId).size(); #else m_startPointer = startP; m_endPointer = endP; @@ -211,9 +251,11 @@ bool WindowBatch::hasTimestampOffset() { } void WindowBatch::updateTimestamps() { #if defined(HAVE_NUMA) - auto buf = (long *) ((NUMACircularQueryBuffer*)m_inputBuffer)->getBuffer(m_numaNodeId).data(); + NumaBuffer *b = dynamic_cast(m_inputBuffer); + assert(b != nullptr && "error: invalid buffer pointer"); + auto buf = (long *) b->getBuffer(m_numaNodeId).data(); #else - auto buf = (long *) m_inputBuffer->getBuffer().data(); + auto buf = (long *) m_inputBuffer->getBufferRaw(); #endif auto tupleSize = m_schema->getTupleSize(); @@ -281,6 +323,7 @@ int WindowBatch::getLastWindowIndex() { } void WindowBatch::clear() { + m_graph.reset(); m_initialised = false; m_openingWindows.reset(); m_closingWindows.reset(); @@ -294,11 +337,11 @@ void WindowBatch::resetWindowPointers() { std::fill(m_windowEndPointers.begin(), m_windowEndPointers.end(), -1); } -int WindowBatch::normalise(int pointer) { - return (int) m_inputBuffer->normalise((long) pointer); +long WindowBatch::normalise(long pointer) { + return m_inputBuffer->normalise(pointer); } -long WindowBatch::getTimestamp(int index) { +long WindowBatch::getTimestamp(long index) { long value = m_inputBuffer->getLong((size_t) index); if (SystemConf::getInstance().LATENCY_ON) return (long) Utils::getTupleTimestamp(value); @@ -332,6 +375,22 @@ long WindowBatch::getEmptyEndWindowId() { return m_emptyEndWindowId; } +void WindowBatch::setWatermark (long watermark) { + m_watermark = watermark; +} + +long WindowBatch::getWatermark () { + return m_watermark; +} + +void WindowBatch::setPartialBuffer(char *partial) { + m_partialBuffer = partial; +} + +char *WindowBatch::getPartialBuffer() { + return m_partialBuffer; +} + void WindowBatch::initPartialWindowPointers() { if (m_initialised) throw std::runtime_error("error: batch window pointers already initialised"); diff --git a/src/tasks/WindowBatch.h b/src/tasks/WindowBatch.h index b902f65..ee37ca8 100644 --- a/src/tasks/WindowBatch.h +++ b/src/tasks/WindowBatch.h @@ -1,8 +1,8 @@ #pragma once -#include -#include #include +#include +#include #include "buffers/QueryBuffer.h" @@ -10,6 +10,7 @@ class PartialWindowResults; class WindowDefinition; class TupleSchema; class Query; +struct LineageGraph; enum TaskType : uint8_t; /* @@ -26,14 +27,14 @@ class WindowBatch { int m_taskId; int m_pid; int m_numaNodeId; - int m_freePointer; + long m_freePointer1, m_freePointer2; + long m_prevFreePointer1, m_prevFreePointer2; Query *m_query; QueryBuffer *m_inputBuffer; /* buffer holding the results when no window semantics are required */ std::shared_ptr m_outputBuffer; /* buffers holding the results of window fragments */ - std::shared_ptr m_openingWindows, m_closingWindows, - m_pendingWindows, m_completeWindows; + std::shared_ptr m_openingWindows, m_closingWindows, m_pendingWindows, m_completeWindows; WindowDefinition *m_windowDefinition; TupleSchema *m_schema; @@ -60,14 +61,22 @@ class WindowBatch { TaskType m_type; + std::shared_ptr m_graph; + + // variables added for watermarks + long m_watermark = LONG_MIN; + char * m_partialBuffer; + public: - WindowBatch(size_t batchSize = 0, int taskId = 0, int freePointer = INT_MIN, + WindowBatch(size_t batchSize = 0, int taskId = 0, + long freePointer1 = INT_MIN, long freePointer2 = INT_MIN, Query *query = nullptr, QueryBuffer *buffer = nullptr, - WindowDefinition *windowDefinition = nullptr, - TupleSchema *schema = nullptr, long mark = 0); - void set(size_t batchSize, int taskId, int freePointer, Query *query, - QueryBuffer *buffer, WindowDefinition *windowDefinition, - TupleSchema *schema, long mark); + WindowDefinition *windowDefinition = nullptr, TupleSchema *schema = nullptr, long mark = 0, + long prevFreePointer1 = -1, long prevFreePointer2 = -1); + void set(size_t batchSize, int taskId, long freePointer1, + long freePointer2, Query *query, QueryBuffer *buffer, + WindowDefinition *windowDefinition, TupleSchema *schema, long mark, + long prevFreePointer1 = -1, long prevFreePointer2 = -1); int getBatchSize(); int getTaskId(); void setTaskId(int taskId); @@ -94,10 +103,15 @@ class WindowBatch { TupleSchema *getSchema(); void setSchema(TupleSchema *schema); WindowDefinition *getWindowDefinition(); - int getFreePointer(); - int getBufferStartPointer(); - int getBufferEndPointer(); - void setBufferPointers(int startP, int endP); + void setLineageGraph(std::shared_ptr &graph); + std::shared_ptr &getLineageGraph(); + long getFreePointer(); + long getSecondFreePointer(); + long getPrevFreePointer(); + long getPrevSecondFreePointer(); + long getBufferStartPointer(); + long getBufferEndPointer(); + void setBufferPointers(long startP, long endP); long getStreamStartPointer(); long getStreamEndPointer(); void setStreamPointers(long startP, long endP); @@ -116,14 +130,21 @@ class WindowBatch { int getLastWindowIndex(); void clear(); void resetWindowPointers(); - int normalise(int pointer); - long getTimestamp(int index); + long normalise(long pointer); + long getTimestamp(long index); void setPrevTimestamps(long startTime, long endTime); long getPrevStartTimestamp(); long getPrevEndTimestamp(); void setEmptyWindowIds(long emptyStartWindow, long emptyEndWindow); long getEmptyStartWindowId(); long getEmptyEndWindowId(); + + // functions for watermarks + void setWatermark (long watermark); + long getWatermark (); + void setPartialBuffer (char *partial); + char *getPartialBuffer (); + void initPartialWindowPointers(); void initPartialRangeBasedWindowPointers(); void initPartialCountBasedWindowPointers(); diff --git a/src/tasks/WindowBatchFactory.h b/src/tasks/WindowBatchFactory.h index f5409aa..fda6e7c 100644 --- a/src/tasks/WindowBatchFactory.h +++ b/src/tasks/WindowBatchFactory.h @@ -29,22 +29,38 @@ class WindowBatchFactory { void operator=(WindowBatchFactory const &) = delete; std::shared_ptr newInstance( - size_t batchSize, int taskId, int freePointer, - Query *query, QueryBuffer *buffer, - WindowDefinition *window, TupleSchema *schema, long latencyMark) { + size_t batchSize, int taskId, long freePointer1, + long freePointer2, Query *query, QueryBuffer *buffer, + WindowDefinition *window, TupleSchema *schema, long latencyMark, + long prevFreePointer1 = -1, long prevFreePointer2 = -1) { std::shared_ptr windowBatch; bool hasRemaining = pool.try_pop(windowBatch); if (!hasRemaining) { count.fetch_add(1, std::memory_order_seq_cst); - windowBatch = std::make_shared(batchSize, taskId, freePointer, - query, buffer, window, schema, latencyMark); + windowBatch = std::make_shared(batchSize, taskId, freePointer1, freePointer2, + query, buffer, window, schema, latencyMark, + prevFreePointer1, prevFreePointer2); + } else if (SystemConf::getInstance().LINEAGE_ON) { + while (windowBatch.use_count() != 1) { + hasRemaining = pool.try_pop(windowBatch); + if (!hasRemaining) { + count.fetch_add(1, std::memory_order_seq_cst); + windowBatch = std::make_shared(batchSize, taskId, freePointer1, freePointer2, + query, buffer, window, schema, latencyMark, + prevFreePointer1, prevFreePointer2); + } + } } - windowBatch->set(batchSize, taskId, freePointer, - query, buffer, window, schema, latencyMark); + windowBatch->set(batchSize, taskId, freePointer1, freePointer2, + query, buffer, window, schema, latencyMark, + prevFreePointer1, prevFreePointer2); return windowBatch; } - void free(std::shared_ptr &windowBatch) { + void free(const std::shared_ptr& windowBatch) { + if (windowBatch->getLineageGraph()) { + throw std::runtime_error("error: invalid place for a graph"); + } windowBatch->clear(); pool.push(windowBatch); } diff --git a/src/utils/Async.h b/src/utils/Async.h new file mode 100644 index 0000000..aaab636 --- /dev/null +++ b/src/utils/Async.h @@ -0,0 +1,138 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "utils/Status.h" + +#define RETURN_NOT_OK(s) \ + do { \ + Status _s = (s); \ + if (_s != Status::Ok) return _s; \ + } while (0) + +class IAsyncContext; + +/* + * \brief Signature of the async callback for I/Os. + * + * */ + +typedef void (*AsyncIOCallback)(IAsyncContext* context, Status result, + size_t bytesTransferred); + +/* + * \brief Standard interface for contexts used by async callbacks. + * + * */ + +class IAsyncContext { + private: + // Whether the internal state for the async context has been copied to a + // heap-allocated memory block. + bool m_fromDeepCopy; + + public: + IAsyncContext() : m_fromDeepCopy{false} {} + + virtual ~IAsyncContext() {} + + // Contexts are initially allocated (as local variables) on the stack. When an + // operation goes async, it deep copies its context to a new heap allocation; + // this context must also deep copy its parent context, if any. Once a context + // has been deep copied, subsequent DeepCopy() calls just return the original, + // heap-allocated copy. + Status deepCopy(IAsyncContext*& contextCopy) { + if (m_fromDeepCopy) { + // Already on the heap: nothing to do. + contextCopy = this; + return Status::Ok; + } else { + RETURN_NOT_OK(deepCopyInternal(contextCopy)); + contextCopy->m_fromDeepCopy = true; + return Status::Ok; + } + } + + // Whether the internal state for the async context has been copied to a + // heap-allocated memory block. + bool getFromDeepCopy() const { return m_fromDeepCopy; } + + void setFromDeepCopy() { m_fromDeepCopy = true; } + + protected: + // Override this method to make a deep, persistent copy of your context. A + // context should: + // 1. Allocate memory for its copy. If the allocation fails, return + // Status::OutOfMemory. + // 2. If it has a parent/caller context, call DeepCopy() on that context. If + // the call fails, free the memory it just allocated and return the call's error code. + // 3. Initialize its copy and return Status::Ok.. + virtual Status deepCopyInternal(IAsyncContext*& contextCopy) = 0; + + // A common pattern: deep copy, when context has no parent/caller context. + template + inline static Status deepCopyInternal(C& context, + IAsyncContext*& contextCopy) { + contextCopy = nullptr; + auto ctxt = std::unique_ptr( new C(context) ); + if (!ctxt.get()) return Status::OutOfMemory; + contextCopy = ctxt.release(); + return Status::Ok; + } + // Another common pattern: deep copy, when context has a parent/caller + // context. + template + inline static Status deepCopyInternal(C& context, + IAsyncContext* callerContext, + IAsyncContext*& contextCopy) { + contextCopy = nullptr; + IAsyncContext* callerContextCopy; + RETURN_NOT_OK(callerContext->deepCopy(callerContextCopy)); + //auto ctxt = std::make_unique(context, callerContextCopy); + auto ctxt = std::unique_ptr(new C(context, callerContextCopy)); + std::unique_ptr( new C(context) ); + if (!ctxt.get()) return Status::OutOfMemory; + contextCopy = ctxt.release(); + return Status::Ok; + } +}; + +/* + * \brief User-defined callbacks for async FishStore operations. Async callback + * equivalent of: Status some_function(context* arg). + * + * */ + +typedef void (*AsyncCallback)(IAsyncContext* ctxt, Status result); + +/* + * \brief Helper class, for use inside a continuation callback, that ensures the + * context will be freed when the callback exits. + * + * */ + +template +class CallbackContext { + public: + bool m_async; + protected: + std::unique_ptr m_context; + + public: + CallbackContext(IAsyncContext* context) : m_async{false} { + m_context = std::unique_ptr(static_cast(context)); + } + C* get() const { return m_context.get(); } + C* operator->() const { return m_context.get(); } + ~CallbackContext() { + if (m_async || !m_context->getFromDeepCopy()) { + // The callback went async again, or it never went async. The next + // callback or the caller is responsible for freeing the context. + m_context.release(); + } + } +}; \ No newline at end of file diff --git a/src/utils/AttributeType.cpp b/src/utils/AttributeType.cpp index 202adf5..6c115fd 100644 --- a/src/utils/AttributeType.cpp +++ b/src/utils/AttributeType.cpp @@ -6,7 +6,7 @@ const std::map AttributeType::m_typeNames({{BasicType::Integer, "int"}, {BasicType::Float, "float"}, {BasicType::Long, "long"}, - {BasicType::LongLong, "longlong"}, + {BasicType::LongLong, "__uint128_t"}, {BasicType::Double, "double"}, {BasicType::Char, "char"}, {BasicType::String, "string"}}); @@ -15,4 +15,4 @@ const std::map AttributeType::m_namesToTypes {"int", BasicType::Integer}, {"float", BasicType::Float}, {"long", BasicType::Long}, {"double", BasicType::Double}, {"char", BasicType::Char}, {"string", BasicType::String}, - {"longlong", BasicType::LongLong}}; + {"__uint128_t", BasicType::LongLong}}; diff --git a/src/utils/Channel.h b/src/utils/Channel.h new file mode 100644 index 0000000..e9ab9a2 --- /dev/null +++ b/src/utils/Channel.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include +#include +#include + +/* + * \brief A simple communication channel implementation. + * + * */ + +template +class Channel { + private: + std::list m_queue; + std::mutex m_mutex; + std::condition_variable m_cv; + bool m_closed; + + public: + Channel() : m_closed(false) {} + void close() { + std::unique_lock lock(m_mutex); + m_closed = true; + m_cv.notify_all(); + } + bool is_closed() { + std::unique_lock lock(m_mutex); + return m_closed; + } + + void put(const item &i) { + std::unique_lock lock(m_mutex); + if (m_closed) throw std::logic_error("error: put to closed channel"); + m_queue.push_back(i); + m_cv.notify_one(); + } + + bool get(item &out, bool wait = true) { + std::unique_lock lock(m_mutex); + if (wait) m_cv.wait(lock, [&]() { return m_closed || !m_queue.empty(); }); + if (m_queue.empty()) return false; + out = m_queue.front(); + m_queue.pop_front(); + return true; + } +}; \ No newline at end of file diff --git a/src/utils/Guid.h b/src/utils/Guid.h new file mode 100644 index 0000000..08d2f1d --- /dev/null +++ b/src/utils/Guid.h @@ -0,0 +1,66 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +class Guid { + public: + Guid() { uuid_clear(m_uuid); } + + private: + Guid(const uuid_t uuid) { uuid_copy(m_uuid, uuid); } + + public: + static Guid Create() { + uuid_t uuid; + uuid_generate(uuid); + return uuid; + } + + static Guid Parse(const std::string str) { + uuid_t uuid; + int result = uuid_parse(const_cast(str.c_str()), uuid); + assert(result == 0); + return uuid; + } + + std::string ToString() const { + char buffer[37]; + uuid_unparse(m_uuid, buffer); + return std::string{buffer}; + } + + bool operator==(const Guid& other) const { + return uuid_compare(m_uuid, other.m_uuid) == 0; + } + + uint32_t GetHashCode() const { + uint32_t Data1; + uint16_t Data2; + uint16_t Data3; + std::memcpy(&Data1, m_uuid, sizeof(Data1)); + std::memcpy(&Data2, m_uuid + 4, sizeof(Data2)); + std::memcpy(&Data3, m_uuid + 6, sizeof(Data3)); + return Data1 ^ + ((static_cast(Data2) << 16) | + static_cast(Data3)) ^ + ((static_cast(m_uuid[10]) << 24) | m_uuid[15]); + } + + private: + uuid_t m_uuid; +}; + +// Implement std::hash<> for GUIDs. +namespace std { +template <> +struct hash { + size_t operator()(const Guid& val) const { return val.GetHashCode(); } +}; +} // namespace std \ No newline at end of file diff --git a/src/utils/NumaAllocator.h b/src/utils/NumaAllocator.h index 7ca3765..580f14a 100644 --- a/src/utils/NumaAllocator.h +++ b/src/utils/NumaAllocator.h @@ -1,8 +1,18 @@ -#include +#include #include +#include +#include namespace NumaAlloc { +#define ALIGNMENT 512 + +static inline void memset_16aligned(void *space, char byte, size_t nbytes) { + assert((nbytes & 0x0F) == 0); + assert(((uintptr_t)space & 0x0F) == 0); + memset(space, byte, nbytes); +} + /** * \brief An STL allocator that uses memory of a specific NUMA node only * based on The library KASKADE 7 @@ -98,9 +108,17 @@ return a.max_size() / sizeof(T); numa_set_preferred(m_numaNode); numa_set_bind_policy(m_numaNode); numa_set_strict(m_numaNode); - auto ret = numa_alloc_onnode(n * sizeof(T), m_numaNode); - if (!ret) - throw std::bad_alloc(); + //auto ret = numa_alloc_onnode(n * sizeof(T), m_numaNode); + //if (!ret) throw std::bad_alloc(); + + uintptr_t mask = ~(uintptr_t)(ALIGNMENT - 1); + auto mem = numa_alloc_onnode(n * sizeof(T) + ALIGNMENT - 1, m_numaNode); + if (!mem) throw std::bad_alloc(); + void *ret = (void *)(((uintptr_t)mem + ALIGNMENT - 1) & mask); + assert((ALIGNMENT & (ALIGNMENT - 1)) == 0); + printf("0x%08" PRIXPTR ", 0x%08" PRIXPTR "\n", (uintptr_t)mem, + (uintptr_t)ret); + memset_16aligned(ret, 0, std::min((size_t)1024, n * sizeof(T))); ((char *) ret)[0] = 0; //memset(ret, 0, n); @@ -108,8 +126,11 @@ return a.max_size() / sizeof(T); } void deallocate(m_pointer p, m_size_type n) { - if (p) + if (p) { numa_free(static_cast(p), n * sizeof(T)); + // todo: fix leaking memory here + // numa_free(((void**)p)[-1], n * sizeof(T)); + } } template diff --git a/src/utils/Query.cpp b/src/utils/Query.cpp index 5777d7c..e1cd3ec 100644 --- a/src/utils/Query.cpp +++ b/src/utils/Query.cpp @@ -1,60 +1,112 @@ #include "utils/Query.h" -#include "utils/QueryApplication.h" -#include "utils/TupleSchema.h" -#include "utils/QueryOperator.h" -#include "dispatcher/TaskDispatcher.h" -#include "tasks/Task.h" -#include "result/ResultHandler.h" -#include "monitors/LatencyMonitor.h" + +#include + #include "cql/operators/AggregateOperatorCode.h" #include "cql/operators/Selection.h" +#include "dispatcher/JoinTaskDispatcher.h" +#include "dispatcher/TaskDispatcher.h" +#include "monitors/LatencyMonitor.h" +#include "result/ResultHandler.h" +#include "utils/QueryApplication.h" +#include "utils/QueryConfig.h" +#include "utils/QueryOperator.h" +#include "utils/TupleSchema.h" #if defined(HAVE_NUMA) -#include "buffers/NUMACircularQueryBuffer.h" +#include "buffers/NumaCircularQueryBuffer.h" +#include "buffers/PersistentNumaCircularQueryBuffer.h" #else #include "buffers/CircularQueryBuffer.h" +#include "buffers/PersistentCircularQueryBuffer.h" #endif -#include - Query::Query(int id, std::vector &operators, WindowDefinition window, TupleSchema *schema, long timestampReference, bool hasWindowFragments, bool replayTimestamps, - bool copyDataOnInsert, bool useParallelMerge, int multipleQueries) - : m_window(window), - m_schema(schema), + bool copyDataOnInsert, bool useParallelMerge, int multipleQueries, bool persistInput, QueryConfig *config, bool clearFiles) + : Query(id, operators, window, schema, WindowDefinition(), nullptr, + timestampReference, hasWindowFragments, replayTimestamps, + copyDataOnInsert, useParallelMerge, multipleQueries, persistInput, config, clearFiles) {} + +Query::Query(int id, std::vector &operators, WindowDefinition firstWindow, + TupleSchema *firstSchema, WindowDefinition secondWindow, + TupleSchema *secondSchema, long timestampReference, bool hasWindowFragments, bool replayTimestamps, + bool copyDataOnInsert, bool useParallelMerge, int multipleQueries, bool persistInput, QueryConfig *config, bool clearFiles) + : m_firstWindow(firstWindow), m_secondWindow(secondWindow), m_firstSchema(firstSchema), m_secondSchema(secondSchema), m_config(config), #if defined(HAVE_NUMA) - m_circularBuffer(std::make_shared(id, SystemConf::getInstance().CIRCULAR_BUFFER_SIZE, schema->getTupleSize(), copyDataOnInsert)), + m_firstCircularBuffer(!persistInput ? std::shared_ptr(std::make_shared(id * 10, + m_config ? m_config->getCircularBufferSize() : + SystemConf::getInstance().CIRCULAR_BUFFER_SIZE, m_firstSchema->getTupleSize(), copyDataOnInsert, + m_config ? m_config->getBatchSize() : SystemConf::getInstance().BATCH_SIZE, clearFiles)) : + std::make_shared(id * 10, + m_config ? m_config->getCircularBufferSize() : + SystemConf::getInstance().CIRCULAR_BUFFER_SIZE, m_firstSchema->getTupleSize(), copyDataOnInsert, + m_config ? m_config->getBatchSize() : SystemConf::getInstance().BATCH_SIZE, + nullptr, clearFiles)), + m_secondCircularBuffer(!persistInput || !secondSchema ? std::shared_ptr(std::make_shared(id * 10 + 1,(secondSchema == nullptr) ? + 1 : m_config ? m_config->getCircularBufferSize() : + SystemConf::getInstance().CIRCULAR_BUFFER_SIZE, 1, copyDataOnInsert, + m_config ? m_config->getBatchSize() : SystemConf::getInstance().BATCH_SIZE, clearFiles)) : + std::make_shared(id * 10 + 1, + m_config ? m_config->getCircularBufferSize() : + SystemConf::getInstance().CIRCULAR_BUFFER_SIZE, m_firstSchema->getTupleSize(), copyDataOnInsert, + m_config ? m_config->getBatchSize() : SystemConf::getInstance().BATCH_SIZE, + nullptr, clearFiles)), #else - m_circularBuffer(std::make_shared(id, + m_firstCircularBuffer(!persistInput ? std::shared_ptr(std::make_shared(id * 10, + m_config ? m_config->getCircularBufferSize() : SystemConf::getInstance().CIRCULAR_BUFFER_SIZE, - schema->getTupleSize(), - copyDataOnInsert)), + m_firstSchema->getTupleSize(), + copyDataOnInsert, + m_config ? m_config->getBatchSize() : + SystemConf::getInstance().BATCH_SIZE, clearFiles)) : + std::make_shared(id * 10, + m_config ? m_config->getCircularBufferSize() : + SystemConf::getInstance().CIRCULAR_BUFFER_SIZE, + m_firstSchema->getTupleSize(), + copyDataOnInsert, m_config ? m_config->getBatchSize() : + SystemConf::getInstance().BATCH_SIZE, + nullptr, clearFiles)), + m_secondCircularBuffer(!persistInput || !secondSchema ? std::shared_ptr(std::make_shared(id * 10 + 1, (secondSchema == nullptr) ? 0 : + m_config ? m_config->getCircularBufferSize() : + SystemConf::getInstance().CIRCULAR_BUFFER_SIZE, + (secondSchema == nullptr) ? 1 : m_secondSchema->getTupleSize(), + copyDataOnInsert, + m_config ? m_config->getBatchSize() : + SystemConf::getInstance().BATCH_SIZE, clearFiles)) : + std::make_shared(id * 10 + 1, (secondSchema == nullptr) ? 0 : + m_config ? m_config->getCircularBufferSize() : + SystemConf::getInstance().CIRCULAR_BUFFER_SIZE, + (secondSchema == nullptr) ? 1 : m_secondSchema->getTupleSize(), + copyDataOnInsert, m_config ? m_config->getBatchSize() : + SystemConf::getInstance().BATCH_SIZE, nullptr, clearFiles)), #endif - m_dispatcher(std::make_shared(*this, - *m_circularBuffer, - replayTimestamps)), + m_dispatcher((secondSchema == nullptr) ? + std::shared_ptr(std::make_shared(*this, *m_firstCircularBuffer, replayTimestamps)) : + std::make_shared(*this, *m_firstCircularBuffer, *m_secondCircularBuffer, replayTimestamps)), m_resultHandler(std::make_shared(*this, - *m_circularBuffer, - hasWindowFragments, - useParallelMerge)), + *m_firstCircularBuffer, + *m_secondCircularBuffer, + (secondSchema == nullptr) && hasWindowFragments, + (secondSchema == nullptr) && useParallelMerge)), m_timestampReference(timestampReference), - m_latencyMonitor(std::make_unique(m_timestampReference)), + m_latencyMonitor(std::make_unique(m_timestampReference, clearFiles)), m_operators(operators), m_id(id), m_numberOfUpstreamQueries(0), m_numberOfDownstreamQueries(0), - m_upstreamQueries(2), - m_downstreamQueries(2), - m_numOfWindowDefinitions(multipleQueries) { - + m_upstreamQueries(2, nullptr), + m_downstreamQueries(2, nullptr), + m_numOfWindowDefinitions(multipleQueries), + m_markedForCheckpoint(true) { // Merge and re-order operators into a single unit. Set it up accordingly for the pipeline. for (auto op: m_operators) { - op->setParent(this); + op->setParent(this, m_id, m_firstCircularBuffer->getCapacity()/m_firstCircularBuffer->getBatchSize()); if (op->isMostUpstream()) m_mostUpstreamOperator = op; if (op->isMostDownstream()) { m_mostDownstreamOperator = op; if (Selection *code = dynamic_cast(&op->getCode())) { - setOutputSchema(schema); + setOutputSchema(m_firstSchema); } else { setOutputSchema(&op->getCode().getOutputSchema()); } @@ -66,12 +118,23 @@ Query::Query(int id, std::vector &operators, WindowDefinition w for (int i = 0; i < 2; ++i) m_upstreamQueries[i] = m_downstreamQueries[i] = nullptr; + // set up a reference from the buffer to the query + // that is used for persistence + m_firstCircularBuffer->setQuery( + this, (SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON) + ? m_mostDownstreamOperator->getCode().getInputCols() + : nullptr); + if (secondSchema) + m_secondCircularBuffer->setQuery(this, (SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON) + ? m_mostDownstreamOperator->getCode().getSecondInputCols() + : nullptr); + if (!SystemConf::getInstance().LATENCY_ON) m_latencyMonitor->disable(); } int Query::getSchemaTupleSize() { - return m_schema->getTupleSize(); + return m_firstSchema->getTupleSize(); } void Query::setName(std::string name) { @@ -105,6 +168,10 @@ bool Query::isMostDownstream() { return (m_numberOfDownstreamQueries == 0); } +QueryConfig *Query::getConfig() { + return m_config; +} + QueryOperator *Query::getMostUpstreamOperator() { return m_mostUpstreamOperator; } @@ -113,6 +180,12 @@ QueryOperator *Query::getMostDownstreamOperator() { return m_mostDownstreamOperator; } +QueryOperator *Query::getOperator() { + if (m_operators.size() != 1) + throw std::runtime_error("error: the query can have only one operator"); + return m_operators[0]; +} + QueryApplication *Query::getParent() { return m_parent; } @@ -125,13 +198,21 @@ void Query::setParent(QueryApplication *parent) { } QueryBuffer *Query::getBuffer() { - return m_circularBuffer.get(); + return m_firstCircularBuffer.get(); +} + +QueryBuffer *Query::getSecondBuffer() { + return m_secondCircularBuffer.get(); } -std::shared_ptr Query::getTaskDispatcher() { +std::shared_ptr Query::getTaskDispatcher() { return m_dispatcher; } +void Query::setTaskQueue(std::shared_ptr &queue) { + m_taskQueue = queue; +} + std::shared_ptr Query::getTaskQueue() { return m_taskQueue; } @@ -152,12 +233,48 @@ long Query::getBytesGenerated() { return m_resultHandler->getTotalOutputBytes(); } +void Query::startDroppingTasks(int task) { + m_dropTasks = true; + m_taskToDrop = task; +} + +bool Query::isTaskDropped(int task) { + if (m_dropTasks) { + return task <= m_taskToDrop; + } + return false; +} + +long Query::getTimestampReference() { + return m_latencyMonitor->getTimestampReference(); +} + +long Query::getLastTimestamp() { + return m_latencyMonitor->getLastTimestamp(); +} + WindowDefinition &Query::getWindowDefinition() { - return m_window; + return m_firstWindow; } TupleSchema *Query::getSchema() { - return m_schema; + return m_firstSchema; +} + +WindowDefinition &Query::getFirstWindowDefinition() { + return m_firstWindow; +} + +TupleSchema *Query::getFirstSchema() { + return m_firstSchema; +} + +WindowDefinition &Query::getSecondWindowDefinition() { + return m_secondWindow; +} + +TupleSchema *Query::getSecondSchema() { + return m_secondSchema; } TupleSchema *Query::getOutputSchema() { @@ -189,6 +306,14 @@ bool Query::getIsLeft() { return m_isLeft; } +void Query::markForCheckpoint(bool mark) { + m_markedForCheckpoint = mark; +} + +bool Query::isMarkedForCheckpoint() { + return m_markedForCheckpoint; +} + Query *Query::getUpstreamQuery() { return m_upstreamQueries[0]; } diff --git a/src/utils/Query.h b/src/utils/Query.h index 6979e7c..204b2e4 100644 --- a/src/utils/Query.h +++ b/src/utils/Query.h @@ -6,7 +6,7 @@ #include #include -class TaskDispatcher; +class ITaskDispatcher; class WindowDefinition; class QueryApplication; class TupleSchema; @@ -15,6 +15,7 @@ class ResultHandler; class AggregateOperatorCode; class QueryBuffer; class LatencyMonitor; +class QueryConfig; /* * \brief This class represents a sequence of pipelineable operators @@ -28,13 +29,14 @@ class LatencyMonitor; class Query { private: - WindowDefinition m_window; - TupleSchema *m_schema; + WindowDefinition m_firstWindow, m_secondWindow; + TupleSchema *m_firstSchema, *m_secondSchema; TupleSchema *m_outputSchema; - std::shared_ptr m_circularBuffer; + QueryConfig *m_config = nullptr; + std::shared_ptr m_firstCircularBuffer, m_secondCircularBuffer; size_t m_taskQueueCapacity; std::shared_ptr m_taskQueue; - std::shared_ptr m_dispatcher; + std::shared_ptr m_dispatcher; std::shared_ptr m_resultHandler; long m_timestampReference = 0L; std::unique_ptr m_latencyMonitor; @@ -54,12 +56,21 @@ class Query { bool m_isLeft = false; int m_numOfWindowDefinitions = 0; + bool m_dropTasks = false; + int m_taskToDrop = -1; + + bool m_markedForCheckpoint; public: Query(int id, std::vector &operators, WindowDefinition window, TupleSchema *schema = nullptr, long timestampReference = 0L, bool hasWindowFragments = false, bool replayTimestamps = false, bool copyDataOnInsert = true, bool useParallelMerge = false, - int multipleQueries = 0); + int multipleQueries = 0, bool persistInput = false, QueryConfig *config = nullptr, bool clearFiles = true); + Query(int id, std::vector &operators, WindowDefinition firstWindow, + TupleSchema *firstSchema, WindowDefinition secondWindow, + TupleSchema *secondSchema, long timestampReference = 0L, bool hasWindowFragments = false, + bool replayTimestamps = false, bool copyDataOnInsert = true, bool useParallelMerge = false, + int multipleQueries = 0, bool persistInput = false, QueryConfig *config = nullptr, bool clearFiles = true); int getSchemaTupleSize(); void setName(std::string name); void setSQLExpression(std::string sql); @@ -68,25 +79,39 @@ class Query { int getId(); bool isMostUpstream(); bool isMostDownstream(); + QueryConfig *getConfig(); QueryOperator *getMostUpstreamOperator(); QueryOperator *getMostDownstreamOperator(); + QueryOperator *getOperator(); QueryApplication *getParent(); void setParent(QueryApplication *parent); QueryBuffer *getBuffer(); - std::shared_ptr getTaskDispatcher(); + QueryBuffer *getSecondBuffer(); + std::shared_ptr getTaskDispatcher(); + void setTaskQueue(std::shared_ptr &queue); std::shared_ptr getTaskQueue(); size_t getTaskQueueCapacity(); std::shared_ptr getResultHandler(); void setAggregateOperator(AggregateOperatorCode *aggrOperator); long getBytesGenerated(); + void startDroppingTasks(int task); + bool isTaskDropped(int task); + long getTimestampReference(); + long getLastTimestamp(); WindowDefinition &getWindowDefinition(); TupleSchema *getSchema(); + WindowDefinition &getFirstWindowDefinition(); + TupleSchema *getFirstSchema(); + WindowDefinition &getSecondWindowDefinition(); + TupleSchema *getSecondSchema(); TupleSchema *getOutputSchema(); int getNumOfWindowDefinitions(); void setOutputSchema(TupleSchema *schema); LatencyMonitor &getLatencyMonitor(); void connectTo(Query *query); bool getIsLeft(); + void markForCheckpoint(bool mark); + bool isMarkedForCheckpoint(); Query *getUpstreamQuery(); Query *getUpstreamQuery(int idx); Query *getDownstreamQuery(); diff --git a/src/utils/QueryApplication.cpp b/src/utils/QueryApplication.cpp index 833e49d..ff520ae 100644 --- a/src/utils/QueryApplication.cpp +++ b/src/utils/QueryApplication.cpp @@ -1,54 +1,233 @@ #include "utils/QueryApplication.h" -#include "utils/Query.h" + +#include + +#include "buffers/QueryBuffer.h" +#include "checkpoint/BlockManager.h" +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "checkpoint/LineageGraphFactory.h" +#include "dispatcher/ITaskDispatcher.h" +#include "dispatcher/TaskDispatcher.h" +#include "filesystem/File.h" +#include "filesystem/FileSystemDisk.h" +#include "monitors/CompressionMonitor.h" #include "monitors/PerformanceMonitor.h" -#include "tasks/Task.h" #include "processor/TaskProcessorPool.h" -#include "dispatcher/TaskDispatcher.h" +#include "result/ResultHandler.h" +#include "tasks/Task.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" #include "utils/Utils.h" -QueryApplication::QueryApplication(std::vector> &queries) +QueryApplication::QueryApplication(std::vector> &queries, + bool checkpointEnabled, bool clearFiles) : m_numOfThreads(SystemConf::getInstance().WORKER_THREADS), - m_queries(queries), m_numOfQueries((int) queries.size()), m_numberOfUpstreamQueries(0), - m_taskQueueCapacity(2 * queries.size() * + m_queries(queries), + m_numOfQueries((int)queries.size()), + m_numberOfUpstreamQueries(0), + m_taskQueueCapacity(8 * queries.size() * (SystemConf::getInstance().CIRCULAR_BUFFER_SIZE / SystemConf::getInstance().BATCH_SIZE)), m_queue(std::make_shared(m_taskQueueCapacity)), - //m_queue(std::make_shared(2 * queries.size() * (SystemConf::getInstance().CIRCULAR_BUFFER_SIZE - // / SystemConf::getInstance().BATCH_SIZE))), - m_workerPool(std::make_shared(m_numOfThreads, m_queue)) {} + m_workerPool(std::make_shared(m_numOfThreads, m_queue)), + m_checkpointEnabled(checkpointEnabled), + m_rates(m_numOfQueries, 1), m_clearFiles(clearFiles) { + + if (SystemConf::getInstance().LINEAGE_ON) { + LineageGraphFactory::getInstance().setGraph(m_queries); + m_fileStore = std::make_shared(m_queries, m_clearFiles); + } +} -void QueryApplication::processData(std::vector &values, long latencyMark) { +void QueryApplication::processData(std::vector &values, + long latencyMark, long retainMark) { for (unsigned long i = 0; i < m_dispatchers.size(); ++i) { - m_dispatchers[i]->dispatch(values.data(), values.size(), latencyMark); + m_dispatchers[i]->dispatch(values.data(), values.size() / m_rates[i], latencyMark, retainMark); + } + + /*if (m_dispatchers.size() > 1) { + for (unsigned long i = 0; i < m_dispatchers.size(); ++i) { + m_dispatchers[i]->tryToConsume(); + } + }*/ +} + +void QueryApplication::processData(std::shared_ptr &values, + long latencyMark, long retainMark) { + for (unsigned long i = 0; i < m_dispatchers.size(); ++i) { + m_dispatchers[i]->dispatch(values, latencyMark, retainMark); + } +} + +void QueryApplication::processData(void *values, int length, long latencyMark, long retainMark) { + for (unsigned long i = 0; i < m_dispatchers.size(); ++i) { + m_dispatchers[i]->dispatch(values, length, latencyMark, retainMark); + } +} + +void QueryApplication::processFirstStream(std::vector &values, long latencyMark) { + for (unsigned long i = 0; i < m_dispatchers.size(); ++i) { + m_dispatchers[i]->dispatchToFirstStream(values.data(), values.size(), latencyMark); + } +} + +void QueryApplication::processSecondStream(std::vector &values, long latencyMark) { + for (unsigned long i = 0; i < m_dispatchers.size(); ++i) { + m_dispatchers[i]->dispatchToSecondStream(values.data(), values.size(), latencyMark); + } +} + +void QueryApplication::recoverData() { + auto t1 = std::chrono::high_resolution_clock::now(); + + // wait until all the tasks for checkpoint recovery have been created + if (SystemConf::getInstance().CHECKPOINT_ON) { + while (!m_clearFiles) + ; + } + + std::cout << "[DBG] starting to recover the data for the input buffers" << std::endl; + + //perform a reversal topological order traversal for restoring offsets + std::vector> sortedQueries; + std::vector visited (m_numOfQueries, false); + std::stack stack; + for (int i = 0; i < m_numOfQueries; i++) { + if (visited[i] == false) { + topologicalSort(i, visited, stack); + } } + while (!stack.empty()) { + sortedQueries.push_back(m_queries[stack.top()]); + stack.pop(); + } + + size_t maxIterations = 0; + for (unsigned long idx = 0; idx < sortedQueries.size(); idx++) { + auto query = sortedQueries[idx]; + //auto buffer = m_dispatchers[i]->getBuffer(); + auto buffer = query->getTaskDispatcher()->getBuffer(); + + auto temp = buffer->getUnsafeRemainingBytes()/buffer->getBatchSize(); + maxIterations = std::max(temp, maxIterations); + buffer->setNumberOfSlotsToRecover(temp); + int lastTask = 0; + if (m_dispatchers.size() == 1 || query->isMostDownstream()) { + // todo: the most downstream operators would have to read this from an external sink + // todo: generalize this for pipelined queries + lastTask = (int)(buffer->getUnsafeStartPointer()/buffer->getBatchSize()) + 1; + } else { + // todo: fix joins + throw std::runtime_error("error: recovering joins is not supported yet"); + auto startPtr = query->getDownstreamQuery()->getOperator()->getInputPtr(true); + lastTask = (int)(startPtr/buffer->getBatchSize()) + 1; + } + std::cout << "[DBG] restarting from task " << lastTask << " for buffer " + << buffer->getBufferId() << " with " << temp + << " slots " << std::endl; + query->getTaskDispatcher()->setTaskNumber(lastTask); + + long step; long offset; + // todo: implement the following function to non p-stream buffers + buffer->getStepAndOffset(step, offset); + query->getTaskDispatcher()->setStepAndOffset(step, offset); + buffer->prepareRecovery(); + } + + // parallel recovery of p-streams + if (m_dispatchers.size() == 1) { + auto buffer = m_dispatchers[0]->getBuffer(); + if (buffer->isPersistent()) { + m_dispatchers[0]->recover(); + } + } else { + std::vector threads (m_dispatchers.size()); + for (unsigned long i = 0; i < m_dispatchers.size(); ++i) { + auto buffer = m_dispatchers[i]->getBuffer(); + if (buffer->isPersistent()) { + threads[i] = std::thread([&]{ + m_dispatchers[i]->recover(); + }); + } else { + threads[i] = std::thread([&]{}); + } + } + for (unsigned long i = 0; i < m_dispatchers.size(); ++i) { + threads[i].join(); + } + } + + if (m_checkpointCoordinator) + m_checkpointCoordinator->setReady(); + + auto t2 = std::chrono::high_resolution_clock::now(); + auto time_span = + std::chrono::duration_cast>(t2 - t1); + std::cout << "[DBG] recovery duration " << time_span.count() << std::endl; +} + +bool QueryApplication::tryProcessData(std::vector &values, + long latencyMark) { + throw std::runtime_error("error: this function is not supported yet"); + if (m_dispatchers.size() > 1) + throw std::runtime_error("error: unsupported number of dispatchers"); + return m_dispatchers[0]->tryDispatchOrCreateTask(values.data(), values.size(), latencyMark); } void QueryApplication::setup() { /* Bind main thread to CPU core 0 */ Utils::bindProcess(0); m_workerPool->start(); - for (auto &q: m_queries) { + std::unordered_set qIds; + for (auto &q : m_queries) { + if(!qIds.insert(q->getId()).second) { + throw std::runtime_error("error: duplicate query ids"); + } q->setParent(this); - if (q->isMostUpstream()) - setDispatcher(q->getTaskDispatcher()); + if (q->isMostUpstream()) setDispatcher(q->getTaskDispatcher()); + if (q->isMostDownstream()) { +#if defined(TCP_OUTPUT) + q->getResultHandler()->setupSocket(); +#elif defined(RDMA_OUTPUT) + q->getResultHandler()->setupSocket(); +#endif + } } m_performanceMonitor = std::make_unique(*this); m_performanceMonitorThread = std::thread(std::ref(*m_performanceMonitor)); m_performanceMonitorThread.detach(); + + if (SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON) { + m_compressionMonitor = std::make_unique(this); + m_compressionMonitorThread = std::thread(std::ref(*m_compressionMonitor)); + m_compressionMonitorThread.detach(); + } + + /* Setup the checkpoint coordinator */ + if (m_checkpointEnabled) { + m_filesystem = std::make_shared(SystemConf::FILE_ROOT_PATH); + m_checkpointCoordinator = std::make_unique(0, m_queries, &m_clearFiles, m_filesystem, true); + m_checkpointCoordinatorThread = std::thread(std::ref(*m_checkpointCoordinator)); + m_checkpointCoordinatorThread.detach(); + } } -void QueryApplication::setDispatcher(std::shared_ptr dispatcher) { +void QueryApplication::setupRates(std::vector &rates) { + size_t i = 0; + for (auto &r: m_rates) { + r = rates[i++]; + } +} + +void QueryApplication::setDispatcher( + std::shared_ptr dispatcher) { m_numberOfUpstreamQueries++; m_dispatchers.push_back(dispatcher); } -std::shared_ptr QueryApplication::getTaskQueue() { - return m_queue; -} +std::shared_ptr QueryApplication::getTaskQueue() { return m_queue; } -int QueryApplication::getTaskQueueSize() { - return (int) m_queue->size_approx(); -} +size_t QueryApplication::getTaskQueueSize() { return m_queue->size_approx(); } size_t QueryApplication::getTaskQueueCapacity() { return m_taskQueueCapacity; } @@ -60,11 +239,49 @@ std::shared_ptr QueryApplication::getTaskProcessorPool() { return m_workerPool; } -int QueryApplication::numberOfQueries() { - return m_numOfQueries; +FileBackedCheckpointCoordinator *QueryApplication::getCheckpointCoordinator() { + return m_checkpointCoordinator.get(); } -int QueryApplication::numberOfUpStreamQueries() { +long QueryApplication::getTimestampReference() { + long timestamp = LONG_MAX; + for (auto &q: m_queries) { + timestamp = std::min(timestamp, q->getTimestampReference()); + } + return timestamp; +} + +long QueryApplication::getLastTimestamp() { + long timestamp = LONG_MAX; + for (auto &q: m_queries) { + timestamp = std::min(timestamp, q->getLastTimestamp()); + } + return timestamp; +} + +int QueryApplication::numberOfQueries() { return m_numOfQueries; } + +int QueryApplication::numberOfUpstreamQueries() { return m_numberOfUpstreamQueries; } +QueryApplication::~QueryApplication() { + for (auto &q : m_queries) { + q->getBuffer()->~QueryBuffer(); + q->getSecondBuffer()->~QueryBuffer(); + } + if (m_checkpointCoordinator) { + m_checkpointCoordinator->~FileBackedCheckpointCoordinator(); + } +} + +void QueryApplication::topologicalSort(int q, std::vector &visited, std::stack &stack) { + visited[q] = true; + for (int i = 0; i < m_queries[q]->getNumberOfUpstreamQueries(); i++) { + auto qId = m_queries[q]->getUpstreamQuery(i)->getId(); + if (!visited[qId]) { + topologicalSort(qId, visited, stack); + } + } + stack.push(q); +} \ No newline at end of file diff --git a/src/utils/QueryApplication.h b/src/utils/QueryApplication.h index db8072e..b2a53e9 100644 --- a/src/utils/QueryApplication.h +++ b/src/utils/QueryApplication.h @@ -1,13 +1,21 @@ #pragma once +#include #include #include "utils/SystemConf.h" class Query; class TaskProcessorPool; -class TaskDispatcher; +class ITaskDispatcher; +class CompressionMonitor; class PerformanceMonitor; +class QueueIoHandler; +template class FileSystemDisk; +template class FileSystemFile; +class FileBackedCheckpointCoordinator; +class BlockManager; +class UnboundedQueryBuffer; /* * \brief This is the query application that is going to be executed once @@ -29,21 +37,51 @@ class QueryApplication { size_t m_taskQueueCapacity; std::shared_ptr m_queue; std::shared_ptr m_workerPool; - std::vector> m_dispatchers; + std::vector> m_dispatchers; + std::unique_ptr m_compressionMonitor; std::unique_ptr m_performanceMonitor; - std::thread m_performanceMonitorThread; + std::thread m_compressionMonitorThread, m_performanceMonitorThread; - void setDispatcher(std::shared_ptr dispatcher); + /* + * Variables used for checkpointing + * */ + bool m_checkpointEnabled; + typedef QueueIoHandler adapter_t; + typedef FileSystemDisk disk_t; + typedef FileSystemFile file_t; + std::shared_ptr m_filesystem; + std::unique_ptr m_checkpointCoordinator; + std::thread m_checkpointCoordinatorThread; + + std::atomic m_clearFiles; + std::shared_ptr m_fileStore; + + std::vector m_rates; + + void setDispatcher(std::shared_ptr dispatcher); + + void topologicalSort(int q, std::vector &visited, std::stack &stack); public: - QueryApplication(std::vector> &queries); - void processData(std::vector &values, long latencyMark = -1); + explicit QueryApplication(std::vector> &queries, bool checkpointEnabled = false, bool clearFiles = true); + void processData(std::vector &values, long latencyMark = -1, long retainMark = -1); + void processData(std::shared_ptr &values, long latencyMark = -1, long retainMark = -1); + void processData(void *values, int length, long latencyMark = -1, long retainMark = -1); + void processFirstStream(std::vector &values, long latencyMark = -1); + void processSecondStream(std::vector &values, long latencyMark = -1); + bool tryProcessData(std::vector &values, long latencyMark); + void recoverData(); void setup(); + void setupRates(std::vector &rates); std::shared_ptr getTaskQueue(); - int getTaskQueueSize(); + size_t getTaskQueueSize(); size_t getTaskQueueCapacity(); std::vector> getQueries(); std::shared_ptr getTaskProcessorPool(); + FileBackedCheckpointCoordinator *getCheckpointCoordinator(); + long getTimestampReference(); + long getLastTimestamp(); int numberOfQueries(); - int numberOfUpStreamQueries(); + int numberOfUpstreamQueries(); + ~QueryApplication(); }; \ No newline at end of file diff --git a/src/utils/QueryOperator.h b/src/utils/QueryOperator.h index 3bb2429..54e569d 100644 --- a/src/utils/QueryOperator.h +++ b/src/utils/QueryOperator.h @@ -1,5 +1,18 @@ #pragma once +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include #include #include "cql/operators/OperatorCode.h" @@ -20,14 +33,37 @@ class QueryOperator { QueryOperator *m_downstream; QueryOperator *m_upstream; OperatorCode &m_code; + bool m_isFT; + + struct PMem; + typedef QueueIoHandler adapter_t; + typedef FileSystemDisk disk_t; + typedef typename FileSystemDisk::file_t file_t; + std::shared_ptr m_filesystem = nullptr; + // Variables for persisting the progress vector + const size_t m_poolSize; + pmem::obj::pool m_pop; + pmem::obj::persistent_ptr m_root; + std::string m_pmFileName; + + // used for logging non-deterministic operations + bool m_useLog = false; + std::string m_logFileName; + size_t m_logSize; + size_t m_logIdx = 0; + int m_logFD; + char *m_logMF; void setUpstream(QueryOperator *qOperator) { m_upstream = qOperator; } public: - QueryOperator(OperatorCode &code) : - m_downstream(nullptr), m_upstream(nullptr), m_code(code) {} + QueryOperator(OperatorCode &code, bool isFT = false) : + m_downstream(nullptr), m_upstream(nullptr), m_code(code), m_isFT(isFT), + m_poolSize(PMEMOBJ_MIN_POOL), + m_pmFileName("scabbard/operator_pm_"), + m_logFileName("scabbard/operator_log_pm_") {} OperatorCode &getCode() { return m_code; @@ -37,8 +73,66 @@ class QueryOperator { m_code = code; } - void setParent(Query *parent) { + void setParent(Query *parent, int id = 0, long logSize = 0) { m_parent = parent; + if (m_isFT && m_parent) { + try { + m_pmFileName += std::to_string(id); + if (!m_filesystem) { + m_filesystem = std::make_shared(SystemConf::FILE_ROOT_PATH, SystemConf::getInstance().WORKER_THREADS); + } + + auto pmPath = m_filesystem->getRootPath() + m_pmFileName; + if (Utils::fileExists(pmPath.c_str()) != 0) { + m_pop = pmem::obj::pool::create(pmPath.c_str(), + "", m_poolSize, CREATE_MODE_RW); + m_root = m_pop.root(); + pmem::obj::make_persistent_atomic(m_pop, m_root->next); + pmem::obj::transaction::run(m_pop, [&] { m_root = m_root->next; }); + } else { + m_pop = pmem::obj::pool::open(pmPath, ""); + m_root = m_pop.root(); + m_root = m_root->next; + if (!SystemConf::getInstance().RECOVER) { + m_root->m_leftP.get_rw().store(0); + m_root->m_rightP.get_rw().store(0); + m_root->m_outputP.get_rw().store(0); + } + } + + if (m_code.getSecondInputCols() != nullptr) { + if (m_useLog) { + m_logFileName += std::to_string(id); + m_logSize = 2 * logSize * 4 * sizeof(long); + if ((m_logFD = + open((m_filesystem->getRootPath() + m_logFileName).c_str(), + O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600)) < 0) { + throw std::runtime_error("error: failed to open fd"); + } + ftruncate(m_logFD, m_logSize); + fsync(m_logFD); + if ((m_logMF = + (char *)mmap64(nullptr, m_logSize, PROT_READ | PROT_WRITE, + MAP_SHARED, m_logFD, 0)) == MAP_FAILED) { + throw std::runtime_error("error: failed to mmap"); + } + } else { + if (!SystemConf::getInstance().RECOVER) { + pmem::obj::transaction::run(m_pop, [&] { + m_root->m_offsets = pmem::obj::make_persistent(2 * logSize); + }); + m_root->m_writePos.get_rw() = 0; + } + } + } + } catch (const pmem::pool_error &e) { + std::cerr << "Exception: " << e.what() << std::endl; + return; + } catch (const pmem::transaction_error &e) { + std::cerr << "Exception: " << e.what() << std::endl; + return; + } + } } Query *getParent() { @@ -65,4 +159,114 @@ class QueryOperator { std::string toString() { return ""; } + + void updateInputPtr(long inputPtr, bool isLeft) { + if (!m_root) { + throw std::runtime_error("error: pmem is not initialized"); + } + if (isLeft) { + auto prevOffset = m_root->m_leftP.get_ro().load(); + /*if (inputPtr < prevOffset) + throw std::runtime_error("error: trying to set an invalid offset " + + std::to_string(inputPtr) + " < " + + std::to_string(prevOffset));*/ + m_root->m_leftP.get_rw().store(inputPtr); + } else { + auto prevOffset = m_root->m_rightP.get_ro().load(); + /*if (inputPtr < prevOffset) + throw std::runtime_error("error: trying to set an invalid offset " + + std::to_string(inputPtr) + " < " + + std::to_string(prevOffset));*/ + m_root->m_rightP.get_rw().store(inputPtr); + } + } + + long getInputPtr(bool isLeft) { + if (!m_root) { + throw std::runtime_error("error: pmem is not initialized"); + } + auto ptr = (isLeft) ? m_root->m_leftP.get_ro().load() : m_root->m_rightP.get_ro().load(); + return ptr; + } + + void updateOutputPtr(long outputPtr) { + if (!m_root) { + throw std::runtime_error("error: pmem is not initialized"); + } + auto prevOffset = m_root->m_outputP.get_ro().load(); + + if (outputPtr < prevOffset) { + //throw std::runtime_error("error: trying to free an invalid offset in qoperator " + + // std::to_string(outputPtr) + " < " + + // std::to_string(prevOffset)); + //std::cout << "warning: trying to free an invalid offset in qoperator " + + // std::to_string(outputPtr) + " < " + std::to_string(prevOffset) << std::endl; + return; + } + m_root->m_outputP.get_rw().store(outputPtr); + } + + long getOutputPtr() { + if (!m_root) { + throw std::runtime_error("error: pmem is not initialized"); + } + auto ptr = m_root->m_outputP.get_ro().load(); + return ptr; + } + + void writeOffsets(long tid, long offset1, long offset2) { + if (m_useLog) { + if (m_logMF) { + auto buf = (offsetTuple *)m_logMF; + buf[m_logIdx] = {tid, offset1, offset2, 0}; + m_logIdx++; + if (m_logIdx >= m_logSize / sizeof(offsetTuple)) { + if (msync((void *)m_logMF, m_logSize, MS_SYNC) < 0) { + throw std::runtime_error("error: failed to msync"); + } + m_logIdx = 0; + } + } else { + std::cout << "warning: no file was initialized for logging" << std::endl; + } + } else { + vector_type &pvector = *(m_root->m_offsets); + pvector[m_root->m_writePos.get_rw()++] = {tid, offset1, offset2, 0}; + if (m_root->m_writePos.get_ro().load() == pvector.size()) { + m_root->m_writePos.get_rw() = 0; + } + } + } + + private: + + struct offsetTuple { + long _1; + long _2; + long _3; + long _4; + }; + using vector_type = pmem::obj::vector; + struct PMem { + pmem::obj::p> m_leftP{}; + pmem::obj::p> m_rightP{}; + pmem::obj::p> m_outputP{}; + pmem::obj::p> m_leftId{}; + pmem::obj::p> m_rightId{}; + pmem::obj::p> m_outputId{}; + pmem::obj::p> m_writePos{}; + pmem::obj::persistent_ptr m_offsets; + pmem::obj::persistent_ptr next; + PMem() { + m_leftP.get_rw() = 0L; + m_rightP.get_rw() = 0L; + m_outputP.get_rw() = 0L; + m_writePos.get_rw() = 0; + }; + + /** Copy constructor is deleted */ + PMem(const PMem &) = delete; + /** Assignment operator is deleted */ + PMem &operator=(const PMem &) = delete; + }; }; \ No newline at end of file diff --git a/src/utils/Status.h b/src/utils/Status.h new file mode 100644 index 0000000..779908c --- /dev/null +++ b/src/utils/Status.h @@ -0,0 +1,16 @@ +#pragma once + +/* + * \brief Return codes for file and memory allocation handling. + * + * */ + +enum class Status : uint8_t { + Ok = 0, + Pending = 1, + NotFound = 2, + OutOfMemory = 3, + IOError = 4, + Corruption = 5, + Aborted = 6, +}; \ No newline at end of file diff --git a/src/utils/SystemConf.cpp b/src/utils/SystemConf.cpp index 8f5638b..2b17b76 100644 --- a/src/utils/SystemConf.cpp +++ b/src/utils/SystemConf.cpp @@ -1,22 +1,79 @@ #include "SystemConf.h" #include +#include +#include +#include + +[[maybe_unused]] static const char *homedir = ((homedir = getenv("HOME")) == nullptr) ? + getpwuid(getuid())->pw_dir : getenv("HOME"); unsigned int SystemConf::BATCH_SIZE = 2 * 64 * 1024; unsigned int SystemConf::BUNDLE_SIZE = 2 * 64 * 1024; long SystemConf::INPUT_SIZE = 2 * 64 * 1024; int SystemConf::PARTIAL_WINDOWS = 1024; size_t SystemConf::HASH_TABLE_SIZE = 512; //4*512; -unsigned long SystemConf::THROUGHPUT_MONITOR_INTERVAL = 1000L; +unsigned long SystemConf::COMPRESSION_MONITOR_INTERVAL = 4000L; unsigned long SystemConf::PERFORMANCE_MONITOR_INTERVAL = 1000L; +unsigned long SystemConf::MBs_INGESTED_PER_SEC = 0L; +bool SystemConf::BUFFERED_LATENCY = false; +unsigned long SystemConf::CHECKPOINT_INTERVAL = 1000L; +unsigned long SystemConf::BLOCK_SIZE = 16 * _KB; +unsigned long SystemConf::DISK_BUFFER = 8; int SystemConf::MOST_UPSTREAM_QUERIES = 2; int SystemConf::PIPELINE_DEPTH = 4; size_t SystemConf::CIRCULAR_BUFFER_SIZE = 4 * 1048576; size_t SystemConf::UNBOUNDED_BUFFER_SIZE = 2 * 4 * 64 * 1024; +size_t SystemConf::OUTPUT_BUFFER_SIZE = 1048576; int SystemConf::WORKER_THREADS = 1; int SystemConf::SLOTS = 256; bool SystemConf::LATENCY_ON = false; int SystemConf::THREADS = std::thread::hardware_concurrency(); long SystemConf::DURATION = 0; +long SystemConf::FAILURE_TIME = 0; +bool SystemConf::FAILURE_ON = false; int SystemConf::QUERY_NUM = 0; -bool SystemConf::PARALLEL_MERGE_ON = false; \ No newline at end of file +bool SystemConf::PARALLEL_MERGE_ON = false; +bool SystemConf::CHECKPOINT_ON = false; +bool SystemConf::ADAPTIVE_COMPRESSION_ON = false; +bool SystemConf::ADAPTIVE_FORCE_RLE = false; +bool SystemConf::ADAPTIVE_CHANGE_DATA = false; +size_t SystemConf::ADAPTIVE_COMPRESSION_INTERVAL = 1024; +bool SystemConf::CREATE_MERGE_WITH_CHECKPOINTS = false; +bool SystemConf::CHECKPOINT_COMPRESSION = false; +size_t SystemConf::OUT_OF_ORDER_SIZE = 4; +bool SystemConf::PERSIST_INPUT = false; +bool SystemConf::LINEAGE_ON = false; +bool SystemConf::RECOVER = false; +const std::string SystemConf::FILE_ROOT_PATH = std::string(homedir) + "/data"; +//const std::string SystemConf::FILE_ROOT_PATH = "/mnt/rdisk/data"; +//const std::string SystemConf::FILE_ROOT_PATH = "/mnt/LSDSDataShare/projects/21-scabbard/data"; +//const std::string SystemConf::FILE_ROOT_PATH = "/home/grt17/data"; +//const std::string SystemConf::FILE_ROOT_PATH = "/home/george"; +size_t SystemConf::CAMPAIGNS_NUM = 100; +bool SystemConf::USE_FLINK = true; +bool SystemConf::USE_KAFKA = false; + +const std::string SystemConf::LOCALHOST = "127.0.0.1"; +const std::string SystemConf::PLATYPUS1_1GB = "192.168.0.66"; +const std::string SystemConf::PLATYPUS1_10GB = "192.168.10.98"; +const std::string SystemConf::KEA03_ib0 = "10.0.0.30"; +const std::string SystemConf::KEA03_ib1 = "11.0.0.31"; +const std::string SystemConf::KEA04_ib0 = "10.0.0.40"; +const std::string SystemConf::KEA04_ib1 = "11.0.0.41"; +const std::string SystemConf::WALLABY_ib0 = "10.0.0.90"; +const std::string SystemConf::WALLABY_ib1 = "11.0.0.91"; + +bool SystemConf::HAS_TWO_SOURCES = false; +bool SystemConf::SEND_TO_SECOND_WORKER = false; +const std::string SystemConf::REMOTE_WORKER = SystemConf::WALLABY_ib1; // SystemConf::KEA04_ib0; +const std::string SystemConf::REMOTE_WORKER_2 = SystemConf::KEA04_ib1; +const std::string SystemConf::REMOTE_CLIENT = SystemConf::KEA04_ib1; + +// server: iperf -s +// client: iperf -c 10.0.0.40 + +// rdma +// ibdev2netdev : check status +// sudo ifconfig ib0 10.0.0.30/24 up +// sudo ifconfig ib1 11.0.0.31/24 up \ No newline at end of file diff --git a/src/utils/SystemConf.h b/src/utils/SystemConf.h index caa6dad..6e9f65b 100644 --- a/src/utils/SystemConf.h +++ b/src/utils/SystemConf.h @@ -1,10 +1,13 @@ #pragma once -#include +#include + +#include +#include #include +#include #include -#include -#include +#include #include "tasks/Task.h" @@ -21,23 +24,49 @@ #include "tasks/NumaTaskQueueWrapper.h" typedef NumaAlloc::NumaAllocator numa_allocator; typedef NumaAlloc::NumaAllocator ptr_numa_allocator; -//using ByteBuffer = std::vector>; -//using ByteBufferPtr = std::vector>; -using ByteBuffer = std::vector>; -using ByteBufferPtr = std::vector>; +////using ByteBuffer = std::vector>; +////using ByteBufferPtr = std::vector>; +//using ByteBuffer = std::vector>; +//using ByteBufferPtr = std::vector>; +using ByteBuffer = std::vector>; +using ByteBufferPtr = std::vector>; //using ByteBuffer = std::vector; //using ByteBufferPtr = std::vector; using TaskQueue = NumaTaskQueueWrapper; #else #include #include "tasks/ConcurrentQueue.h" + +#if defined(HAVE_SHARED) +#include +#include +#include + +typedef boost::interprocess::allocator ShmemAllocator; +typedef boost::interprocess::vector SharedByteBuffer; +#endif + +//#if defined (HAVE_PMEM) +//#include "libpmemobj++/container/vector.hpp" +//using ByteBuffer = pmem::obj::vector; + +//#else using ByteBuffer = std::vector>;//boost::alignment::aligned_allocator>; + //tbb::cache_aligned_allocator>; + boost::alignment::aligned_allocator>; +//#endif + using ByteBufferPtr = std::vector>;//boost::alignment::aligned_allocator>; + //tbb::cache_aligned_allocator>; + boost::alignment::aligned_allocator>; using TaskQueue = moodycamel::ConcurrentQueue>; #endif +#define PORT 6667 +#define BUFFER_COUNT 16 //128 + class SystemConf { private: SystemConf() {}; @@ -48,20 +77,68 @@ class SystemConf { static long INPUT_SIZE; static int PARTIAL_WINDOWS; static size_t HASH_TABLE_SIZE; - static unsigned long THROUGHPUT_MONITOR_INTERVAL; + static unsigned long COMPRESSION_MONITOR_INTERVAL; static unsigned long PERFORMANCE_MONITOR_INTERVAL; + static unsigned long MBs_INGESTED_PER_SEC; + static bool BUFFERED_LATENCY; + static unsigned long CHECKPOINT_INTERVAL; + static unsigned long BLOCK_SIZE; + static unsigned long DISK_BUFFER; static int MOST_UPSTREAM_QUERIES; static int PIPELINE_DEPTH; static size_t CIRCULAR_BUFFER_SIZE; static size_t UNBOUNDED_BUFFER_SIZE; + static size_t OUTPUT_BUFFER_SIZE; static int WORKER_THREADS; static int SLOTS; static bool LATENCY_ON; static const int POOL_SIZE = 0; static int THREADS; static long DURATION; + static long FAILURE_TIME; + static bool FAILURE_ON; static int QUERY_NUM; static bool PARALLEL_MERGE_ON; + static bool CHECKPOINT_ON; + static bool ADAPTIVE_COMPRESSION_ON; + static bool ADAPTIVE_FORCE_RLE; + static bool ADAPTIVE_CHANGE_DATA; + static size_t ADAPTIVE_COMPRESSION_INTERVAL; + static bool CREATE_MERGE_WITH_CHECKPOINTS; + static bool CHECKPOINT_COMPRESSION; + static size_t OUT_OF_ORDER_SIZE; + static bool PERSIST_INPUT; + static bool LINEAGE_ON; + static bool RECOVER; + + static const std::string LOCALHOST; + static const std::string PLATYPUS1_1GB; + static const std::string PLATYPUS1_10GB; + static const std::string KEA03_ib0; + static const std::string KEA03_ib1; + static const std::string KEA04_ib0; + static const std::string KEA04_ib1; + static const std::string WALLABY_ib0; + static const std::string WALLABY_ib1; + + static bool HAS_TWO_SOURCES; + static bool SEND_TO_SECOND_WORKER; + static const std::string REMOTE_WORKER; + static const std::string REMOTE_WORKER_2; + static const std::string REMOTE_CLIENT; + + constexpr static size_t _KB = 1024; + constexpr static size_t _4KB = 4 * _KB; + constexpr static size_t _MB = 1024 * 1024; + constexpr static size_t _4MB = 4 * _MB; + constexpr static size_t _GB = 1024 * 1024 * 1024; + static const std::string FILE_ROOT_PATH; + + // Query specific variables + static size_t CAMPAIGNS_NUM; + + static bool USE_FLINK; + static bool USE_KAFKA; #if defined(HAVE_NUMA) void findMemoryNodeForCPU(int &numa_node) { @@ -101,8 +178,9 @@ class SystemConf { s.append("Circular buffer size : " + std::to_string(SystemConf::CIRCULAR_BUFFER_SIZE) + " bytes\n"); s.append("Intermediate buffer size : " + std::to_string(SystemConf::UNBOUNDED_BUFFER_SIZE) + " bytes\n"); s.append("Hash table size : " + std::to_string(SystemConf::HASH_TABLE_SIZE) + " bytes\n"); - s.append("Throughput monitor interval : " + std::to_string(SystemConf::THROUGHPUT_MONITOR_INTERVAL) + " msec\n"); + s.append("Compression monitor interval : " + std::to_string(SystemConf::COMPRESSION_MONITOR_INTERVAL) + " msec\n"); s.append("Performance monitor interval : " + std::to_string(SystemConf::PERFORMANCE_MONITOR_INTERVAL) + " msec\n"); + s.append("Checkpoint interval : " + std::to_string(SystemConf::CHECKPOINT_INTERVAL) + " msec\n"); s.append("Number of upstream queries : " + std::to_string(SystemConf::MOST_UPSTREAM_QUERIES) + "\n"); s.append("GPU pipeline depth : " + std::to_string(SystemConf::PIPELINE_DEPTH) + "\n"); std::string latency = (SystemConf::LATENCY_ON ? "On" : "Off"); diff --git a/src/utils/TupleSchema.h b/src/utils/TupleSchema.h index 700a30c..7d046c9 100644 --- a/src/utils/TupleSchema.h +++ b/src/utils/TupleSchema.h @@ -18,6 +18,7 @@ class TupleSchema { int m_size; bool m_hasTimestamp; int m_tupleSize; + int m_padLength = -1; public: @@ -81,4 +82,18 @@ class TupleSchema { } return m_tupleSize; } + + int getPadLength() { + if (m_padLength == -1) { + auto tupleSize = getTupleSize(); + m_padLength = 0; + // Expand size, if needed, to ensure that tuple size is a power of 2 + if ((tupleSize & (tupleSize - 1)) != 0) { + auto pow2Size = 1; + while (tupleSize > pow2Size) pow2Size *= 2; + m_padLength = pow2Size - tupleSize; + } + } + return m_padLength; + } }; \ No newline at end of file diff --git a/src/utils/Utils.cpp b/src/utils/Utils.cpp index 7adc3fc..02ec533 100644 --- a/src/utils/Utils.cpp +++ b/src/utils/Utils.cpp @@ -1,16 +1,21 @@ +#include "Utils.h" + #include -#include -#include -#include #include +#include +#include #include #include +#include +#include -#include "Utils.h" -#include "SystemConf.h" - -void Utils::bindProcess(const int core_id) { +void Utils::bindProcess(int core_id) { + if (core_id >= (int) std::thread::hardware_concurrency()) { + std::cout << "warning: the core id exceeds the number of cores" << std::endl; + core_id = core_id % (int) std::thread::hardware_concurrency(); + } + //core_id = core_id + 1; pthread_t pid = pthread_self(); cpu_set_t cpuset; CPU_ZERO(&cpuset); @@ -24,7 +29,11 @@ void Utils::bindProcess(const int core_id) { fprintf(stderr, "Failed to set thread %lu to affinity to CPU %d\n", pid, core_id); } -void Utils::bindProcess(std::thread &thread, const int id) { +void Utils::bindProcess(std::thread &thread, int id) { + if (id >= (int) std::thread::hardware_concurrency()) { + std::cout << "warning: the core id exceeds the number of cores" << std::endl; + id = id % (int) std::thread::hardware_concurrency(); + } auto pid = thread.native_handle(); /* Pin worker to thread */ /*int min = 1; // +1 dispatcher @@ -32,7 +41,7 @@ void Utils::bindProcess(std::thread &thread, const int id) { int total = max - min + 1; int core_id = ((id - (min - 1)) % total) + min;*/ - int core_id = id; + int core_id = id; // + 1; std::cout << "[DBG] bind worker " + std::to_string(id) + " to core " + std::to_string(core_id) << std::endl; cpu_set_t cpuset; @@ -60,7 +69,7 @@ int Utils::getTupleTimestamp(long value) { } int Utils::getPowerOfTwo(int value) { - bool powerOfTwo = !(value == 0) && !(value & (value - 1)); + bool powerOfTwo = value != 0 && !(value & (value - 1)); if (!powerOfTwo) { int temp = 0; auto num = (double) value; @@ -73,22 +82,22 @@ int Utils::getPowerOfTwo(int value) { return value; } -bool Utils::__is_pointer_aligned(const void *p, int alignment) { +bool Utils::_is_pointer_aligned(const void *p, int alignment) { return ((((uintptr_t) p) & (alignment - 1)) == 0); } -bool Utils::__is_length_aligned(int length, int alignment) { +bool Utils::_is_length_aligned(int length, int alignment) { return ((length & (alignment - 1)) == 0); } -std::string Utils::GetCurrentWorkingDir() { +std::string Utils::getCurrentWorkingDir() { char buff[FILENAME_MAX]; GetCurrentDir(buff, FILENAME_MAX); std::string current_working_dir(buff); return current_working_dir; } -std::string Utils::GetHomeDir() { +std::string Utils::getHomeDir() { struct passwd *pw = getpwuid(getuid()); const char *homedir = pw->pw_dir; return std::string(homedir); @@ -127,7 +136,7 @@ void Utils::process_mem_usage(double &vm_usage, double &resident_set) { resident_set = rss * page_size_kb; } -std::string Utils::GetStdoutFromCommand(std::string cmd) { +std::string Utils::getStdoutFromCommand(std::string cmd) { std::string data; FILE *stream; const int max_buffer = 1024; @@ -137,7 +146,7 @@ std::string Utils::GetStdoutFromCommand(std::string cmd) { stream = popen(cmd.c_str(), "r"); if (stream) { while (!feof(stream)) - if (fgets(buffer, max_buffer, stream) != NULL) data.append(buffer); + if (fgets(buffer, max_buffer, stream) != nullptr) data.append(buffer); pclose(stream); } return data; @@ -164,6 +173,72 @@ void Utils::getOrderedCores (std::vector &orderedCores) { } } +int Utils::getFirstCoreFromSocket(size_t socket) { + auto topo = boost::fibers::numa::topology(); + if (socket > topo.size()) { + throw std::runtime_error("error: wrong socket number"); + } + auto &soc = topo[socket]; + for (auto &cpu_id: soc.logical_cpus) { + return (int) cpu_id; + } + return -1; +} + +int Utils::fileExists(char const *file) { + return access(file, F_OK); +} + +void Utils::readDirectory(const std::string &name, std::vector &v) { + DIR* dirp = opendir(name.c_str()); + struct dirent * dp; + while ((dp = readdir(dirp)) != nullptr) { + v.emplace_back(dp->d_name); + } + closedir(dirp); +} + +void Utils::tryCreateDirectory(std::string dir) { + std::experimental::filesystem::path path{dir}; + if (!std::experimental::filesystem::exists( + std::experimental::filesystem::status(path))) { + std::experimental::filesystem::create_directories(path); + } +} + +template +void wrapArrayInVector( + T *sourceArray, size_t arraySize, + std::vector > &targetVector) { + typename std::_Vector_base< + T, boost::alignment::aligned_allocator >::_Vector_impl *vectorPtr = + (typename std::_Vector_base< + T, boost::alignment::aligned_allocator >::_Vector_impl + *)((void *)&targetVector); + vectorPtr->_M_start = sourceArray; + vectorPtr->_M_finish = vectorPtr->_M_end_of_storage = + vectorPtr->_M_start + arraySize; +} +template void wrapArrayInVector( + char *sourceArray, size_t arraySize, + std::vector > + &targetVector); + +template +void releaseVectorWrapper( + std::vector > &targetVector) { + typename std::_Vector_base< + T, boost::alignment::aligned_allocator >::_Vector_impl *vectorPtr = + (typename std::_Vector_base< + T, boost::alignment::aligned_allocator >::_Vector_impl + *)((void *)&targetVector); + vectorPtr->_M_start = vectorPtr->_M_finish = vectorPtr->_M_end_of_storage = + nullptr; +} +template void releaseVectorWrapper( + std::vector > + &targetVector); + template std::vector computePercentiles(std::vector &input, std::vector &percentiles) { std::sort(input.begin(), input.end()); diff --git a/src/utils/Utils.h b/src/utils/Utils.h index a6a0ee4..c7c81d7 100644 --- a/src/utils/Utils.h +++ b/src/utils/Utils.h @@ -1,18 +1,25 @@ #pragma once -#include +#include +#include #include -#include -#include +#include + #include // std::sort -#include +#include +#include #include -#include -#include /* defines FILENAME_MAX */ +#include +#include /* defines FILENAME_MAX */ +#include +#include #include +#include #include -#include +#include #include +#include +#include /* * \brief Utility functions for the system. @@ -33,20 +40,32 @@ #undef dbg #ifdef DEBUG -# define dbg(fmt, args...) do { fprintf(stdout, "DEBUG %35s (l. %4d) > " fmt, __FILE__, __LINE__, ## args); fflush(stdout); } while (0) +#define dbg(fmt, args...) \ + do { \ + fprintf(stdout, "DEBUG %35s (l. %4d) > " fmt, __FILE__, __LINE__, ##args); \ + fflush(stdout); \ + } while (0) #else -# define dbg(fmt, args...) +#define dbg(fmt, args...) #endif -#define info(fmt, args...) do { fprintf(stdout, "INFO %35s (l. %4d) > " fmt, __FILE__, __LINE__, ## args); fflush(stdout); } while (0) +#define _info(fmt, args...) \ + do { \ + fprintf(stdout, "INFO %35s (l. %4d) > " fmt, __FILE__, __LINE__, ##args); \ + fflush(stdout); \ + } while (0) #define print_error_then_terminate(en, msg) \ - do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0) + do { \ + errno = en; \ + perror(msg); \ + exit(EXIT_FAILURE); \ + } while (0) namespace Utils { -void bindProcess(const int core_id); +void bindProcess(int core_id); -void bindProcess(std::thread &thread, const int id); +void bindProcess(std::thread &thread, int id); long pack(long systemTimestamp, long tupleTimestamp); @@ -56,13 +75,13 @@ int getTupleTimestamp(long value); int getPowerOfTwo(int value); -bool __is_pointer_aligned(const void *p, int alignment); +bool _is_pointer_aligned(const void *p, int alignment); -bool __is_length_aligned(int length, int alignment); +bool _is_length_aligned(int length, int alignment); -std::string GetCurrentWorkingDir(); +std::string getCurrentWorkingDir(); -std::string GetHomeDir (); +std::string getHomeDir(); // process_mem_usage(double &, double &) - takes two doubles by reference, // attempts to read the system-dependent data for a process' virtual memory @@ -70,15 +89,107 @@ std::string GetHomeDir (); // On failure, returns 0.0, 0.0 void process_mem_usage(double &vm_usage, double &resident_set); -std::string GetStdoutFromCommand(std::string cmd); - -int getNumberOfSockets (); - -int getNumberOfCoresPerSocket (); - -void getOrderedCores (std::vector &orderedCores); - -} - -template -std::vector computePercentiles(std::vector &input, std::vector &percentiles); \ No newline at end of file +std::string getStdoutFromCommand(std::string cmd); + +int getNumberOfSockets(); + +int getNumberOfCoresPerSocket(); + +void getOrderedCores(std::vector &orderedCores); + +int getFirstCoreFromSocket(size_t socket); + +int fileExists(char const *file); + +void readDirectory(const std::string &name, std::vector &v); + +void tryCreateDirectory(std::string dir); + +class Timer { + public: + Timer() : m_beg(clock_::now()) {} + void reset() { m_beg = clock_::now(); } + + [[nodiscard]] double elapsed_nsec() const { + return std::chrono::duration_cast( + clock_::now() - m_beg).count(); + } + + [[nodiscard]] double elapsed_msec() const { + return std::chrono::duration_cast( + clock_::now() - m_beg).count(); + } + + [[nodiscard]] double elapsed_sec() const { + return std::chrono::duration_cast( + clock_::now() - m_beg).count() / 1000.0; + } + + void printElapsed(std::string name = "") const { + std::cout << name + "Timer: " << elapsed_sec() << " sec" << std::endl; + } + + private: + typedef std::chrono::high_resolution_clock clock_; + typedef std::chrono::duration > second_; + std::chrono::time_point m_beg; +}; + +class DynamicLoader { + public: + explicit DynamicLoader() = default; + + explicit DynamicLoader(std::string const &filename) { + m_libHandles[filename] = HandlePtr(dlopen(filename.c_str(), RTLD_LAZY)); + if (!m_libHandles[filename]) { + throw std::logic_error("error: can't load library named \"" + filename + "\""); + } + } + + void addLibrary(std::string const &filename) { + if (m_libHandles.find(filename) != m_libHandles.end()) + std::cout << "warning: library already exists" << std::endl; + m_libHandles[filename] = HandlePtr(dlopen(filename.c_str(), RTLD_LAZY)); + if (!m_libHandles[filename]) { + throw std::logic_error("error: can't load library named \"" + filename + "\""); + } + } + + template + std::function load(std::string const &filename, + std::string const &functionName) { + auto handle = m_libHandles[filename].get(); + if (!handle) { + throw std::logic_error("error: can't load library named \"" + filename + "\""); + } + dlerror(); + void *const result = dlsym(handle, functionName.c_str()); + if (!result) { + char *const error = dlerror(); + if (error) { + throw std::logic_error("error: can't find symbol named \"" + + functionName + "\": " + error); + } + } + return reinterpret_cast(result); + } + + private: + struct dl_closer{ void operator()(void* dl) const { dlclose(dl); }}; + using HandlePtr = std::unique_ptr; + std::unordered_map m_libHandles; +}; +} // namespace Utils + +template +void wrapArrayInVector( + T *sourceArray, size_t arraySize, + std::vector > &targetVector); + +template +void releaseVectorWrapper( + std::vector > &targetVector); + +template +std::vector computePercentiles(std::vector &input, + std::vector &percentiles); \ No newline at end of file diff --git a/src/utils/WindowDefinition.h b/src/utils/WindowDefinition.h index b280881..1773ea2 100644 --- a/src/utils/WindowDefinition.h +++ b/src/utils/WindowDefinition.h @@ -41,7 +41,7 @@ class WindowDefinition { } public: - WindowDefinition(WindowMeasure measure, long size, long slide) + WindowDefinition(WindowMeasure measure = WindowMeasure::ROW_BASED, long size = 1, long slide = 1) : m_size(size), m_slide(slide), m_gap(0), m_windowMeasure(measure) { m_paneSize = gcd(m_size, m_slide); if (slide < size) diff --git a/test/benchmarks/CMakeLists.txt b/test/benchmarks/CMakeLists.txt index d2bf715..e7cd36b 100644 --- a/test/benchmarks/CMakeLists.txt +++ b/test/benchmarks/CMakeLists.txt @@ -1,6 +1,6 @@ include_directories(applications) -include_directories(queries) -include_directories(queues) add_subdirectory(applications) -add_subdirectory(microbenchmarks) \ No newline at end of file +add_subdirectory(microbenchmarks) +add_subdirectory(applicationsWithCheckpoints) +add_subdirectory(kafka-flink) \ No newline at end of file diff --git a/test/benchmarks/applications/BenchmarkQuery.h b/test/benchmarks/applications/BenchmarkQuery.h index b09ed80..1570426 100644 --- a/test/benchmarks/applications/BenchmarkQuery.h +++ b/test/benchmarks/applications/BenchmarkQuery.h @@ -1,33 +1,61 @@ #pragma once +#include +#include +#include +#include + +#if defined(RDMA_INPUT) +#include "RDMA/infinity/infinity.h" +#include "buffers/RDMABufferPool.h" +infinity::core::Context *m_context; +infinity::queues::QueuePairFactory *m_qpFactory; +infinity::queues::QueuePair *m_qp; +infinity::memory::Buffer **m_receiveBuffers; +#endif + #include +//#include #include "utils/TupleSchema.h" #include "utils/QueryApplication.h" #include "utils/SystemConf.h" -// ./cluster_monitoring --circular-size 8388608 --unbounded-size 1048576 --batch-size 524288 --bundle-size 524288 --query 1 --threads 1 -// ./cluster_monitoring --circular-size 8388608 --unbounded-size 1048576 --batch-size 524288 --bundle-size 524288 --query 2 --threads 1 +// ./cluster_monitoring --circular-size 8388608 --unbounded-size 1048576 (524288) --batch-size 524288 --bundle-size 524288 --query 1 --threads 1 +// ./cluster_monitoring --circular-size 8388608 --unbounded-size 1048576 (524288) --batch-size 524288 --bundle-size 524288 --query 2 --threads 1 // ./smartgrid --query 1 --unbounded-size 262144 --batch-size 524288 --circular-size 16777216 --bundle-size 524288 --slots 128 --threads 1 // ./smartgrid --query 2 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 --threads 1 // ./linear_road_benchmark --unbounded-size 4194304 --circular-size 16777216 --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 --threads 1 // ./linear_road_benchmark --unbounded-size 16777216 --circular-size 16777216 --batch-size 262144 --bundle-size 262144 --query 2 --threads 1 // ./yahoo_benchmark --circular-size 8388608 --slots 128 --batch-size 524288 --bundle-size 524288 --threads 1 // ./manufacturing_equipment --query 1 --unbounded-size 4096 --batch-size 262144 --circular-size 16777216 --bundle-size 262144 --slots 128 --threads 1 +// ./nexmark --query 1 --circular-size 33554432 --batch-size 1048576 --bundle-size 1048576 --unbounded-size 262144 --latency tru --parallel-merge true --threads 15 class BenchmarkQuery { protected: std::string m_name; long m_timestampReference = 0; + long m_lastTimestamp = 0; + long m_startTimestamp = 0; + long m_endTimestamp = 0; + int m_sock = 0; + int m_server_fd; private: - const long m_duration = 60 * 10; + const long m_duration = 60 * 1; // in seconds, i.e. 60 = 60 secs + const long m_changeDuration = 10; + long m_prevThrTime = 0, m_thrTime = 0; + double m_Bytes; public: std::string getApplicationName() { return m_name; } virtual QueryApplication *getApplication() = 0; + long getTimestampReference() { return m_timestampReference; } + long getStartTimestamp() { return m_startTimestamp; } + long getEndTimestamp() { return m_endTimestamp; } virtual TupleSchema *getSchema() = 0; virtual std::vector *getInMemoryData() = 0; + virtual std::vector *getSecondInMemoryData() { throw std::runtime_error("error: the function is not implemented"); }; virtual std::vector *getStaticData() = 0; static void parseCommandLineArguments(int argc, const char **argv) { int i, j; @@ -46,9 +74,11 @@ class BenchmarkQuery { } else if (strcmp(argv[i], "--query") == 0) { SystemConf::getInstance().QUERY_NUM = std::stoi(argv[j]); } else if (strcmp(argv[i], "--circular-size") == 0) { - SystemConf::getInstance().CIRCULAR_BUFFER_SIZE = std::stoul(argv[j]); + SystemConf::getInstance().CIRCULAR_BUFFER_SIZE = std::stoi(argv[j]); } else if (strcmp(argv[i], "--unbounded-size") == 0) { - SystemConf::getInstance().UNBOUNDED_BUFFER_SIZE = std::stoul(argv[j]); + SystemConf::getInstance().UNBOUNDED_BUFFER_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--output-size") == 0) { + SystemConf::getInstance().OUTPUT_BUFFER_SIZE = std::stoi(argv[j]); } else if (strcmp(argv[i], "--hashtable-size") == 0) { SystemConf::getInstance().HASH_TABLE_SIZE = std::stoi(argv[j]); } else if (strcmp(argv[i], "--slots") == 0) { @@ -56,13 +86,53 @@ class BenchmarkQuery { } else if (strcmp(argv[i], "--partial-windows") == 0) { SystemConf::getInstance().PARTIAL_WINDOWS = std::stoi(argv[j]); } else if (strcmp(argv[i], "--parallel-merge") == 0) { - SystemConf::getInstance().PARALLEL_MERGE_ON = (strcasecmp(argv[j], "true") == 0 || - std::atoi(argv[j]) != 0); + SystemConf::getInstance().PARALLEL_MERGE_ON = + (strcasecmp(argv[j], "true") == 0 || std::atoi(argv[j]) != 0); } else if (strcmp(argv[i], "--performance-monitor-interval") == 0) { - SystemConf::getInstance().PERFORMANCE_MONITOR_INTERVAL = std::stoul(argv[j]); + SystemConf::getInstance().PERFORMANCE_MONITOR_INTERVAL = + std::stoul(argv[j]); } else if (strcmp(argv[i], "--latency") == 0) { - SystemConf::getInstance().LATENCY_ON = (strcasecmp(argv[j], "true") == 0 || + SystemConf::getInstance().LATENCY_ON = + (strcasecmp(argv[j], "true") == 0 || std::atoi(argv[j]) != 0); + } else if (strcmp(argv[i], "--compression-monitor-interval") == 0) { + SystemConf::getInstance().COMPRESSION_MONITOR_INTERVAL = + std::stoul(argv[j]); + } else if (strcmp(argv[i], "--checkpoint-duration") == 0) { + SystemConf::getInstance().CHECKPOINT_INTERVAL = std::stoi(argv[j]); + SystemConf::getInstance().CHECKPOINT_ON = + SystemConf::getInstance().CHECKPOINT_INTERVAL > 0; + } else if (strcmp(argv[i], "--failure") == 0) { + SystemConf::getInstance().FAILURE_TIME = std::stoi(argv[j]); + SystemConf::getInstance().FAILURE_ON = + SystemConf::getInstance().FAILURE_TIME > 0; + } else if (strcmp(argv[i], "--disk-block-size") == 0) { + SystemConf::getInstance().BLOCK_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--create-merge") == 0) { + SystemConf::getInstance().CREATE_MERGE_WITH_CHECKPOINTS = (strcasecmp(argv[j], "true") == 0 || + std::atoi(argv[j]) != 0); + } else if (strcmp(argv[i], "--checkpoint-compression") == 0) { + SystemConf::getInstance().CHECKPOINT_COMPRESSION = (strcasecmp(argv[j], "true") == 0 || + std::atoi(argv[j]) != 0); + } else if (strcmp(argv[i], "--persist-input") == 0) { + SystemConf::getInstance().PERSIST_INPUT = (strcasecmp(argv[j], "true") == 0 || std::atoi(argv[j]) != 0); + } else if (strcmp(argv[i], "--lineage") == 0) { + SystemConf::getInstance().LINEAGE_ON = (strcasecmp(argv[j], "true") == 0 || + std::atoi(argv[j]) != 0); + } else if (strcmp(argv[i], "--recover") == 0) { + SystemConf::getInstance().RECOVER = (strcasecmp(argv[j], "true") == 0 || + std::atoi(argv[j]) != 0); + } else if (strcmp(argv[i], "--adaptive-compression") == 0) { + SystemConf::getInstance().ADAPTIVE_COMPRESSION_ON = (strcasecmp(argv[j], "true") == 0 || + std::atoi(argv[j]) != 0); + } else if (strcmp(argv[i], "--adaptive-interval") == 0) { + SystemConf::getInstance().ADAPTIVE_COMPRESSION_INTERVAL = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--adaptive-data") == 0) { + SystemConf::getInstance().ADAPTIVE_CHANGE_DATA = true; + } else if (strcmp(argv[i], "--campaign-num") == 0) { + SystemConf::getInstance().CAMPAIGNS_NUM = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--ingestion") == 0) { + SystemConf::getInstance().MBs_INGESTED_PER_SEC = std::stoi(argv[j]); } else { std::string argument(argv[i]); throw std::runtime_error("error: unknown argument " + argument); @@ -74,12 +144,148 @@ class BenchmarkQuery { int runBenchmark(bool terminate = true) { auto t1 = std::chrono::high_resolution_clock::now(); auto inputBuffer = getInMemoryData(); + std::vector * alterInputBuffer = nullptr; + auto application = getApplication(); + if (SystemConf::getInstance().ADAPTIVE_CHANGE_DATA) { + alterInputBuffer = getSecondInMemoryData(); + auto tempBuffer = inputBuffer; + inputBuffer = alterInputBuffer; + alterInputBuffer = tempBuffer; + } + if (SystemConf::getInstance().LATENCY_ON) { + SystemConf::getInstance().DURATION = m_duration - 5; + if (SystemConf::getInstance().RECOVER) { + m_timestampReference = application->getTimestampReference(); + m_lastTimestamp = application->getLastTimestamp(); + m_lastTimestamp = m_lastTimestamp + (m_timestampReference/1000L); // in usec + std::cout << "Last timestamp was " << m_lastTimestamp << " usec" << std::endl; + SystemConf::getInstance().BUFFERED_LATENCY = true; + } + } + long systemTimestamp = -1; + long restartReference = 0; + double remainingTime = 0.; + bool stopRecovery = false; + if (SystemConf::getInstance().MBs_INGESTED_PER_SEC > 0) { + auto t2 = std::chrono::high_resolution_clock::now(); + auto time_span = std::chrono::duration_cast(t2 - t1); + m_prevThrTime = time_span.count(); + m_thrTime = time_span.count(); + } + std::cout << "Start running " + getApplicationName() + " ..." << std::endl; + try { + while (true) { + if (terminate || SystemConf::getInstance().MBs_INGESTED_PER_SEC > 0) { + auto t2 = std::chrono::high_resolution_clock::now(); + if (SystemConf::getInstance().MBs_INGESTED_PER_SEC > 0 && !SystemConf::getInstance().BUFFERED_LATENCY) { + //std::cout << "Start limiting the throughput..." << std::endl; + auto time_span = std::chrono::duration_cast(t2 - t1); + m_thrTime = time_span.count(); + m_Bytes += (double) inputBuffer->size(); + //std::this_thread::sleep_for(std::chrono::microseconds (1600)); + if ((m_thrTime - m_prevThrTime < 1000) && + m_Bytes >= SystemConf::getInstance().MBs_INGESTED_PER_SEC * 1024 * 1024) { + auto sleepTime = ((m_prevThrTime+1000) - m_thrTime) + 350; + std::this_thread::sleep_for(std::chrono::milliseconds(sleepTime)); + // std::cout << "[dat] " << " " << inputBuffer->size() << " " + // << m_Bytes << " " << SystemConf::getInstance().MBs_INGESTED_PER_SEC + // << " " << sleepTime << std::endl; + m_prevThrTime = m_thrTime; + m_Bytes = 0; + } else if (m_thrTime - m_prevThrTime >= 1000) { + m_prevThrTime = m_thrTime; + m_Bytes = 0; + } + } + auto time_span = std::chrono::duration_cast>(t2 - t1); + if (terminate && time_span.count() >= (double) m_duration) { + std::cout << "Stop running " + getApplicationName() + " ..." << std::endl; + return 0; + } + if (SystemConf::getInstance().FAILURE_ON && + time_span.count() >= (double) SystemConf::getInstance().FAILURE_TIME) { + std::cout << "Killing " + getApplicationName() + " ..." << std::endl; + //application->~QueryApplication(); + return 0; + } + } + if (SystemConf::getInstance().LATENCY_ON) { + auto currentTime = std::chrono::high_resolution_clock::now(); + auto currentTimeNano = + std::chrono::duration_cast(currentTime.time_since_epoch()).count(); + systemTimestamp = (long)((currentTimeNano - m_timestampReference) / 1000L); + if (SystemConf::getInstance().BUFFERED_LATENCY) { + if (restartReference == 0) { + restartReference = currentTimeNano; + auto dt = (restartReference / 1000L) - m_lastTimestamp; // in usec + remainingTime = dt / 1000.; // in ms + std::cout << "Ingesting buffered data for " + std::to_string(remainingTime) + " ms" << std::endl; + } + double diff = ((double)(currentTimeNano - restartReference) / 1000L) / 1000.; // in ms + if (diff > remainingTime) { + SystemConf::getInstance().BUFFERED_LATENCY = false; + std::cout << "Finished ingesting buffered data" << std::endl; + } else { + auto currentTimeUsec = currentTimeNano/1000L + (long) (diff - remainingTime) * 1000L; + systemTimestamp = (long)(currentTimeUsec - (m_timestampReference / 1000L)); + } + } + } + if (SystemConf::getInstance().RECOVER && !stopRecovery) { + application->recoverData(); + stopRecovery = true; + } + if (SystemConf::getInstance().ADAPTIVE_CHANGE_DATA) { + auto t2 = std::chrono::high_resolution_clock::now(); + auto time_span = std::chrono::duration_cast>(t2 - t1); + if (time_span.count() >= (double) m_changeDuration) { + std::cout << "Changing data..." << std::endl; + auto tempBuffer = inputBuffer; + inputBuffer = alterInputBuffer; + alterInputBuffer = tempBuffer; + SystemConf::getInstance().ADAPTIVE_CHANGE_DATA = false; + } + } + +#if defined(TCP_INPUT) + // read data from a remote source + auto buffer = getInputData(); + application->processData(buffer, systemTimestamp); +#elif defined(RDMA_INPUT) + // read data from a remote source + auto buffer = getRDMABuffer(); + application->processData((void *)buffer, inputBuffer->size(), systemTimestamp); +#else + application->processData(*inputBuffer, systemTimestamp); +#endif + } + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + exit(1); + } + } + + int runTwoStreamsBenchmark(bool terminate = true) { + auto t1 = std::chrono::high_resolution_clock::now(); + auto inputBuffer1 = getInMemoryData(); + auto inputBuffer2 = getSecondInMemoryData(); auto application = getApplication(); if (SystemConf::getInstance().LATENCY_ON) { - SystemConf::getInstance().DURATION = m_duration - 3; + SystemConf::getInstance().DURATION = m_duration - 5; } long systemTimestamp = -1; std::cout << "Start running " + getApplicationName() + " ..." << std::endl; + + /*struct iSchema { + long timestamp; + int attr1; + int attr2; + }; + auto p1 = (iSchema*) inputBuffer1->data(); + auto p2 = (iSchema*) inputBuffer2->data(); + size_t len = inputBuffer1->size() / sizeof(iSchema); + long lastTimestamp = p1[len - 1].timestamp;*/ + try { while (true) { if (terminate) { @@ -96,11 +302,132 @@ class BenchmarkQuery { std::chrono::duration_cast(currentTime.time_since_epoch()).count(); systemTimestamp = (long) ((currentTimeNano - m_timestampReference) / 1000L); } - application->processData(*inputBuffer, systemTimestamp); + application->processFirstStream(*inputBuffer1, systemTimestamp); + application->processSecondStream(*inputBuffer2, systemTimestamp); + + // update timestamps + /*for (size_t i = 0; i < len/8; i+=4) { + p1[i].attr2 += 1; + p2[i].attr2 += 1; + }*/ } } catch (std::exception &e) { std::cout << e.what() << std::endl; exit(1); } } + + private: + inline std::shared_ptr getInputData() { +#if defined(TCP_INPUT) + //if (!buffer) { + // throw std::runtime_error("error: buffer in a nullptr"); + //} + auto buffer = UnboundedQueryBufferFactory::getInstance().newNBInstance(0); + if (m_sock == 0) { + std::cout << "[DBG] setting up the tcp socket" << std::endl; + setupSocket(); + } + readBytes(m_sock, buffer->getBuffer().size(), buffer->getBuffer().data()); + + /*struct _InputSchema_128 { + long timestamp; + long padding_0; + __uint128_t user_id; + __uint128_t page_id; + __uint128_t ad_id; + long ad_type; + long event_type; + __uint128_t ip_address; + __uint128_t padding_1; + __uint128_t padding_2; + }; + auto d = (_InputSchema_128 *) buffer->getBuffer().data(); + for (auto ii = 0; ii < buffer->getBuffer().size()/128; ii++) { + //std::cout << d[ii].timestamp << " " + // << (int) d[ii].ad_id << " " + // << (int) d[ii].ad_type << " " + // << (int) d[ii].event_type << " " << std::endl; + if (ii == 0) + break; + }*/ + //std::cout << "end" << std::endl; + return buffer; +#else + // do nothing + return nullptr; +#endif + } + +#if defined(RDMA_INPUT) + inline infinity::core::receive_element_t *getRDMABuffer() { + infinity::core::receive_element_t *receiveElement = new infinity::core::receive_element_t; + if (!m_context) { + auto device = (SystemConf::getInstance().REMOTE_WORKER == SystemConf::WALLABY_ib1) ? 1 : 0; + m_context = new infinity::core::Context(device); + m_qpFactory = new infinity::queues::QueuePairFactory(m_context); + + std::cout <<"Creating buffers to receive a messages" << std::endl; + m_receiveBuffers = new infinity::memory::Buffer *[BUFFER_COUNT]; + for (uint32_t i = 0; i < BUFFER_COUNT; ++i) { + m_receiveBuffers[i] = new infinity::memory::Buffer(m_context, SystemConf::getInstance().BUNDLE_SIZE * sizeof(char)); + m_context->postReceiveBuffer(m_receiveBuffers[i]); + } + + std::cout <<"Waiting for incoming connection" << std::endl; + m_qpFactory->bindToPort(PORT); + m_qp = m_qpFactory->acceptIncomingConnection(); + while (!m_context->receive(receiveElement)); + m_context->postReceiveBuffer(receiveElement->buffer); + // setup rdma context + RDMABufferPool::getInstance().setContext(m_context); + } + while (!m_context->receive(receiveElement)); + return receiveElement; + } +#endif + + inline void setupSocket() { + struct sockaddr_in address {}; + int opt = 1; + int addrlen = sizeof(address); + // Creating socket file descriptor + if ((m_server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) { + throw std::runtime_error("error: Socket file descriptor creation error"); + } + + // Forcefully attaching socket to the PORT + if (setsockopt(m_server_fd, SOL_SOCKET, SO_REUSEADDR | SO_REUSEPORT, &opt, + sizeof(opt))) { + throw std::runtime_error("error: setsockopt"); + } + address.sin_family = AF_INET; + address.sin_addr.s_addr = INADDR_ANY; + address.sin_port = htons(PORT); + + // Forcefully attaching socket to the PORT + if (bind(m_server_fd, (struct sockaddr *)&address, sizeof(address)) < 0) { + throw std::runtime_error("error: bind failed"); + } + if (listen(m_server_fd, 3) < 0) { + throw std::runtime_error("error: listen"); + } + + std::cout << "[DBG] Waiting for a tcp connection" << std::endl; + // todo: accept multiple connections + if ((m_sock = accept(m_server_fd, (struct sockaddr *)&address, + (socklen_t *)&addrlen)) < 0) { + throw std::runtime_error("error: accept"); + } + } + + inline void readBytes(int socket, unsigned int length, void *buffer) { + unsigned int bytesRead = 0; + while (bytesRead < length) { + auto valread = + read(socket, (char *)buffer + bytesRead, length - bytesRead); + assert(valread >= 0); + bytesRead += valread; + } + } }; \ No newline at end of file diff --git a/test/benchmarks/applications/CMakeLists.txt b/test/benchmarks/applications/CMakeLists.txt index efa6dd4..8996529 100644 --- a/test/benchmarks/applications/CMakeLists.txt +++ b/test/benchmarks/applications/CMakeLists.txt @@ -3,6 +3,11 @@ include_directories(LinearRoadBenchmark) include_directories(SmartGrid) include_directories(YahooBenchmark) include_directories(ManufacturingEquipment) +include_directories(RemoteBenchmark) +include_directories(Nexmark) +include_directories(../../../src/RDMA) +include_directories(RemoteBenchmark/RDMA) +add_subdirectory(RemoteBenchmark/RDMA) find_package(GTest REQUIRED) @@ -20,7 +25,17 @@ if (CCACHE_PROGRAM) endif () SET(CPP_FILES + ../../../src/filesystem/File.cpp + ../../../src/checkpoint/FileBackedCheckpointCoordinator.cpp + ../../../src/checkpoint/BlockManager.cpp + ../../../src/checkpoint/LineageGraph.cpp + ../../../src/cql/expressions/Expression.cpp + ../../../src/dispatcher/ITaskDispatcher.cpp + ../../../src/dispatcher/JoinTaskDispatcher.cpp ../../../src/dispatcher/TaskDispatcher.cpp + ../../../src/compression/CompressionCodeGenUtils.cpp + ../../../src/compression/CompressionStatistics.cpp + ../../../src/monitors/CompressionMonitor.cpp ../../../src/monitors/PerformanceMonitor.cpp ../../../src/monitors/Measurement.cpp ../../../src/monitors/LatencyMonitor.cpp @@ -35,9 +50,21 @@ SET(CPP_FILES ../../../src/utils/Utils.cpp ../../../src/utils/SystemConf.cpp ) +SET(RDMA_CPP_FILES + ../../../src/RDMA/infinity/core/Context.cpp + ../../../src/RDMA/infinity/memory/Atomic.cpp + ../../../src/RDMA/infinity/memory/Buffer.cpp + ../../../src/RDMA/infinity/memory/Region.cpp + ../../../src/RDMA/infinity/memory/RegionToken.cpp + ../../../src/RDMA/infinity/memory/RegisteredMemory.cpp + ../../../src/RDMA/infinity/queues/QueuePair.cpp + ../../../src/RDMA/infinity/queues/QueuePairFactory.cpp + ../../../src/RDMA/infinity/requests/RequestToken.cpp + ../../../src/RDMA/infinity/utils/Address.cpp + ) SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -lnuma") -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -g -Wall -Wextra -DHAVE_NUM -DPREFETCH") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -g -Wall -Wextra -DHAVE_NUM -DHAVE_Oo") find_package(benchmark REQUIRED) include_directories(${benchmark_INCLUDE_DIRS}) @@ -47,7 +74,7 @@ FIND_LIBRARY(tbb NAMES libtbb.so) # ManufacturingEquipment add_executable(manufacturing_equipment ManufacturingEquipment/main.cpp - ${CPP_FILES} + ${CPP_FILES} ${RDMA_CPP_FILES} ) target_link_options(manufacturing_equipment PRIVATE -Wl,--unresolved-symbols=ignore-all) if (Boost_FOUND) @@ -59,15 +86,15 @@ target_link_libraries(manufacturing_equipment boost_iostreams boost_fiber operatorJITLib - tbb - pthread dl) + tbb ibverbs + pthread dl uuid stdc++fs) target_compile_options(manufacturing_equipment PRIVATE -Wall -Wextra -O3 -march=native) # Cluster Monitoring add_executable(cluster_monitoring ClusterMonitoring/main.cpp - ${CPP_FILES} + ${CPP_FILES} ${RDMA_CPP_FILES} ) target_link_options(cluster_monitoring PRIVATE -Wl,--unresolved-symbols=ignore-all) if (Boost_FOUND) @@ -79,15 +106,15 @@ target_link_libraries(cluster_monitoring boost_iostreams boost_fiber operatorJITLib - tbb - pthread dl) + tbb ibverbs + pthread dl uuid stdc++fs) target_compile_options(cluster_monitoring PRIVATE -Wall -Wextra -O3 -march=native) # Linear Road Benchmark add_executable(linear_road_benchmark LinearRoadBenchmark/main.cpp - ${CPP_FILES} + ${CPP_FILES} ${RDMA_CPP_FILES} ) target_link_options(linear_road_benchmark PRIVATE -Wl,--unresolved-symbols=ignore-all) if (Boost_FOUND) @@ -99,15 +126,15 @@ target_link_libraries(linear_road_benchmark boost_iostreams boost_fiber operatorJITLib - tbb - pthread dl) + tbb ibverbs + pthread dl uuid stdc++fs) target_compile_options(linear_road_benchmark PRIVATE -Wall -Wextra -O3 -march=native) # Yahoo Benchmark add_executable(yahoo_benchmark YahooBenchmark/main.cpp - ${CPP_FILES} + ${CPP_FILES} ${RDMA_CPP_FILES} ) target_link_options(yahoo_benchmark PRIVATE -Wl,--unresolved-symbols=ignore-all) if (Boost_FOUND) @@ -117,15 +144,15 @@ endif () target_link_libraries(yahoo_benchmark operatorJITLib boost_fiber - tbb - pthread dl) + tbb ibverbs + pthread dl uuid stdc++fs) target_compile_options(yahoo_benchmark PRIVATE -Wall -Wextra -O3 -march=native) # Smart Grid add_executable(smartgrid SmartGrid/main.cpp - ${CPP_FILES} + ${CPP_FILES} ${RDMA_CPP_FILES} ) if (Boost_FOUND) include_directories(${Boost_INCLUDE_DIRS}) @@ -136,7 +163,108 @@ target_link_libraries(smartgrid boost_iostreams boost_fiber operatorJITLib - tbb - pthread dl) + tbb ibverbs + pthread dl uuid stdc++fs) target_compile_options(smartgrid PRIVATE -Wall -Wextra -O3 -march=native) target_link_options(smartgrid PRIVATE -Wl,--unresolved-symbols=ignore-all) + + +# Nexmark +add_executable(nexmark + Nexmark/main.cpp + ${CPP_FILES} ${RDMA_CPP_FILES} + ) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(nexmark ${Boost_LIBRARIES}) +endif () +target_link_libraries(nexmark + z + boost_iostreams + boost_fiber + operatorJITLib + tbb ibverbs + pthread dl uuid stdc++fs) +target_compile_options(nexmark PRIVATE -Wall -Wextra -O3 -march=native -DHAVE_NUM) +target_link_options(nexmark PRIVATE -Wl,--unresolved-symbols=ignore-all) + + +# Remote source +add_executable(remoteSource + RemoteBenchmark/remoteSource.cpp + ${CPP_FILES} ${RDMA_CPP_FILES} + ) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(remoteSource ${Boost_LIBRARIES}) +endif () +target_link_libraries(remoteSource + z + boost_iostreams + boost_fiber + operatorJITLib + tbb ibverbs + pthread dl uuid stdc++fs) +target_compile_options(remoteSource PRIVATE -Wall -Wextra -O3 -march=native) +target_link_options(remoteSource PRIVATE -Wl,--unresolved-symbols=ignore-all) + + +# Remote sink +add_executable(remoteSink + RemoteBenchmark/remoteSink.cpp + ${CPP_FILES} ${RDMA_CPP_FILES} + ) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(remoteSource ${Boost_LIBRARIES}) +endif () +target_link_libraries(remoteSink + z + boost_iostreams + boost_fiber + operatorJITLib + tbb ibverbs + pthread dl uuid stdc++fs) +target_compile_options(remoteSink PRIVATE -Wall -Wextra -O3 -march=native) +target_link_options(remoteSink PRIVATE -Wl,--unresolved-symbols=ignore-all) + +# Remote RDMA source +add_executable(remoteRDMASource + RemoteBenchmark/remoteRDMASource.cpp + ${CPP_FILES} + ${RDMA_CPP_FILES} + ) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(remoteRDMASource ${Boost_LIBRARIES}) +endif () +target_link_libraries(remoteRDMASource + z + boost_iostreams + boost_fiber + operatorJITLib + tbb ibverbs + pthread dl uuid stdc++fs) +target_compile_options(remoteRDMASource PRIVATE -Wall -Wextra -O3 -march=native) +target_link_options(remoteRDMASource PRIVATE -Wl,--unresolved-symbols=ignore-all) + + +# Remote RDMA sink +add_executable(remoteRDMASink + RemoteBenchmark/remoteRDMASink.cpp + ${CPP_FILES} + ${RDMA_CPP_FILES} + ) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(remoteSource ${Boost_LIBRARIES}) +endif () +target_link_libraries(remoteRDMASink + z + boost_iostreams + boost_fiber + operatorJITLib + tbb ibverbs + pthread dl uuid stdc++fs) +target_compile_options(remoteRDMASink PRIVATE -Wall -Wextra -O3 -march=native) +target_link_options(remoteRDMASink PRIVATE -Wl,--unresolved-symbols=ignore-all) \ No newline at end of file diff --git a/test/benchmarks/applications/ClusterMonitoring/CM1.cpp b/test/benchmarks/applications/ClusterMonitoring/CM1.cpp index e180be6..0f22696 100644 --- a/test/benchmarks/applications/ClusterMonitoring/CM1.cpp +++ b/test/benchmarks/applications/ClusterMonitoring/CM1.cpp @@ -65,10 +65,11 @@ class CM1 : public ClusterMonitoring { } public: - CM1(bool inMemory = true) { + CM1(bool inMemory = true, bool startApp = true) { m_name = "CM1"; createSchema(); - createApplication(); + if (startApp) + createApplication(); if (inMemory) loadInMemoryData(); } diff --git a/test/benchmarks/applications/ClusterMonitoring/CM2.cpp b/test/benchmarks/applications/ClusterMonitoring/CM2.cpp index 6466d41..08c8ed0 100644 --- a/test/benchmarks/applications/ClusterMonitoring/CM2.cpp +++ b/test/benchmarks/applications/ClusterMonitoring/CM2.cpp @@ -74,10 +74,11 @@ class CM2 : public ClusterMonitoring { } public: - CM2(bool inMemory = true) { + CM2(bool inMemory = true, bool startApp = true) { m_name = "CM2"; createSchema(); - createApplication(); + if (startApp) + createApplication(); if (inMemory) loadInMemoryData(); } diff --git a/test/benchmarks/applications/ClusterMonitoring/ClusterMonitoring.h b/test/benchmarks/applications/ClusterMonitoring/ClusterMonitoring.h index 3ae22c0..78ddaef 100644 --- a/test/benchmarks/applications/ClusterMonitoring/ClusterMonitoring.h +++ b/test/benchmarks/applications/ClusterMonitoring/ClusterMonitoring.h @@ -67,15 +67,50 @@ class ClusterMonitoring : public BenchmarkQuery { m_data = new std::vector(len); auto buf = (InputSchema *) m_data->data(); - std::string filePath = Utils::GetHomeDir() + "/LightSaber/resources/datasets/google-cluster-data/"; + std::string filePath = Utils::getHomeDir() + "/LightSaber/resources/datasets/google-cluster-data/"; std::ifstream file(filePath + "google-cluster-data.txt"); + // std::cout << filePath << std::endl; + if (!file.good()) + throw std::runtime_error("error: input file does not exist, check the path."); std::string line; unsigned long idx = 0; while (std::getline(file, line) && idx < len / sizeof(InputSchema)) { InputSchema::parse(buf[idx], line); + if (m_startTimestamp == 0) { + m_startTimestamp = buf[0].timestamp; + } + m_endTimestamp = buf[idx].timestamp; idx++; } + if (idx < len / sizeof(InputSchema)) { + unsigned long iter = 0; + auto barrier = idx-1; + long lastTime = buf[idx-1].timestamp; + while (idx < len / sizeof(InputSchema)) { + std::memcpy(&buf[idx], &buf[iter], sizeof(InputSchema)); + buf[idx].timestamp += lastTime; + m_endTimestamp = buf[idx].timestamp; + idx++; + iter++; + if (iter == barrier) { + iter = 0; + lastTime = buf[idx-1].timestamp; + } + } + } + + /*for (unsigned long i = 0; i < m_data->size() / sizeof(InputSchema); ++i) { + if (i%10==0) { + auto cpu = buf[i].cpu; + auto ii = 0; + for (;ii < 10 && i < m_data->size() / sizeof(InputSchema); ++i) { + buf[i].cpu = cpu; + ii++; + } + } + }*/ + if (m_debug) { std::cout << "timestamp jobId machineId eventType userId category priority cpu ram disk constraints" << std::endl; for (unsigned long i = 0; i < m_data->size() / sizeof(InputSchema); ++i) { diff --git a/test/benchmarks/applications/ClusterMonitoring/main.cpp b/test/benchmarks/applications/ClusterMonitoring/main.cpp index f56eb61..c2559a4 100644 --- a/test/benchmarks/applications/ClusterMonitoring/main.cpp +++ b/test/benchmarks/applications/ClusterMonitoring/main.cpp @@ -5,15 +5,15 @@ #include "CM2.cpp" int main(int argc, const char **argv) { - BenchmarkQuery *benchmarkQuery = nullptr; + std::unique_ptr benchmarkQuery {}; SystemConf::getInstance().QUERY_NUM = 1; BenchmarkQuery::parseCommandLineArguments(argc, argv); if (SystemConf::getInstance().QUERY_NUM == 1) { - benchmarkQuery = new CM1(); + benchmarkQuery = std::make_unique(); } else if (SystemConf::getInstance().QUERY_NUM == 2) { - benchmarkQuery = new CM2(); + benchmarkQuery = std::make_unique(); } else { throw std::runtime_error("error: invalid benchmark query id"); } diff --git a/test/benchmarks/applications/LinearRoadBenchmark/LRB1.cpp b/test/benchmarks/applications/LinearRoadBenchmark/LRB1.cpp index 23fe6e9..8a1d21d 100644 --- a/test/benchmarks/applications/LinearRoadBenchmark/LRB1.cpp +++ b/test/benchmarks/applications/LinearRoadBenchmark/LRB1.cpp @@ -79,10 +79,11 @@ class LRB1 : public LinearRoadBenchmark { } public: - LRB1(bool inMemory = true) { + LRB1(bool inMemory = true, bool startApp = true) { m_name = "LRB1"; createSchema(); - createApplication(); + if (startApp) + createApplication(); m_fileName = "lrb-data-small-ht.txt"; if (inMemory) loadInMemoryData(); diff --git a/test/benchmarks/applications/LinearRoadBenchmark/LRB2.cpp b/test/benchmarks/applications/LinearRoadBenchmark/LRB2.cpp index de6137b..811d512 100644 --- a/test/benchmarks/applications/LinearRoadBenchmark/LRB2.cpp +++ b/test/benchmarks/applications/LinearRoadBenchmark/LRB2.cpp @@ -458,10 +458,11 @@ class LRB2 : public LinearRoadBenchmark { } public: - LRB2(bool inMemory = true) { + LRB2(bool inMemory = true, bool startApp = true) { m_name = "LRB2"; createSchema(); - createApplication(); + if (startApp) + createApplication(); m_fileName = "lrb-data-small-ht.txt"; if (inMemory) loadInMemoryData(); diff --git a/test/benchmarks/applications/LinearRoadBenchmark/LinearRoadBenchmark.h b/test/benchmarks/applications/LinearRoadBenchmark/LinearRoadBenchmark.h index d63fd52..9306489 100644 --- a/test/benchmarks/applications/LinearRoadBenchmark/LinearRoadBenchmark.h +++ b/test/benchmarks/applications/LinearRoadBenchmark/LinearRoadBenchmark.h @@ -58,15 +58,38 @@ class LinearRoadBenchmark : public BenchmarkQuery { m_data = new std::vector(len); auto buf = (InputSchema *) m_data->data(); - std::string filePath = Utils::GetHomeDir() + "/LightSaber/resources/datasets/lrb/"; + std::string filePath = Utils::getHomeDir() + "/LightSaber/resources/datasets/lrb/"; std::ifstream file(filePath + m_fileName); + if (!file.good()) + throw std::runtime_error("error: input file does not exist, check the path."); std::string line; unsigned long idx = 0; while (std::getline(file, line) && idx < len / sizeof(InputSchema)) { InputSchema::parse(buf[idx], line); + if (m_startTimestamp == 0) { + m_startTimestamp = buf[0].timestamp; + } + m_endTimestamp = buf[idx].timestamp; idx++; } + if (idx < len / sizeof(InputSchema)) { + unsigned long iter = 0; + auto barrier = idx-1; + long lastTime = buf[idx-1].timestamp; + while (idx < len / sizeof(InputSchema)) { + std::memcpy(&buf[idx], &buf[iter], sizeof(InputSchema)); + buf[idx].timestamp += lastTime; + m_endTimestamp = buf[idx].timestamp; + idx++; + iter++; + if (iter == barrier) { + iter = 0; + lastTime = buf[idx-1].timestamp; + } + } + } + if (m_debug) { std::cout << "timestamp vehicle speed highway lane direction position" << std::endl; for (unsigned long i = 0; i < m_data->size() / sizeof(InputSchema); ++i) { diff --git a/test/benchmarks/applications/LinearRoadBenchmark/main.cpp b/test/benchmarks/applications/LinearRoadBenchmark/main.cpp index 7e45a9a..c8b7e24 100644 --- a/test/benchmarks/applications/LinearRoadBenchmark/main.cpp +++ b/test/benchmarks/applications/LinearRoadBenchmark/main.cpp @@ -5,15 +5,15 @@ #include "LRB2.cpp" int main(int argc, const char **argv) { - BenchmarkQuery *benchmarkQuery = nullptr; + std::unique_ptr benchmarkQuery {}; SystemConf::getInstance().QUERY_NUM = 2; BenchmarkQuery::parseCommandLineArguments(argc, argv); if (SystemConf::getInstance().QUERY_NUM == 1) { - benchmarkQuery = new LRB1(); + benchmarkQuery = std::make_unique(); } else if (SystemConf::getInstance().QUERY_NUM == 2) { - benchmarkQuery = new LRB2(); + benchmarkQuery = std::make_unique(); } else { throw std::runtime_error("error: invalid benchmark query id"); } diff --git a/test/benchmarks/applications/ManufacturingEquipment/ME1.cpp b/test/benchmarks/applications/ManufacturingEquipment/ME1.cpp index 6b25a1e..836d8df 100644 --- a/test/benchmarks/applications/ManufacturingEquipment/ME1.cpp +++ b/test/benchmarks/applications/ManufacturingEquipment/ME1.cpp @@ -63,10 +63,11 @@ class ME1 : public ManufacturingEquipment { } public: - ME1(bool inMemory = true) { + ME1(bool inMemory = true, bool startApp = true) { m_name = "ME1"; createSchema(); - createApplication(); + if (startApp) + createApplication(); if (inMemory) loadInMemoryData(); } diff --git a/test/benchmarks/applications/ManufacturingEquipment/ManufacturingEquipment.h b/test/benchmarks/applications/ManufacturingEquipment/ManufacturingEquipment.h index 75c3153..c974994 100644 --- a/test/benchmarks/applications/ManufacturingEquipment/ManufacturingEquipment.h +++ b/test/benchmarks/applications/ManufacturingEquipment/ManufacturingEquipment.h @@ -71,6 +71,7 @@ class ManufacturingEquipment : public BenchmarkQuery { TupleSchema *m_schema = nullptr; QueryApplication *m_application = nullptr; std::vector *m_data = nullptr; + std::vector *m_secData = nullptr; bool m_debug = false; QueryApplication *getApplication() override { @@ -82,7 +83,9 @@ class ManufacturingEquipment : public BenchmarkQuery { void loadInMemoryData() { size_t len = SystemConf::getInstance().BUNDLE_SIZE; m_data = new std::vector(len); + m_secData = new std::vector(len); auto buf = (InputSchema *) m_data->data(); + auto secBuf = (InputSchema *) m_secData->data(); const std::string cell = "2012-02-22T16:46:28.9670320+00:00"; const std::locale @@ -93,15 +96,59 @@ class ManufacturingEquipment : public BenchmarkQuery { is >> myEpoch; //std::cout << myEpoch << std::endl; - std::string filePath = Utils::GetHomeDir() + "/LightSaber/resources/datasets/manufacturing_equipment/"; + std::string filePath = Utils::getHomeDir() + "/LightSaber/resources/datasets/manufacturing_equipment/"; std::ifstream file(filePath + "DEBS2012-small.txt"); + if (!file.good()) + throw std::runtime_error("error: input file does not exist, check the path."); std::string line; unsigned long idx = 0; while (std::getline(file, line) && idx < len / sizeof(InputSchema)) { InputSchema::parse(buf[idx], line, myEpoch); + if (m_startTimestamp == 0) { + m_startTimestamp = buf[0].timestamp; + } + m_endTimestamp = buf[idx].timestamp; idx++; } + if (SystemConf::getInstance().ADAPTIVE_CHANGE_DATA) { + for (unsigned long i = 0; i < idx; ++i) { + // if (i%10==0) { + auto mf01 = buf[i].mf01 % 16; + auto mf02 = buf[i].mf02 % 16; + auto mf03 = buf[i].mf03 % 16; + auto ii = 0; + for (; ii < 14 && i < idx; ++i) { + secBuf[i].timestamp = buf[i].timestamp; + secBuf[i].mf01 = mf01; + secBuf[i].mf02 = mf02; + secBuf[i].mf03 = mf03; + ii++; + } + i = i - 1; + //} + } + + for (unsigned long i = 0; i < idx; ++i) { + // if (i%14==0) { + // auto mf01 = buf[i].mf01 %16; + // auto mf02 = buf[i].mf02 %16; + // auto mf03 = buf[i].mf03 %16; + auto mf01 = buf[i].mf01 % 4095; + auto mf02 = buf[i].mf02 % 4095; + auto mf03 = buf[i].mf03 % 4095; + auto ii = 0; + for (; ii < 14 && i < m_data->size() / sizeof(InputSchema); ++i) { + buf[i].mf01 = mf01; + buf[i].mf02 = mf02; + buf[i].mf03 = mf03; + ii++; + } + i = i - 1; + //} + } + } + if (m_debug) { std::cout << "timestamp messageIndex mf01 mf02 mf03 pc13 pc14 pc15 pc25 pc26 pc27 res bm05 bm06 bm07 bm08 bm09 bm10" @@ -122,6 +169,10 @@ class ManufacturingEquipment : public BenchmarkQuery { return m_data; } + std::vector *getSecondInMemoryData() override { + return m_secData; + } + std::vector *getStaticData() override { throw std::runtime_error("error: this benchmark does not have static data"); } diff --git a/test/benchmarks/applications/ManufacturingEquipment/main.cpp b/test/benchmarks/applications/ManufacturingEquipment/main.cpp index de5e75f..f1c76fd 100644 --- a/test/benchmarks/applications/ManufacturingEquipment/main.cpp +++ b/test/benchmarks/applications/ManufacturingEquipment/main.cpp @@ -4,13 +4,13 @@ #include "ME1.cpp" int main(int argc, const char **argv) { - BenchmarkQuery *benchmarkQuery = nullptr; + std::unique_ptr benchmarkQuery {}; SystemConf::getInstance().QUERY_NUM = 1; BenchmarkQuery::parseCommandLineArguments(argc, argv); if (SystemConf::getInstance().QUERY_NUM == 1) { - benchmarkQuery = new ME1(); + benchmarkQuery = std::make_unique(); } else { throw std::runtime_error("error: invalid benchmark query id"); } diff --git a/test/benchmarks/applications/Nexmark/NBQ5.cpp b/test/benchmarks/applications/Nexmark/NBQ5.cpp new file mode 100644 index 0000000..3c6f737 --- /dev/null +++ b/test/benchmarks/applications/Nexmark/NBQ5.cpp @@ -0,0 +1,430 @@ +#include "benchmarks/applications/Nexmark/Nexmark.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +class NBQ5 : public Nexmark { + private: + void createApplication() override { + SystemConf::getInstance().SLOTS = 128; + SystemConf::getInstance().PARTIAL_WINDOWS = 64; + SystemConf::getInstance().HASH_TABLE_SIZE = 512; + + bool useParallelMerge = SystemConf::getInstance().PARALLEL_MERGE_ON; + + // auto window = new WindowDefinition(RANGE_BASED, 3600, 60); + auto window = new WindowDefinition(RANGE_BASED, 60, 1); + + // Configure aggregation + std::vector aggregationTypes(1); + aggregationTypes[0] = AggregationTypes::fromString("cnt"); + + std::vector aggregationAttributes(1); + aggregationAttributes[0] = new ColumnReference(1, BasicType::Float); + + std::vector groupByAttributes(1); + groupByAttributes[0] = new ColumnReference(1, BasicType::Long); + + Aggregation *aggregation = new Aggregation(*window, aggregationTypes, aggregationAttributes, groupByAttributes); + + bool replayTimestamps = window->isRangeBased(); + + OperatorCode *cpuCode; + // Set up code-generated operator + OperatorKernel *genCode = new OperatorKernel(true, true, useParallelMerge); + genCode->setInputSchema(getSchema()); + genCode->setAggregation(aggregation); + //genCode->setCustomHashTable(customHashtable); + genCode->setPostWindowOperation(postOperation, postCondition, (useParallelMerge) ? parallelMergeOperation : mergeOperation); + genCode->setQueryId(0); + genCode->setup(); + cpuCode = genCode; + + // Print operator + std::cout << cpuCode->toSExpr() << std::endl; + + auto queryOperator = new QueryOperator(*cpuCode); + std::vector operators; + operators.push_back(queryOperator); + + // this is used for latency measurements + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + + std::vector> queries(1); + queries[0] = std::make_shared(0, + operators, + *window, + m_schema, + m_timestampReference, + true, + replayTimestamps, + !replayTimestamps, + useParallelMerge); + + m_application = new QueryApplication(queries); + m_application->setup(); + } + + std::string parallelMergeOperation = + " int _max = INT_MIN;\n" + " for (int idx = 0; idx < mapSize; idx++) {\n" + " if (tempCompleteWindowsRes[idx].state == 1 && _max < tempCompleteWindowsRes[idx].value._1) /* Skip empty slot */\n" + " _max = tempCompleteWindowsRes[idx].value._1; \n" + " }\n"; + + std::string mergeOperation = + " int _max = INT_MIN;\n" + " for (int idx = 0; idx < mapSize; idx++) {\n" + " if (openingWindowsRes[wid][idx].state != 1) /* Skip empty slot */\n" + " continue;\n" + " isFound = map2.get_index(openingWindowsRes[wid][idx].key, posInB2);\n" + " if (posInB2 < 0) {\n" + " printf(\"error: open-adress hash table is full \\n\");\n" + " exit(1);\n" + " }\n" + " if (!isFound) { \n" + " _max = (_max > openingWindowsRes[wid][idx].value._1) ? _max : openingWindowsRes[wid][idx].value._1;\n" + " } else { // merge values based on the number of aggregated values and their types! \n" + " int temp = openingWindowsRes[wid][idx].value._1+partialRes[wid2][posInB2].value._1;\n" + " _max = (_max > temp) ? _max : temp;\n" + " }\n" + " }\n" + "\n" + " /* Iterate over the remaining tuples in the second table. */\n" + " for (int idx = 0; idx < mapSize; idx++) {\n" + " if (partialRes[wid2][idx].state == 1 && _max < partialRes[wid2][idx].value._1) /* Skip empty slot */\n" + " _max = partialRes[wid2][idx].value._1;\n" + " }\n"; + + std::string postOperation = "\tint _max = INT_MIN;\n" + "\tfor (int i = 0; i < mapSize; i++) {\n" + "\t\tif (aggrStructures[pid].getBuckets()[i].state == 1 && _max < aggrStructures[pid].getBuckets()[i].value._1)\n" + "\t\t\t_max = aggrStructures[pid].getBuckets()[i].value._1;\n" + "\t}\n"; + + std::string postCondition = "completeWindowsResults[completeWindowsPointer]._2 == _max"; + + std::string customHashtable = "using KeyT = long;\n" + "using ValueT = Value;\n" + "\n" + "struct MyHash{\n" + " std::size_t operator()(KeyT m) const {\n" + " std::hash hashVal;\n" + " return hashVal(m%1000);\n" + " }\n" + "};\n" + "struct HashMapEqualTo {\n" + " constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const {\n" + " return lhs == rhs;\n" + " }\n" + "};\n" + "\n" + "struct alignas(16) Bucket {\n" + " char state;\n" + " char dirty;\n" + " long timestamp;\n" + " KeyT key;\n" + " ValueT value;\n" + " int counter;\n" + "};\n" + "\n" + "using BucketT = Bucket;\n" + "\n" + "class alignas(64) HashTable {\n" + "private:\n" + " using HashT = MyHash; //std::hash;\n" + " using EqT = HashMapEqualTo;\n" + " using AggrT = Aggregator;\n" + "\n" + " HashT _hasher;\n" + " EqT _eq;\n" + " BucketT* _buckets = nullptr;\n" + " AggrT* _aggrs = nullptr;\n" + " size_t _num_buckets = MAP_SIZE;\n" + " size_t _num_filled = 0;\n" + " size_t _mask = MAP_SIZE-1;\n" + "public:\n" + " HashTable ();\n" + " HashTable (Bucket*nodes);\n" + " void init ();\n" + " void reset ();\n" + " void clear ();\n" + " void insert (KeyT &key, ValueT &value, long timestamp);\n" + " void insert_or_modify (KeyT &key, ValueT &value, long timestamp);\n" + " bool evict (KeyT &key);\n" + " void insertSlices ();\n" + " void evictSlices ();\n" + " void setValues ();\n" + " void setIntermValues (int pos, long timestamp);\n" + " bool get_value (const KeyT &key, ValueT &result);\n" + " bool get_result (const KeyT &key, ValueT &result);\n" + " bool get_index (const KeyT &key, int &index);\n" + " void deleteHashTable();\n" + " BucketT* getBuckets ();\n" + " size_t getSize() const;\n" + " bool isEmpty() const;\n" + " size_t getNumberOfBuckets() const;\n" + " float load_factor() const;\n" + "};\n" + "\n" + "HashTable::HashTable () {}\n" + "\n" + "HashTable::HashTable (Bucket *nodes) : _buckets(nodes) {\n" + " if (!(_num_buckets && !(_num_buckets & (_num_buckets - 1)))) {\n" + " throw std::runtime_error (\"error: the size of the hash table has to be a power of two\\n\");\n" + " }\n" + "}\n" + "\n" + "void HashTable::init () {\n" + " if (!(_num_buckets && !(_num_buckets & (_num_buckets - 1)))) {\n" + " throw std::runtime_error (\"error: the size of the hash table has to be a power of two\\n\");\n" + " }\n" + "\n" + " _buckets = (BucketT*)_mm_malloc(_num_buckets * sizeof(BucketT), 64);\n" + " _aggrs = (AggrT*)_mm_malloc(_num_buckets * sizeof(AggrT), 64);\n" + " if (!_buckets /*|| !_aggrs*/) {\n" + " free(_buckets);\n" + " /*free(_aggrs);*/\n" + " throw std::bad_alloc();\n" + " }\n" + "\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " _buckets[i].state = 0;\n" + " _buckets[i].dirty = 0;\n" + " _aggrs[i] = AggrT (); // maybe initiliaze this on insert\n" + " _aggrs[i].initialise();\n" + " }\n" + "}\n" + "\n" + "void HashTable::reset () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " _buckets[i].state = 0;\n" + " //_aggrs[i].initialise();\n" + " }\n" + " _num_filled = 0;\n" + "}\n" + "\n" + "void HashTable::clear () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " _buckets[i].state = 0;\n" + " _buckets[i].dirty = 0;\n" + " //_buckets[i].counter = 0;\n" + " _aggrs[i].initialise();\n" + " }\n" + " _num_filled = 0;\n" + "}\n" + "\n" + "void HashTable::insert (KeyT &key, ValueT &value, long timestamp) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " for (; i < _num_buckets; i++) {\n" + " if (!_buckets[i].state || _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key; //std::memcpy(&_buckets[i].key, key, KEY_SIZE);\n" + " _buckets[i].value = value;\n" + " return;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (!_buckets[i].state || _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key;\n" + " _buckets[i].value = value;\n" + " return;\n" + " }\n" + " }\n" + " throw std::runtime_error (\"error: the hashtable is full \\n\");\n" + "}\n" + "\n" + "void HashTable::insert_or_modify (KeyT &key, ValueT &value, long timestamp) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " char tempState;\n" + " for (; i < _num_buckets; i++) {\n" + " tempState = _buckets[i].state;\n" + " if (tempState && _eq(_buckets[i].key, key)) {\n" + "\t\t\t_buckets[i].value._1 = _buckets[i].value._1+value._1;\n" + " _buckets[i].counter++;\n" + " return;\n" + " }\n" + " if (!tempState && (_eq(_buckets[i].key, key) || _buckets[i].dirty == 0)) { // first insert -- keep track of previous inserted value\n" + " _buckets[i].state = 1;\n" + " _buckets[i].dirty = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key;\n" + " _buckets[i].value = value;\n" + " _buckets[i].counter = 1;\n" + " return;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " tempState = _buckets[i].state;\n" + " if (tempState && _eq(_buckets[i].key, key)) {\n" + "\t\t\t\t_buckets[i].value._1 = _buckets[i].value._1+value._1;\n" + " _buckets[i].counter++;\n" + " return;\n" + " }\n" + " if (!tempState && (_eq(_buckets[i].key, key) || _buckets[i].dirty == 0)) { // first insert -- keep track of previous inserted value\n" + " _buckets[i].state = 1;\n" + " _buckets[i].dirty = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key;\n" + " _buckets[i].value = value;\n" + " _buckets[i].counter = 1;\n" + " return;\n" + " }\n" + " }\n" + " throw std::runtime_error (\"error: the hashtable is full \\n\");\n" + "}\n" + "\n" + "bool HashTable::evict (KeyT &key) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " for (; i < _num_buckets; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 0;\n" + " return true;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 0;\n" + " return true;\n" + " }\n" + " }\n" + " printf (\"error: entry not found \\n\");\n" + " return false;\n" + "}\n" + "\n" + "void HashTable::insertSlices () {\n" + " int maxNumOfSlices = INT_MIN;\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " int temp = _aggrs[i].addedElements - _aggrs[i].removedElements;\n" + " if (_buckets[i].state) {\n" + " node n;\n" + "\t\t\t\tn._1 = _buckets[i].value._1;\n" + " _aggrs[i].insert(n);\n" + " _buckets[i].state = 0;\n" + " //_buckets[i].value = ValueT();\n" + " } else if (temp > 0) {\n" + " ValueT val;\n" + " node n;\n" + "\t\t\tn._1 = val._1;\n" + " _aggrs[i].insert(n);\n" + " }\n" + " }\n" + "}\n" + "\n" + "void HashTable::evictSlices () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" + " _aggrs[i].evict();\n" + " }\n" + " }\n" + "}\n" + "\n" + "void HashTable::setValues () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" + " auto res = _aggrs[i].query();\n" + " _buckets[i].state = 1;\n" + "\t\t\t_buckets[i].value._1 = res._1;\n" + " _buckets[i].counter = 1;\n" + " }\n" + " }\n" + "}\n" + "\n" + "void HashTable::setIntermValues (int pos, long timestamp) {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" + " auto res = _aggrs[i].queryIntermediate (pos);\n" + " _buckets[i].state = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + "\t\t\t_buckets[i].value._1 = res._1;\n" + " }\n" + " }\n" + "}\n" + "\n" + "bool HashTable::get_value (const KeyT &key, ValueT &result) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " for (; i < _num_buckets; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " result = _buckets[i].value;\n" + " return true;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " result = _buckets[i].value;\n" + " return true;\n" + " }\n" + " }\n" + " return false;\n" + "}\n" + "\n" + "bool HashTable::get_index (const KeyT &key, int &index) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " index = -1;\n" + " for (; i < _num_buckets; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " index = i;\n" + " return true;\n" + " }\n" + " if (_buckets[i].state == 0 && index == -1) {\n" + " index = i;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " index = i;\n" + " return true;\n" + " }\n" + " if (_buckets[i].state == 0 && index == -1) {\n" + " index = i;\n" + " }\n" + " }\n" + " return false;\n" + "}\n" + "\n" + "void HashTable::deleteHashTable() {\n" + " for (size_t bucket=0; bucket<_num_buckets; ++bucket) {\n" + " _buckets[bucket].~BucketT();\n" + " _aggrs->~AggrT();\n" + " }\n" + " free(_buckets);\n" + " free(_aggrs);\n" + "}\n" + "\n" + "BucketT* HashTable::getBuckets () {\n" + " return _buckets;\n" + "}\n" + "\n" + "size_t HashTable::getSize() const {\n" + " return _num_filled;\n" + "}\n" + "\n" + "bool HashTable::isEmpty() const {\n" + " return _num_filled==0;\n" + "}\n" + "\n" + "size_t HashTable::getNumberOfBuckets() const {\n" + " return _num_buckets;\n" + "}\n" + "\n" + "float HashTable::load_factor() const {\n" + " return static_cast(_num_filled) / static_cast(_num_buckets);\n" + "}\n"; + + public: + explicit NBQ5(bool inMemory = true, bool startApp = true) { + m_name = "NBQ5"; + createSchema(); + if (inMemory) + loadInMemoryData(); + if (startApp) + createApplication(); + } +}; \ No newline at end of file diff --git a/test/benchmarks/applications/Nexmark/Nexmark.h b/test/benchmarks/applications/Nexmark/Nexmark.h new file mode 100644 index 0000000..da992d7 --- /dev/null +++ b/test/benchmarks/applications/Nexmark/Nexmark.h @@ -0,0 +1,317 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../BenchmarkQuery.h" +#include "utils/QueryApplication.h" +#include "utils/TupleSchema.h" +#include "utils/Utils.h" + +class Nexmark : public BenchmarkQuery { + private: + struct InputSchema { + long timestamp; + long id; + long itemName; + long description; + long initialBid; + long reserve; + long expires; + long seller; + long category; + long padding_0; + long padding_1; + long padding_2; + long padding_3; + long padding_4; + long padding_5; + long padding_6; + + static void parse(InputSchema &tuple, std::string &line) { + std::istringstream iss(line); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + tuple.timestamp = std::stol(words[0]); + tuple.id = std::stol(words[1]); + tuple.itemName = std::stol(words[2]); + tuple.description = std::stol(words[3]); + tuple.initialBid = std::stol(words[4]); + tuple.reserve = std::stol(words[5]); + tuple.expires = std::stol(words[6]); + tuple.seller = std::stol(words[7]); + tuple.category = std::stol(words[8]); + } + }; + + /** + * We start the ids at specific values to help ensure the queries find a match + * even on small synthesized dataset sizes. + */ + const long FIRST_AUCTION_ID = 1000L; + const long FIRST_PERSON_ID = 1000L; + const long FIRST_CATEGORY_ID = 10L; + /** Proportions of people/auctions/bids to synthesize. */ + const int PERSON_PROPORTION = 1; + + const int AUCTION_PROPORTION = 3; + const int BID_PROPORTION = 46; + const int PROPORTION_DENOMINATOR = + PERSON_PROPORTION + AUCTION_PROPORTION + BID_PROPORTION; + + /** + * Keep the number of categories small so the example queries will find + * results even with a small batch of events. + */ + const int NUM_CATEGORIES = 5; + /** Number of yet-to-be-created people and auction ids allowed. */ + const int AUCTION_ID_LEAD = 10; + /** + * Fraction of people/auctions which may be 'hot' sellers/bidders/auctions are + * 1 over these values. + */ + const int HOT_SELLER_RATIO = 100; + + /* + * Extra parameters + * */ + long outOfOrderGroupSize = 1; + long firstEventNumber = 0; + long firstEventId = 0; + /** Number of yet-to-be-created people and auction ids allowed. */ + const int PERSON_ID_LEAD = 10; + /** Average idealized size of a 'new person' event, in bytes. */ + int avgPersonByteSize = 200; + /** Average idealized size of a 'new auction' event, in bytes. */ + int avgAuctionByteSize = 500; + /** Average idealized size of a 'bid' event, in bytes. */ + int avgBidByteSize = 100; + /** Ratio of bids to 'hot' auctions compared to all other auctions. */ + int hotAuctionRatio = 2; + /** Ratio of auctions for 'hot' sellers compared to all other people. */ + int hotSellersRatio = 4; + /** Ratio of bids for 'hot' bidders compared to all other people. */ + int hotBiddersRatio = 4; + /** Window size, in seconds, for queries 3, 5, 7 and 8. */ + long windowSizeSec = 10; + /** Sliding window period, in seconds, for query 5. */ + long windowPeriodSec = 5; + /** Number of seconds to hold back events according to their reported + * timestamp. */ + long watermarkHoldbackSec = 0; + /** Average number of auction which should be inflight at any time, per + * generator. */ + int numInFlightAuctions = 100; + /** Maximum number of people to consider as active for placing auctions or + * bids. */ + int numActivePeople = 1000; + /** Initial overall event rate. */ + int firstEventRate = 10000; + /** Next overall event rate. */ + int nextEventRate = 10000; + /** Events per second **/ + const int eventsPerSec = 1000; + + long lastBase0AuctionId(long eventId) { + long epoch = eventId / PROPORTION_DENOMINATOR; + long offset = eventId % PROPORTION_DENOMINATOR; + if (offset < PERSON_PROPORTION) { + // About to generate a person. + // Go back to the last auction in the last epoch. + epoch--; + offset = AUCTION_PROPORTION - 1; + } else if (offset >= PERSON_PROPORTION + AUCTION_PROPORTION) { + // About to generate a bid. + // Go back to the last auction generated in this epoch. + offset = AUCTION_PROPORTION - 1; + } else { + // About to generate an auction. + offset -= PERSON_PROPORTION; + } + return epoch * AUCTION_PROPORTION + offset; + } + + long lastBase0PersonId(long eventId) { + long epoch = eventId / PROPORTION_DENOMINATOR; + long offset = eventId % PROPORTION_DENOMINATOR; + if (offset >= PERSON_PROPORTION) { + // About to generate an auction or bid. + // Go back to the last person generated in this epoch. + offset = PERSON_PROPORTION - 1; + } + // About to generate a person. + return epoch * PERSON_PROPORTION + offset; + } + + long nextBase0PersonId(long eventId) { + // Choose a random person from any of the 'active' people, plus a few + // 'leads'. By limiting to 'active' we ensure the density of bids or + // auctions per person does not decrease over time for long running jobs. By + // choosing a person id ahead of the last valid person id we will make + // newPerson and newAuction events appear to have been swapped in time. + // todo: fix this + std::random_device rd; + std::mt19937_64 eng(rd()); + + long numPeople = lastBase0PersonId(eventId) + 1; + long activePeople = std::min(numPeople, (long)numActivePeople); + + std::uniform_int_distribution distr(0, activePeople + PERSON_ID_LEAD); + long n = distr(eng); + return numPeople - activePeople + n; + } + + long nextEventNumber(long numEvents) { return firstEventNumber + numEvents; } + + long nextAdjustedEventNumber(long numEvents) { + long n = outOfOrderGroupSize; + long eventNumber = nextEventNumber(numEvents); + long base = (eventNumber / n) * n; + long offset = (eventNumber * 953) % n; + return base + offset; + } + + long getNextEventId(long eventsCountSoFar) { + return firstEventId + nextAdjustedEventNumber(eventsCountSoFar); + } + + long getNextAuctionId(long eventsCountSoFar) { + return FIRST_AUCTION_ID + nextAdjustedEventNumber(eventsCountSoFar); + } + + public: + TupleSchema *m_schema = nullptr; + QueryApplication *m_application = nullptr; + std::vector *m_data = nullptr; + bool m_debug = false; + + QueryApplication *getApplication() override { return m_application; } + + virtual void createApplication() = 0; + + void loadInMemoryData() { + std::random_device rd; + std::mt19937_64 eng(rd()); + std::uniform_int_distribution distr(0, 1000000); + + std::random_device rd_; + std::mt19937_64 eng_(rd_()); + std::uniform_real_distribution<> dbl(0.0, 1.0); + + std::random_device _rd; + std::mt19937_64 _eng(_rd()); + std::uniform_int_distribution _distr(HOT_SELLER_RATIO); + + std::random_device _rd_; + std::mt19937_64 _eng_(_rd_()); + std::uniform_int_distribution _distr_(0, NUM_CATEGORIES); + + size_t len = SystemConf::getInstance().BUNDLE_SIZE; + m_data = new std::vector(len); + auto buf = (InputSchema *)m_data->data(); + + std::unordered_set set; + + std::string line; + unsigned long idx = 0; + long timestamp = 0; + while (idx < len / sizeof(InputSchema)) { + auto eventsCountSoFar = idx; + auto newEventId = getNextEventId(eventsCountSoFar); + + if ((int)idx % eventsPerSec == 0) { + timestamp++; + } + auto id = lastBase0AuctionId(newEventId) + FIRST_AUCTION_ID; + set.insert(id); + auto initialBid = std::round(std::pow(10.0, dbl(eng_) * 6.0) * 100.0); + auto itemName = distr(eng); + auto description = distr(eng); + auto reserve = + initialBid + std::round(std::pow(10.0, dbl(eng_) * 6.0) * 100.0); + long seller; + // Here P(auction will be for a hot seller) = 1 - 1/hotSellersRatio. + if (_distr(_eng) > 0) { + // Choose the first person in the batch of last HOT_SELLER_RATIO people. + seller = (lastBase0PersonId(newEventId) / HOT_SELLER_RATIO) * + HOT_SELLER_RATIO; + } else { + seller = nextBase0PersonId(newEventId); + } + seller += FIRST_PERSON_ID; + auto category = FIRST_CATEGORY_ID + _distr_(_eng_); + auto expires = timestamp + distr(eng); + + line = std::to_string(timestamp) + " " + std::to_string(id) + " " + + std::to_string(itemName) + " " + std::to_string(description) + + " " + std::to_string(initialBid) + " " + std::to_string(reserve) + + " " + std::to_string(expires) + " " + std::to_string(seller) + + " " + std::to_string(category); + InputSchema::parse(buf[idx], line); + if (m_startTimestamp == 0) { + m_startTimestamp = buf[0].timestamp; + } + m_endTimestamp = buf[idx].timestamp; + idx++; + } + + // std::cout << "Distinct keys " << set.size() << std::endl; + + if (m_debug) { + std::cout << "timestamp id itemName description initialBid reserve " + "expires seller category" + << std::endl; + for (unsigned long i = 0; i < m_data->size() / sizeof(InputSchema); ++i) { + printf("[DBG] %09d: %7ld %13ld %8ld %13ld %3ld %6ld %2ld %6ld %6ld \n", + i, buf[i].timestamp, (long)buf[i].id, (long)buf[i].itemName, + (long)buf[i].description, buf[i].initialBid, buf[i].reserve, + (long)buf[i].expires, (long)buf[i].seller, + (long)buf[i].category); + } + } + }; + + std::vector *getInMemoryData() override { return m_data; } + + TupleSchema *getSchema() override { + if (m_schema == nullptr) createSchema(); + return m_schema; + } + + std::vector *getStaticData() override { + throw std::runtime_error("error: this benchmark does not have static data"); + } + + void createSchema() { + m_schema = new TupleSchema(16, "Nexmark"); // 9, "Nexmark"); + auto longAttr = AttributeType(BasicType::Long); + + m_schema->setAttributeType(0, longAttr); /* timestamp: long */ + m_schema->setAttributeType(1, longAttr); /* id: long */ + m_schema->setAttributeType(2, longAttr); /* itemName: long */ + m_schema->setAttributeType(3, longAttr); /* description: long */ + m_schema->setAttributeType(4, longAttr); /* initialBid: long */ + m_schema->setAttributeType(5, longAttr); /* reserve: long */ + m_schema->setAttributeType(6, longAttr); /* expires: long */ + m_schema->setAttributeType(7, longAttr); /* seller: long */ + m_schema->setAttributeType(8, longAttr); /* category: long */ + m_schema->setAttributeType(9, longAttr); /* padding: long */ + m_schema->setAttributeType(10, longAttr); /* padding: long */ + m_schema->setAttributeType(11, longAttr); /* padding: long */ + m_schema->setAttributeType(12, longAttr); /* padding: long */ + m_schema->setAttributeType(13, longAttr); /* padding: long */ + m_schema->setAttributeType(14, longAttr); /* padding: long */ + m_schema->setAttributeType(15, longAttr); /* padding: long */ + } +}; diff --git a/test/benchmarks/applications/Nexmark/main.cpp b/test/benchmarks/applications/Nexmark/main.cpp new file mode 100644 index 0000000..079aa38 --- /dev/null +++ b/test/benchmarks/applications/Nexmark/main.cpp @@ -0,0 +1,18 @@ +#include + +#include "NBQ5.cpp" + +int main(int argc, const char **argv) { + std::unique_ptr benchmarkQuery {}; + + SystemConf::getInstance().QUERY_NUM = 1; + BenchmarkQuery::parseCommandLineArguments(argc, argv); + + if (SystemConf::getInstance().QUERY_NUM == 1) { + benchmarkQuery = std::make_unique(); + } else { + throw std::runtime_error("error: invalid benchmark query id"); + } + + return benchmarkQuery->runBenchmark(); +} \ No newline at end of file diff --git a/test/benchmarks/applications/RemoteBenchmark/RDMA/CMakeLists.txt b/test/benchmarks/applications/RemoteBenchmark/RDMA/CMakeLists.txt new file mode 100644 index 0000000..56a8675 --- /dev/null +++ b/test/benchmarks/applications/RemoteBenchmark/RDMA/CMakeLists.txt @@ -0,0 +1,76 @@ +find_package(GTest REQUIRED) +include_directories(${GTEST_INCLUDE_DIRS}) + +# Configure CCache if available +find_program(CCACHE_PROGRAM ccache) +if (CCACHE_PROGRAM) + message("Using CCache...") + #set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + #set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) + set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) + set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) + set(CMAKE_CXX_COMPILER "/usr/lib/ccache/clang++") +endif () + + +SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -lnuma") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -g -Wall -Wextra -DHAVE_NUM -DHAVE_Oo") + +find_package(benchmark REQUIRED) +include_directories(${benchmark_INCLUDE_DIRS}) + +FIND_LIBRARY(tbb NAMES libtbb.so) + +SET(RDMA_CPP_FILES + ../../../../../src/RDMA/infinity/core/Context.cpp + ../../../../../src/RDMA/infinity/memory/Atomic.cpp + ../../../../../src/RDMA/infinity/memory/Buffer.cpp + ../../../../../src/RDMA/infinity/memory/Region.cpp + ../../../../../src/RDMA/infinity/memory/RegionToken.cpp + ../../../../../src/RDMA/infinity/memory/RegisteredMemory.cpp + ../../../../../src/RDMA/infinity/queues/QueuePair.cpp + ../../../../../src/RDMA/infinity/queues/QueuePairFactory.cpp + ../../../../../src/RDMA/infinity/requests/RequestToken.cpp + ../../../../../src/RDMA/infinity/utils/Address.cpp + ) + +# read-write-send +add_executable(read-write-send + examples/read-write-send.cpp + ${CPP_FILES} + ${RDMA_CPP_FILES} + ) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(read-write-send ${Boost_LIBRARIES}) +endif () +target_link_libraries(read-write-send + z + boost_iostreams + boost_fiber + operatorJITLib + tbb ibverbs + pthread dl uuid stdc++fs) +target_compile_options(read-write-send PRIVATE -Wall -Wextra -O3 -march=native) +target_link_options(read-write-send PRIVATE -Wl,--unresolved-symbols=ignore-all) + + +# send-performance +add_executable(send-performance + examples/send-performance.cpp + ${CPP_FILES} + ${RDMA_CPP_FILES} + ) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(send-performance ${Boost_LIBRARIES}) +endif () +target_link_libraries(send-performance + z + boost_iostreams + boost_fiber + operatorJITLib + tbb ibverbs + pthread dl uuid stdc++fs) +target_compile_options(send-performance PRIVATE -Wall -Wextra -O3 -march=native) +target_link_options(send-performance PRIVATE -Wl,--unresolved-symbols=ignore-all) \ No newline at end of file diff --git a/test/benchmarks/applications/RemoteBenchmark/RDMA/examples/read-write-send.cpp b/test/benchmarks/applications/RemoteBenchmark/RDMA/examples/read-write-send.cpp new file mode 100644 index 0000000..796bed8 --- /dev/null +++ b/test/benchmarks/applications/RemoteBenchmark/RDMA/examples/read-write-send.cpp @@ -0,0 +1,105 @@ +/** + * Examples - Read/Write/Send Operations + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define PORT_NUMBER 8011 +#define SERVER_IP "192.0.0.1" + +// Usage: ./progam -s for server and ./program for client component +int main(int argc, char **argv) { + + bool isServer = false; + + while (argc > 1) { + if (argv[1][0] == '-') { + switch (argv[1][1]) { + + case 's': { + isServer = true; + break; + } + + } + } + ++argv; + --argc; + } + + infinity::core::Context *context = new infinity::core::Context(); + infinity::queues::QueuePairFactory *qpFactory = new infinity::queues::QueuePairFactory(context); + infinity::queues::QueuePair *qp; + + if(isServer) { + + printf("Creating buffers to read from and write to\n"); + infinity::memory::Buffer *bufferToReadWrite = new infinity::memory::Buffer(context, 128 * sizeof(char)); + infinity::memory::RegionToken *bufferToken = bufferToReadWrite->createRegionToken(); + + printf("Creating buffers to receive a message\n"); + infinity::memory::Buffer *bufferToReceive = new infinity::memory::Buffer(context, 128 * sizeof(char)); + context->postReceiveBuffer(bufferToReceive); + + printf("Setting up connection (blocking)\n"); + qpFactory->bindToPort(PORT_NUMBER); + qp = qpFactory->acceptIncomingConnection(bufferToken, sizeof(infinity::memory::RegionToken)); + + printf("Waiting for message (blocking)\n"); + infinity::core::receive_element_t receiveElement; + while(!context->receive(&receiveElement)); + + printf("Message received\n"); + delete bufferToReadWrite; + delete bufferToReceive; + + } else { + + printf("Connecting to remote node\n"); + qp = qpFactory->connectToRemoteHost(SERVER_IP, PORT_NUMBER); + infinity::memory::RegionToken *remoteBufferToken = (infinity::memory::RegionToken *) qp->getUserData(); + + + printf("Creating buffers\n"); + infinity::memory::Buffer *buffer1Sided = new infinity::memory::Buffer(context, 128 * sizeof(char)); + infinity::memory::Buffer *buffer2Sided = new infinity::memory::Buffer(context, 128 * sizeof(char)); + + printf("Reading content from remote buffer\n"); + infinity::requests::RequestToken requestToken(context); + qp->read(buffer1Sided, remoteBufferToken, &requestToken); + requestToken.waitUntilCompleted(); + + printf("Writing content to remote buffer\n"); + qp->write(buffer1Sided, remoteBufferToken, &requestToken); + requestToken.waitUntilCompleted(); + + printf("Sending message to remote host\n"); + qp->send(buffer2Sided, &requestToken); + requestToken.waitUntilCompleted(); + + delete buffer1Sided; + delete buffer2Sided; + + } + + delete qp; + delete qpFactory; + delete context; + + return 0; + +} diff --git a/test/benchmarks/applications/RemoteBenchmark/RDMA/examples/send-performance.cpp b/test/benchmarks/applications/RemoteBenchmark/RDMA/examples/send-performance.cpp new file mode 100644 index 0000000..71b021b --- /dev/null +++ b/test/benchmarks/applications/RemoteBenchmark/RDMA/examples/send-performance.cpp @@ -0,0 +1,178 @@ +/** + * Examples - Send Performance + * + * (c) 2018 Claude Barthels, ETH Zurich + * Contact: claudeb@inf.ethz.ch + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define PORT_NUMBER 8011 +#define SERVER_IP "10.0.0.40" +#define BUFFER_COUNT 128 +#define MAX_BUFFER_SIZE 1048576 //4096 +#define OPERATIONS_COUNT 1024 + +uint64_t timeDiff(struct timeval stop, struct timeval start); + +// Usage: ./progam -s for server and ./program for client component +int main(int argc, char **argv) { + + bool isServer = false; + + while (argc > 1) { + if (argv[1][0] == '-') { + switch (argv[1][1]) { + + case 's': { + isServer = true; + break; + } + + } + } + ++argv; + --argc; + } + + infinity::core::Context *context = new infinity::core::Context(); + infinity::queues::QueuePairFactory *qpFactory = new infinity::queues::QueuePairFactory(context); + infinity::queues::QueuePair *qp; + + if (isServer) { + + printf("Creating buffers to receive a messages\n"); + infinity::memory::Buffer **receiveBuffers = new infinity::memory::Buffer *[BUFFER_COUNT]; + for (uint32_t i = 0; i < BUFFER_COUNT; ++i) { + receiveBuffers[i] = new infinity::memory::Buffer(context, MAX_BUFFER_SIZE * sizeof(char)); + context->postReceiveBuffer(receiveBuffers[i]); + } + + printf("Waiting for incoming connection\n"); + qpFactory->bindToPort(PORT_NUMBER); + qp = qpFactory->acceptIncomingConnection(); + + printf("Waiting for first message (first message has additional setup costs)\n"); + infinity::core::receive_element_t receiveElement; + while (!context->receive(&receiveElement)); + context->postReceiveBuffer(receiveElement.buffer); + + printf("Performing measurement\n"); + + uint32_t messageSize = 1; + uint32_t rounds = (uint32_t) log2(MAX_BUFFER_SIZE); + + for(uint32_t sizeIndex = 0; sizeIndex <= rounds; ++sizeIndex) { + + printf("Receiving messages of size %d bytes\n", messageSize); + fflush(stdout); + + uint32_t numberOfReceivedMessages = 0; + while (numberOfReceivedMessages < OPERATIONS_COUNT) { + while (!context->receive(&receiveElement)); + ++numberOfReceivedMessages; + context->postReceiveBuffer(receiveElement.buffer); + } + + messageSize *= 2; + } + + printf("All messages received\n"); + + printf("Sending notification to client\n"); + infinity::memory::Buffer *sendBuffer = new infinity::memory::Buffer(context, sizeof(char)); + qp->send(sendBuffer, context->defaultRequestToken); + context->defaultRequestToken->waitUntilCompleted(); + + printf("Clean up\n"); + for (uint32_t i = 0; i < BUFFER_COUNT; ++i) { + delete receiveBuffers[i]; + } + delete receiveBuffers; + delete sendBuffer; + + } else { + + printf("Connecting to remote node\n"); + qp = qpFactory->connectToRemoteHost(SERVER_IP, PORT_NUMBER); + + printf("Creating buffers\n"); + infinity::memory::Buffer *sendBuffer = new infinity::memory::Buffer(context, MAX_BUFFER_SIZE * sizeof(char)); + infinity::memory::Buffer *receiveBuffer = new infinity::memory::Buffer(context, sizeof(char)); + context->postReceiveBuffer(receiveBuffer); + + printf("Sending first message\n"); + qp->send(sendBuffer, sizeof(char), context->defaultRequestToken); + context->defaultRequestToken->waitUntilCompleted(); + + printf("Performing measurement\n"); + uint32_t rounds = (uint32_t) log2(MAX_BUFFER_SIZE); + uint32_t messageSize = 1; + + for(uint32_t sizeIndex = 0; sizeIndex <= rounds; ++sizeIndex) { + + printf("Sending messages of size %d bytes\t", messageSize); + fflush(stdout); + + struct timeval start; + gettimeofday(&start, NULL); + + for(uint32_t i=0; isend(sendBuffer, messageSize, &requestToken); + requestToken.waitUntilCompleted(); + + } else { + + qp->send(sendBuffer, messageSize, NULL); + + } + } + + struct timeval stop; + gettimeofday(&stop, NULL); + + uint64_t time = timeDiff(stop, start); + double msgRate = ((double)(OPERATIONS_COUNT * 1000000L)) / time; + double bandwidth = ((double) (OPERATIONS_COUNT * messageSize)) / (1024*1024) / (((double) time) / 1000000L); + printf("%.3f msg/sec\t%.3f MB/sec\n", msgRate, bandwidth); + fflush(stdout); + + messageSize *= 2; + + } + + printf("Waiting for notification from server\n"); + infinity::core::receive_element_t receiveElement; + while (!context->receive(&receiveElement)); + + delete receiveBuffer; + delete sendBuffer; + } + + delete qp; + delete qpFactory; + delete context; + + return 0; + +} + +uint64_t timeDiff(struct timeval stop, struct timeval start) { + return (stop.tv_sec * 1000000L + stop.tv_usec) - (start.tv_sec * 1000000L + start.tv_usec); +} diff --git a/test/benchmarks/applications/RemoteBenchmark/remoteRDMASink.cpp b/test/benchmarks/applications/RemoteBenchmark/remoteRDMASink.cpp new file mode 100644 index 0000000..09efba2 --- /dev/null +++ b/test/benchmarks/applications/RemoteBenchmark/remoteRDMASink.cpp @@ -0,0 +1,160 @@ +#pragma once + +#include + +#include +#include + +#include "RDMA/infinity/infinity.h" +#include "utils/SystemConf.h" +#include "utils/TupleSchema.h" +#include "utils/Utils.h" + +static size_t sum = 0; +static std::unique_ptr> copyBuffer; + +// --batch-size 1048576 --bundle-size 1048576 +class RemoteRDMASink { + protected: + infinity::core::Context *m_context; + infinity::queues::QueuePairFactory *m_qpFactory; + infinity::queues::QueuePair *m_qp; + + infinity::memory::Buffer **m_receiveBuffers; + + private: + const long m_duration = 60 * 1; + const long m_changeDuration = 10; + long m_prevThrTime = 0, m_thrTime = 0; + double m_Bytes{}; + + public: + static void parseCommandLineArguments(int argc, const char **argv) { + int i, j; + for (i = 1; i < argc;) { + if ((j = i + 1) == argc) { + throw std::runtime_error("error: wrong number of arguments"); + } + if (strcmp(argv[i], "--threads") == 0) { + SystemConf::getInstance().WORKER_THREADS = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--batch-size") == 0) { + SystemConf::getInstance().BATCH_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--bundle-size") == 0) { + SystemConf::getInstance().BUNDLE_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--input-size") == 0) { + SystemConf::getInstance().INPUT_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--query") == 0) { + SystemConf::getInstance().QUERY_NUM = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--two-sources") == 0) { + SystemConf::getInstance().HAS_TWO_SOURCES = (strcasecmp(argv[j], "true") == 0 || + std::atoi(argv[j]) != 0); + } else { + std::string argument(argv[i]); + throw std::runtime_error("error: unknown argument " + argument); + } + i = j + 1; + } + } + + int run(int argc, const char **argv, bool terminate = true) { + SystemConf::getInstance().QUERY_NUM = 0; + parseCommandLineArguments(argc, argv); + + //auto core = (SystemConf::getInstance().REMOTE_CLIENT == SystemConf::KEA03_ib1 || + // SystemConf::getInstance().REMOTE_CLIENT == SystemConf::KEA04_ib1) ? Utils::getFirstCoreFromSocket(1) : 2; + auto core = 0; + Utils::bindProcess(core); + + size_t len = SystemConf::getInstance().BUNDLE_SIZE; + auto inputBuffer = new std::vector(len); + + // setup socket + setupSocket(); + + copyBuffer = std::make_unique>(len); + + size_t idx = 0; + auto t1 = std::chrono::high_resolution_clock::now(); + auto _t1 = std::chrono::high_resolution_clock::now(); + std::cout << "Start running the remote sink..." << std::endl; + + std::cout << "Waiting for first message (first message has additional setup costs)" << std::endl; + infinity::core::receive_element_t receiveElement; + while (!m_context->receive(&receiveElement)); + m_context->postReceiveBuffer(receiveElement.buffer); + + try { + while (true) { + if (terminate) { + auto t2 = std::chrono::high_resolution_clock::now(); + auto time_span = + std::chrono::duration_cast>(t2 - + _t1); + if (time_span.count() >= 1) { + auto thr = (m_Bytes / (1024 * 1024)) / time_span.count(); + m_Bytes = 0; + _t1 = t2; + std::cout << "[DBG] " + std::to_string(thr) + " MB/s" << std::endl; + } + time_span = std::chrono::duration_cast>( + t2 - t1); + if (terminate && time_span.count() >= (double)m_duration) { + std::cout << "Stop running the remote sink..." << " (" << sum << ")" << std::endl; + return 0; + } + } + + long valread = 0; + // receive data + while (!m_context->receive(&receiveElement)); + // do something with data here + /* std::cout << ((long*)receiveElement.buffer->getData())[0] << " " << + ((long*)receiveElement.buffer->getData())[131064]<< " timestamp" << std::endl; + if(idx == 256){ + return 0; + } + idx++;*/ + /*auto dat = (long *)receiveElement.buffer->getData(); + idx = 0; size_t idx2 = 0; + for (size_t ii = 0; ii < receiveElement.buffer->getSizeInBytes()/128; ii++) { + if (dat[idx+9] == 0) { + sum += dat[idx]; + std::memcpy(copyBuffer->data() + idx2, &dat[idx], 32); + idx2 += 16; + } + idx += 16; + }*/ + // return buffer + m_context->postReceiveBuffer(receiveElement.buffer); + valread = SystemConf::getInstance().BUNDLE_SIZE; + + m_Bytes += valread; + } + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + exit(1); + } + } + + private: + void setupSocket() { + m_context = new infinity::core::Context(1); + m_qpFactory = new infinity::queues::QueuePairFactory(m_context); + + std::cout <<"Creating buffers to receive a messages" << std::endl; + m_receiveBuffers = new infinity::memory::Buffer *[BUFFER_COUNT]; + for (uint32_t i = 0; i < BUFFER_COUNT; ++i) { + m_receiveBuffers[i] = new infinity::memory::Buffer(m_context, SystemConf::getInstance().BUNDLE_SIZE * sizeof(char)); + m_context->postReceiveBuffer(m_receiveBuffers[i]); + } + + std::cout <<"Waiting for incoming connection" << std::endl; + m_qpFactory->bindToPort(PORT); + m_qp = m_qpFactory->acceptIncomingConnection(); + } +}; + +int main(int argc, const char **argv) { + std::unique_ptr remoteSink = std::make_unique(); + remoteSink->run(argc, argv); +} \ No newline at end of file diff --git a/test/benchmarks/applications/RemoteBenchmark/remoteRDMASource.cpp b/test/benchmarks/applications/RemoteBenchmark/remoteRDMASource.cpp new file mode 100644 index 0000000..ceb25f5 --- /dev/null +++ b/test/benchmarks/applications/RemoteBenchmark/remoteRDMASource.cpp @@ -0,0 +1,382 @@ +#pragma once + +#include + +#include "BenchmarkQuery.h" +#include "RDMA/infinity/infinity.h" +#include "benchmarks/applications/ClusterMonitoring/CM1.cpp" +#include "benchmarks/applications/ClusterMonitoring/CM2.cpp" +#include "benchmarks/applications/LinearRoadBenchmark/LRB1.cpp" +#include "benchmarks/applications/LinearRoadBenchmark/LRB2.cpp" +#include "benchmarks/applications/ManufacturingEquipment/ME1.cpp" +#include "benchmarks/applications/Nexmark/NBQ5.cpp" +#include "benchmarks/applications/SmartGrid/SG1.cpp" +#include "benchmarks/applications/SmartGrid/SG2.cpp" +#include "benchmarks/applications/YahooBenchmark/YSB.cpp" +#include "utils/SystemConf.h" +#include "utils/TupleSchema.h" + +/* + * This is a remote source that loads and replays the data from the + * test/benchmarks/applications folder. + * + * */ +class RemoteRDMASource { + protected: + std::string m_name; + long m_timestampReference = 0; + long m_lastTimestamp = 0; + infinity::core::Context *m_context; + infinity::queues::QueuePairFactory *m_qpFactory; + infinity::queues::QueuePair *m_qp; + + private: + std::unique_ptr m_benchmarkQuery = nullptr; + const long m_duration = 60 * 1; + const long m_changeDuration = 10; + long m_prevThrTime = 0, m_thrTime = 0; + double m_Bytes{}; + + struct DataSlot; + std::vector> m_initialSlots; + std::vector> m_slots; + std::vector m_workers; + std::atomic m_stop = false; + int m_nextSlot = 0; + bool m_first = true; + + const bool m_debug = false; + + public: + long getTimestampReference() { return m_timestampReference; } + static void parseCommandLineArguments(int argc, const char **argv) { + int i, j; + for (i = 1; i < argc;) { + if ((j = i + 1) == argc) { + throw std::runtime_error("error: wrong number of arguments"); + } + if (strcmp(argv[i], "--threads") == 0) { + SystemConf::getInstance().WORKER_THREADS = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--batch-size") == 0) { + SystemConf::getInstance().BATCH_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--bundle-size") == 0) { + SystemConf::getInstance().BUNDLE_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--input-size") == 0) { + SystemConf::getInstance().INPUT_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--query") == 0) { + SystemConf::getInstance().QUERY_NUM = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--ingestion") == 0) { + SystemConf::getInstance().MBs_INGESTED_PER_SEC = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--two-sources") == 0) { + SystemConf::getInstance().HAS_TWO_SOURCES = (strcasecmp(argv[j], "true") == 0 || + std::atoi(argv[j]) != 0); + } else if (strcmp(argv[i], "--send-second") == 0) { + SystemConf::getInstance().SEND_TO_SECOND_WORKER = (strcasecmp(argv[j], "true") == 0 || + std::atoi(argv[j]) != 0); + } else { + std::string argument(argv[i]); + throw std::runtime_error("error: unknown argument " + argument); + } + i = j + 1; + } + } + + std::unique_ptr getBenchmark() { + switch (SystemConf::getInstance().QUERY_NUM) { + case 0: + return std::make_unique(true, false); + case 1: + return std::make_unique(true, false); + case 2: + return std::make_unique(true, false); + case 3: + case 4: + return std::make_unique(true, false); + case 5: + return std::make_unique(true, false); + case 6: + case 7: + return std::make_unique(true, false); + case 8: + return std::make_unique(true, false); + case 9: + return std::make_unique(true, false); + case 10: + return std::make_unique(true, false); + default: + throw std::runtime_error("error: wrong query number"); + } + } + + int run(int argc, const char **argv, bool terminate = true) { + SystemConf::getInstance().QUERY_NUM = 0; + parseCommandLineArguments(argc, argv); + m_benchmarkQuery = getBenchmark(); + auto inputBuffer = m_benchmarkQuery->getInMemoryData(); + + // setup RDMA + setupRDMA(); + + // prepare workers + setupWorkers(inputBuffer); + + infinity::memory::Buffer *sendBuffer = new infinity::memory::Buffer(m_context, sizeof(char)); + infinity::memory::Buffer *receiveBuffer = new infinity::memory::Buffer(m_context, sizeof(char)); + m_context->postReceiveBuffer(receiveBuffer); + + std::cout <<"Sending first message" << std::endl; + m_qp->send(sendBuffer, sizeof(char), m_context->defaultRequestToken); + m_context->defaultRequestToken->waitUntilCompleted(); + + size_t ii = 0; + + auto t1 = std::chrono::high_resolution_clock::now(); + if (SystemConf::getInstance().LATENCY_ON) { + SystemConf::getInstance().DURATION = m_duration - 5; + } + long systemTimestamp = -1; + long restartReference = 0; + double remainingTime = 0.; + bool stopRecovery = false; + if (SystemConf::getInstance().MBs_INGESTED_PER_SEC > 0) { + auto t2 = std::chrono::high_resolution_clock::now(); + auto time_span = + std::chrono::duration_cast(t2 - t1); + m_prevThrTime = time_span.count(); + m_thrTime = time_span.count(); + } + + std::cout << "Start running " + m_benchmarkQuery->getApplicationName() + + " ..." + << std::endl; + try { + while (true) { + if (terminate || SystemConf::getInstance().MBs_INGESTED_PER_SEC > 0) { + auto t2 = std::chrono::high_resolution_clock::now(); + if (SystemConf::getInstance().MBs_INGESTED_PER_SEC > 0 && + !SystemConf::getInstance().BUFFERED_LATENCY) { + // std::cout << "Start limiting the throughput..." << std::endl; + auto time_span = + std::chrono::duration_cast(t2 - t1); + m_thrTime = time_span.count(); + m_Bytes += (double)inputBuffer->size(); + // std::this_thread::sleep_for(std::chrono::microseconds (1600)); + if ((m_thrTime - m_prevThrTime < 1000) && + m_Bytes >= SystemConf::getInstance().MBs_INGESTED_PER_SEC * + 1024 * 1024) { + auto sleepTime = ((m_prevThrTime + 1000) - m_thrTime) + 350; + std::this_thread::sleep_for(std::chrono::milliseconds(sleepTime)); + // std::cout << "[dat] " << " " << inputBuffer->size() << " " + // << m_Bytes << " " << + // SystemConf::getInstance().MBs_INGESTED_PER_SEC + // << " " << sleepTime << std::endl; + m_prevThrTime = m_thrTime; + m_Bytes = 0; + } else if (m_thrTime - m_prevThrTime >= 1000) { + m_prevThrTime = m_thrTime; + m_Bytes = 0; + } + } + auto time_span = + std::chrono::duration_cast>(t2 - + t1); + if (terminate && time_span.count() >= (double)m_duration) { + std::cout << "Stop running " + + m_benchmarkQuery->getApplicationName() + " ..." + << std::endl; + return 0; + } + } + if (SystemConf::getInstance().LATENCY_ON) { + auto currentTime = std::chrono::high_resolution_clock::now(); + auto currentTimeNano = + std::chrono::duration_cast( + currentTime.time_since_epoch()) + .count(); + systemTimestamp = + (long)((currentTimeNano - m_timestampReference) / 1000L); + } + + // get next buffer + sendBuffer = getNextSlot(); + + //if(ii % BUFFER_COUNT == 0) { + infinity::requests::RequestToken requestToken(m_context); + m_qp->send(sendBuffer, sendBuffer->getSizeInBytes(), &requestToken); + requestToken.waitUntilCompleted(); + //} else { + // m_qp->send(sendBuffer, sendBuffer->getSizeInBytes(), nullptr); + //} + ii++; + + + // return the buffer to the pool + advanceSlot(); + } + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + exit(1); + } + } + + private: + void setupRDMA() { + // Create new context + m_context = new infinity::core::Context(); + // Create a queue pair + m_qpFactory = new infinity::queues::QueuePairFactory(m_context); + std::cout << "Connecting to remote node " << SystemConf::getInstance().REMOTE_WORKER << "..." << std::endl; + m_qp = m_qpFactory->connectToRemoteHost(SystemConf::getInstance().REMOTE_WORKER.c_str(), PORT); + std::cout << "Connected to remote node " << SystemConf::getInstance().REMOTE_WORKER << std::endl; + } + + void setupWorkers(std::vector *buffer) { + auto size = buffer->size(); + auto offset = m_benchmarkQuery->getEndTimestamp() - + m_benchmarkQuery->getStartTimestamp() + 1; + if (SystemConf::getInstance().QUERY_NUM == 0 || + SystemConf::getInstance().QUERY_NUM == 1) { + offset -= 1; + } + if (SystemConf::getInstance().QUERY_NUM == 5 || + SystemConf::getInstance().QUERY_NUM == 6 || + SystemConf::getInstance().QUERY_NUM == 7) { + offset += 1; + } + if (offset <= 0) { + std::cout << "warning: the starting offset is " << offset << std::endl; + } + auto curOffset = 0; + int idx = 0; + m_initialSlots.resize(SystemConf::getInstance().WORKER_THREADS * 2); + m_slots.resize(SystemConf::getInstance().WORKER_THREADS * 2); + for (size_t ii = 0; ii < SystemConf::getInstance().WORKER_THREADS * 2; ++ii) { + m_initialSlots[ii] = std::make_shared(m_context); + m_slots[ii] = std::make_shared(m_context); + } + auto finalOffset = offset * m_slots.size(); + for (auto &slot : m_slots) { + std::memcpy(slot->m_buffer->getData(), buffer->data(), size); + auto tupleSize = m_benchmarkQuery->getSchema()->getTupleSize(); + auto startPos = 0; + auto endPos = SystemConf::getInstance().BUNDLE_SIZE / sizeof(long); + auto step = tupleSize / sizeof(long); + auto buf = (long *)slot->m_buffer->getData(); + for (unsigned long i = startPos; i < endPos; i += step) { + buf[i] += curOffset; + } + // the first data sent + std::memcpy(m_initialSlots[idx]->m_buffer->getData(), slot->m_buffer->getData(), size); + + curOffset += offset; + slot->m_offset = finalOffset; + slot->m_state = new std::atomic(0); + idx++; + } + + m_workers.resize(SystemConf::getInstance().WORKER_THREADS); + for (int t = 0; t < m_workers.size(); t++) { + m_workers[t] = new std::thread([&, t] { + auto thread = t; + auto idx = t; + + while (!m_slots[idx]->m_state) + ; + + while (!m_stop) { + auto oldVal = 0; + while (!m_slots[idx]->m_state->compare_exchange_weak(oldVal, 1)) { + if (m_debug) { + // std::cout << "Worker " + std::to_string(thread) + " waiting for + // " + std::to_string(idx) + " slot." << std::endl; + } + oldVal = 0; + _mm_pause(); + } + + if (m_debug) { + std::cout << "Worker " + std::to_string(thread) + " updating " + + std::to_string(idx) + " slot." + << std::endl; + } + + auto tupleSize = m_benchmarkQuery->getSchema()->getTupleSize(); + auto startPos = 0; + auto endPos = SystemConf::getInstance().BUNDLE_SIZE / sizeof(long); + auto step = tupleSize / sizeof(long); + auto buf = (long *)m_slots[idx]->m_buffer->getData(); + for (unsigned long i = startPos; i < endPos; i += step) { + buf[i] += m_slots[idx]->m_offset; + } + + m_slots[idx]->m_state->store(2); + idx += SystemConf::getInstance().WORKER_THREADS; + if (idx >= m_slots.size()) { + idx = t; + } + } + }); + auto core = (SystemConf::getInstance().SEND_TO_SECOND_WORKER) ? t+2 : t; + Utils::bindProcess(*m_workers[t], core); + } + } + + infinity::memory::Buffer *getNextSlot() { + if (m_first) { + return m_initialSlots[m_nextSlot]->m_buffer; + } + auto oldVal = 2; + while (!m_slots[m_nextSlot]->m_state->compare_exchange_weak(oldVal, 3)) { + if (m_debug) { + // std::cout << "Waiting to get " + std::to_string(m_nextSlot) + " + // slot"<< std::endl; + } + oldVal = 2; + _mm_pause(); + } + if (m_debug) { + auto buf = (long *)m_slots[m_nextSlot]->m_buffer->getData(); + std::cout << "Sending " + std::to_string(m_nextSlot) + " slot with " + + std::to_string(buf[0]) + " starting timestamp" + << std::endl; + } + return m_slots[m_nextSlot]->m_buffer; + } + + void advanceSlot() { + if (m_first) { + m_nextSlot++; + if (m_nextSlot >= m_slots.size()) { + m_first = false; + m_nextSlot = 0; + } + return; + } + if (m_debug) { + std::cout << "Resetting " + std::to_string(m_nextSlot) + " slot" + << std::endl; + } + m_slots[m_nextSlot]->m_state->store(0); + m_nextSlot++; + if (m_nextSlot >= m_slots.size()) { + m_nextSlot = 0; + } + } + + struct DataSlot { + infinity::memory::Buffer *m_buffer; + long m_offset = 0; + std::atomic *m_state = nullptr; + DataSlot(infinity::core::Context *context) { + if (!context) { + throw std::runtime_error("error: invalid context"); + } + m_buffer = new infinity::memory::Buffer(context, SystemConf::getInstance().BUNDLE_SIZE * sizeof(char)); + } + }; +}; + +int main(int argc, const char **argv) { + std::unique_ptr remoteSource = std::make_unique(); + remoteSource->run(argc, argv); +} \ No newline at end of file diff --git a/test/benchmarks/applications/RemoteBenchmark/remoteSink.cpp b/test/benchmarks/applications/RemoteBenchmark/remoteSink.cpp new file mode 100644 index 0000000..7591692 --- /dev/null +++ b/test/benchmarks/applications/RemoteBenchmark/remoteSink.cpp @@ -0,0 +1,174 @@ +#pragma once + +#include +#include +#include + +#include +#include + +#include "utils/SystemConf.h" +#include "utils/TupleSchema.h" +#include "utils/Utils.h" + +// --batch-size 1048576 --bundle-size 1048576 --two-sources true + +class RemoteSink { + protected: + int m_sock = 0; + int m_sock2 = 0; + int m_server_fd = 0; + int m_server_fd2 = 0; + + private: + const long m_duration = 60 * 1; + const long m_changeDuration = 10; + long m_prevThrTime = 0, m_thrTime = 0; + double m_Bytes{}; + + public: + static void parseCommandLineArguments(int argc, const char **argv) { + int i, j; + for (i = 1; i < argc;) { + if ((j = i + 1) == argc) { + throw std::runtime_error("error: wrong number of arguments"); + } + if (strcmp(argv[i], "--threads") == 0) { + SystemConf::getInstance().WORKER_THREADS = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--batch-size") == 0) { + SystemConf::getInstance().BATCH_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--bundle-size") == 0) { + SystemConf::getInstance().BUNDLE_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--input-size") == 0) { + SystemConf::getInstance().INPUT_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--query") == 0) { + SystemConf::getInstance().QUERY_NUM = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--two-sources") == 0) { + SystemConf::getInstance().HAS_TWO_SOURCES = (strcasecmp(argv[j], "true") == 0 || + std::atoi(argv[j]) != 0); + } else { + std::string argument(argv[i]); + throw std::runtime_error("error: unknown argument " + argument); + } + i = j + 1; + } + } + + int run(int argc, const char **argv, bool terminate = true) { + SystemConf::getInstance().QUERY_NUM = 0; + parseCommandLineArguments(argc, argv); + + auto core = (SystemConf::getInstance().REMOTE_CLIENT == SystemConf::KEA03_ib1 || + SystemConf::getInstance().REMOTE_CLIENT == SystemConf::KEA04_ib1) ? Utils::getFirstCoreFromSocket(1) : 2; + Utils::bindProcess(core); + + size_t len = SystemConf::getInstance().BUNDLE_SIZE; + auto inputBuffer = new std::vector(len); + + // setup socket + setupSocket(); + + size_t idx = 0; + auto t1 = std::chrono::high_resolution_clock::now(); + auto _t1 = std::chrono::high_resolution_clock::now(); + std::cout << "Start running the remote sink..." << std::endl; + try { + while (true) { + if (terminate) { + auto t2 = std::chrono::high_resolution_clock::now(); + auto time_span = + std::chrono::duration_cast>(t2 - + _t1); + if (time_span.count() >= 1) { + auto thr = (m_Bytes / (1024 * 1024)) / time_span.count(); + m_Bytes = 0; + _t1 = t2; + std::cout << "[DBG] " + std::to_string(thr) + " MB/s" << std::endl; + } + time_span = std::chrono::duration_cast>( + t2 - t1); + if (terminate && time_span.count() >= (double)m_duration) { + std::cout << "Stop running the remote sink..." << std::endl; + return 0; + } + } + + long valread = 0; + // receive data + if (SystemConf::getInstance().HAS_TWO_SOURCES) { + + if (idx % 2 == 0) { + valread = readBytes(m_sock, inputBuffer->size(), inputBuffer->data()); + } else { + valread = readBytes(m_sock2, inputBuffer->size(), inputBuffer->data()); + } + idx++; + } else { + valread = readBytes(m_sock, inputBuffer->size(), inputBuffer->data()); + } + m_Bytes += valread; + } + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + exit(1); + } + } + + private: + void setupSocket() { + struct sockaddr_in address {}; + int opt = 1; + int addrlen = sizeof(address); + // Creating socket file descriptor + if ((m_server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) { + throw std::runtime_error("error: Socket file descriptor creation error"); + } + + // Forcefully attaching socket to the PORT + if (setsockopt(m_server_fd, SOL_SOCKET, SO_REUSEADDR | SO_REUSEPORT, &opt, + sizeof(opt))) { + throw std::runtime_error("error: setsockopt"); + } + address.sin_family = AF_INET; + address.sin_addr.s_addr = INADDR_ANY; + address.sin_port = htons(PORT); + + // Forcefully attaching socket to the PORT + if (bind(m_server_fd, (struct sockaddr *)&address, sizeof(address)) < 0) { + throw std::runtime_error("error: bind failed"); + } + if (listen(m_server_fd, 3) < 0) { + throw std::runtime_error("error: listen"); + } + + // todo: accept multiple connections + if ((m_sock = accept(m_server_fd, (struct sockaddr *)&address, + (socklen_t *)&addrlen)) < 0) { + throw std::runtime_error("error: accept"); + } + std::cout << "The remote sink established the 1st connection" << std::endl; + if (SystemConf::getInstance().HAS_TWO_SOURCES) { + if ((m_sock2 = accept(m_server_fd, (struct sockaddr *)&address, + (socklen_t *)&addrlen)) < 0) { + throw std::runtime_error("error: accept"); + } + std::cout << "The remote sink established the 2nd connection" << std::endl; + } + } + + static inline size_t readBytes(int socket, unsigned int length, void *buffer) { + size_t bytesRead = 0; + while (bytesRead < length) { + auto valread = + read(socket, (char *)buffer + bytesRead, length - bytesRead); + assert(valread >= 0); + bytesRead += valread; + } + return bytesRead; + } +}; + +int main(int argc, const char **argv) { + std::unique_ptr remoteSink = std::make_unique(); + remoteSink->run(argc, argv); +} \ No newline at end of file diff --git a/test/benchmarks/applications/RemoteBenchmark/remoteSource.cpp b/test/benchmarks/applications/RemoteBenchmark/remoteSource.cpp new file mode 100644 index 0000000..f19840c --- /dev/null +++ b/test/benchmarks/applications/RemoteBenchmark/remoteSource.cpp @@ -0,0 +1,379 @@ +#pragma once + +#include +#include + +#include + +#include "BenchmarkQuery.h" +#include "benchmarks/applications/ClusterMonitoring/CM1.cpp" +#include "benchmarks/applications/ClusterMonitoring/CM2.cpp" +#include "benchmarks/applications/LinearRoadBenchmark/LRB1.cpp" +#include "benchmarks/applications/LinearRoadBenchmark/LRB2.cpp" +#include "benchmarks/applications/ManufacturingEquipment/ME1.cpp" +#include "benchmarks/applications/Nexmark/NBQ5.cpp" +#include "benchmarks/applications/SmartGrid/SG1.cpp" +#include "benchmarks/applications/SmartGrid/SG2.cpp" +#include "benchmarks/applications/YahooBenchmark/YSB.cpp" +#include "utils/SystemConf.h" +#include "utils/TupleSchema.h" + + +/* + * This is a remote source that loads and replays the data from the + * test/benchmarks/applications folder. + * + * */ +class RemoteSource { + protected: + std::string m_name; + long m_timestampReference = 0; + long m_lastTimestamp = 0; + int m_sock = 0; + int m_sock2 = 0; + + private: + std::unique_ptr m_benchmarkQuery = nullptr; + const long m_duration = 60 * 1; + const long m_changeDuration = 10; + long m_prevThrTime = 0, m_thrTime = 0; + double m_Bytes{}; + + struct DataSlot; + std::vector m_initialSlots; + std::vector m_slots; + std::vector m_workers; + std::atomic m_stop = false; + int m_nextSlot = 0; + bool m_first = true; + + const bool m_debug = false; + + public: + long getTimestampReference() { return m_timestampReference; } + static void parseCommandLineArguments(int argc, const char **argv) { + int i, j; + for (i = 1; i < argc;) { + if ((j = i + 1) == argc) { + throw std::runtime_error("error: wrong number of arguments"); + } + if (strcmp(argv[i], "--threads") == 0) { + SystemConf::getInstance().WORKER_THREADS = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--batch-size") == 0) { + SystemConf::getInstance().BATCH_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--bundle-size") == 0) { + SystemConf::getInstance().BUNDLE_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--input-size") == 0) { + SystemConf::getInstance().INPUT_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--query") == 0) { + SystemConf::getInstance().QUERY_NUM = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--ingestion") == 0) { + SystemConf::getInstance().MBs_INGESTED_PER_SEC = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--two-sources") == 0) { + SystemConf::getInstance().HAS_TWO_SOURCES = (strcasecmp(argv[j], "true") == 0 || + std::atoi(argv[j]) != 0); + } else if (strcmp(argv[i], "--send-second") == 0) { + SystemConf::getInstance().SEND_TO_SECOND_WORKER = (strcasecmp(argv[j], "true") == 0 || + std::atoi(argv[j]) != 0); + } else { + std::string argument(argv[i]); + throw std::runtime_error("error: unknown argument " + argument); + } + i = j + 1; + } + } + + std::unique_ptr getBenchmark() { + switch (SystemConf::getInstance().QUERY_NUM) { + case 0: + return std::make_unique(true, false); + case 1: + return std::make_unique(true, false); + case 2: + return std::make_unique(true, false); + case 3: + case 4: + return std::make_unique(true, false); + case 5: + return std::make_unique(true, false); + case 6: + case 7: + return std::make_unique(true, false); + case 8: + return std::make_unique(true, false); + case 9: + return std::make_unique(true, false); + case 10: + return std::make_unique(true, false); + default: + throw std::runtime_error("error: wrong query number"); + } + } + + int run(int argc, const char **argv, bool terminate = true) { + SystemConf::getInstance().QUERY_NUM = 0; + parseCommandLineArguments(argc, argv); + m_benchmarkQuery = getBenchmark(); + auto inputBuffer = m_benchmarkQuery->getInMemoryData(); + + // prepare workers + setupWorkers(inputBuffer); + + // setup socket + if (!SystemConf::getInstance().SEND_TO_SECOND_WORKER) { + std::cout << "Setting up 1st socket" << std::endl; + setupSocket(m_sock, SystemConf::getInstance().REMOTE_WORKER); + } + if (SystemConf::getInstance().HAS_TWO_SOURCES || SystemConf::getInstance().SEND_TO_SECOND_WORKER) { + std::cout << "Setting up 2nd socket" << std::endl; + setupSocket(m_sock2, SystemConf::getInstance().REMOTE_WORKER_2); + } + + auto t1 = std::chrono::high_resolution_clock::now(); + if (SystemConf::getInstance().LATENCY_ON) { + SystemConf::getInstance().DURATION = m_duration - 5; + } + long systemTimestamp = -1; + long restartReference = 0; + double remainingTime = 0.; + bool stopRecovery = false; + if (SystemConf::getInstance().MBs_INGESTED_PER_SEC > 0) { + auto t2 = std::chrono::high_resolution_clock::now(); + auto time_span = + std::chrono::duration_cast(t2 - t1); + m_prevThrTime = time_span.count(); + m_thrTime = time_span.count(); + } + + std::cout << "Start running " + m_benchmarkQuery->getApplicationName() + + " ..." + << std::endl; + try { + while (true) { + if (terminate || SystemConf::getInstance().MBs_INGESTED_PER_SEC > 0) { + auto t2 = std::chrono::high_resolution_clock::now(); + if (SystemConf::getInstance().MBs_INGESTED_PER_SEC > 0 && + !SystemConf::getInstance().BUFFERED_LATENCY) { + // std::cout << "Start limiting the throughput..." << std::endl; + auto time_span = + std::chrono::duration_cast(t2 - t1); + m_thrTime = time_span.count(); + m_Bytes += (double)inputBuffer->size(); + // std::this_thread::sleep_for(std::chrono::microseconds (1600)); + if ((m_thrTime - m_prevThrTime < 1000) && + m_Bytes >= SystemConf::getInstance().MBs_INGESTED_PER_SEC * + 1024 * 1024) { + auto sleepTime = ((m_prevThrTime + 1000) - m_thrTime) + 350; + std::this_thread::sleep_for(std::chrono::milliseconds(sleepTime)); + // std::cout << "[dat] " << " " << inputBuffer->size() << " " + // << m_Bytes << " " << + // SystemConf::getInstance().MBs_INGESTED_PER_SEC + // << " " << sleepTime << std::endl; + m_prevThrTime = m_thrTime; + m_Bytes = 0; + } else if (m_thrTime - m_prevThrTime >= 1000) { + m_prevThrTime = m_thrTime; + m_Bytes = 0; + } + } + auto time_span = + std::chrono::duration_cast>(t2 - + t1); + if (terminate && time_span.count() >= (double)m_duration) { + std::cout << "Stop running " + + m_benchmarkQuery->getApplicationName() + " ..." + << std::endl; + return 0; + } + } + if (SystemConf::getInstance().LATENCY_ON) { + auto currentTime = std::chrono::high_resolution_clock::now(); + auto currentTimeNano = + std::chrono::duration_cast( + currentTime.time_since_epoch()) + .count(); + systemTimestamp = + (long)((currentTimeNano - m_timestampReference) / 1000L); + } + + // get next buffer + inputBuffer = getNextSlot(); + + // send data + if (SystemConf::getInstance().HAS_TWO_SOURCES) { + if (m_nextSlot % 2 == 0) { + send(m_sock, inputBuffer->data(), inputBuffer->size(), 0); + } else { + send(m_sock2, inputBuffer->data(), inputBuffer->size(), 0); + } + } else { + if (!SystemConf::getInstance().SEND_TO_SECOND_WORKER) { + send(m_sock, inputBuffer->data(), inputBuffer->size(), 0); + } else { + send(m_sock2, inputBuffer->data(), inputBuffer->size(), 0); + } + } + + // return the buffer to the pool + advanceSlot(); + } + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + exit(1); + } + } + + private: + static void setupSocket(int &sock, const std::string &remoteIp) { + struct sockaddr_in serv_addr {}; + if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) { + throw std::runtime_error("error: Socket creation error"); + } + + serv_addr.sin_family = AF_INET; + serv_addr.sin_port = htons(PORT); + + // Convert IPv4 and IPv6 addresses from text to binary form + if (inet_pton(AF_INET, remoteIp.c_str(), &serv_addr.sin_addr) <= 0) { + throw std::runtime_error("error: Invalid address/ Address not supported"); + } + + if (connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) { + throw std::runtime_error("error: Connection Failed"); + } + } + + void setupWorkers(std::vector *buffer) { + auto size = buffer->size(); + auto offset = m_benchmarkQuery->getEndTimestamp() - + m_benchmarkQuery->getStartTimestamp() + 1; + if (offset <= 0) { + std::cout << "warning: the starting offset is " << offset << std::endl; + } + auto curOffset = 0; + int idx = 0; + m_initialSlots.resize(SystemConf::getInstance().WORKER_THREADS * 2); + m_slots.resize(SystemConf::getInstance().WORKER_THREADS * 2); + auto finalOffset = offset * m_slots.size(); + for (auto &slot : m_slots) { + slot.m_buffer.resize(size); + std::memcpy(slot.m_buffer.data(), buffer->data(), size); + auto tupleSize = m_benchmarkQuery->getSchema()->getTupleSize(); + auto startPos = 0; + auto endPos = slot.m_buffer.size() / sizeof(long); + auto step = tupleSize / sizeof(long); + auto buf = (long *)slot.m_buffer.data(); + for (unsigned long i = startPos; i < endPos; i += step) { + buf[i] += curOffset; + } + // the first data sent + m_initialSlots[idx].m_buffer.resize(size); + std::memcpy(m_initialSlots[idx].m_buffer.data(), slot.m_buffer.data(), + size); + + curOffset += offset; + slot.m_offset = finalOffset; + slot.m_state = new std::atomic(0); + idx++; + } + + m_workers.resize(SystemConf::getInstance().WORKER_THREADS); + for (int t = 0; t < m_workers.size(); t++) { + m_workers[t] = new std::thread([&, t] { + auto thread = t; + auto idx = t; + + while (!m_slots[idx].m_state) + ; + + while (!m_stop) { + auto oldVal = 0; + while (!m_slots[idx].m_state->compare_exchange_weak(oldVal, 1)) { + if (m_debug) { + // std::cout << "Worker " + std::to_string(thread) + " waiting for + // " + std::to_string(idx) + " slot." << std::endl; + } + oldVal = 0; + _mm_pause(); + } + + if (m_debug) { + std::cout << "Worker " + std::to_string(thread) + " updating " + + std::to_string(idx) + " slot." + << std::endl; + } + + auto tupleSize = m_benchmarkQuery->getSchema()->getTupleSize(); + auto startPos = 0; + auto endPos = m_slots[idx].m_buffer.size() / sizeof(long); + auto step = tupleSize / sizeof(long); + auto buf = (long *)m_slots[idx].m_buffer.data(); + for (unsigned long i = startPos; i < endPos; i += step) { + buf[i] += m_slots[idx].m_offset; + } + + m_slots[idx].m_state->store(2); + idx += SystemConf::getInstance().WORKER_THREADS; + if (idx >= m_slots.size()) { + idx = t; + } + } + }); + auto core = (SystemConf::getInstance().SEND_TO_SECOND_WORKER) ? t+2 : t; + Utils::bindProcess(*m_workers[t], core); + } + } + + std::vector *getNextSlot() { + if (m_first) { + return &m_initialSlots[m_nextSlot].m_buffer; + } + auto oldVal = 2; + while (!m_slots[m_nextSlot].m_state->compare_exchange_weak(oldVal, 3)) { + if (m_debug) { + // std::cout << "Waiting to get " + std::to_string(m_nextSlot) + " + // slot"<< std::endl; + } + oldVal = 2; + _mm_pause(); + } + if (m_debug) { + auto buf = (long *)m_slots[m_nextSlot].m_buffer.data(); + std::cout << "Sending " + std::to_string(m_nextSlot) + " slot with " + + std::to_string(buf[0]) + " starting timestamp" + << std::endl; + } + return &m_slots[m_nextSlot].m_buffer; + } + + void advanceSlot() { + if (m_first) { + m_nextSlot++; + if (m_nextSlot >= m_slots.size()) { + m_first = false; + m_nextSlot = 0; + } + return; + } + if (m_debug) { + std::cout << "Resetting " + std::to_string(m_nextSlot) + " slot" + << std::endl; + } + m_slots[m_nextSlot].m_state->store(0); + m_nextSlot++; + if (m_nextSlot >= m_slots.size()) { + m_nextSlot = 0; + } + } + + struct DataSlot { + std::vector m_buffer; + long m_offset = 0; + std::atomic *m_state = nullptr; + }; +}; + +int main(int argc, const char **argv) { + std::unique_ptr remoteSource = std::make_unique(); + remoteSource->run(argc, argv); +} \ No newline at end of file diff --git a/test/benchmarks/applications/SmartGrid/SG1.cpp b/test/benchmarks/applications/SmartGrid/SG1.cpp index 73cd141..c1148b1 100644 --- a/test/benchmarks/applications/SmartGrid/SG1.cpp +++ b/test/benchmarks/applications/SmartGrid/SG1.cpp @@ -59,10 +59,11 @@ class SG1 : public SmartGrid { } public: - SG1(bool inMemory = true) { + SG1(bool inMemory = true, bool startApp = true) { m_name = "SG1"; createSchema(); - createApplication(); + if (startApp) + createApplication(); if (inMemory) loadInMemoryData(); } diff --git a/test/benchmarks/applications/SmartGrid/SG2.cpp b/test/benchmarks/applications/SmartGrid/SG2.cpp index b0c3b30..6096444 100644 --- a/test/benchmarks/applications/SmartGrid/SG2.cpp +++ b/test/benchmarks/applications/SmartGrid/SG2.cpp @@ -68,10 +68,11 @@ class SG2 : public SmartGrid { } public: - SG2(bool inMemory = true) { + SG2(bool inMemory = true, bool startApp = true) { m_name = "SG2"; createSchema(); - createApplication(); + if (startApp) + createApplication(); if (inMemory) loadInMemoryData(); } diff --git a/test/benchmarks/applications/SmartGrid/SG3.cpp b/test/benchmarks/applications/SmartGrid/SG3.cpp index abd142d..966e9c9 100644 --- a/test/benchmarks/applications/SmartGrid/SG3.cpp +++ b/test/benchmarks/applications/SmartGrid/SG3.cpp @@ -60,10 +60,11 @@ class SG3 : public SmartGrid { } public: - SG3(bool inMemory = true) { + SG3(bool inMemory = true, bool startApp = true) { m_name = "SG3"; createSchema(); - createApplication(); + if (startApp) + createApplication(); if (inMemory) loadInMemoryData(); } diff --git a/test/benchmarks/applications/SmartGrid/SmartGrid.h b/test/benchmarks/applications/SmartGrid/SmartGrid.h index b608efd..a66b193 100644 --- a/test/benchmarks/applications/SmartGrid/SmartGrid.h +++ b/test/benchmarks/applications/SmartGrid/SmartGrid.h @@ -59,15 +59,49 @@ class SmartGrid : public BenchmarkQuery { m_data = new std::vector(len); auto buf = (InputSchema *) m_data->data(); - std::string filePath = Utils::GetHomeDir() + "/LightSaber/resources/datasets/smartgrid/"; + std::string filePath = Utils::getHomeDir() + "/LightSaber/resources/datasets/smartgrid/"; std::ifstream file(filePath + "smartgrid-data.txt"); + if (!file.good()) + throw std::runtime_error("error: input file does not exist, check the path."); std::string line; unsigned long idx = 0; while (std::getline(file, line) && idx < len / sizeof(InputSchema)) { InputSchema::parse(buf[idx], line, normalisedTimestamp); + if (m_startTimestamp == 0) { + m_startTimestamp = buf[0].timestamp; + } + m_endTimestamp = buf[idx].timestamp; idx++; } + if (idx < len / sizeof(InputSchema)) { + unsigned long iter = 0; + auto barrier = idx-1; + long lastTime = buf[idx-1].timestamp; + while (idx < len / sizeof(InputSchema)) { + std::memcpy(&buf[idx], &buf[iter], sizeof(InputSchema)); + buf[idx].timestamp += lastTime; + m_endTimestamp = buf[idx].timestamp; + idx++; + iter++; + if (iter == barrier) { + iter = 0; + lastTime = buf[idx-1].timestamp; + } + } + } + + /*for (unsigned long i = 0; i < m_data->size() / sizeof(InputSchema); ++i) { + if (i%10==0) { + auto value = (int) std::round(buf[i].value * 1000); + auto ii = 0; + for (;ii < 10 && i < m_data->size() / sizeof(InputSchema); ++i) { + buf[i].value = value; + ii++; + } + } + }*/ + if (m_debug) { std::cout << "timestamp value property plug household house" << std::endl; for (unsigned long i = 0; i < m_data->size() / sizeof(InputSchema); ++i) { diff --git a/test/benchmarks/applications/SmartGrid/main.cpp b/test/benchmarks/applications/SmartGrid/main.cpp index 0081b48..7164cd5 100644 --- a/test/benchmarks/applications/SmartGrid/main.cpp +++ b/test/benchmarks/applications/SmartGrid/main.cpp @@ -6,17 +6,17 @@ #include "SG3.cpp" int main(int argc, const char **argv) { - BenchmarkQuery *benchmarkQuery = nullptr; + std::unique_ptr benchmarkQuery {}; SystemConf::getInstance().QUERY_NUM = 2; BenchmarkQuery::parseCommandLineArguments(argc, argv); if (SystemConf::getInstance().QUERY_NUM == 1) { - benchmarkQuery = new SG1(); + benchmarkQuery = std::make_unique(); } else if (SystemConf::getInstance().QUERY_NUM == 2) { - benchmarkQuery = new SG2(); + benchmarkQuery = std::make_unique(); } else if (SystemConf::getInstance().QUERY_NUM == 3) { - benchmarkQuery = new SG3(); + benchmarkQuery = std::make_unique(); } else { throw std::runtime_error("error: invalid benchmark query id"); } diff --git a/test/benchmarks/applications/YahooBenchmark/YSB.cpp b/test/benchmarks/applications/YahooBenchmark/YSB.cpp index 3ae455e..8dba9fb 100644 --- a/test/benchmarks/applications/YahooBenchmark/YSB.cpp +++ b/test/benchmarks/applications/YahooBenchmark/YSB.cpp @@ -1,13 +1,13 @@ -#include "cql/operators/AggregationType.h" +#include "benchmarks/applications/YahooBenchmark/YahooBenchmark.h" #include "cql/expressions/ColumnReference.h" -#include "utils/WindowDefinition.h" +#include "cql/expressions/IntConstant.h" #include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" #include "cql/operators/codeGeneration/OperatorKernel.h" -#include "utils/QueryOperator.h" -#include "utils/Query.h" #include "cql/predicates/ComparisonPredicate.h" -#include "cql/expressions/IntConstant.h" -#include "benchmarks/applications/YahooBenchmark/YahooBenchmark.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" class YSB : public YahooBenchmark { private: @@ -193,6 +193,7 @@ class YSB : public YahooBenchmark { bool replayTimestamps = window->isRangeBased(); + OperatorCode *cpuCode; // Set up code-generated operator OperatorKernel *genCode = new OperatorKernel(true, true, useParallelMerge, true); genCode->setInputSchema(getSchema()); @@ -202,7 +203,7 @@ class YSB : public YahooBenchmark { genCode->setAggregation(aggregation); genCode->setQueryId(0); genCode->setup(); - OperatorCode *cpuCode = genCode; + cpuCode = genCode; // Print operator std::cout << cpuCode->toSExpr() << std::endl; @@ -230,11 +231,12 @@ class YSB : public YahooBenchmark { } public: - YSB(bool inMemory = true) { + YSB(bool inMemory = true, bool startApp = true) { m_name = "YSB"; createSchema(); if (inMemory) loadInMemoryData(); - createApplication(); + if (startApp) + createApplication(); } }; \ No newline at end of file diff --git a/test/benchmarks/applications/YahooBenchmark/YahooBenchmark.h b/test/benchmarks/applications/YahooBenchmark/YahooBenchmark.h index 6a9720f..0b9f791 100644 --- a/test/benchmarks/applications/YahooBenchmark/YahooBenchmark.h +++ b/test/benchmarks/applications/YahooBenchmark/YahooBenchmark.h @@ -86,11 +86,11 @@ class YahooBenchmark : public BenchmarkQuery { virtual void createApplication() = 0; - void loadInMemoryData() { + void loadInMemoryData(uint32_t campaignNum = 100) { if (m_is64) loadInMemoryData_64(); else - loadInMemoryData_128(); + loadInMemoryData_128(campaignNum); }; void loadInMemoryData_64() { @@ -133,6 +133,10 @@ class YahooBenchmark : public BenchmarkQuery { std::to_string(ad_id) + " " + std::to_string(ad_type) + " " + std::to_string(event_type) + " " + std::to_string(-1); InputSchema_64::parse(buf[idx], line); + if (m_startTimestamp == 0) { + m_startTimestamp = buf[0].timestamp; + } + m_endTimestamp = buf[idx].timestamp; idx++; } @@ -146,22 +150,25 @@ class YahooBenchmark : public BenchmarkQuery { } }; - void loadInMemoryData_128() { + void loadInMemoryData_128(uint32_t campaignNum) { std::random_device rd; std::mt19937_64 eng(rd()); std::uniform_int_distribution distr(0, 1000000); std::unordered_set set; + auto adsNum = campaignNum * 10; + assert(adsNum <= 100000); + size_t totalSize = Utils::getPowerOfTwo(adsNum); size_t len = SystemConf::getInstance().BUNDLE_SIZE; m_data = new std::vector(len); - m_staticData = new std::vector(2 * sizeof(__uint128_t) * 1024); + m_staticData = new std::vector(2 * sizeof(__uint128_t) * totalSize); auto buf = (InputSchema_128 *) m_data->data(); auto staticBuf = (__uint128_t *) m_staticData->data(); long campaign_id = distr(eng); //0; set.insert(campaign_id); - for (unsigned long i = 0; i < 1000; ++i) { + for (unsigned long i = 0; i < adsNum; ++i) { if (i > 0 && i % 10 == 0) { campaign_id = distr(eng); //++; bool is_in = set.find(campaign_id) != set.end(); @@ -179,13 +186,17 @@ class YahooBenchmark : public BenchmarkQuery { auto page_id = distr(eng); unsigned long idx = 0; while (idx < len / sizeof(InputSchema_128)) { - auto ad_id = staticBuf[((idx % 100000) % 1000) * 2]; + auto ad_id = staticBuf[((idx % 100000) % adsNum) * 2]; auto ad_type = (idx % 100000) % 5; auto event_type = (idx % 100000) % 3; line = std::to_string(idx / 1000) + " " + std::to_string(user_id) + " " + std::to_string(page_id) + " " + std::to_string((long) ad_id) + " " + std::to_string(ad_type) + " " + std::to_string(event_type) + " " + std::to_string(-1); InputSchema_128::parse(buf[idx], line); + if (m_startTimestamp == 0) { + m_startTimestamp = buf[0].timestamp; + } + m_endTimestamp = buf[idx].timestamp; idx++; } diff --git a/test/benchmarks/applications/YahooBenchmark/main.cpp b/test/benchmarks/applications/YahooBenchmark/main.cpp index 43c1712..530cbd0 100644 --- a/test/benchmarks/applications/YahooBenchmark/main.cpp +++ b/test/benchmarks/applications/YahooBenchmark/main.cpp @@ -4,13 +4,13 @@ #include "YSB.cpp" int main(int argc, const char **argv) { - BenchmarkQuery *benchmarkQuery = nullptr; + std::unique_ptr benchmarkQuery {}; SystemConf::getInstance().QUERY_NUM = 1; BenchmarkQuery::parseCommandLineArguments(argc, argv); if (SystemConf::getInstance().QUERY_NUM == 1) { - benchmarkQuery = new YSB(); + benchmarkQuery = std::make_unique(); } else { throw std::runtime_error("error: invalid benchmark query id"); } diff --git a/test/benchmarks/applicationsWithCheckpoints/CMakeLists.txt b/test/benchmarks/applicationsWithCheckpoints/CMakeLists.txt new file mode 100644 index 0000000..d2e3066 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/CMakeLists.txt @@ -0,0 +1,221 @@ +include_directories(ClusterMonitoring) +include_directories(LinearRoadBenchmark) +include_directories(SmartGrid) +include_directories(YahooBenchmark) +include_directories(ManufacturingEquipment) +include_directories(Nexmark) +include_directories(../applications/RemoteBenchmark) +include_directories(../applications/RemoteBenchmark/RDMA) +include_directories(../../../src/RDMA) + +find_package(GTest REQUIRED) +include_directories(${GTEST_INCLUDE_DIRS}) + +# Configure CCache if available +find_program(CCACHE_PROGRAM ccache) +if (CCACHE_PROGRAM) + message("Using CCache...") + #set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + #set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) + set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) + set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) + set(CMAKE_CXX_COMPILER "/usr/lib/ccache/clang++") +endif () + +SET(CPP_FILES + ../../../src/checkpoint/FileBackedCheckpointCoordinator.cpp + ../../../src/checkpoint/BlockManager.cpp + ../../../src/checkpoint/LineageGraph.cpp + ../../../src/cql/expressions/Expression.cpp + ../../../src/dispatcher/ITaskDispatcher.cpp + ../../../src/dispatcher/JoinTaskDispatcher.cpp + ../../../src/dispatcher/TaskDispatcher.cpp + ../../../src/compression/CompressionCodeGenUtils.cpp + ../../../src/compression/CompressionStatistics.cpp + ../../../src/monitors/CompressionMonitor.cpp + ../../../src/monitors/PerformanceMonitor.cpp + ../../../src/monitors/ThroughputMonitor.cpp + ../../../src/monitors/Measurement.cpp + ../../../src/monitors/LatencyMonitor.cpp + ../../../src/processor/TaskProcessor.cpp + ../../../src/result/ResultHandler.cpp + ../../../src/tasks/NumaTaskQueueWrapper.cpp + ../../../src/tasks/WindowBatch.cpp + ../../../src/tasks/Task.cpp + ../../../src/utils/AttributeType.cpp + ../../../src/utils/Query.cpp + ../../../src/utils/QueryApplication.cpp + ../../../src/utils/Utils.cpp + ../../../src/utils/SystemConf.cpp + ../../../src/filesystem/File.cpp + ../../../src/checkpoint/FileBackedCheckpointCoordinator.cpp + ../../../src/checkpoint/BlockManager.cpp + ../../../src/checkpoint/LineageGraph.cpp + ) +SET(RDMA_CPP_FILES + ../../../src/RDMA/infinity/core/Context.cpp + ../../../src/RDMA/infinity/memory/Atomic.cpp + ../../../src/RDMA/infinity/memory/Buffer.cpp + ../../../src/RDMA/infinity/memory/Region.cpp + ../../../src/RDMA/infinity/memory/RegionToken.cpp + ../../../src/RDMA/infinity/memory/RegisteredMemory.cpp + ../../../src/RDMA/infinity/queues/QueuePair.cpp + ../../../src/RDMA/infinity/queues/QueuePairFactory.cpp + ../../../src/RDMA/infinity/requests/RequestToken.cpp + ../../../src/RDMA/infinity/utils/Address.cpp + ) + +SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -lnuma -lrt") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -g -Wall -Wextra -DNO_DIS -DRDMA_INPU -DRDMA_OUTPU -DTCP_INPU -DTCP_OUTPU") # -DTCP_INPUT -DTCP_OUTPUT -DHAVE_NUMA -DHAVE_SHARE") --gcc-toolchain=/usr/local/gcc/7.5.0 + +find_package(benchmark REQUIRED) +include_directories(${benchmark_INCLUDE_DIRS}) + +FIND_LIBRARY(tbb NAMES libtbb.so) + +if(NOT WIN32) + find_package(PkgConfig QUIET) +endif() + +SET(ENV{PKG_CONFIG_PATH} "/usr/local/lib64/pkgconfig:/usr/local/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}") +message(STATUS "PKG_CONFIG_PATH: $ENV{PKG_CONFIG_PATH}") + +if(PKG_CONFIG_FOUND) + pkg_check_modules(LIBPMEMOBJ++ REQUIRED libpmemobj++) +else() + find_package(LIBPMEMOBJ++ REQUIRED) +endif() + +link_directories(${LIBPMEMOBJ++_LIBRARY_DIRS}) + +# Yahoo Benchmark +add_executable(yahoo_benchmark_checkpoints + YahooBenchmark/main.cpp + ${CPP_FILES} + ${RDMA_CPP_FILES} + ) +target_link_options(yahoo_benchmark_checkpoints PRIVATE -Wl,--unresolved-symbols=ignore-all) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(yahoo_benchmark_checkpoints ${Boost_LIBRARIES}) +endif () +target_include_directories(yahoo_benchmark_checkpoints PUBLIC ${LIBPMEMOBJ++_INCLUDE_DIRS}) +target_link_libraries(yahoo_benchmark_checkpoints + ${LIBPMEMOBJ++_LIBRARIES} + operatorJITLib + boost_fiber + boost_system + tbb snappy ibverbs + pthread dl aio uuid stdc++fs) +target_compile_options(yahoo_benchmark_checkpoints PRIVATE -Wall -Wextra -O3 -march=native -UNDEBUG) +set_target_properties(yahoo_benchmark_checkpoints PROPERTIES COMPILE_FLAGS "-DHAVE_NUM -DPREFETCH") + + +# ManufacturingEquipment +add_executable(manufacturing_equipment_checkpoints + ManufacturingEquipment/main.cpp + ${CPP_FILES} + ${RDMA_CPP_FILES} + ) +target_link_options(manufacturing_equipment_checkpoints PRIVATE -Wl,--unresolved-symbols=ignore-all) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(manufacturing_equipment_checkpoints ${Boost_LIBRARIES} ${LIBPMEMOBJ++_INCLUDE_DIRS}) +endif () +target_link_libraries(manufacturing_equipment_checkpoints + z ${LIBPMEMOBJ++_LIBRARIES} + boost_iostreams + boost_fiber + operatorJITLib + tbb snappy ibverbs + pthread dl aio uuid stdc++fs) +target_compile_options(manufacturing_equipment_checkpoints PRIVATE -Wall -Wextra -O3 -march=native -UNDEBUG) + + +# Cluster Monitoring +add_executable(cluster_monitoring_checkpoints + ClusterMonitoring/main.cpp + ${CPP_FILES} + ${RDMA_CPP_FILES} + ) +target_link_options(cluster_monitoring_checkpoints PRIVATE -Wl,--unresolved-symbols=ignore-all) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(cluster_monitoring_checkpoints ${Boost_LIBRARIES} ${LIBPMEMOBJ++_INCLUDE_DIRS}) +endif () +target_link_libraries(cluster_monitoring_checkpoints + z ${LIBPMEMOBJ++_LIBRARIES} + boost_iostreams + boost_fiber + operatorJITLib + tbb snappy ibverbs + pthread dl aio uuid stdc++fs) +target_compile_options(cluster_monitoring_checkpoints PRIVATE -Wall -Wextra -O3 -march=native -UNDEBUG) +set_target_properties(cluster_monitoring_checkpoints PROPERTIES COMPILE_FLAGS "-DHAVE_NUM -DPREFETCH") + + +# Linear Road Benchmark +add_executable(linear_road_benchmark_checkpoints + LinearRoadBenchmark/main.cpp + ${CPP_FILES} + ${RDMA_CPP_FILES} + ) +target_link_options(linear_road_benchmark_checkpoints PRIVATE -Wl,--unresolved-symbols=ignore-all) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(linear_road_benchmark_checkpoints ${Boost_LIBRARIES} ${LIBPMEMOBJ++_INCLUDE_DIRS}) +endif () +target_link_libraries(linear_road_benchmark_checkpoints + z ${LIBPMEMOBJ++_LIBRARIES} + boost_iostreams + boost_fiber + operatorJITLib + tbb snappy ibverbs + pthread dl aio uuid stdc++fs) +target_compile_options(linear_road_benchmark_checkpoints PRIVATE -Wall -Wextra -O3 -march=native -UNDEBUG) +set_target_properties(linear_road_benchmark_checkpoints PROPERTIES COMPILE_FLAGS "-DHAVE_NUM -DPREFETCH") + + +# Smart Grid +add_executable(smartgrid_checkpoints + SmartGrid/main.cpp + ${CPP_FILES} + ${RDMA_CPP_FILES} + ) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(smartgrid_checkpoints ${Boost_LIBRARIES} ${LIBPMEMOBJ++_INCLUDE_DIRS}) +endif () +target_link_libraries(smartgrid_checkpoints + z ${LIBPMEMOBJ++_LIBRARIES} + boost_iostreams + boost_fiber + operatorJITLib + tbb snappy ibverbs + pthread dl aio uuid stdc++fs) +target_compile_options(smartgrid_checkpoints PRIVATE -Wall -Wextra -O3 -march=native -UNDEBUG) +target_link_options(smartgrid_checkpoints PRIVATE -Wl,--unresolved-symbols=ignore-all) +set_target_properties(smartgrid_checkpoints PROPERTIES COMPILE_FLAGS "-DHAVE_NUM -DPREFETCH") + + +# Nexmark +add_executable(nexmark_checkpoints + Nexmark/main.cpp + ${CPP_FILES} + ${RDMA_CPP_FILES} + ) +target_link_options(nexmark_checkpoints PRIVATE -Wl,--unresolved-symbols=ignore-all) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(nexmark_checkpoints ${Boost_LIBRARIES} ${LIBPMEMOBJ++_INCLUDE_DIRS}) +endif () +target_link_libraries(nexmark_checkpoints + z ${LIBPMEMOBJ++_LIBRARIES} + operatorJITLib + boost_fiber + boost_system + boost_iostreams + tbb snappy ibverbs + pthread dl aio uuid stdc++fs) +target_compile_options(nexmark_checkpoints PRIVATE -Wall -Wextra -O3 -march=native -UNDEBUG) +set_target_properties(nexmark_checkpoints PROPERTIES COMPILE_FLAGS "-DHAVE_NUM -DPREFETCH") \ No newline at end of file diff --git a/test/benchmarks/applicationsWithCheckpoints/ClusterMonitoring/CM1.cpp b/test/benchmarks/applicationsWithCheckpoints/ClusterMonitoring/CM1.cpp new file mode 100644 index 0000000..715986a --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/ClusterMonitoring/CM1.cpp @@ -0,0 +1,689 @@ +#include "benchmarks/applications/ClusterMonitoring/ClusterMonitoring.h" +#include "compression/Compressor.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "snappy.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +namespace CM1Compress { +struct alignas(16) input_tuple_t { + long timestamp; + long jobId; + long taskId; + long machineId; + int eventType; + int userId; + int category; + int priority; + float cpu; + float ram; + float disk; + int constraints; +}; + +std::vector> *metadata; + +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (3); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000); + struct t_1 { + uint16_t timestamp : 9; + uint8_t counter : 7; + }; + struct t_2 { + uint8_t category : 3; + uint8_t counter : 5; + }; + struct t_3 { + uint16_t cpu : 10; + uint16_t counter : 6; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.33)); + t_3 *buf3 = (t_3 *)(output + (int) (length*0.66)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint8_t count_1 = 1; + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && count_1 < 127 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + auto fVal_2 = (uint8_t)data[idx].category; + auto sVal_2 = fVal_2; + if (idx < n - 1 && count_2 < 31 && + fVal_2 == (sVal_2 = (uint8_t)data[idx + 1].category)) { + count_2++; + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + + auto fVal_3 = fcomp.compress(data[idx].cpu); + auto sVal_3 = fVal_3; + if (idx < n - 1 && count_3 < 63 && + fVal_3 == (sVal_3 = fcomp.compress(data[idx + 1].cpu))) { + count_3++; + } else { + buf3[idxs[2]++] = {(uint16_t)fVal_3, count_3}; + fVal_3 = sVal_3; + count_3 = 1; + } + } + + writePos += idxs[0] * sizeof(t_1); + (*metadata)[pid] = "c0 RLE BD " + std::to_string(data[0].timestamp) + " {uint16_t:9,uint8_t:7} " + std::to_string(writePos); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[1] * sizeof(t_2); + (*metadata)[pid] += " c7 RLE {uint8_t:3,uint8_t:5} " + std::to_string(writePos); + std::memcpy((void *)(output + writePos), + (void *)buf3, idxs[2] * sizeof(t_3)); + writePos += idxs[2] * sizeof(t_3); + (*metadata)[pid] += " c9 RLE FM " + std::to_string(1000) + " {uint16_t:10,uint8_t:6} " + std::to_string(writePos); + + if (SystemConf::getInstance().LATENCY_ON) { + auto value = data[0].timestamp; + latency = (int) (value >> 32); + (*metadata)[pid] += " " + std::to_string(latency) + " "; + } + //(*metadata)[pid] = "r0 " + std::to_string(idxs[0]) + " r1 " + std::to_string(idxs[1]) + " "; + if ((*metadata)[pid].size() > 128) { + throw std::runtime_error("error: increase the metadata size"); + } + std::memcpy((void *)(output - 128), (*metadata)[pid].data(), (*metadata)[pid].size()); + (*metadata)[pid].clear(); +} + +struct tempV { + int _1; +}; +std::vector tempVec[20]; +bool isFirst[20] = {false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false}; +void compressGenInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (3); + GorillaTimestampCompressor gorillaComp; + Simple8 simpleComp; + Simple8 simpleComp2; + GorillaValuesCompressor xorComp; + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + + size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + // simple 8 + auto buf2 = (uint64_t *)(output + (int) (length*0.33)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp.compress( + inOffset, outOffset, n, &data, &input_tuple_t::category, 1, buf2); + + if (!isFirst[pid]) { + tempVec[pid].resize(n); + isFirst[pid] = true; + } + // gorilla float + // store first float in 64 bits + auto buf3 = (uint64_t *)(output + (int) (length*0.66)); + buf3[idxs[2]++] = data[0].cpu; + // calculate trailing and leading zeros for first float + uint64_t *firstV = (uint64_t *)&data[0].cpu; + int prevLeadingZeros = __builtin_clzll(*firstV); + int prevTrailingZeros = __builtin_ctzll(*firstV); + + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + if (idx > 0) { + auto prev = (float)data[idx - 1].cpu; + auto current = (float)data[idx].cpu; + uint64_t *a = (uint64_t *)&prev; + uint64_t *b = (uint64_t *)¤t; + uint64_t xorValue = *a ^ *b; + auto [appendedValue, appendedValueLength, leadingZeros, + trailingZeros] = xorComp.compress(xorValue, prevLeadingZeros, prevTrailingZeros); + prevLeadingZeros = leadingZeros; + prevTrailingZeros = trailingZeros; + if (count_3 + appendedValueLength > 64) { + uint8_t split = (64 - count_3); + if (appendedValueLength > 1) { + buf3[idxs[2]] |= + appendedValue >> (appendedValueLength - split); + } + ++idxs[2]; + count_3 = appendedValueLength - split; + } else { + count_3 += appendedValueLength; + } + buf3[idxs[2]] |= appendedValue << (64 - count_3); + } + //tempVec[pid][idx]._1 = (int) std::round(data[idx].cpu * 1000); + } + + /*// simple 8 + auto tempData = tempVec[pid].data(); + auto buf3 = (uint64_t *)(output + (int) (length*0.66)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp2.compress( + inOffset, outOffset, n, &tempData, &tempV::_1, 1, buf3);*/ + + std::memcpy((void *)(output + idxs[0] * sizeof(uint64_t)), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + std::memcpy((void *)(output + idxs[0] * sizeof(uint64_t) + + idxs[1] * sizeof(uint64_t)), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + writePos += idxs[0] * sizeof(uint64_t) + idxs[1] * sizeof(uint64_t) + + idxs[2] * sizeof(uint64_t); +} + +void noCompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + struct res { + long timestamp; + int category; + float cpu; + }; + auto out = (res*) output; + size_t n = (end - start) / sizeof(input_tuple_t); + for (size_t idx = 0; idx < n; idx++) { + out[idx] = {data[idx].timestamp, data[idx].category, data[idx].cpu}; + } + writePos = n * sizeof(res); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + // parse metadata + std::string meta; + if (copy) { + std::memcpy(output, input, end); + for (size_t i = 0; i < 128; i++) { + meta += input[i]; + } + } else { + for (size_t i = 0; i < 128; i++) { + meta += output[i]; + } + } + + std::istringstream iss(meta); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + + auto base = std::stoi(words[3]); + auto mul = std::stoi(words[13]); + std::vector idxs (3); + idxs[0] = std::stoi(words[5]); + idxs[1] = std::stoi(words[9]); + idxs[2] = std::stoi(words[15]); + if (SystemConf::getInstance().LATENCY_ON) { + latency = std::stoi(words[16]); + } + + + //BaseDeltaCompressor bcomp(base); + struct t_1 { + uint16_t timestamp : 9; + uint8_t counter : 7; + }; + struct t_2 { + uint8_t category : 3; + uint8_t counter : 5; + }; + struct t_3 { + uint16_t cpu : 10; + uint16_t counter : 6; + }; + + auto res = (input_tuple_t*) input; + t_1 *col0 = (t_1 *)(output + 128); + auto *col7 = (t_2 *)(output + 128 + idxs[0]); + auto *col9 = (t_3 *)(output + 128 + idxs[1]); + auto wPos = 0; + auto dataSize = end / sizeof(input_tuple_t); + auto col1Size = idxs[0] / sizeof(t_1); + for (int idx = 0; idx < col1Size; ++idx) { + auto temp = col0[idx]; + for (int it = 0; it < temp.counter; ++it) { + res[wPos++].timestamp = temp.timestamp + base; + if (wPos * sizeof(input_tuple_t) > SystemConf::getInstance().BATCH_SIZE) { + throw std::runtime_error("error: the write position exceeds the batch size"); + } + } + } + + if (SystemConf::getInstance().LATENCY_ON) { + res[0].timestamp = Utils::pack(latency, (int)res[0].timestamp); + } + + // c7 + wPos = 0; + for (int idx = 0; idx < col1Size; ++idx) { + auto temp = col7[idx]; + for (int it = 0; it < temp.counter; ++it) { + res[wPos++].category = temp.category; + if (wPos * sizeof(input_tuple_t) > SystemConf::getInstance().BATCH_SIZE) { + throw std::runtime_error("error: the write position exceeds the batch size"); + } + } + } + + // c9 + wPos = 0; + for (int idx = 0; idx < col1Size; ++idx) { + auto temp = col9[idx]; + for (int it = 0; it < temp.counter; ++it) { + res[wPos++].cpu = temp.cpu * mul; + if (wPos * sizeof(input_tuple_t) > SystemConf::getInstance().BATCH_SIZE) { + throw std::runtime_error("error: the write position exceeds the batch size"); + } + } + } + writePos = wPos * sizeof(input_tuple_t); + + /*std::cout << "===========decompress===========" << std::endl; + auto n = dataSize; + for (int i = 0; i idxs (12); + GorillaTimestampCompressor gorillaComp_0; + Simple8 simpleComp_1; + Simple8 simpleComp_2; + Simple8 simpleComp_3; + Simple8 simpleComp_4; + Simple8 simpleComp_5; + Simple8 simpleComp_6; + Simple8 simpleComp_7; + GorillaValuesCompressor xorComp_8; + GorillaValuesCompressor xorComp_9; + GorillaValuesCompressor xorComp_10; + Simple8 simpleComp_11; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + + size_t output_length; + auto buf1 = (uint64_t *)input; + snappy::RawCompress((const char *)(buf1), end, (char*)(output), &output_length); + writePos += output_length; + + /*size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + // _1 + auto buf2 = (uint64_t *)(output + (int) (length*0.08)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp_1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::jobId, 1, buf2); + // _2 + auto buf3 = (uint64_t *)(output + (int) (length*0.08*2)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp_2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::taskId, 1, buf3); + // _3 + auto buf4 = (uint64_t *)(output + (int) (length*0.08*3)); + inOffset = 0; + outOffset = 0; + idxs[3] = simpleComp_3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::machineId, 1, buf4); + // _4 + auto buf5 = (uint64_t *)(output + (int) (length*0.08*4)); + inOffset = 0; + outOffset = 0; + idxs[4] = simpleComp_4.compress( + inOffset, outOffset, n, &data, &input_tuple_t::eventType, 1, buf5); + // _5 + auto buf6 = (uint64_t *)(output + (int) (length*0.08*5)); + inOffset = 0; + outOffset = 0; + idxs[5] = simpleComp_5.compress( + inOffset, outOffset, n, &data, &input_tuple_t::userId, 1, buf6); + // _6 + auto buf7 = (uint64_t *)(output + (int) (length*0.08*6)); + inOffset = 0; + outOffset = 0; + idxs[6] = simpleComp_6.compress( + inOffset, outOffset, n, &data, &input_tuple_t::category, 1, buf7); + // _7 + auto buf8 = (uint64_t *)(output + (int) (length*0.08*7)); + inOffset = 0; + outOffset = 0; + idxs[7] = simpleComp_7.compress( + inOffset, outOffset, n, &data, &input_tuple_t::priority, 1, buf8); + // _11 + auto buf12 = (uint64_t *)(output + (int) (length*0.08*11)); + inOffset = 0; + outOffset = 0; + idxs[11] = simpleComp_11.compress( + inOffset, outOffset, n, &data, &input_tuple_t::constraints, 1, buf12); + + if (!isFirst[pid]) { + tempVec[pid].resize(n); + isFirst[pid] = true; + } + // gorilla float + // _8 + auto buf9 = (uint64_t *)(output + (int) (length*0.08*8)); + buf9[idxs[8]++] = data[0].cpu; + uint64_t *firstV_8 = (uint64_t *)&data[0].cpu; + int prevLeadingZeros_8 = __builtin_clzll(*firstV_8); + int prevTrailingZeros_8 = __builtin_ctzll(*firstV_8); + uint8_t count_8 = 1; + // _9 + auto buf10 = (uint64_t *)(output + (int) (length*0.08*9)); + buf10[idxs[9]++] = data[0].ram; + uint64_t *firstV_9 = (uint64_t *)&data[0].ram; + int prevLeadingZeros_9 = __builtin_clzll(*firstV_9); + int prevTrailingZeros_9 = __builtin_ctzll(*firstV_9); + uint8_t count_9 = 1; + // _10 + auto buf11 = (uint64_t *)(output + (int) (length*0.08*10)); + buf11[idxs[10]++] = data[0].disk; + uint64_t *firstV_10 = (uint64_t *)&data[0].disk; + int prevLeadingZeros_10 = __builtin_clzll(*firstV_10); + int prevTrailingZeros_10 = __builtin_ctzll(*firstV_10); + uint8_t count_10 = 1; + + uint8_t count_2 = 1; + for (size_t idx = 0; idx < n; idx++) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp_0.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + if (idx > 0) { + auto prev = (float)data[idx - 1].cpu; + auto current = (float)data[idx].cpu; + uint64_t *a = (uint64_t *)&prev; + uint64_t *b = (uint64_t *)¤t; + uint64_t xorValue = *a ^ *b; + auto [appendedValue, appendedValueLength, leadingZeros, + trailingZeros] = xorComp_8.compress(xorValue, prevLeadingZeros_8, prevTrailingZeros_8); + prevLeadingZeros_8 = leadingZeros; + prevTrailingZeros_8 = trailingZeros; + if (count_8 + appendedValueLength > 64) { + uint8_t split = (64 - count_8); + if (appendedValueLength > 1) { + buf9[idxs[8]] |= + appendedValue >> (appendedValueLength - split); + } + ++idxs[8]; + count_8 = appendedValueLength - split; + } else { + count_8 += appendedValueLength; + } + buf9[idxs[8]] |= appendedValue << (64 - count_8); + } + if (idx > 0) { + auto prev = (float)data[idx - 1].ram; + auto current = (float)data[idx].ram; + uint64_t *a = (uint64_t *)&prev; + uint64_t *b = (uint64_t *)¤t; + uint64_t xorValue = *a ^ *b; + auto [appendedValue, appendedValueLength, leadingZeros, + trailingZeros] = xorComp_9.compress(xorValue, prevLeadingZeros_9, prevTrailingZeros_9); + prevLeadingZeros_9 = leadingZeros; + prevTrailingZeros_9 = trailingZeros; + if (count_9 + appendedValueLength > 64) { + uint8_t split = (64 - count_9); + if (appendedValueLength > 1) { + buf10[idxs[9]] |= + appendedValue >> (appendedValueLength - split); + } + ++idxs[2]; + count_9 = appendedValueLength - split; + } else { + count_9 += appendedValueLength; + } + buf10[idxs[9]] |= appendedValue << (64 - count_9); + } + if (idx > 0) { + auto prev = (float)data[idx - 1].disk; + auto current = (float)data[idx].disk; + uint64_t *a = (uint64_t *)&prev; + uint64_t *b = (uint64_t *)¤t; + uint64_t xorValue = *a ^ *b; + auto [appendedValue, appendedValueLength, leadingZeros, + trailingZeros] = xorComp_10.compress(xorValue, prevLeadingZeros_10, prevTrailingZeros_10); + prevLeadingZeros_10 = leadingZeros; + prevTrailingZeros_10 = trailingZeros; + if (count_10 + appendedValueLength > 64) { + uint8_t split = (64 - count_10); + if (appendedValueLength > 1) { + buf11[idxs[10]] |= + appendedValue >> (appendedValueLength - split); + } + ++idxs[2]; + count_10 = appendedValueLength - split; + } else { + count_10 += appendedValueLength; + } + buf11[idxs[10]] |= appendedValue << (64 - count_10); + } + } + + writePos += idxs[0] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[1] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf3, + idxs[2] * sizeof(uint64_t)); + writePos += idxs[2] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf4, + idxs[3] * sizeof(uint64_t)); + writePos += idxs[3] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf5, + idxs[4] * sizeof(uint64_t)); + writePos += idxs[4] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf6, + idxs[5] * sizeof(uint64_t)); + writePos += idxs[5] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf7, + idxs[6] * sizeof(uint64_t)); + writePos += idxs[6] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf8, + idxs[7] * sizeof(uint64_t)); + writePos += idxs[7] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf9, + idxs[8] * sizeof(uint64_t)); + writePos += idxs[8] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf10, + idxs[9] * sizeof(uint64_t)); + writePos += idxs[9] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf11, + idxs[10] * sizeof(uint64_t)); + writePos += idxs[10] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf12, + idxs[11] * sizeof(uint64_t)); + writePos += idxs[11] * sizeof(uint64_t);*/ +} +}; + +class CM1 : public ClusterMonitoring { + private: + void createApplication() override { + SystemConf::getInstance().SLOTS = 256; + // change this depending on the batch size + if (SystemConf::getInstance().BATCH_SIZE <= 524288) { + SystemConf::getInstance().PARTIAL_WINDOWS = 288; + } else if (SystemConf::getInstance().BATCH_SIZE <= 1048576) { + SystemConf::getInstance().PARTIAL_WINDOWS = 4 * 288; + } else { + SystemConf::getInstance().PARTIAL_WINDOWS = 8 * 288; + } + SystemConf::getInstance().HASH_TABLE_SIZE = 8; + + bool useParallelMerge = SystemConf::getInstance().PARALLEL_MERGE_ON; + bool persistInput = SystemConf::getInstance().PERSIST_INPUT; + + // Configure first query + std::vector aggregationTypes(1); + aggregationTypes[0] = AggregationTypes::fromString("sum"); + + std::vector aggregationAttributes(1); + aggregationAttributes[0] = new ColumnReference(8, BasicType::Float); + + std::vector groupByAttributes(1); + groupByAttributes[0] = new ColumnReference(6, BasicType::Integer); + + auto window = new WindowDefinition(RANGE_BASED, 60, 1); // (RANGE_BASED, 60*25, 1*25) + Aggregation *aggregation = new Aggregation(*window, aggregationTypes, aggregationAttributes, groupByAttributes); + +#if defined(TCP_INPUT) + bool replayTimestamps = false; +#elif defined(RDMA_INPUT) + bool replayTimestamps = false; +#else + bool replayTimestamps = window->isRangeBased(); +#endif + + // Set up code-generated operator + OperatorKernel *genCode = new OperatorKernel(true, true, useParallelMerge); + genCode->setInputSchema(getSchema()); + genCode->setAggregation(aggregation); + genCode->setQueryId(0); + genCode->setup(); + OperatorCode *cpuCode = genCode; + + // Print operator + std::cout << cpuCode->toSExpr() << std::endl; + + // Define an ft-operator + auto queryOperator = new QueryOperator(*cpuCode, true); + std::vector operators; + operators.push_back(queryOperator); + + // used for latency measurements + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + + std::vector> queries(1); + queries[0] = std::make_shared(0, + operators, + *window, + m_schema, + m_timestampReference, + true, + replayTimestamps, + !replayTimestamps, + useParallelMerge, + 0, persistInput, nullptr, !SystemConf::getInstance().RECOVER); + + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + queries[0]->getBuffer()->setCompressionFP(CM1Compress::compressInput); + queries[0]->getBuffer()->setDecompressionFP(CM1Compress::decompressInput); + } + + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, !SystemConf::getInstance().RECOVER); + m_application->setup(); + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION && (SystemConf::getInstance().CHECKPOINT_ON || persistInput)) { + CM1Compress::metadata = new std::vector>(SystemConf::getInstance().WORKER_THREADS, ""); + //m_application->getCheckpointCoordinator()->setCompressionFP(0, CM1Compress::compress); + } + } + + public: + CM1(bool inMemory = true) { + m_name = "CM1"; + createSchema(); + createApplication(); + if (inMemory) + loadInMemoryData(); + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/ClusterMonitoring/CM2.cpp b/test/benchmarks/applicationsWithCheckpoints/ClusterMonitoring/CM2.cpp new file mode 100644 index 0000000..df790e6 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/ClusterMonitoring/CM2.cpp @@ -0,0 +1,701 @@ +#include "benchmarks/applications/ClusterMonitoring/ClusterMonitoring.h" +#include "compression/Compressor.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/expressions/IntConstant.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/Selection.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "cql/predicates/ComparisonPredicate.h" +#include "snappy.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +namespace CM2Compress { +struct alignas(16) input_tuple_t { + long timestamp; + long jobId; + long taskId; + long machineId; + int eventType; + int userId; + int category; + int priority; + float cpu; + float ram; + float disk; + int constraints; +}; + +std::vector> *metadata; + +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (3); + std::vector first (3, true); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000); + struct t_1 { + uint16_t timestamp : 9; + //uint16_t counter : 8; + }; + struct t_2 { + uint16_t jobId : 16; + //uint16_t counter : 16; + }; + struct t_3 { + uint16_t cpu : 10; + //uint16_t counter : 6; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + for (auto &&b: first) { + b = false; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.33)); + t_3 *buf3 = (t_3 *)(output + (int) (length*0.66)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint8_t fVal_1 = (n > 0) ? bcomp.compress(data[0].timestamp) : 0; + auto fVal_2 = (n > 0) ? (uint8_t)data[0].jobId : 0; + auto fVal_3 = (n > 0) ? fcomp.compress(data[0].cpu) : 0; + uint16_t count_1 = 1; + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + if (data[idx].eventType == 3) { + buf1[idxs[0]++] = {bcomp.compress(data[idx].timestamp)}; + buf2[idxs[1]++] = {(uint16_t)data[idx+1].jobId}; + buf3[idxs[2]++] = {fcomp.compress(data[idx].cpu)}; + /*fVal_1 = (first[0]) ? bcomp.compress(data[idx].timestamp) : fVal_1; + first[0] = false; + auto sVal_1 = fVal_1; + if (idx < n - 1 && data[idx+1].eventType == 3 && + fVal_1 == (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + fVal_2 = (first[1]) ? (uint8_t)data[idx].jobId : fVal_2; + first[1] = false; + auto sVal_2 = fVal_2; + if (idx < n - 1 && data[idx+1].eventType == 3 && + fVal_2 == (sVal_2 = (uint8_t)data[idx+1].jobId)) { + count_2++; + } else { + buf2[idxs[1]++] = {(uint8_t) fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + + fVal_3 = (first[2]) ? fcomp.compress(data[idx].cpu) : fVal_3; + first[2] = false; + auto sVal_3 = fVal_3; + if (idx < n - 1 && data[idx+1].eventType == 3 && + fVal_3 == (sVal_3 = fcomp.compress(data[idx+1].cpu))) { + count_3++; + } else { + buf3[idxs[2]++] = {(uint16_t)fVal_3, count_3}; + fVal_3 = sVal_3; + count_3 = 1; + }*/ + } + } + + writePos += idxs[0] * sizeof(t_1); + (*metadata)[pid] = "c0 BS " + std::to_string(data[0].timestamp) + " {uint16_t:9} " + std::to_string(writePos); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[1] * sizeof(t_2); + (*metadata)[pid] += "c1 NS {uint16_t:16} " + std::to_string(writePos); + std::memcpy((void *)(output + writePos), + (void *)buf3, idxs[2] * sizeof(t_3)); + writePos += idxs[2] * sizeof(t_3); + (*metadata)[pid] += "c9 FM 1000 {uint16_t:10} " + std::to_string(writePos); + + (*metadata)[pid] += "FLT 3"; + + if (SystemConf::getInstance().LATENCY_ON) { + auto value = data[0].timestamp; + latency = (int) (value >> 32); + (*metadata)[pid] += " " + std::to_string(latency) + " "; + } + //(*metadata)[pid] = "r0 " + std::to_string(idxs[0]) + " r1 " + std::to_string(idxs[1]) + " "; + if ((*metadata)[pid].size() > 128) { + throw std::runtime_error("error: increase the metadata size"); + } + std::memcpy((void *)(output - 128), (*metadata)[pid].data(), (*metadata)[pid].size()); + (*metadata)[pid].clear(); +} + +void compressGenInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (3); + std::vector first (3, true); + GorillaTimestampCompressor gorillaComp; + Simple8 simpleComp; + GorillaValuesCompressor xorComp; + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + for (auto &&b: first) { + b = false; + } + size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + + struct res2 {uint64_t _1;}; + std::vector r2 (8192); + auto buf2 = (uint64_t *)(output + (int) (length*0.33)); + + // gorilla float + // store first float in 64 bits + auto buf3 = (uint64_t *)(output + (int) (length*0.66)); + buf3[idxs[2]++] = data[0].cpu; + // calculate trailing and leading zeros for first float + uint64_t *firstV = (uint64_t *)&data[0].cpu; + int prevLeadingZeros = __builtin_clzll(*firstV); + int prevTrailingZeros = __builtin_ctzll(*firstV); + + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + if (data[idx].eventType == 3) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + r2[idxs[1]++] = {static_cast(data[idx].jobId)}; + if (idx > 0) { + auto prev = (float)data[idx - 1].cpu; + auto current = (float)data[idx].cpu; + uint64_t *a = (uint64_t *)&prev; + uint64_t *b = (uint64_t *)¤t; + uint64_t xorValue = *a ^ *b; + auto [appendedValue, appendedValueLength, leadingZeros, + trailingZeros] = xorComp.compress(xorValue, prevLeadingZeros, prevTrailingZeros); + prevLeadingZeros = leadingZeros; + prevTrailingZeros = trailingZeros; + if (count_3 + appendedValueLength > 64) { + uint8_t split = (64 - count_3); + if (appendedValueLength > 1) { + buf3[idxs[2]] |= + appendedValue >> (appendedValueLength - split); + } + ++idxs[2]; + count_3 = appendedValueLength - split; + } else { + count_3 += appendedValueLength; + } + buf3[idxs[2]] |= appendedValue << (64 - count_3); + } + } + } + + // simple 8 + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp.compress( + inOffset, outOffset, idxs[1], &r2, &res2::_1, 1, buf2); + + + std::memcpy((void *)(output + idxs[0] * sizeof(uint64_t)), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + std::memcpy((void *)(output + idxs[0] * sizeof(uint64_t) + + idxs[1] * sizeof(uint64_t)), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + writePos += idxs[0] * sizeof(uint64_t) + idxs[1] * sizeof(uint64_t) + + idxs[2] * sizeof(uint64_t); +} + +void noCompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + struct res { + long timestamp; + long jobId; + float cpu; + }; + auto out = (res*) output; + size_t n = (end - start) / sizeof(input_tuple_t); + size_t resIdx = 0; + for (size_t idx = 0; idx < n; idx++) { + if (data[idx].eventType == 3) { + out[resIdx++] = {data[idx].timestamp, data[idx].jobId, data[idx].cpu}; + } + } + writePos = resIdx * sizeof(res); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + // parse metadata + std::string meta; + if (copy) { + std::memcpy(output, input, end); + for (size_t i = 0; i < 128; i++) { + meta += input[i]; + } + } else { + for (size_t i = 0; i < 128; i++) { + meta += output[i]; + } + } + + std::istringstream iss(meta); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + + auto base = std::stoi(words[2]); + auto mul = std::stoi(words[11]); + auto flt = std::stoi(words[15]); + std::vector idxs (3); + idxs[0] = std::stoi(words[4]); + idxs[1] = std::stoi(words[8]); + idxs[2] = std::stoi(words[13]); + if (SystemConf::getInstance().LATENCY_ON) { + latency = std::stoi(words[16]); + } + + + //BaseDeltaCompressor bcomp(base); + struct t_1 { + uint16_t timestamp : 9; + }; + struct t_2 { + uint16_t jobId : 16; + }; + struct t_3 { + uint16_t cpu : 10; + }; + + auto res = (input_tuple_t*) input; + t_1 *col0 = (t_1 *)(output + 128); + auto *col1 = (t_2 *)(output + 128 + idxs[0]); + auto *col9 = (t_3 *)(output + 128 + idxs[1]); + auto wPos = 0; + auto dataSize = end / sizeof(input_tuple_t); + auto col1Size = idxs[0] / sizeof(t_1); + for (int idx = 0; idx < col1Size; ++idx) { + auto temp = col0[idx]; + res[wPos++].timestamp = temp.timestamp + base; + } + + if (SystemConf::getInstance().LATENCY_ON) { + res[0].timestamp = Utils::pack(latency, (int)res[0].timestamp); + } + + // c1 + wPos = 0; + for (int idx = 0; idx < col1Size; ++idx) { + auto temp = col1[idx]; + res[wPos++].jobId = temp.jobId; + } + + // c9 + wPos = 0; + for (int idx = 0; idx < col1Size; ++idx) { + auto temp = col9[idx]; + res[wPos++].cpu = temp.cpu * mul; + } + + // fix filtered out values + for (; wPos < dataSize; wPos++) { + res[wPos].eventType = 0; + } + + writePos = wPos * sizeof(input_tuple_t); + + /*std::cout << "===========decompress===========" << std::endl; + auto n = dataSize; + for (int i = 0; i idxs (12); + GorillaTimestampCompressor gorillaComp_0; + Simple8 simpleComp_1; + Simple8 simpleComp_2; + Simple8 simpleComp_3; + Simple8 simpleComp_4; + Simple8 simpleComp_5; + Simple8 simpleComp_6; + Simple8 simpleComp_7; + GorillaValuesCompressor xorComp_8; + GorillaValuesCompressor xorComp_9; + GorillaValuesCompressor xorComp_10; + Simple8 simpleComp_11; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + + size_t output_length; + auto buf1 = (uint64_t *)input; + snappy::RawCompress((const char *)(buf1), end, (char*)(output), &output_length); + writePos += output_length; + + + /*size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + // _1 + auto buf2 = (uint64_t *)(output + (int) (length*0.08)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp_1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::jobId, 1, buf2); + // _2 + auto buf3 = (uint64_t *)(output + (int) (length*0.08*2)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp_2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::taskId, 1, buf3); + // _3 + auto buf4 = (uint64_t *)(output + (int) (length*0.08*3)); + inOffset = 0; + outOffset = 0; + idxs[3] = simpleComp_3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::machineId, 1, buf4); + // _4 + auto buf5 = (uint64_t *)(output + (int) (length*0.08*4)); + inOffset = 0; + outOffset = 0; + idxs[4] = simpleComp_4.compress( + inOffset, outOffset, n, &data, &input_tuple_t::eventType, 1, buf5); + // _5 + auto buf6 = (uint64_t *)(output + (int) (length*0.08*5)); + inOffset = 0; + outOffset = 0; + idxs[5] = simpleComp_5.compress( + inOffset, outOffset, n, &data, &input_tuple_t::userId, 1, buf6); + // _6 + auto buf7 = (uint64_t *)(output + (int) (length*0.08*6)); + inOffset = 0; + outOffset = 0; + idxs[6] = simpleComp_6.compress( + inOffset, outOffset, n, &data, &input_tuple_t::category, 1, buf7); + // _7 + auto buf8 = (uint64_t *)(output + (int) (length*0.08*7)); + inOffset = 0; + outOffset = 0; + idxs[7] = simpleComp_7.compress( + inOffset, outOffset, n, &data, &input_tuple_t::priority, 1, buf8); + // _11 + auto buf12 = (uint64_t *)(output + (int) (length*0.08*11)); + inOffset = 0; + outOffset = 0; + idxs[11] = simpleComp_11.compress( + inOffset, outOffset, n, &data, &input_tuple_t::constraints, 1, buf12); + + if (!isFirst[pid]) { + tempVec[pid].resize(n); + isFirst[pid] = true; + } + // gorilla float + // _8 + auto buf9 = (uint64_t *)(output + (int) (length*0.08*8)); + buf9[idxs[8]++] = data[0].cpu; + uint64_t *firstV_8 = (uint64_t *)&data[0].cpu; + int prevLeadingZeros_8 = __builtin_clzll(*firstV_8); + int prevTrailingZeros_8 = __builtin_ctzll(*firstV_8); + uint8_t count_8 = 1; + // _9 + auto buf10 = (uint64_t *)(output + (int) (length*0.08*9)); + buf10[idxs[9]++] = data[0].ram; + uint64_t *firstV_9 = (uint64_t *)&data[0].ram; + int prevLeadingZeros_9 = __builtin_clzll(*firstV_9); + int prevTrailingZeros_9 = __builtin_ctzll(*firstV_9); + uint8_t count_9 = 1; + // _10 + auto buf11 = (uint64_t *)(output + (int) (length*0.08*10)); + buf11[idxs[10]++] = data[0].disk; + uint64_t *firstV_10 = (uint64_t *)&data[0].disk; + int prevLeadingZeros_10 = __builtin_clzll(*firstV_10); + int prevTrailingZeros_10 = __builtin_ctzll(*firstV_10); + uint8_t count_10 = 1; + + uint8_t count_2 = 1; + for (size_t idx = 0; idx < n; idx++) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp_0.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + if (idx > 0) { + auto prev = (float)data[idx - 1].cpu; + auto current = (float)data[idx].cpu; + uint64_t *a = (uint64_t *)&prev; + uint64_t *b = (uint64_t *)¤t; + uint64_t xorValue = *a ^ *b; + auto [appendedValue, appendedValueLength, leadingZeros, + trailingZeros] = xorComp_8.compress(xorValue, prevLeadingZeros_8, prevTrailingZeros_8); + prevLeadingZeros_8 = leadingZeros; + prevTrailingZeros_8 = trailingZeros; + if (count_8 + appendedValueLength > 64) { + uint8_t split = (64 - count_8); + if (appendedValueLength > 1) { + buf9[idxs[8]] |= + appendedValue >> (appendedValueLength - split); + } + ++idxs[8]; + count_8 = appendedValueLength - split; + } else { + count_8 += appendedValueLength; + } + buf9[idxs[8]] |= appendedValue << (64 - count_8); + } + if (idx > 0) { + auto prev = (float)data[idx - 1].ram; + auto current = (float)data[idx].ram; + uint64_t *a = (uint64_t *)&prev; + uint64_t *b = (uint64_t *)¤t; + uint64_t xorValue = *a ^ *b; + auto [appendedValue, appendedValueLength, leadingZeros, + trailingZeros] = xorComp_9.compress(xorValue, prevLeadingZeros_9, prevTrailingZeros_9); + prevLeadingZeros_9 = leadingZeros; + prevTrailingZeros_9 = trailingZeros; + if (count_9 + appendedValueLength > 64) { + uint8_t split = (64 - count_9); + if (appendedValueLength > 1) { + buf10[idxs[9]] |= + appendedValue >> (appendedValueLength - split); + } + ++idxs[2]; + count_9 = appendedValueLength - split; + } else { + count_9 += appendedValueLength; + } + buf10[idxs[9]] |= appendedValue << (64 - count_9); + } + if (idx > 0) { + auto prev = (float)data[idx - 1].disk; + auto current = (float)data[idx].disk; + uint64_t *a = (uint64_t *)&prev; + uint64_t *b = (uint64_t *)¤t; + uint64_t xorValue = *a ^ *b; + auto [appendedValue, appendedValueLength, leadingZeros, + trailingZeros] = xorComp_10.compress(xorValue, prevLeadingZeros_10, prevTrailingZeros_10); + prevLeadingZeros_10 = leadingZeros; + prevTrailingZeros_10 = trailingZeros; + if (count_10 + appendedValueLength > 64) { + uint8_t split = (64 - count_10); + if (appendedValueLength > 1) { + buf11[idxs[10]] |= + appendedValue >> (appendedValueLength - split); + } + ++idxs[2]; + count_10 = appendedValueLength - split; + } else { + count_10 += appendedValueLength; + } + buf11[idxs[10]] |= appendedValue << (64 - count_10); + } + } + + writePos += idxs[0] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[1] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf3, + idxs[2] * sizeof(uint64_t)); + writePos += idxs[2] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf4, + idxs[3] * sizeof(uint64_t)); + writePos += idxs[3] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf5, + idxs[4] * sizeof(uint64_t)); + writePos += idxs[4] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf6, + idxs[5] * sizeof(uint64_t)); + writePos += idxs[5] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf7, + idxs[6] * sizeof(uint64_t)); + writePos += idxs[6] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf8, + idxs[7] * sizeof(uint64_t)); + writePos += idxs[7] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf9, + idxs[8] * sizeof(uint64_t)); + writePos += idxs[8] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf10, + idxs[9] * sizeof(uint64_t)); + writePos += idxs[9] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf11, + idxs[10] * sizeof(uint64_t)); + writePos += idxs[10] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf12, + idxs[11] * sizeof(uint64_t)); + writePos += idxs[11] * sizeof(uint64_t);*/ +} +}; + +class CM2 : public ClusterMonitoring { + private: + void createApplication() override { + + SystemConf::getInstance().SLOTS = 256; + // change this depending on the batch size + if (SystemConf::getInstance().BATCH_SIZE <= 524288) { + SystemConf::getInstance().PARTIAL_WINDOWS = 256; + } else if (SystemConf::getInstance().BATCH_SIZE <= 1048576) { + SystemConf::getInstance().PARTIAL_WINDOWS = 3 * 256; + } else { + SystemConf::getInstance().PARTIAL_WINDOWS = 6 * 256; + } + SystemConf::getInstance().HASH_TABLE_SIZE = 32; + + bool useParallelMerge = SystemConf::getInstance().PARALLEL_MERGE_ON; + bool persistInput = SystemConf::getInstance().PERSIST_INPUT; + + // Configure first query + auto predicate = new ComparisonPredicate(EQUAL_OP, new ColumnReference(4), new IntConstant(3)); + Selection *selection = new Selection(predicate); + + // Configure second query + std::vector aggregationTypes(1); + aggregationTypes[0] = AggregationTypes::fromString("sum"); + + std::vector aggregationAttributes(1); + aggregationAttributes[0] = new ColumnReference(8, BasicType::Float); + + std::vector groupByAttributes(1); + groupByAttributes[0] = new ColumnReference(1, BasicType::Long); + + auto window = new WindowDefinition(RANGE_BASED, 60, 1); //ROW_BASED, 60*25, 1*25); + Aggregation *aggregation = new Aggregation(*window, aggregationTypes, aggregationAttributes, groupByAttributes); + +#if defined(TCP_INPUT) + bool replayTimestamps = false; +#elif defined(RDMA_INPUT) + bool replayTimestamps = false; +#else + bool replayTimestamps = window->isRangeBased(); +#endif + + // Set up code-generated operator + OperatorKernel *genCode = new OperatorKernel(true, true, useParallelMerge); + genCode->setInputSchema(getSchema()); + genCode->setSelection(selection); + genCode->setAggregation(aggregation); + genCode->setQueryId(0); + genCode->setup(); + OperatorCode *cpuCode = genCode; + + // Print operator + std::cout << cpuCode->toSExpr() << std::endl; + + // Define an ft-operator + auto queryOperator = new QueryOperator(*cpuCode, true); + std::vector operators; + operators.push_back(queryOperator); + + // this is used for latency measurements + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + + std::vector> queries(1); + queries[0] = std::make_shared(0, + operators, + *window, + m_schema, + m_timestampReference, + true, + replayTimestamps, + !replayTimestamps, + useParallelMerge, + 0, persistInput, nullptr, !SystemConf::getInstance().RECOVER); + + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + queries[0]->getBuffer()->setCompressionFP(CM2Compress::compressInput); + queries[0]->getBuffer()->setDecompressionFP(CM2Compress::decompressInput); + } + + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, !SystemConf::getInstance().RECOVER); + m_application->setup(); + + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION && (SystemConf::getInstance().CHECKPOINT_ON || persistInput)) { + CM2Compress::metadata = new std::vector>(SystemConf::getInstance().WORKER_THREADS, ""); + //m_application->getCheckpointCoordinator()->setCompressionFP(0, CM2Compress::compress); + } + } + + public: + CM2(bool inMemory = true) { + m_name = "CM2"; + createSchema(); + createApplication(); + if (inMemory) + loadInMemoryData(); + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/ClusterMonitoring/ClusterMonitoring.h b/test/benchmarks/applicationsWithCheckpoints/ClusterMonitoring/ClusterMonitoring.h new file mode 100644 index 0000000..f468396 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/ClusterMonitoring/ClusterMonitoring.h @@ -0,0 +1,139 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "utils/TupleSchema.h" +#include "utils/QueryApplication.h" +#include "utils/Utils.h" +#include "benchmarks/applications/BenchmarkQuery.h" + +class ClusterMonitoring : public BenchmarkQuery { + private: + struct InputSchema { + long timestamp; + long jobId; + long taskId; + long machineId; + int eventType; + int userId; + int category; + int priority; + float cpu; + float ram; + float disk; + int constraints; + + static void parse(InputSchema &tuple, std::string &line) { + std::istringstream iss(line); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + tuple.timestamp = std::stol(words[0]); + tuple.jobId = std::stol(words[1]); + tuple.taskId = std::stol(words[2]); + tuple.machineId = std::stol(words[3]); + tuple.eventType = std::stoi(words[4]); + tuple.userId = std::stoi(words[5]); + tuple.category = std::stoi(words[6]); + tuple.priority = std::stoi(words[7]); + tuple.cpu = std::stof(words[8]); + tuple.ram = std::stof(words[9]); + tuple.disk = std::stof(words[10]); + tuple.constraints = std::stoi(words[11]); + } + }; + + public: + TupleSchema *m_schema = nullptr; + QueryApplication *m_application = nullptr; + std::vector *m_data = nullptr; + bool m_debug = false; + + QueryApplication *getApplication() override { + return m_application; + } + + virtual void createApplication() = 0; + + void loadInMemoryData() { + size_t len = SystemConf::getInstance().BUNDLE_SIZE; + m_data = new std::vector(len); + auto buf = (InputSchema *) m_data->data(); + + std::string filePath = Utils::getHomeDir() + "/LightSaber/resources/datasets/google-cluster-data/"; + std::ifstream file(filePath + "google-cluster-data.txt"); + if (!file.good()) + throw std::runtime_error("error: input file does not exist, check the path."); + std::string line; + unsigned long idx = 0; + while (std::getline(file, line) && idx < len / sizeof(InputSchema)) { + InputSchema::parse(buf[idx], line); + idx++; + } + + if (m_debug) { + std::cout << "timestamp jobId machineId eventType userId category priority cpu ram disk constraints" << std::endl; + for (unsigned long i = 0; i < m_data->size() / sizeof(InputSchema); ++i) { + printf("[DBG] %09d: %7d %13d %8d %13d %3d %6d %2d %2d %8.3f %5.3f %5.3f %5d \n", + i, buf[i].timestamp, buf[i].jobId, buf[i].taskId, buf[i].machineId, + buf[i].eventType, buf[i].userId, buf[i].category, buf[i].priority, + buf[i].cpu, buf[i].ram, buf[i].disk, buf[i].constraints); + } + } + + //std::ifstream file(filePath + "compressed-512-norm.dat", + // std::ios_base::in | std::ios_base::binary); + //try { + // boost::iostreams::filtering_istream in; + // in.push(boost::iostreams::gzip_decompressor()); + // in.push(file); + // for(std::string str; std::getline(in, str); ) { + // std::cout << "Processed line " << str << '\n'; + // } + //} + //catch(const boost::iostreams::gzip_error& e) { + // std::cout << e.what() << '\n'; + //} + }; + + std::vector *getInMemoryData() override { + return m_data; + } + + std::vector *getStaticData() override { + throw std::runtime_error("error: this benchmark does not have static data"); + } + + TupleSchema *getSchema() override { + if (m_schema == nullptr) + createSchema(); + return m_schema; + } + + void createSchema() { + m_schema = new TupleSchema(12, "ClusterMonitoring"); + auto longAttr = AttributeType(BasicType::Long); + auto intAttr = AttributeType(BasicType::Integer); + auto floatAttr = AttributeType(BasicType::Float); + + m_schema->setAttributeType(0, longAttr); /* timestamp: long */ + m_schema->setAttributeType(1, longAttr); /* jobId: long */ + m_schema->setAttributeType(2, longAttr); /* taskId: long */ + m_schema->setAttributeType(3, longAttr); /* machineId: long */ + m_schema->setAttributeType(4, intAttr); /* eventType: int */ + m_schema->setAttributeType(5, intAttr); /* userId: int */ + m_schema->setAttributeType(6, intAttr); /* category: int */ + m_schema->setAttributeType(7, intAttr); /* priority: int */ + m_schema->setAttributeType(8, floatAttr); /* cpu: float */ + m_schema->setAttributeType(9, floatAttr); /* ram: float */ + m_schema->setAttributeType(10, floatAttr);/* disk: float */ + m_schema->setAttributeType(11, intAttr); /* constraints: int */ + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/ClusterMonitoring/main.cpp b/test/benchmarks/applicationsWithCheckpoints/ClusterMonitoring/main.cpp new file mode 100644 index 0000000..e67b718 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/ClusterMonitoring/main.cpp @@ -0,0 +1,24 @@ +#include +#include + +#include "CM1.cpp" +#include "CM2.cpp" + +// ./cluster_monitoring_checkpoints --circular-size 16777216 --unbounded-size 1048576 (524288) --batch-size 524288 --bundle-size 524288 --query 1 --checkpoint-duration 1000 --disk-block-size 65536 --threads 1 +// ./cluster_monitoring_checkpoints --circular-size 16777216 --unbounded-size 1048576 (524288) --batch-size 524288 --bundle-size 524288 --query 2 --checkpoint-duration 1000 --disk-block-size 131072 --threads 1 +int main(int argc, const char **argv) { + std::unique_ptr benchmarkQuery {}; + + SystemConf::getInstance().QUERY_NUM = 1; + BenchmarkQuery::parseCommandLineArguments(argc, argv); + + if (SystemConf::getInstance().QUERY_NUM == 1) { + benchmarkQuery = std::make_unique(); + } else if (SystemConf::getInstance().QUERY_NUM == 2) { + benchmarkQuery = std::make_unique(); + } else { + throw std::runtime_error("error: invalid benchmark query id"); + } + + return benchmarkQuery->runBenchmark(); +} \ No newline at end of file diff --git a/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/LRB1.cpp b/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/LRB1.cpp new file mode 100644 index 0000000..851b919 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/LRB1.cpp @@ -0,0 +1,775 @@ +#include "benchmarks/applications/LinearRoadBenchmark/LinearRoadBenchmark.h" +#include "compression/Compressor.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/expressions/IntConstant.h" +#include "cql/expressions/operations/Division.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "cql/predicates/ComparisonPredicate.h" +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +namespace LRB1Compress { +struct alignas(16) input_tuple_t { + long timestamp; + int _1; + float _2; + int _3; + int _4; + int _5; + int _6; +}; +struct alignas(16) output_tuple_t { + long timestamp; + int _1; + int _2; + int _3; + float _4; + int _5; +}; +struct Value { + float _1; +}; +struct Key { + int _0; + int _1; + int _2; +}; +using KeyT = Key; +using ValueT = Value; +struct alignas(16) BucketComp { + char state; + char dirty; + long timestamp; + KeyT key; + ValueT value; + int counter; +}; +struct hash { + std::size_t operator()(const Key &key) const { + uint64_t result = uint16_t(key._0) * 100 + uint16_t(key._1) * 10 + + uint16_t(key._2); // todo: is this luck? + return result; + } +}; +struct Eq { + constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const { + return lhs._0 == rhs._0 && lhs._1 == rhs._1 && lhs._2 == rhs._2; + } +}; +std::vector>> *dcomp; +std::vector> *metadata; + +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = (*dcomp)[pid]->getTable().max_size() * + (*dcomp)[pid]->getTable().bucket_size(); + std::memcpy(output + writePos, (void *)(*dcomp)[pid]->getTable().buckets(), offset); + writePos += offset; + (*metadata)[pid] += "ht " + std::to_string(offset); + if (latency != -1) { + if (latency == 0) { + auto value = ((long*)input)[0]; + latency = (int) (value >> 32); + } + (*metadata)[pid] += " " + std::to_string(latency) + " "; + } + if ((*metadata)[pid].size() > 128) + throw std::runtime_error("error: increase the size of the metadata area"); + std::memcpy(output, (void *)(*metadata)[pid].data(), (*metadata)[pid].size()); + (*metadata)[pid].clear(); + return; + } + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (2); + BaseDeltaCompressor bcomp(data[0].timestamp); + //DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 10; + uint16_t counter : 6; + }; + struct t_2 { + uint16_t groupKey : 10; + uint16_t value : 6; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.33)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint16_t count_1 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + Key temp{data[idx]._3, data[idx]._5, + data[idx]._6 / 5280}; + auto fVal_2 = (*dcomp)[pid]->compress(temp); + auto fVal_3 = (uint16_t) data[idx]._2;//fcomp.compress(data[idx]._2); + buf2[idxs[1]++] = {fVal_2, fVal_3}; + } + std::memcpy((void *)(output + idxs[0] * sizeof(t_1)), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2); + (*metadata)[pid] = "r0 " + std::to_string(idxs[0]) + " r1 " + std::to_string(idxs[1]) + " "; +} + +void compressInput_(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + return; + } + if (clear) { + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (5, 0); + BaseDeltaCompressor bcomp(data[0].timestamp); + Simple8 simpleComp1; + Simple8 simpleComp2; + Simple8 simpleComp3; + //DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 10; + uint8_t counter : 6; + }; + struct t_5 { + uint8_t value : 8; + }; + + //writePos = 0; + // compress + size_t n = (end - start) / sizeof(input_tuple_t); + t_1 *buf1 = (t_1 *)(output); + // simple 8 + auto buf2 = (uint64_t *)(output + (int) (length*0.2)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_3, 1, buf2); + + // simple 8 + auto buf3 = (uint64_t *)(output + (int) (length*0.4)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_5, 1, buf3); + + // simple 8 + auto buf4 = (uint64_t *)(output + (int) (length*0.6)); + inOffset = 0; + outOffset = 0; + //for (size_t idx = 0; idx < n; idx++) { + //data[idx]._6 /= 5280; + //} + idxs[3] = simpleComp3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_6, 1, buf4, 5280); + + t_5 *buf5 = (t_5 *)(output + (int) (length*0.8)); + uint16_t count_1 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (count_1 < 63 && idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {static_cast(fVal_1), static_cast(count_1)}; + fVal_1 = sVal_1; + count_1 = 1; + } + buf5[idxs[4]++] = {static_cast(data[idx]._2)}; + } + + writePos += idxs[0] * sizeof(t_1); + (*metadata)[pid] = "c0 RLE BD " + std::to_string(data[0].timestamp) + " {uint16_t:10,uint8_t:6} " + std::to_string(writePos); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[1] * sizeof(uint64_t); + (*metadata)[pid] += " c3 S8 " + std::to_string(writePos); + std::memcpy((void *)(output + writePos), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + writePos += idxs[2] * sizeof(uint64_t); + (*metadata)[pid] += " c5 S8 " + std::to_string(writePos); + std::memcpy((void *)(output + writePos), + (void *)buf4, idxs[3] * sizeof(uint64_t)); + writePos += idxs[3] * sizeof(uint64_t); + (*metadata)[pid] += " c6 S8 " + std::to_string(writePos); + std::memcpy((void *)(output + writePos), + (void *)buf5, idxs[4] * sizeof(t_5)); + writePos += idxs[4] * sizeof(t_5); + (*metadata)[pid] += " c2 NS {uint8_t:8} " + std::to_string(writePos); + if (SystemConf::getInstance().LATENCY_ON) { + auto value = data[0].timestamp; + latency = (int) (value >> 32); + (*metadata)[pid] += " " + std::to_string(latency) + " "; + } + //(*metadata)[pid] = "r0 " + std::to_string(idxs[0]) + " r1 " + std::to_string(idxs[1]) + " "; + if ((*metadata)[pid].size() > 128) { + throw std::runtime_error("error: increase the metadata size"); + } + std::memcpy((void *)(output - 128), (*metadata)[pid].data(), (*metadata)[pid].size()); + (*metadata)[pid].clear(); + + /*std::cout << "===========compress===========" << std::endl; + for (int i = 0; i words{std::istream_iterator{iss}, + std::istream_iterator{}}; + + auto base = std::stoi(words[3]); + std::vector idxs (5); + idxs[0] = std::stoi(words[5]); + idxs[1] = std::stoi(words[8]); + idxs[2] = std::stoi(words[11]); + idxs[3] = std::stoi(words[14]); + idxs[4] = std::stoi(words[18]); + if (SystemConf::getInstance().LATENCY_ON) { + latency = std::stoi(words[19]); + } + + + //BaseDeltaCompressor bcomp(base); + Simple8 simpleComp1; + Simple8 simpleComp2; + Simple8 simpleComp3; + struct t_1 { + uint16_t timestamp : 10; + uint8_t counter : 6; + }; + struct t_5 { + uint8_t value : 8; + }; + + auto res = (input_tuple_t*) input; + t_1 *col0 = (t_1 *)(output + 128); + auto *col3 = (uint64_t *)(output + 128 + idxs[0]); + auto *col5 = (uint64_t *)(output + 128 + idxs[1]); + auto *col6 = (uint64_t *)(output + 128 + idxs[2]); + t_5 *col2 = (t_5 *)(output + 128 + idxs[3]); + auto wPos = 0; + auto dataSize = end / sizeof(input_tuple_t); + auto col1Size = idxs[0] / sizeof(t_1); + for (int idx = 0; idx < col1Size; ++idx) { + auto temp = col0[idx]; + for (int it = 0; it < temp.counter; ++it) { + res[wPos++].timestamp = temp.timestamp + base; + if (wPos * sizeof(input_tuple_t) > SystemConf::getInstance().BATCH_SIZE) { + throw std::runtime_error("error: the write position exceeds the batch size"); + } + } + } + + if (SystemConf::getInstance().LATENCY_ON) { + res[0].timestamp = Utils::pack(latency, (int)res[0].timestamp); + } + + // c3 + auto retVal = (idxs[1]-idxs[0])/sizeof(uint64_t); + simpleComp1.decompress(retVal, 0, dataSize, &res, &input_tuple_t::_3, 1, col3); + + // c5 + retVal = (idxs[2]-idxs[1])/sizeof(uint64_t); + simpleComp1.decompress(retVal, 0, dataSize, &res, &input_tuple_t::_5, 1, col5); + + // c6 + retVal = (idxs[3]-idxs[2])/sizeof(uint64_t); + simpleComp1.decompress(retVal, 0, dataSize, &res, &input_tuple_t::_6, 1, col6, 5280); + + wPos = 0; + for (int idx = 0; idx < dataSize; ++idx) { + auto temp = (int) col2[idx].value; + res[wPos++]._2 = temp; + if (wPos * sizeof(input_tuple_t) > SystemConf::getInstance().BATCH_SIZE) { + throw std::runtime_error("error: the write position exceeds the batch size"); + } + } + writePos = wPos * sizeof(input_tuple_t); + + /*std::cout << "===========decompress===========" << std::endl; + auto n = dataSize; + for (int i = 0; i idxs (5); + GorillaTimestampCompressor gorillaComp; + Simple8 simpleComp1; + Simple8 simpleComp2; + Simple8 simpleComp3; + GorillaValuesCompressor xorComp; + + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + // simple 8 + auto buf2 = (uint64_t *)(output + (int) (length*0.2)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_3, 1, buf2); + + // simple 8 + auto buf3 = (uint64_t *)(output + (int) (length*0.4)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_5, 1, buf3); + + // simple 8 + auto buf4 = (uint64_t *)(output + (int) (length*0.6)); + inOffset = 0; + outOffset = 0; + //for (size_t idx = 0; idx < n; idx++) { + // data[idx]._6 /= 5280; + //} + idxs[3] = simpleComp3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_6, 1, buf4, 5280); + + // gorilla float + // store first float in 64 bits + auto buf5 = (uint64_t *)(output + (int) (length*0.8)); + buf5[idxs[4]++] = data[0]._2; + // calculate trailing and leading zeros for first float + uint64_t *firstV = (uint64_t *)&data[0]._2; + int prevLeadingZeros = __builtin_clzll(*firstV); + int prevTrailingZeros = __builtin_ctzll(*firstV); + uint16_t count_5 = 1; + + for (size_t idx = 0; idx < n; idx++) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + if (idx > 0) { + auto prev = (float)data[idx - 1]._2; + auto current = (float)data[idx]._2; + uint64_t *a = (uint64_t *)&prev; + uint64_t *b = (uint64_t *)¤t; + uint64_t xorValue = *a ^ *b; + auto [appendedValue, appendedValueLength, leadingZeros, + trailingZeros] = xorComp.compress(xorValue, prevLeadingZeros, prevTrailingZeros); + prevLeadingZeros = leadingZeros; + prevTrailingZeros = trailingZeros; + if (count_5 + appendedValueLength > 64) { + uint8_t split = (64 - count_5); + if (appendedValueLength > 1) { + buf5[idxs[4]] |= + appendedValue >> (appendedValueLength - split); + } + ++idxs[4]; + count_5 = appendedValueLength - split; + } else { + count_5 += appendedValueLength; + } + buf5[idxs[4]] |= appendedValue << (64 - count_5); + } + } + writePos += idxs[0] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[1] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + writePos += idxs[2] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf4, idxs[3] * sizeof(uint64_t)); + writePos += idxs[3] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf5, idxs[4] * sizeof(uint64_t)); + writePos += idxs[4] * sizeof(uint64_t); +} + +void noCompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + struct res { + long timestamp; + int highway; + int direction; + int segment; + float speed; + }; + auto out = (res*) output; + size_t n = (end - start) / sizeof(input_tuple_t); + for (size_t idx = 0; idx < n; idx++) { + out[idx] = {data[idx].timestamp, data[idx]._3, data[idx]._5, + data[idx]._6 / 5280, data[idx]._2}; + } + writePos = n * sizeof(res); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + // parse metadata + std::string meta; + if (copy) { + std::memcpy(output, input, end); + for (size_t i = 0; i < 128; i++) { + meta += input[i]; + } + } else { + for (size_t i = 0; i < 128; i++) { + meta += output[i]; + } + } + + std::istringstream iss(meta); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + + std::vector idxs (2); + idxs[0] = std::stoi(words[1]); + idxs[1] = std::stoi(words[3]); + auto hashTableSize = std::stoi(words[5]); + if (latency != -1) { + latency = std::stoi(words[6]); + } + + int bDecomp = 1; + float fDecomp = (float) 1; + struct t_1 { + uint32_t timestamp : 24; + uint16_t counter : 8; + }; + struct t_2 { + uint16_t groupKey : 10; + uint16_t value : 6; + }; + + auto res = (input_tuple_t*) input; + t_1 *col1 = (t_1 *)(output + 128); + t_2 *col2 = (t_2 *)(output + 128 + idxs[0] * sizeof(t_1)); + auto buckets = (Bucket *)(output + 128 + idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2)); + auto wPos = 0; + for (int idx = 0; idx < idxs[0]; ++idx) { + auto temp = col1[idx]; + for (int it = 0; it < temp.counter; ++it) { + res[wPos++].timestamp = temp.timestamp * bDecomp; + if (wPos * sizeof(input_tuple_t) > SystemConf::getInstance().BATCH_SIZE) { + throw std::runtime_error("error: the write position exceeds the batch size"); + } + } + } + + if (latency != -1) { + res[0].timestamp = Utils::pack(latency, (int)res[0].timestamp); + } + + wPos = 0; + for (int idx = 0; idx < idxs[1]; ++idx) { + auto temp = col2[idx]; + auto pos = (int)temp.groupKey; + Key key = buckets[pos].key; + auto val = (float)((float)temp.value / fDecomp); + //res[wPos]._1 = 0; + res[wPos]._2 = val; + res[wPos]._3 = key._0; + //res[wPos]._4 = 0; + res[wPos]._5 = key._1; + res[wPos]._6 = key._2 * 5280; + wPos++; + if (wPos * sizeof(input_tuple_t) > SystemConf::getInstance().BATCH_SIZE) { + throw std::runtime_error("error: the write position exceeds the batch size"); + } + } + writePos = wPos * sizeof(input_tuple_t); +} + +void compress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + if (start == 0 && end == -1) { + auto offset = (*dcomp)[pid]->getTable().max_size() * + (*dcomp)[pid]->getTable().bucket_size(); + std::memcpy(output + writePos, (void *)(*dcomp)[pid]->getTable().buckets(), offset); + writePos += offset; + return; + } + + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + start = start / sizeof(BucketComp); + end = end / sizeof(BucketComp); + DummyFloatCompressor fcomp(1000000); + + if (!isComplete) { + auto inputBuf = (BucketComp *)input; + BaseDeltaCompressor bcomp(inputBuf[0].timestamp); + struct res { + uint16_t timestamp : 9; + uint16_t groupKey : 8; + uint16_t speed : 7; + uint16_t counter : 8; + }; + auto outputBuf = (res *)output; + auto outIdx = writePos / sizeof(res); + for (size_t idx = start; idx < end; ++idx) { + if (inputBuf[idx].state) { + outputBuf[outIdx++] = {bcomp.compress(inputBuf[idx].timestamp), + (*dcomp)[pid]->compress(inputBuf[idx].key), + fcomp.compress(inputBuf[idx].value._1), + static_cast(inputBuf[idx].counter)}; + } + } + writePos = outIdx * sizeof(res); + } else { + auto inputBuf = (output_tuple_t *)input; + BaseDeltaCompressor bcomp(inputBuf[0].timestamp); + struct res { + uint16_t timestamp : 9; + uint16_t groupKey : 8; + uint16_t speed : 7; + }; + auto outputBuf = (res *)output; + auto outIdx = writePos / sizeof(res); + for (size_t idx = start; idx < end; ++idx) { + Key temp{inputBuf[idx]._1, inputBuf[idx]._2, inputBuf[idx]._3}; + outputBuf[outIdx++] = {bcomp.compress(inputBuf[idx].timestamp), + (*dcomp)[pid]->compress(temp), + fcomp.compress(inputBuf[idx]._4)}; + } + writePos = outIdx * sizeof(res); + } +} + +void decompress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + + BaseDeltaCompressor bcomp(1); + auto fdecomp = 1000000; + auto hTable = (BucketComp *) &input[writePos]; + auto outIdx = 0; + auto base = 1; + + if (!isComplete) { + auto outputBuf = (BucketComp *)output; + struct res { + uint16_t timestamp : 9; + uint16_t groupKey : 8; + uint16_t speed : 7; + uint16_t counter : 8; + }; + auto inputBuf = (res *) &input[start]; + start = start / sizeof(res); + end = end / sizeof(res); + for (size_t idx = start; idx < end; ++idx) { + if (inputBuf[idx].groupKey >= (*dcomp)[pid]->getTable().max_size()) + throw std::runtime_error("error: the group key is greater than the hashtable size"); + Key key = hTable[inputBuf[idx].groupKey].key; + outputBuf[inputBuf[idx].groupKey] = {1, 1, (long)inputBuf[idx].timestamp + base, key, + {(float)inputBuf[idx].speed/fdecomp}, inputBuf[idx].counter}; + } + } else { + auto outputBuf = (output_tuple_t *)output; + struct res { + uint16_t timestamp : 9; + uint16_t groupKey : 8; + uint16_t speed : 7; + }; + auto inputBuf = (res *) &input[start]; + start = start / sizeof(res); + end = end / sizeof(res); + for (size_t idx = start; idx < end; ++idx) { + Key key = hTable[inputBuf[idx].groupKey].key; + outputBuf[outIdx] = {static_cast(inputBuf[idx].timestamp) + base, key._0, key._1, + key._2, (float)inputBuf[idx].speed * fdecomp, 0}; + outIdx++; + } + } +} +}; + +class LRB1 : public LinearRoadBenchmark { + private: + void createApplication() override { + SystemConf::getInstance().SLOTS = 128; + SystemConf::getInstance().PARTIAL_WINDOWS = 550; //320; + //SystemConf::getInstance().HASH_TABLE_SIZE = 256; + + bool useParallelMerge = SystemConf::getInstance().PARALLEL_MERGE_ON; + bool persistInput = SystemConf::getInstance().PERSIST_INPUT; + + // Configure first query + auto segmentExpr = new Division(new ColumnReference(6, BasicType::Integer), new IntConstant(5280)); + + // Configure second query + std::vector aggregationTypes(1); + aggregationTypes[0] = AggregationTypes::fromString("avg"); + + std::vector aggregationAttributes(1); + aggregationAttributes[0] = new ColumnReference(2, BasicType::Float); + + std::vector groupByAttributes(3); + groupByAttributes[0] = new ColumnReference(3, BasicType::Integer); + groupByAttributes[1] = new ColumnReference(5, BasicType::Integer); + groupByAttributes[2] = segmentExpr; + + auto window = new WindowDefinition(RANGE_BASED, 300, 1); //(ROW_BASED, 300*80, 1*80); + Aggregation *aggregation = new Aggregation(*window, aggregationTypes, aggregationAttributes, groupByAttributes); + + // Configure third query + auto predicate = new ComparisonPredicate(LESS_OP, new ColumnReference(4), new IntConstant(40)); + Selection *selection = new Selection(predicate); + +#if defined(TCP_INPUT) + bool replayTimestamps = false; +#elif defined(RDMA_INPUT) + bool replayTimestamps = false; +#else + bool replayTimestamps = window->isRangeBased(); +#endif + + // Set up code-generated operator + OperatorKernel *genCode = new OperatorKernel(true, true, useParallelMerge); + genCode->setInputSchema(getSchema()); + genCode->setAggregation(aggregation); + genCode->setCollisionBarrier(8); + genCode->setHaving(selection); + genCode->setQueryId(0); + genCode->setup(); + OperatorCode *cpuCode = genCode; + + // Print operator + std::cout << cpuCode->toSExpr() << std::endl; + + // Define an ft-operator + auto queryOperator = new QueryOperator(*cpuCode, true); + std::vector operators; + operators.push_back(queryOperator); + + // this is used for latency measurements + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + + std::vector> queries(1); + queries[0] = std::make_shared(0, + operators, + *window, + m_schema, + m_timestampReference, + true, + replayTimestamps, + !replayTimestamps, + useParallelMerge, + 0, persistInput, nullptr, !SystemConf::getInstance().RECOVER); + + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION/* && false*/) { + if (true) { + queries[0]->getBuffer()->setCompressionFP(LRB1Compress::compressInput_); + queries[0]->getBuffer()->setDecompressionFP( + LRB1Compress::decompressInput_); + } else { + std::cout << "No compression is used in the input" << std::endl; + } + } + + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, !SystemConf::getInstance().RECOVER); + m_application->setup(); + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION && (SystemConf::getInstance().CHECKPOINT_ON || persistInput)) { + LRB1Compress::metadata = new std::vector>(SystemConf::getInstance().WORKER_THREADS, ""); + LRB1Compress::dcomp = new std::vector>>(); + for (int w = 0; w < SystemConf::getInstance().WORKER_THREADS; ++w) { + LRB1Compress::dcomp->emplace_back( + std::make_unique>( + SystemConf::getInstance().HASH_TABLE_SIZE)); + } + } + if (SystemConf::getInstance().CHECKPOINT_ON && SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + m_application->getCheckpointCoordinator()->setCompressionFP(0, LRB1Compress::compress); + m_application->getCheckpointCoordinator()->setDecompressionFP(0, LRB1Compress::decompress); + } + } + + public: + LRB1(bool inMemory = true) { + m_name = "LRB1"; + createSchema(); + createApplication(); + m_fileName = "lrb-data-small-ht.txt"; + if (inMemory) + loadInMemoryData(); + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/LRB2.cpp b/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/LRB2.cpp new file mode 100644 index 0000000..31e8162 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/LRB2.cpp @@ -0,0 +1,898 @@ +#include "benchmarks/applications/LinearRoadBenchmark/LinearRoadBenchmark.h" +#include "compression/Compressor.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/expressions/IntConstant.h" +#include "cql/expressions/operations/Division.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +namespace LRB2Compress { +struct alignas(16) input_tuple_t { + long timestamp; + int _1; + float _2; + int _3; + int _4; + int _5; + int _6; +}; +struct alignas(16) output_tuple_t { + long timestamp; + int _1; + int _2; + int _3; + int _4; + float _5; + int _6; +}; +struct Value { + float _1; +}; +struct Key { + int _0; + int _1; + int _2; + int _3; +}; +using KeyT = Key; +using ValueT = Value; +struct alignas(16) Bucket { + char state; + char dirty; + long timestamp; + KeyT key; + ValueT value; + int counter; +}; +struct hash { + std::size_t operator()(const Key &key) const { + uint64_t result = uint16_t(key._0) * 100 + uint16_t(key._2) * 10 + + uint16_t(key._3); // todo: is this luck? + return result; + } +}; +struct Eq { + constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const { + return lhs._0 == rhs._0 && lhs._1 == rhs._1 && lhs._2 == rhs._2 && lhs._3 == rhs._3; + } +}; +std::vector>> *dcomp; +std::vector> *metadata; + +struct dBucket { + Key key; +}; +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto buf = (dBucket*) (output + writePos); + auto bucket = (Bucket *)(*dcomp)[pid]->getTable().buckets(); + for (size_t idx = 0; idx < (*dcomp)[pid]->getTable().max_size(); ++idx) { + if (bucket[idx].state) { + buf[idx] = dBucket{bucket[idx].key}; + } else { + buf[idx] = dBucket{ -1, -1, -1, -1}; + } + } + auto offset = (*dcomp)[pid]->getTable().max_size() * sizeof(dBucket); + writePos += offset; + (*metadata)[pid] += "ht " + std::to_string(offset); + if ((*metadata)[pid].size() > 128) + throw std::runtime_error("error: increase the size of the metadata area"); + std::memcpy(output, (void *)(*metadata)[pid].data(), (*metadata)[pid].size()); + (*metadata)[pid].clear(); + return; + } + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (2); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 9; + uint16_t counter : 7; + }; + struct t_2 { + uint16_t groupKey : 12; + uint16_t counter : 4; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.5)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint16_t count_1 = 1; + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {(uint16_t)fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + Key temp{data[idx]._1, data[idx]._3, data[idx]._5, + data[idx]._6 / 5280}; + auto fVal_2 = (*dcomp)[pid]->compress(temp); + auto sVal_2 = fVal_2; + if (idx < n - 1) { + Key temp2{data[idx+1]._1, data[idx+1]._3, data[idx+1]._5, + data[idx+1]._6 / 5280}; + sVal_2 = (*dcomp)[pid]->compress(temp2); + if (sVal_2 == fVal_2) { + count_2++; + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + } + std::memcpy((void *)(output + idxs[0] * sizeof(t_1)), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2); + (*metadata)[pid] = "r0 " + std::to_string(idxs[0]) + " r1 " + std::to_string(idxs[1]) + " "; +} + +void compressInput_(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + return; + } + if (clear) { + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (6, 0); + BaseDeltaCompressor bcomp(data[0].timestamp); + Simple8 simpleComp1; + Simple8 simpleComp2; + Simple8 simpleComp3; + Simple8 simpleComp4; + //DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 9; + uint16_t counter : 7; + }; + + //writePos = 0; + // compress + size_t n = (end - start) / sizeof(input_tuple_t); + t_1 *buf1 = (t_1 *)(output); + + // simple 8 + auto buf2 = (uint64_t *)(output + (int) (length*0.2)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_3, 1, buf2); + + // simple 8 + auto buf3 = (uint64_t *)(output + (int) (length*2*0.2)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_5, 1, buf3); + + // simple 8 + auto buf4 = (uint64_t *)(output + (int) (length*3*0.2)); + inOffset = 0; + outOffset = 0; + //for (size_t idx = 0; idx < n; idx++) { + // data[idx]._6 /= 5280; + //} + idxs[3] = simpleComp3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_6, 1, buf4, 5280); + + // simple 8 + auto buf5 = (uint64_t *)(output + (int) (length*4*0.2)); + inOffset = 0; + outOffset = 0; + idxs[4] = simpleComp4.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_1, 1, buf5); + + + uint16_t count_1 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {static_cast(fVal_1), static_cast(count_1)}; + fVal_1 = sVal_1; + count_1 = 1; + } + } + writePos += idxs[0] * sizeof(t_1); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[1] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + writePos += idxs[2] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf4, idxs[3] * sizeof(uint64_t)); + writePos += idxs[3] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf5, idxs[4] * sizeof(uint64_t)); + writePos += idxs[4] * sizeof(uint64_t); + (*metadata)[pid] = "r0 " + std::to_string(idxs[0]) + " r1 " + std::to_string(idxs[1]) + " "; +} + +void compressGenInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + return; + } + if (clear) { + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (6, 0); + GorillaTimestampCompressor gorillaComp; + Simple8 simpleComp1; + Simple8 simpleComp2; + Simple8 simpleComp3; + Simple8 simpleComp4; + + //writePos = 0; + // compress + size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + // simple 8 + auto buf2 = (uint64_t *)(output + (int) (length*0.2)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_3, 1, buf2); + + // simple 8 + auto buf3 = (uint64_t *)(output + (int) (length*2*0.2)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_5, 1, buf3); + + // simple 8 + auto buf4 = (uint64_t *)(output + (int) (length*3*0.2)); + inOffset = 0; + outOffset = 0; + //for (size_t idx = 0; idx < n; idx++) { + // data[idx]._6 /= 5280; + //} + idxs[3] = simpleComp3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_6, 1, buf4, 5280); + + // simple 8 + auto buf5 = (uint64_t *)(output + (int) (length*4*0.2)); + inOffset = 0; + outOffset = 0; + idxs[4] = simpleComp4.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_1, 1, buf5); + + for (size_t idx = 0; idx < n; idx++) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + } + writePos += idxs[0] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[1] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + writePos += idxs[2] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf4, idxs[3] * sizeof(uint64_t)); + writePos += idxs[3] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf5, idxs[4] * sizeof(uint64_t)); + writePos += idxs[4] * sizeof(uint64_t); +} + +void noCompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + struct res { + long timestamp; + int vehicle; + int highway; + int direction; + int segment; + float speed; + }; + auto out = (res*) output; + size_t n = (end - start) / sizeof(input_tuple_t); + for (size_t idx = 0; idx < n; idx++) { + out[idx] = {data[idx].timestamp, data[idx]._1, data[idx]._3, data[idx]._5, + data[idx]._6 / 5280, data[idx]._2}; + } + writePos = n * sizeof(res); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + throw std::runtime_error("error: the decompression function is not implemented"); +} + +void compress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + if (start == 0 && end == -1) { + auto offset = (*dcomp)[pid]->getTable().max_size() * + (*dcomp)[pid]->getTable().bucket_size(); + std::memcpy(output + writePos, (void *)(*dcomp)[pid]->getTable().buckets(), offset); + writePos += offset; + return; + } + + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + start = start / sizeof(Bucket); + end = end / sizeof(Bucket); + struct res { + uint16_t timestamp : 9; + uint16_t groupKey : 12; + uint16_t counter : 7; + }; + auto outputBuf = (res *)output; + auto outIdx = writePos / sizeof(res); + + if (!isComplete) { + auto inputBuf = (Bucket *)input; + BaseDeltaCompressor bcomp(inputBuf[0].timestamp); + for (size_t idx = start; idx < end; ++idx) { + if (inputBuf[idx].state) { + outputBuf[outIdx++] = {bcomp.compress(inputBuf[idx].timestamp), + (*dcomp)[pid]->compress(inputBuf[idx].key), + static_cast(inputBuf[idx].counter)}; + } + } + writePos = outIdx * sizeof(res); + } else { + auto inputBuf = (output_tuple_t *)input; + BaseDeltaCompressor bcomp(inputBuf[0].timestamp); + for (size_t idx = start; idx < end; ++idx) { + Key temp{inputBuf[idx]._1, inputBuf[idx]._2, inputBuf[idx]._3, + inputBuf[idx]._4}; + outputBuf[outIdx++] = {bcomp.compress(inputBuf[idx].timestamp), + (*dcomp)[pid]->compress(temp), + static_cast(inputBuf[idx]._6)}; + } + writePos = outIdx * sizeof(res); + } +} + +void decompress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + struct res { + uint32_t timestamp : 9; + uint32_t groupKey : 12; + uint32_t counter : 7; + }; + auto inputBuf = (res *) &input[start]; + start = start / sizeof(res); + end = end / sizeof(res); + BaseDeltaCompressor bcomp(1); + auto hTable = (Bucket *) &input[writePos]; + auto outIdx = 0; + + if (!isComplete) { + auto outputBuf = (Bucket *)output; + for (size_t idx = start; idx < end; ++idx) { + if (inputBuf[idx].groupKey >= (*dcomp)[pid]->getTable().max_size()) + throw std::runtime_error("error: the group key is greater than the hashtable size"); + Key key = hTable[inputBuf[idx].groupKey].key; + outputBuf[inputBuf[idx].groupKey] = {1, 1, inputBuf[idx].timestamp, key, + {(float)inputBuf[idx].counter}, inputBuf[idx].counter}; + } + } else { + auto outputBuf = (output_tuple_t *)output; + for (size_t idx = start; idx < end; ++idx) { + Key key = hTable[inputBuf[idx].groupKey].key; + outputBuf[outIdx] = {inputBuf[idx].timestamp, key._0, key._1, + key._2, key._3, (float)inputBuf[idx].counter, inputBuf[idx].counter}; + outIdx++; + } + } +} +}; + +class LRB2 : public LinearRoadBenchmark { + private: + void createApplication() override { + SystemConf::getInstance().SLOTS = 512; + SystemConf::getInstance().PARTIAL_WINDOWS = 544; + SystemConf::getInstance().HASH_TABLE_SIZE = 2 * 1024; + + bool useParallelMerge = SystemConf::getInstance().PARALLEL_MERGE_ON; + bool persistInput = SystemConf::getInstance().PERSIST_INPUT; + + // Configure first query + auto segmentExpr = new Division(new ColumnReference(6, BasicType::Integer), new IntConstant(5280)); + + // Configure second query + std::vector _aggregationTypes(1); + _aggregationTypes[0] = AggregationTypes::fromString("cnt"); + + std::vector _aggregationAttributes(1); + _aggregationAttributes[0] = new ColumnReference(2, BasicType::Float); + + std::vector _groupByAttributes(4); + _groupByAttributes[0] = new ColumnReference(1, BasicType::Integer); + _groupByAttributes[1] = new ColumnReference(3, BasicType::Integer); + _groupByAttributes[2] = new ColumnReference(5, BasicType::Integer); + _groupByAttributes[3] = segmentExpr; + + auto _window = new WindowDefinition(RANGE_BASED, 30, 1); //(ROW_BASED, 30*1000, 1*1000); + Aggregation + *_aggregation = new Aggregation(*_window, _aggregationTypes, _aggregationAttributes, _groupByAttributes); + +#if defined(TCP_INPUT) + bool replayTimestamps = false; +#elif defined(RDMA_INPUT) + bool replayTimestamps = false; +#else + bool replayTimestamps = _window->isRangeBased(); +#endif + + // Set up code-generated operator + OperatorKernel *_genCode = new OperatorKernel(true, true, useParallelMerge); + _genCode->setInputSchema(getSchema()); + _genCode->setAggregation(_aggregation); + _genCode->setCustomHashTable(buildCustomHashTable()); + _genCode->setQueryId(0); + _genCode->setup(); + OperatorCode *_cpuCode = _genCode; + + // Print operator + std::cout << _cpuCode->toSExpr() << std::endl; + // Define an ft-operator + auto _queryOperator = new QueryOperator(*_cpuCode, true); + std::vector _operators; + _operators.push_back(_queryOperator); + + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + std::vector> queries(1); + queries[0] = std::make_shared(0, + _operators, + *_window, + m_schema, + m_timestampReference, + true, + replayTimestamps, + !replayTimestamps, //false, + useParallelMerge, + 0, persistInput, nullptr, !SystemConf::getInstance().RECOVER); + + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + if (true) { + queries[0]->getBuffer()->setCompressionFP(LRB2Compress::compressInput_); + queries[0]->getBuffer()->setDecompressionFP(LRB2Compress::decompressInput); + } else { + std::cout << "No compression is used in the input" << std::endl; + } + } + + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, !SystemConf::getInstance().RECOVER); + m_application->setup(); + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION && (SystemConf::getInstance().CHECKPOINT_ON || persistInput)) { + LRB2Compress::metadata = new std::vector>(SystemConf::getInstance().WORKER_THREADS, ""); + LRB2Compress::dcomp = new std::vector>>(); + for (int w = 0; w < SystemConf::getInstance().WORKER_THREADS; ++w) { + LRB2Compress::dcomp->emplace_back( + std::make_unique>( + SystemConf::getInstance().HASH_TABLE_SIZE)); + } + } + if (SystemConf::getInstance().CHECKPOINT_ON && SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + m_application->getCheckpointCoordinator()->setCompressionFP(0, LRB2Compress::compress); + m_application->getCheckpointCoordinator()->setDecompressionFP(0, LRB2Compress::decompress); + } + } + + std::string buildCustomHashTable() { + std::string barrier = (SystemConf::getInstance().PARALLEL_MERGE_ON) ? "8" : "220"; + return + "struct Key {\n" + " int _0;\n" + " int _1;\n" + " int _2;\n" + " int _3;\n" + "};\n" + "using KeyT = Key;\n" + "using ValueT = Value;\n" + "\n" + "struct HashMapEqualTo {\n" + " constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const {\n" + " return lhs._0 == rhs._0 && lhs._1 == rhs._1 && lhs._2 == rhs._2 && lhs._3 == rhs._3;\n" + " }\n" + "};\n" + "\n" + "struct CustomHash {\n" + " std::size_t operator()(KeyT t) const {\n" + " std::hash _h;\n" + " return _h(t._0);\n" + " }\n" + "};\n" + "using MyHash = CustomHash;\n" + "\n" + "struct alignas(16) Bucket {\n" + " char state;\n" + " char dirty;\n" + " long timestamp;\n" + " KeyT key;\n" + " ValueT value;\n" + " int counter;\n" + "};\n" + "\n" + "using BucketT = Bucket;\n" + "\n" + "class alignas(64) HashTable {\n" + "private:\n" + " using HashT = MyHash; //std::hash;\n" + " using EqT = HashMapEqualTo;\n" + " using AggrT = Aggregator;\n" + "\n" + " HashT _hasher;\n" + " EqT _eq;\n" + " BucketT* _buckets = nullptr;\n" + " AggrT* _aggrs = nullptr;\n" + " size_t _num_buckets = MAP_SIZE;\n" + " size_t _num_filled = 0;\n" + " size_t _mask = MAP_SIZE-1;\n" + " int _barrier = " + barrier + ";\n" + "public:\n" + " HashTable ();\n" + " HashTable (Bucket*nodes);\n" + " void init ();\n" + " void reset ();\n" + " void clear ();\n" + " void insert (KeyT &key, ValueT &value, long timestamp);\n" + " void insert_or_modify (KeyT &key, ValueT &value, long timestamp);\n" + " bool evict (KeyT &key);\n" + " void insertSlices ();\n" + " void evictSlices ();\n" + " void setValues ();\n" + " void setIntermValues (int pos, long timestamp);\n" + " bool get_value (const KeyT &key, ValueT &result);\n" + " bool get_result (const KeyT &key, ValueT &result);\n" + " bool get_index (const KeyT &key, int &index);\n" + " void deleteHashTable();\n" + " BucketT* getBuckets ();\n" + " size_t getSize() const;\n" + " bool isEmpty() const;\n" + " size_t getNumberOfBuckets() const;\n" + " float load_factor() const;\n" + "};\n" + "\n" + "HashTable::HashTable () {}\n" + "\n" + "HashTable::HashTable (Bucket *nodes) : _buckets(nodes) {\n" + " if (!(_num_buckets && !(_num_buckets & (_num_buckets - 1)))) {\n" + " throw std::runtime_error (\"error: the size of the hash table has to be a power of two\\n\");\n" + " }\n" + "}\n" + "\n" + "void HashTable::init () {\n" + " if (!(_num_buckets && !(_num_buckets & (_num_buckets - 1)))) {\n" + " throw std::runtime_error (\"error: the size of the hash table has to be a power of two\\n\");\n" + " }\n" + "\n" + " _buckets = (BucketT*)malloc(_num_buckets * sizeof(BucketT));\n" + " _aggrs = (AggrT*)malloc(_num_buckets * sizeof(AggrT));\n" + " if (!_buckets /*|| !_aggrs*/) {\n" + " free(_buckets);\n" + " /*free(_aggrs);*/\n" + " throw std::bad_alloc();\n" + " }\n" + "\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " _buckets[i].state = 0;\n" + " _buckets[i].dirty = 0;\n" + " _aggrs[i] = AggrT (); // maybe initiliaze this on insert\n" + " _aggrs[i].initialise();\n" + " }\n" + "}\n" + "\n" + "void HashTable::reset () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " _buckets[i].state = 0;\n" + " //_aggrs[i].initialise();\n" + " }\n" + " _num_filled = 0;\n" + "}\n" + "\n" + "void HashTable::clear () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " _buckets[i].state = 0;\n" + " _buckets[i].dirty = 0;\n" + " //_buckets[i].counter = 0;\n" + " _aggrs[i].initialise();\n" + " }\n" + " _num_filled = 0;\n" + "}\n" + "\n" + "void HashTable::insert (KeyT &key, ValueT &value, long timestamp) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " for (; i < _num_buckets; i++) {\n" + " if (!_buckets[i].state || _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key; //std::memcpy(&_buckets[i].key, key, KEY_SIZE);\n" + " _buckets[i].value = value;\n" + " return;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (!_buckets[i].state || _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key;\n" + " _buckets[i].value = value;\n" + " return;\n" + " }\n" + " }\n" + " throw std::runtime_error (\"error: the hashtable is full \\n\");\n" + "}\n" + "\n" + "void HashTable::insert_or_modify (KeyT &key, ValueT &value, long timestamp) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " char tempState;\n" + " int steps = 0;\n" + " for (; i < _num_buckets; i++) {\n" + " tempState = _buckets[i].state;\n" + " if (tempState && _eq(_buckets[i].key, key)) {\n" + "\t\t\t_buckets[i].value._1 = _buckets[i].value._1+value._1;\n" + " _buckets[i].counter++;\n" + " return;\n" + " }\n" + " if (!tempState && (_buckets[i].key._0 == key._0 || _eq(_buckets[i].key, key) || _buckets[i].dirty == 0)) { // first insert -- keep track of previous inserted value\n" + " _buckets[i].state = 1;\n" + " _buckets[i].dirty = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key;\n" + " _buckets[i].value = value;\n" + " _buckets[i].counter = 1;\n" + " _aggrs[i].initialise();\n" + " return;\n" + " }\n" + " steps++;\n" + " if (steps == _barrier ) {\n" + " printf(\"Too many collisions, increase the size...\\n\");\n" + " exit(1);\n" + " };\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " tempState = _buckets[i].state;\n" + " if (tempState && _eq(_buckets[i].key, key)) {\n" + "\t\t\t\t_buckets[i].value._1 = _buckets[i].value._1+value._1;\n" + " _buckets[i].counter++;\n" + " return;\n" + " }\n" + " if (!tempState && (_buckets[i].key._0 == key._0 || _eq(_buckets[i].key, key) || _buckets[i].dirty == 0)) { // first insert -- keep track of previous inserted value\n" + " _buckets[i].state = 1;\n" + " _buckets[i].dirty = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key;\n" + " _buckets[i].value = value;\n" + " _buckets[i].counter = 1;\n" + " _aggrs[i].initialise();\n" + " return;\n" + " }\n" + " steps++;\n" + " if (steps == _barrier ) {\n" + " printf(\"Too many collisions, increase the size...\\n\");\n" + " exit(1);\n" + " };\n" + " }\n" + " throw std::runtime_error (\"error: the hashtable is full \\n\");\n" + "}\n" + "\n" + "bool HashTable::evict (KeyT &key) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " for (; i < _num_buckets; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 0;\n" + " return true;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 0;\n" + " return true;\n" + " }\n" + " }\n" + " printf (\"error: entry not found \\n\");\n" + " return false;\n" + "}\n" + "\n" + "void HashTable::insertSlices () {\n" + " int maxNumOfSlices = INT_MIN;\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " int temp = _aggrs[i].addedElements - _aggrs[i].removedElements;\n" + " if (_buckets[i].state) {\n" + " node n;\n" + "\t\t\t\tn._1 = _buckets[i].value._1;\n" + " _aggrs[i].insert(n);\n" + " _buckets[i].state = 0;\n" + " //_buckets[i].value = ValueT();\n" + " } else if (temp > 0) {\n" + " ValueT val;\n" + " node n;\n" + "\t\t\tn._1 = val._1;\n" + " _aggrs[i].insert(n);\n" + " }\n" + " }\n" + "}\n" + "\n" + "void HashTable::evictSlices () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" + " _aggrs[i].evict();\n" + " }\n" + " }\n" + "}\n" + "\n" + "void HashTable::setValues () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" + " auto res = _aggrs[i].query();\n" + " _buckets[i].state = 1;\n" + "\t\t\t_buckets[i].value._1 = res._1;\n" + " _buckets[i].counter = 1;\n" + " }\n" + " }\n" + "}\n" + "\n" + "void HashTable::setIntermValues (int pos, long timestamp) {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" + " auto res = _aggrs[i].queryIntermediate (pos);\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].state = 1;\n" + "\t\t\t_buckets[i].value._1 = res._1;\n" + " }\n" + " }\n" + "}\n" + "\n" + "bool HashTable::get_value (const KeyT &key, ValueT &result) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " for (; i < _num_buckets; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " result = _buckets[i].value;\n" + " return true;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " result = _buckets[i].value;\n" + " return true;\n" + " }\n" + " }\n" + " return false;\n" + "}\n" + "\n" + "bool HashTable::get_index (const KeyT &key, int &index) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " int steps = 0;\n" + " index = -1; \n" + " for (; i < _num_buckets; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " index = i;\n" + " return true;\n" + " }\n" + " if (_buckets[i].state == 0 && index == -1) {\n" + " index = i;\n" + " }\n" + " steps++;\n" + " if (steps == _barrier ) {\n" + " return false;\n" + " };\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " index = i;\n" + " return true;\n" + " }\n" + " if (_buckets[i].state == 0 && index == -1) {\n" + " index = i;\n" + " }\n" + " steps++;\n" + " if (steps == _barrier ) {\n" + " return false;\n" + " };\n" + " }\n" + " return false;\n" + "}\n" + "\n" + "void HashTable::deleteHashTable() {\n" + " for (size_t bucket=0; bucket<_num_buckets; ++bucket) {\n" + " _buckets[bucket].~BucketT();\n" + " _aggrs->~AggrT();\n" + " }\n" + " free(_buckets);\n" + " free(_aggrs);\n" + "}\n" + "\n" + "BucketT* HashTable::getBuckets () {\n" + " return _buckets;\n" + "}\n" + "\n" + "size_t HashTable::getSize() const {\n" + " return _num_filled;\n" + "}\n" + "\n" + "bool HashTable::isEmpty() const {\n" + " return _num_filled==0;\n" + "}\n" + "\n" + "size_t HashTable::getNumberOfBuckets() const {\n" + " return _num_buckets;\n" + "}\n" + "\n" + "float HashTable::load_factor() const {\n" + " return static_cast(_num_filled) / static_cast(_num_buckets);\n" + "}\n"; + } + + public: + LRB2(bool inMemory = true) { + m_name = "LRB2"; + createSchema(); + createApplication(); + m_fileName = "lrb-data-small-ht.txt"; + if (inMemory) + loadInMemoryData(); + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/LRB3.cpp b/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/LRB3.cpp new file mode 100644 index 0000000..aab859a --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/LRB3.cpp @@ -0,0 +1,1172 @@ +#include "benchmarks/applications/LinearRoadBenchmark/LinearRoadBenchmark.h" +#include "compression/Compressor.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/expressions/IntConstant.h" +#include "cql/expressions/operations/Division.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +namespace LRB3Compress { +struct alignas(16) input_tuple_t { + long timestamp; + int _1; + float _2; + int _3; + int _4; + int _5; + int _6; +}; +struct alignas(16) output_tuple_t { + long timestamp; + int _1; + int _2; + int _3; + int _4; + float _5; + int _6; +}; +struct Value { + float _1; +}; +struct Key { + int _0; + int _1; + int _2; + int _3; +}; +using KeyT = Key; +using ValueT = Value; +struct alignas(16) Bucket { + char state; + char dirty; + long timestamp; + KeyT key; + ValueT value; + int counter; +}; +struct hash { + std::size_t operator()(const Key &key) const { + uint64_t result = uint16_t(key._0) * 100 + uint16_t(key._2) * 10 + + uint16_t(key._3); // todo: is this luck? + return result; + } +}; +struct Eq { + constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const { + return lhs._0 == rhs._0 && lhs._1 == rhs._1 && lhs._2 == rhs._2 && lhs._3 == rhs._3; + } +}; +std::vector>> *dcomp; +std::vector> *metadata; + +struct dBucket { + Key key; +}; +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto buf = (dBucket*) (output + writePos); + auto bucket = (Bucket *)(*dcomp)[pid]->getTable().buckets(); + for (size_t idx = 0; idx < (*dcomp)[pid]->getTable().max_size(); ++idx) { + if (bucket[idx].state) { + buf[idx] = dBucket{bucket[idx].key}; + } else { + buf[idx] = dBucket{ -1, -1, -1, -1}; + } + } + auto offset = (*dcomp)[pid]->getTable().max_size() * sizeof(dBucket); + writePos += offset; + (*metadata)[pid] += "ht " + std::to_string(offset); + if ((*metadata)[pid].size() > 128) + throw std::runtime_error("error: increase the size of the metadata area"); + std::memcpy(output, (void *)(*metadata)[pid].data(), (*metadata)[pid].size()); + (*metadata)[pid].clear(); + return; + } + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (2); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 9; + uint16_t counter : 7; + }; + struct t_2 { + uint16_t groupKey : 12; + uint16_t counter : 4; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.5)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint16_t count_1 = 1; + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {(uint16_t)fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + Key temp{data[idx]._1, data[idx]._3, data[idx]._5, + data[idx]._6 / 5280}; + auto fVal_2 = (*dcomp)[pid]->compress(temp); + auto sVal_2 = fVal_2; + if (idx < n - 1) { + Key temp2{data[idx+1]._1, data[idx+1]._3, data[idx+1]._5, + data[idx+1]._6 / 5280}; + sVal_2 = (*dcomp)[pid]->compress(temp2); + if (sVal_2 == fVal_2) { + count_2++; + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + } + std::memcpy((void *)(output + idxs[0] * sizeof(t_1)), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2); + (*metadata)[pid] = "r0 " + std::to_string(idxs[0]) + " r1 " + std::to_string(idxs[1]) + " "; +} + +void compressInput_(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + return; + } + if (clear) { + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (6, 0); + BaseDeltaCompressor bcomp(data[0].timestamp); + Simple8 simpleComp1; + Simple8 simpleComp2; + Simple8 simpleComp3; + Simple8 simpleComp4; + //DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 9; + uint16_t counter : 7; + }; + + //writePos = 0; + // compress + size_t n = (end - start) / sizeof(input_tuple_t); + t_1 *buf1 = (t_1 *)(output); + + // simple 8 + auto buf2 = (uint64_t *)(output + (int) (length*0.2)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_3, 1, buf2); + + // simple 8 + auto buf3 = (uint64_t *)(output + (int) (length*2*0.2)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_5, 1, buf3); + + // simple 8 + auto buf4 = (uint64_t *)(output + (int) (length*3*0.2)); + inOffset = 0; + outOffset = 0; + //for (size_t idx = 0; idx < n; idx++) { + // data[idx]._6 /= 5280; + //} + idxs[3] = simpleComp3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_6, 1, buf4, 5280); + + // simple 8 + auto buf5 = (uint64_t *)(output + (int) (length*4*0.2)); + inOffset = 0; + outOffset = 0; + idxs[4] = simpleComp4.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_1, 1, buf5); + + + uint16_t count_1 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {static_cast(fVal_1), static_cast(count_1)}; + fVal_1 = sVal_1; + count_1 = 1; + } + } + writePos += idxs[0] * sizeof(t_1); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[1] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + writePos += idxs[2] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf4, idxs[3] * sizeof(uint64_t)); + writePos += idxs[3] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf5, idxs[4] * sizeof(uint64_t)); + writePos += idxs[4] * sizeof(uint64_t); + (*metadata)[pid] = "r0 " + std::to_string(idxs[0]) + " r1 " + std::to_string(idxs[1]) + " "; +} + +void compressGenInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + return; + } + if (clear) { + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (6, 0); + GorillaTimestampCompressor gorillaComp; + Simple8 simpleComp1; + Simple8 simpleComp2; + Simple8 simpleComp3; + Simple8 simpleComp4; + + //writePos = 0; + // compress + size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + // simple 8 + auto buf2 = (uint64_t *)(output + (int) (length*0.2)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_3, 1, buf2); + + // simple 8 + auto buf3 = (uint64_t *)(output + (int) (length*2*0.2)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_5, 1, buf3); + + // simple 8 + auto buf4 = (uint64_t *)(output + (int) (length*3*0.2)); + inOffset = 0; + outOffset = 0; + //for (size_t idx = 0; idx < n; idx++) { + // data[idx]._6 /= 5280; + //} + idxs[3] = simpleComp3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_6, 1, buf4, 5280); + + // simple 8 + auto buf5 = (uint64_t *)(output + (int) (length*4*0.2)); + inOffset = 0; + outOffset = 0; + idxs[4] = simpleComp4.compress( + inOffset, outOffset, n, &data, &input_tuple_t::_1, 1, buf5); + + for (size_t idx = 0; idx < n; idx++) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + } + writePos += idxs[0] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[1] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + writePos += idxs[2] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf4, idxs[3] * sizeof(uint64_t)); + writePos += idxs[3] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf5, idxs[4] * sizeof(uint64_t)); + writePos += idxs[4] * sizeof(uint64_t); +} + +void noCompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + struct res { + long timestamp; + int vehicle; + int highway; + int direction; + int segment; + float speed; + }; + auto out = (res*) output; + size_t n = (end - start) / sizeof(input_tuple_t); + for (size_t idx = 0; idx < n; idx++) { + out[idx] = {data[idx].timestamp, data[idx]._1, data[idx]._3, data[idx]._5, + data[idx]._6 / 5280, data[idx]._2}; + } + writePos = n * sizeof(res); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + throw std::runtime_error("error: the decompression function is not implemented"); +} + +void compress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + if (start == 0 && end == -1) { + auto offset = (*dcomp)[pid]->getTable().max_size() * + (*dcomp)[pid]->getTable().bucket_size(); + std::memcpy(output + writePos, (void *)(*dcomp)[pid]->getTable().buckets(), offset); + writePos += offset; + return; + } + + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + start = start / sizeof(Bucket); + end = end / sizeof(Bucket); + struct res { + uint16_t timestamp : 9; + uint16_t groupKey : 12; + uint16_t counter : 7; + }; + auto outputBuf = (res *)output; + auto outIdx = writePos / sizeof(res); + + if (!isComplete) { + auto inputBuf = (Bucket *)input; + BaseDeltaCompressor bcomp(inputBuf[0].timestamp); + for (size_t idx = start; idx < end; ++idx) { + if (inputBuf[idx].state) { + outputBuf[outIdx++] = {bcomp.compress(inputBuf[idx].timestamp), + (*dcomp)[pid]->compress(inputBuf[idx].key), + static_cast(inputBuf[idx].counter)}; + } + } + writePos = outIdx * sizeof(res); + } else { + auto inputBuf = (output_tuple_t *)input; + BaseDeltaCompressor bcomp(inputBuf[0].timestamp); + for (size_t idx = start; idx < end; ++idx) { + Key temp{inputBuf[idx]._1, inputBuf[idx]._2, inputBuf[idx]._3, + inputBuf[idx]._4}; + outputBuf[outIdx++] = {bcomp.compress(inputBuf[idx].timestamp), + (*dcomp)[pid]->compress(temp), + static_cast(inputBuf[idx]._6)}; + } + writePos = outIdx * sizeof(res); + } +} + +void decompress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + struct res { + uint32_t timestamp : 9; + uint32_t groupKey : 12; + uint32_t counter : 7; + }; + auto inputBuf = (res *) &input[start]; + start = start / sizeof(res); + end = end / sizeof(res); + BaseDeltaCompressor bcomp(1); + auto hTable = (Bucket *) &input[writePos]; + auto outIdx = 0; + + if (!isComplete) { + auto outputBuf = (Bucket *)output; + for (size_t idx = start; idx < end; ++idx) { + if (inputBuf[idx].groupKey >= (*dcomp)[pid]->getTable().max_size()) + throw std::runtime_error("error: the group key is greater than the hashtable size"); + Key key = hTable[inputBuf[idx].groupKey].key; + outputBuf[inputBuf[idx].groupKey] = {1, 1, inputBuf[idx].timestamp, key, + {(float)inputBuf[idx].counter}, inputBuf[idx].counter}; + } + } else { + auto outputBuf = (output_tuple_t *)output; + for (size_t idx = start; idx < end; ++idx) { + Key key = hTable[inputBuf[idx].groupKey].key; + outputBuf[outIdx] = {inputBuf[idx].timestamp, key._0, key._1, + key._2, key._3, (float)inputBuf[idx].counter, inputBuf[idx].counter}; + outIdx++; + } + } +} +}; + +namespace LRB3Compress_ { +struct alignas(16) input_tuple_t { + long timestamp; + int _1; + int _2; + int _3; + int _4; + float _5; + int _6; +}; +struct alignas(16) output_tuple_t { + long timestamp; + int _1; + int _2; + int _3; + float _4; + int _5; +}; +struct Value { + float _1; +}; +struct Key { + int _0; + int _1; + int _2; +}; +using KeyT = Key; +using ValueT = Value; +struct alignas(16) Bucket { + char state; + char dirty; + long timestamp; + KeyT key; + ValueT value; + int counter; +}; +struct hash { + std::size_t operator()(const Key &key) const { + uint64_t result = uint16_t(key._0) * 100 + uint16_t(key._1) * 10 + + uint16_t(key._2); // todo: is this luck? + return result; + } +}; +struct Eq { + constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const { + return lhs._0 == rhs._0 && lhs._1 == rhs._1 && lhs._2 == rhs._2; + } +}; +std::vector>> *dcomp; +std::vector> *metadata; + +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = (*dcomp)[pid]->getTable().max_size() * + (*dcomp)[pid]->getTable().bucket_size(); + std::memcpy(output + writePos, (void *)(*dcomp)[pid]->getTable().buckets(), offset); + writePos += offset; + (*metadata)[pid] += "ht " + std::to_string(offset); + if ((*metadata)[pid].size() > 128) + throw std::runtime_error("error: increase the size of the metadata area"); + std::memcpy(output, (void *)(*metadata)[pid].data(), (*metadata)[pid].size()); + (*metadata)[pid].clear(); + return; + } + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (3); + BaseDeltaCompressor bcomp(data[0].timestamp); + struct t_1 { + uint16_t timestamp : 9; + uint16_t counter : 7; + }; + struct t_2 { + uint16_t groupKey : 10; + uint16_t counter : 6; + }; + struct t_3 { + uint32_t value : 23; + uint16_t counter : 9; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.33)); + t_3 *buf3 = (t_3 *)(output + (int) (length*0.66)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint16_t count_1 = 1; + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + Key temp{data[idx]._2, data[idx]._3, data[idx]._4}; + auto fVal_2 = (*dcomp)[pid]->compress(temp); + auto sVal_2 = fVal_2; + if (idx < n - 1) { + Key temp2{data[idx+1]._2, data[idx+1]._3, data[idx+1]._4}; + sVal_2 = (*dcomp)[pid]->compress(temp2); + if (sVal_2 == fVal_2) { + count_2++; + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + + auto fVal_3 = data[idx]._1; + auto sVal_3 = fVal_3; + if (idx < n - 1 && + fVal_3 == (sVal_3 = data[idx + 1]._1)) { + count_3++; + } else { + buf3[idxs[2]++] = {(uint16_t)fVal_3, count_3}; + fVal_3 = sVal_3; + count_3 = 1; + } + } + std::memcpy((void *)(output + idxs[0] * sizeof(t_1)), (void *)buf2, + idxs[1] * sizeof(t_2)); + std::memcpy((void *)(output + idxs[0] * sizeof(t_1) + + idxs[1] * sizeof(t_2)), + (void *)buf3, idxs[2] * sizeof(t_3)); + writePos += idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2) + + idxs[2] * sizeof(t_3); + (*metadata)[pid] = "r0 " + std::to_string(idxs[0]) + " r1 " + std::to_string(idxs[1]) + + " r2 " + std::to_string(idxs[2]) + " "; +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + throw std::runtime_error("error: the decompression function is not implemented"); +} + +void compress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + if (start == 0 && end == -1) { + auto offset = (*dcomp)[pid]->getTable().max_size() * + (*dcomp)[pid]->getTable().bucket_size(); + std::memcpy(output + writePos, (void *)(*dcomp)[pid]->getTable().buckets(), offset); + writePos += offset; + return; + } + + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + start = start / sizeof(Bucket); + end = end / sizeof(Bucket); + DummyFloatCompressor fcomp(1000000); + + if (!isComplete) { + auto inputBuf = (Bucket *)input; + BaseDeltaCompressor bcomp(inputBuf[0].timestamp); + struct res { + uint16_t timestamp : 9; + uint16_t groupKey : 8; + uint16_t vehicles : 7; + uint16_t counter : 8; + }; + auto outputBuf = (res *)output; + auto outIdx = writePos / sizeof(res); + for (size_t idx = start; idx < end; ++idx) { + if (inputBuf[idx].state) { + outputBuf[outIdx++] = {bcomp.compress(inputBuf[idx].timestamp), + (*dcomp)[pid]->compress(inputBuf[idx].key), + fcomp.compress(inputBuf[idx].value._1), + static_cast(inputBuf[idx].counter)}; + } + } + writePos = outIdx * sizeof(res); + } else { + auto inputBuf = (output_tuple_t *)input; + BaseDeltaCompressor bcomp(inputBuf[0].timestamp); + struct res { + uint16_t timestamp : 9; + uint16_t groupKey : 8; + uint16_t speed : 7; + }; + auto outputBuf = (res *)output; + auto outIdx = writePos / sizeof(res); + for (size_t idx = start; idx < end; ++idx) { + Key temp{inputBuf[idx]._1, inputBuf[idx]._2, inputBuf[idx]._3}; + outputBuf[outIdx++] = {bcomp.compress(inputBuf[idx].timestamp), + (*dcomp)[pid]->compress(temp), + fcomp.compress(inputBuf[idx]._4)}; + } + writePos = outIdx * sizeof(res); + } +} + +void decompress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + throw std::runtime_error("error: the decompression function is not implemented"); +} +}; + +class LRB3 : public LinearRoadBenchmark { + private: + void createApplication() override { + SystemConf::getInstance().SLOTS = 512; + SystemConf::getInstance().PARTIAL_WINDOWS = 544; + SystemConf::getInstance().HASH_TABLE_SIZE = 2 * 1024; + + bool useParallelMerge = SystemConf::getInstance().PARALLEL_MERGE_ON; + bool persistInput = SystemConf::getInstance().PERSIST_INPUT; + + // Configure first query + auto segmentExpr = new Division(new ColumnReference(6, BasicType::Integer), new IntConstant(5280)); + + // Configure second query + std::vector _aggregationTypes(1); + _aggregationTypes[0] = AggregationTypes::fromString("cnt"); + + std::vector _aggregationAttributes(1); + _aggregationAttributes[0] = new ColumnReference(2, BasicType::Float); + + std::vector _groupByAttributes(4); + _groupByAttributes[0] = new ColumnReference(1, BasicType::Integer); + _groupByAttributes[1] = new ColumnReference(3, BasicType::Integer); + _groupByAttributes[2] = new ColumnReference(5, BasicType::Integer); + _groupByAttributes[3] = segmentExpr; + + auto _window = new WindowDefinition(RANGE_BASED, 30, 1); //(ROW_BASED, 30*1000, 1*1000); + Aggregation + *_aggregation = new Aggregation(*_window, _aggregationTypes, _aggregationAttributes, _groupByAttributes); + +#if defined(TCP_INPUT) + bool replayTimestamps = false; +#elif defined(RDMA_INPUT) + bool replayTimestamps = false; +#else + bool replayTimestamps = _window->isRangeBased(); +#endif + + // Set up code-generated operator + OperatorKernel *_genCode = new OperatorKernel(true, true, useParallelMerge); + _genCode->setInputSchema(getSchema()); + _genCode->setAggregation(_aggregation); + _genCode->setCustomHashTable(buildCustomHashTable()); + _genCode->setQueryId(0); + _genCode->setup(); + OperatorCode *_cpuCode = _genCode; + + // Print operator + std::cout << _cpuCode->toSExpr() << std::endl; + // Define an ft-operator + auto _queryOperator = new QueryOperator(*_cpuCode, true); + std::vector _operators; + _operators.push_back(_queryOperator); + + + // Configure third query + //SystemConf::getInstance().PARTIAL_WINDOWS = 512; // Change this according to the previous operator + auto config3 = + new QueryConfig(8 * SystemConf::getInstance().CIRCULAR_BUFFER_SIZE, + SystemConf::getInstance().BATCH_SIZE, + SystemConf::getInstance().BUNDLE_SIZE, 2 * 1024, 1024); + std::vector aggregationTypes_(1); + aggregationTypes_[0] = AggregationTypes::fromString("cnt"); + + std::vector aggregationAttributes_(1); + aggregationAttributes_[0] = new ColumnReference(1, BasicType::Float); + + std::vector groupByAttributes_(3); + groupByAttributes_[0] = new ColumnReference(2, BasicType::Integer); + groupByAttributes_[1] = new ColumnReference(3, BasicType::Integer); + groupByAttributes_[2] = new ColumnReference(4, BasicType::Integer); + + auto window_ = new WindowDefinition(ROW_BASED, 1024, 1024); + Aggregation + *aggregation_ = new Aggregation(*window_, aggregationTypes_, aggregationAttributes_, groupByAttributes_); + + TupleSchema *schema_ = &(((OperatorKernel *) _cpuCode)->getOutputSchema()); + + // Set up code-generated operator + OperatorKernel *genCode_ = new OperatorKernel(true); + genCode_->setInputSchema(schema_); + genCode_->setAggregation(aggregation_); + genCode_->setQueryId(1); + genCode_->setup(config3); + OperatorCode *cpuCode_ = genCode_; + + // Define an ft-operator + auto queryOperator_ = new QueryOperator(*cpuCode_, true); + std::vector operators_; + operators_.push_back(queryOperator_); + + + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + std::vector> queries(2); + queries[0] = std::make_shared(0, + _operators, + *_window, + m_schema, + m_timestampReference, + true, + replayTimestamps, + !replayTimestamps, //false + useParallelMerge, + 0, persistInput, nullptr, !SystemConf::getInstance().RECOVER); + queries[1] = std::make_shared(1, operators_, *window_, schema_, m_timestampReference, + true, + false, + true, + false, //useParallelMerge, + 0, false, config3); + queries[0]->connectTo(queries[1].get()); + + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + queries[0]->getBuffer()->setCompressionFP(LRB3Compress::compressInput_); + queries[1]->getBuffer()->setCompressionFP(LRB3Compress_::compressInput); + queries[0]->getBuffer()->setDecompressionFP(LRB3Compress::decompressInput); + queries[1]->getBuffer()->setDecompressionFP(LRB3Compress_::decompressInput); + } + + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, !SystemConf::getInstance().RECOVER); + m_application->setup(); + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION && (SystemConf::getInstance().CHECKPOINT_ON || persistInput)) { + LRB3Compress::metadata = new std::vector>(SystemConf::getInstance().WORKER_THREADS, ""); + LRB3Compress_::metadata = new std::vector>(SystemConf::getInstance().WORKER_THREADS, ""); + LRB3Compress::dcomp = new std::vector>>(); + for (int w = 0; w < SystemConf::getInstance().WORKER_THREADS; ++w) { + LRB3Compress::dcomp->emplace_back( + std::make_unique>( + SystemConf::getInstance().HASH_TABLE_SIZE)); + } + + LRB3Compress_::dcomp = new std::vector>>(); + for (int w = 0; w < SystemConf::getInstance().WORKER_THREADS; ++w) { + LRB3Compress_::dcomp->emplace_back( + std::make_unique>( + SystemConf::getInstance().HASH_TABLE_SIZE)); + } + } + if (SystemConf::getInstance().CHECKPOINT_ON && SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + m_application->getCheckpointCoordinator()->setCompressionFP(0, LRB3Compress::compress); + m_application->getCheckpointCoordinator()->setCompressionFP(1, LRB3Compress_::compress); + m_application->getCheckpointCoordinator()->setDecompressionFP(0, LRB3Compress::decompress); + m_application->getCheckpointCoordinator()->setDecompressionFP(1, LRB3Compress_::decompress); + } + } + + std::string buildCustomHashTable() { + std::string barrier = (SystemConf::getInstance().PARALLEL_MERGE_ON) ? "8" : "220"; + return + "struct Key {\n" + " int _0;\n" + " int _1;\n" + " int _2;\n" + " int _3;\n" + "};\n" + "using KeyT = Key;\n" + "using ValueT = Value;\n" + "\n" + "struct HashMapEqualTo {\n" + " constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const {\n" + " return lhs._0 == rhs._0 && lhs._1 == rhs._1 && lhs._2 == rhs._2 && lhs._3 == rhs._3;\n" + " }\n" + "};\n" + "\n" + "struct CustomHash {\n" + " std::size_t operator()(KeyT t) const {\n" + " std::hash _h;\n" + " return _h(t._0);\n" + " }\n" + "};\n" + "using MyHash = CustomHash;\n" + "\n" + "struct alignas(16) Bucket {\n" + " char state;\n" + " char dirty;\n" + " long timestamp;\n" + " KeyT key;\n" + " ValueT value;\n" + " int counter;\n" + "};\n" + "\n" + "using BucketT = Bucket;\n" + "\n" + "class alignas(64) HashTable {\n" + "private:\n" + " using HashT = MyHash; //std::hash;\n" + " using EqT = HashMapEqualTo;\n" + " using AggrT = Aggregator;\n" + "\n" + " HashT _hasher;\n" + " EqT _eq;\n" + " BucketT* _buckets = nullptr;\n" + " AggrT* _aggrs = nullptr;\n" + " size_t _num_buckets = MAP_SIZE;\n" + " size_t _num_filled = 0;\n" + " size_t _mask = MAP_SIZE-1;\n" + " int _barrier = " + barrier + ";\n" + "public:\n" + " HashTable ();\n" + " HashTable (Bucket*nodes);\n" + " void init ();\n" + " void reset ();\n" + " void clear ();\n" + " void insert (KeyT &key, ValueT &value, long timestamp);\n" + " void insert_or_modify (KeyT &key, ValueT &value, long timestamp);\n" + " bool evict (KeyT &key);\n" + " void insertSlices ();\n" + " void evictSlices ();\n" + " void setValues ();\n" + " void setIntermValues (int pos, long timestamp);\n" + " bool get_value (const KeyT &key, ValueT &result);\n" + " bool get_result (const KeyT &key, ValueT &result);\n" + " bool get_index (const KeyT &key, int &index);\n" + " void deleteHashTable();\n" + " BucketT* getBuckets ();\n" + " size_t getSize() const;\n" + " bool isEmpty() const;\n" + " size_t getNumberOfBuckets() const;\n" + " float load_factor() const;\n" + "};\n" + "\n" + "HashTable::HashTable () {}\n" + "\n" + "HashTable::HashTable (Bucket *nodes) : _buckets(nodes) {\n" + " if (!(_num_buckets && !(_num_buckets & (_num_buckets - 1)))) {\n" + " throw std::runtime_error (\"error: the size of the hash table has to be a power of two\\n\");\n" + " }\n" + "}\n" + "\n" + "void HashTable::init () {\n" + " if (!(_num_buckets && !(_num_buckets & (_num_buckets - 1)))) {\n" + " throw std::runtime_error (\"error: the size of the hash table has to be a power of two\\n\");\n" + " }\n" + "\n" + " _buckets = (BucketT*)malloc(_num_buckets * sizeof(BucketT));\n" + " _aggrs = (AggrT*)malloc(_num_buckets * sizeof(AggrT));\n" + " if (!_buckets /*|| !_aggrs*/) {\n" + " free(_buckets);\n" + " /*free(_aggrs);*/\n" + " throw std::bad_alloc();\n" + " }\n" + "\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " _buckets[i].state = 0;\n" + " _buckets[i].dirty = 0;\n" + " _aggrs[i] = AggrT (); // maybe initiliaze this on insert\n" + " _aggrs[i].initialise();\n" + " }\n" + "}\n" + "\n" + "void HashTable::reset () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " _buckets[i].state = 0;\n" + " //_aggrs[i].initialise();\n" + " }\n" + " _num_filled = 0;\n" + "}\n" + "\n" + "void HashTable::clear () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " _buckets[i].state = 0;\n" + " _buckets[i].dirty = 0;\n" + " //_buckets[i].counter = 0;\n" + " _aggrs[i].initialise();\n" + " }\n" + " _num_filled = 0;\n" + "}\n" + "\n" + "void HashTable::insert (KeyT &key, ValueT &value, long timestamp) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " for (; i < _num_buckets; i++) {\n" + " if (!_buckets[i].state || _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key; //std::memcpy(&_buckets[i].key, key, KEY_SIZE);\n" + " _buckets[i].value = value;\n" + " return;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (!_buckets[i].state || _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key;\n" + " _buckets[i].value = value;\n" + " return;\n" + " }\n" + " }\n" + " throw std::runtime_error (\"error: the hashtable is full \\n\");\n" + "}\n" + "\n" + "void HashTable::insert_or_modify (KeyT &key, ValueT &value, long timestamp) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " char tempState;\n" + " int steps = 0;\n" + " for (; i < _num_buckets; i++) {\n" + " tempState = _buckets[i].state;\n" + " if (tempState && _eq(_buckets[i].key, key)) {\n" + "\t\t\t_buckets[i].value._1 = _buckets[i].value._1+value._1;\n" + " _buckets[i].counter++;\n" + " return;\n" + " }\n" + " if (!tempState && (_buckets[i].key._0 == key._0 || _eq(_buckets[i].key, key) || _buckets[i].dirty == 0)) { // first insert -- keep track of previous inserted value\n" + " _buckets[i].state = 1;\n" + " _buckets[i].dirty = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key;\n" + " _buckets[i].value = value;\n" + " _buckets[i].counter = 1;\n" + " _aggrs[i].initialise();\n" + " return;\n" + " }\n" + " steps++;\n" + " if (steps == _barrier ) {\n" + " printf(\"Too many collisions, increase the size...\\n\");\n" + " exit(1);\n" + " };\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " tempState = _buckets[i].state;\n" + " if (tempState && _eq(_buckets[i].key, key)) {\n" + "\t\t\t\t_buckets[i].value._1 = _buckets[i].value._1+value._1;\n" + " _buckets[i].counter++;\n" + " return;\n" + " }\n" + " if (!tempState && (_buckets[i].key._0 == key._0 || _eq(_buckets[i].key, key) || _buckets[i].dirty == 0)) { // first insert -- keep track of previous inserted value\n" + " _buckets[i].state = 1;\n" + " _buckets[i].dirty = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key;\n" + " _buckets[i].value = value;\n" + " _buckets[i].counter = 1;\n" + " _aggrs[i].initialise();\n" + " return;\n" + " }\n" + " steps++;\n" + " if (steps == _barrier ) {\n" + " printf(\"Too many collisions, increase the size...\\n\");\n" + " exit(1);\n" + " };\n" + " }\n" + " throw std::runtime_error (\"error: the hashtable is full \\n\");\n" + "}\n" + "\n" + "bool HashTable::evict (KeyT &key) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " for (; i < _num_buckets; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 0;\n" + " return true;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 0;\n" + " return true;\n" + " }\n" + " }\n" + " printf (\"error: entry not found \\n\");\n" + " return false;\n" + "}\n" + "\n" + "void HashTable::insertSlices () {\n" + " int maxNumOfSlices = INT_MIN;\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " int temp = _aggrs[i].addedElements - _aggrs[i].removedElements;\n" + " if (_buckets[i].state) {\n" + " node n;\n" + "\t\t\t\tn._1 = _buckets[i].value._1;\n" + " _aggrs[i].insert(n);\n" + " _buckets[i].state = 0;\n" + " //_buckets[i].value = ValueT();\n" + " } else if (temp > 0) {\n" + " ValueT val;\n" + " node n;\n" + "\t\t\tn._1 = val._1;\n" + " _aggrs[i].insert(n);\n" + " }\n" + " }\n" + "}\n" + "\n" + "void HashTable::evictSlices () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" + " _aggrs[i].evict();\n" + " }\n" + " }\n" + "}\n" + "\n" + "void HashTable::setValues () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" + " auto res = _aggrs[i].query();\n" + " _buckets[i].state = 1;\n" + "\t\t\t_buckets[i].value._1 = res._1;\n" + " _buckets[i].counter = 1;\n" + " }\n" + " }\n" + "}\n" + "\n" + "void HashTable::setIntermValues (int pos, long timestamp) {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" + " auto res = _aggrs[i].queryIntermediate (pos);\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].state = 1;\n" + "\t\t\t_buckets[i].value._1 = res._1;\n" + " }\n" + " }\n" + "}\n" + "\n" + "bool HashTable::get_value (const KeyT &key, ValueT &result) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " for (; i < _num_buckets; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " result = _buckets[i].value;\n" + " return true;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " result = _buckets[i].value;\n" + " return true;\n" + " }\n" + " }\n" + " return false;\n" + "}\n" + "\n" + "bool HashTable::get_index (const KeyT &key, int &index) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " int steps = 0;\n" + " index = -1; \n" + " for (; i < _num_buckets; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " index = i;\n" + " return true;\n" + " }\n" + " if (_buckets[i].state == 0 && index == -1) {\n" + " index = i;\n" + " }\n" + " steps++;\n" + " if (steps == _barrier ) {\n" + " return false;\n" + " };\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " index = i;\n" + " return true;\n" + " }\n" + " if (_buckets[i].state == 0 && index == -1) {\n" + " index = i;\n" + " }\n" + " steps++;\n" + " if (steps == _barrier ) {\n" + " return false;\n" + " };\n" + " }\n" + " return false;\n" + "}\n" + "\n" + "void HashTable::deleteHashTable() {\n" + " for (size_t bucket=0; bucket<_num_buckets; ++bucket) {\n" + " _buckets[bucket].~BucketT();\n" + " _aggrs->~AggrT();\n" + " }\n" + " free(_buckets);\n" + " free(_aggrs);\n" + "}\n" + "\n" + "BucketT* HashTable::getBuckets () {\n" + " return _buckets;\n" + "}\n" + "\n" + "size_t HashTable::getSize() const {\n" + " return _num_filled;\n" + "}\n" + "\n" + "bool HashTable::isEmpty() const {\n" + " return _num_filled==0;\n" + "}\n" + "\n" + "size_t HashTable::getNumberOfBuckets() const {\n" + " return _num_buckets;\n" + "}\n" + "\n" + "float HashTable::load_factor() const {\n" + " return static_cast(_num_filled) / static_cast(_num_buckets);\n" + "}\n"; + } + + public: + LRB3(bool inMemory = true) { + m_name = "LRB3"; + createSchema(); + createApplication(); + m_fileName = "lrb-data-small-ht.txt"; + if (inMemory) + loadInMemoryData(); + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/LinearRoadBenchmark.h b/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/LinearRoadBenchmark.h new file mode 100644 index 0000000..7ff8455 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/LinearRoadBenchmark.h @@ -0,0 +1,111 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "utils/TupleSchema.h" +#include "utils/QueryApplication.h" +#include "utils/Utils.h" +#include "benchmarks/applications/BenchmarkQuery.h" + +class LinearRoadBenchmark : public BenchmarkQuery { + private: + struct InputSchema { + long timestamp; + int vehicle; + float speed; + int highway; + int lane; + int direction; + int position; + + static void parse(InputSchema &tuple, std::string &line) { + std::istringstream iss(line); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + tuple.timestamp = std::stol(words[0]); + tuple.vehicle = std::stoi(words[1]); + tuple.speed = std::stof(words[2]); + tuple.highway = std::stoi(words[3]); + tuple.lane = std::stoi(words[4]); + tuple.direction = std::stoi(words[5]); + tuple.position = std::stoi(words[6]); + } + }; + + public: + TupleSchema *m_schema = nullptr; + QueryApplication *m_application = nullptr; + std::vector *m_data = nullptr; + bool m_debug = false; + std::string m_fileName; + + QueryApplication *getApplication() override { + return m_application; + } + + virtual void createApplication() = 0; + + void loadInMemoryData() { + size_t len = SystemConf::getInstance().BUNDLE_SIZE; + m_data = new std::vector(len); + auto buf = (InputSchema *) m_data->data(); + + std::string filePath = Utils::getHomeDir() + "/LightSaber/resources/datasets/lrb/"; + std::ifstream file(filePath + m_fileName); + if (!file.good()) + throw std::runtime_error("error: input file does not exist, check the path."); + std::string line; + unsigned long idx = 0; + while (std::getline(file, line) && idx < len / sizeof(InputSchema)) { + InputSchema::parse(buf[idx], line); + idx++; + } + + if (m_debug) { + std::cout << "timestamp vehicle speed highway lane direction position" << std::endl; + for (unsigned long i = 0; i < m_data->size() / sizeof(InputSchema); ++i) { + printf("[DBG] %09d: %7d %8d %5.3f %13d %3d %6d %2d \n", + i, buf[i].timestamp, buf[i].vehicle, + buf[i].speed, buf[i].highway, buf[i].lane, + buf[i].direction, buf[i].position); + } + } + }; + + std::vector *getInMemoryData() override { + return m_data; + } + + std::vector *getStaticData() override { + throw std::runtime_error("error: this benchmark does not have static data"); + } + + TupleSchema *getSchema() override { + if (m_schema == nullptr) + createSchema(); + return m_schema; + } + + void createSchema() { + m_schema = new TupleSchema(7, "LinearRoadBenchmark"); + auto longAttr = AttributeType(BasicType::Long); + auto intAttr = AttributeType(BasicType::Integer); + auto floatAttr = AttributeType(BasicType::Float); + + m_schema->setAttributeType(0, longAttr); /* timestamp: long */ + m_schema->setAttributeType(1, intAttr); /* vehicle: int */ + m_schema->setAttributeType(2, floatAttr); /* speed: float */ + m_schema->setAttributeType(3, intAttr); /* highway: int */ + m_schema->setAttributeType(4, intAttr); /* lane: int */ + m_schema->setAttributeType(5, intAttr); /* direction: int */ + m_schema->setAttributeType(6, intAttr); /* position: int */ + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/main.cpp b/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/main.cpp new file mode 100644 index 0000000..dc282f5 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/LinearRoadBenchmark/main.cpp @@ -0,0 +1,57 @@ +#include +#include + +#include "LRB1.cpp" +#include "LRB2.cpp" +#include "LRB3.cpp" + +// --unbounded-size 8388608 --circular-size 16777216 --batch-size 524288 --bundle-size 524288 --query 1 --hashtable-size 256 --checkpoint-duration 1000 --disk-block-size 16777216 --create-merge true --parallel-merge true --threads 1 +// ./linear_road_benchmark_checkpoints --unbounded-size 4194304 --circular-size 16777216 --batch-size 262144 --bundle-size 262144 --query 1 --hashtable-size 256 --checkpoint-duration 1000 --disk-block-size 16777216 --create-merge true --threads 1 +// ./linear_road_benchmark_checkpoints --unbounded-size 16777216 --circular-size 16777216 --batch-size 262144 --bundle-size 262144 --query 2 --checkpoint-duration 1000 --disk-block-size 8388608 --create-merge true --parallel-merge true --threads 1 +int main(int argc, const char **argv) { + std::unique_ptr benchmarkQuery {}; + + auto t1 = std::chrono::high_resolution_clock::now(); + SystemConf::getInstance().QUERY_NUM = 2; + BenchmarkQuery::parseCommandLineArguments(argc, argv); + + if (SystemConf::getInstance().QUERY_NUM == 1) { + benchmarkQuery = std::make_unique(); + } else if (SystemConf::getInstance().QUERY_NUM == 2) { + benchmarkQuery = std::make_unique(); + } else if (SystemConf::getInstance().QUERY_NUM == 3) { + benchmarkQuery = std::make_unique(); + } else { + throw std::runtime_error("error: invalid benchmark query id"); + } + + auto t2 = std::chrono::high_resolution_clock::now(); + auto time_span = std::chrono::duration_cast>(t2 - t1); + std::cout << "Setup duration: " + std::to_string(time_span.count()) << std::endl; + + //if (!SystemConf::getInstance().FAILURE_ON) { + return benchmarkQuery->runBenchmark(); + /*} else { + try { + benchmarkQuery->runBenchmark(); + //std::system("pkill -9 -f " + // "/home/george/LightSaber/cmake-build-debug/test/benchmarks/applicationsWithCheckpoints/linear_road_benchmark_checkpoints"); + } catch (std::exception& e) { + std::cerr << "Exception caught : " << e.what() << std::endl; + } + + std::this_thread::sleep_for(std::chrono::milliseconds (500)); + SystemConf::getInstance().RECOVER = true; + std::unique_ptr recoverQuery {}; + if (SystemConf::getInstance().QUERY_NUM == 1) { + recoverQuery = std::make_unique(); + } else if (SystemConf::getInstance().QUERY_NUM == 2) { + recoverQuery = std::make_unique(); + } else if (SystemConf::getInstance().QUERY_NUM == 3) { + recoverQuery = std::make_unique(); + } else { + throw std::runtime_error("error: invalid benchmark query id"); + } + return recoverQuery->runBenchmark(); + }*/ +} \ No newline at end of file diff --git a/test/benchmarks/applicationsWithCheckpoints/ManufacturingEquipment/ME1.cpp b/test/benchmarks/applicationsWithCheckpoints/ManufacturingEquipment/ME1.cpp new file mode 100644 index 0000000..cf656e4 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/ManufacturingEquipment/ME1.cpp @@ -0,0 +1,512 @@ +#include "benchmarks/applications/ManufacturingEquipment/ManufacturingEquipment.h" +#include "compression/Compressor.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "snappy.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +// --unbounded-size 32768 --circular-size 16777216 --batch-size 262144 --bundle-size 262144 --disk-block-size 32768 --latency tru --checkpoint-compression true --persist-input true --lineage true --threads 10 +namespace ME1Compress { +struct alignas(16) input_tuple_t { + long timestamp; + long messageIndex; + int mf01; //Electrical Power Main Phase 1 + int mf02; //Electrical Power Main Phase 2 + int mf03; //Electrical Power Main Phase 3 + int pc13; //Anode Current Drop Detection Cell 1 + int pc14; //Anode Current Drop Detection Cell 2 + int pc15; //Anode Current Drop Detection Cell 3 + unsigned int pc25; //Anode Voltage Drop Detection Cell 1 + unsigned int pc26; //Anode Voltage Drop Detection Cell 2 + unsigned int pc27; //Anode Voltage Drop Detection Cell 3 + unsigned int res; + int bm05 = 0; + int bm06 = 0; +}; + +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (4); + BaseDeltaCompressor bcomp(data[0].timestamp); + struct t_1 { + uint8_t timestamp : 6; + uint16_t counter : 10; + }; + struct t_2 { + //uint16_t mf01 : 12; + uint8_t mf01 : 4; + uint8_t counter : 4; + }; + struct t_3 { + //uint16_t mf02 : 12; + uint8_t mf02 : 4; + uint8_t counter : 4; + }; + struct t_4 { + //uint16_t mf03 : 12; + uint8_t mf03 : 4; + uint8_t counter : 4; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.25)); + t_3 *buf3 = (t_3 *)(output + (int) (length*0.5)); + t_3 *buf4 = (t_3 *)(output + (int) (length*0.75)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint16_t count_1 = 1; + uint8_t count_2 = 1; + uint8_t count_3 = 1; + uint8_t count_4 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + auto fVal_2 = (uint16_t)data[idx].mf01; + auto sVal_2 = fVal_2; + if (idx < n - 1 && + fVal_2 == (sVal_2 = (uint16_t)data[idx + 1].mf01)) { + count_2++; + } else { + buf2[idxs[1]++] = {(uint8_t)fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + + auto fVal_3 = (uint16_t)data[idx].mf02; + auto sVal_3 = fVal_3; + if (idx < n - 1 && + fVal_3 == (sVal_3 = (uint16_t)data[idx + 1].mf02)) { + count_3++; + } else { + buf3[idxs[2]++] = {(uint8_t)fVal_3, count_3}; + fVal_3 = sVal_3; + count_3 = 1; + } + + auto fVal_4 = (uint16_t)data[idx].mf03; + auto sVal_4 = fVal_4; + if (idx < n - 1 && + fVal_4 == (sVal_4 = (uint16_t)data[idx + 1].mf03)) { + count_4++; + } else { + buf4[idxs[3]++] = {(uint8_t)fVal_4, count_4}; + fVal_4 = sVal_4; + count_4 = 1; + } + } + std::memcpy((void *)(output + idxs[0] * sizeof(t_1)), (void *)buf2, + idxs[1] * sizeof(t_2)); + std::memcpy((void *)(output + idxs[0] * sizeof(t_1) + + idxs[1] * sizeof(t_2)), + (void *)buf3, idxs[2] * sizeof(t_3)); + std::memcpy((void *)(output + idxs[0] * sizeof(t_1) + + idxs[1] * sizeof(t_2) + idxs[2] * sizeof(t_3)), + (void *)buf4, idxs[3] * sizeof(t_4)); + writePos += idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2) + + idxs[2] * sizeof(t_3) + idxs[3] * sizeof(t_4); +} + +inline void compressInput_(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency) { + if (start == 0 && end == -1) { + return; + } + if (clear) { + clear = false; + } + + if (start == end || end < start) { + return; + } + + // Input Buffer + auto data = (input_tuple_t *)input; + std::vector idxs (4, 0); + uint16_t count_0 = 1; + BaseDeltaCompressor comp_0(data[0].timestamp); + auto temp_0 = comp_0.compress(data[0].timestamp); + struct t_0 { + uint8_t _0 : 6; + uint16_t counter : 10; + }; + uint8_t count_1 = 1; + auto temp_1 = (uint16_t)data[0].mf01; + struct t_1 { + uint16_t _1 : 10; + uint8_t counter : 4; + }; + uint8_t count_2 = 1; + auto temp_2 = (uint16_t)data[0].mf02; + struct t_2 { + uint16_t _2 : 12; + uint8_t counter : 4; + }; + uint8_t count_3 = 1; + auto temp_3 = (uint16_t)data[0].mf03; + struct t_3 { + uint16_t _3 : 12; + uint8_t counter : 4; + }; + // output buffers + int barriers[4]; + barriers[0] = (int)(length*0.000000); + t_0 *buf0 = (t_0 *) (output + barriers[0]); + barriers[1] = (int)(length*0.250000); + t_1 *buf1 = (t_1 *) (output + barriers[1]); + barriers[2] = (int)(length*0.500000); + t_2 *buf2 = (t_2 *) (output + barriers[2]); + barriers[3] = (int)(length*0.750000); + t_3 *buf3 = (t_3 *) (output + barriers[3]); + size_t n = (end - start) / sizeof(input_tuple_t); + + for (size_t idx = 0; idx < n; idx++) { + // apply compression + //if (comp_0.check(data[idx].timestamp)) { + // std::cout << "warning: falling back to the original compression scheme"<< std::endl; + // clear = true; + // return; + //} + auto res_0 = comp_0.compress(data[idx].timestamp); + // apply RLE + if (temp_0 != res_0 || count_0 >= 1023.000000) { + buf0[idxs[0]++] = {temp_0, count_0}; + count_0 = 0; + temp_0 = res_0; + } else { + count_0++; + } + //if (!CanTypeFitValue(data[idx].mf01)) { +// std::cout << "warning: falling back to the original compression scheme"<< std::endl; +// clear = true; +// return; +// } + uint16_t res_1 = (uint16_t) data[idx].mf01; + // apply RLE + if (temp_1 != res_1 || count_1 >= 15.000000) { + buf1[idxs[1]++] = {temp_1, count_1}; + count_1 = 0; + temp_1 = res_1; + } else { + count_1++; + } + /*if (!CanTypeFitValue(data[idx]._3)) { + std::cout << "warning: falling back to the original compression scheme"<< std::endl; + clear = true; + return; + }*/ + uint16_t res_2 = (uint16_t) data[idx].mf02; + // apply RLE + if (temp_2 != res_2 || count_2 >= 15.000000) { + buf2[idxs[2]++] = {temp_2, count_2}; + count_2 = 0; + temp_2 = res_2; + } else { + count_2++; + } + /*if (!CanTypeFitValue(data[idx]._4)) { + std::cout << "warning: falling back to the original compression scheme"<< std::endl; + clear = true; + return; + }*/ + uint16_t res_3 = (uint16_t) data[idx].mf03; + // apply RLE + if (temp_3 != res_3 || count_3 >= 15.000000) { + buf3[idxs[3]++] = {temp_3, count_3}; + count_3 = 0; + temp_3 = res_3; + } else { + count_3++; + } + } + if (count_0 != 0) { + buf0[idxs[0]++] = {temp_0, count_0}; + } + if (count_1 != 0) { + buf1[idxs[1]++] = {temp_1, count_1}; + } + if (count_2 != 0) { + buf2[idxs[2]++] = {temp_2, count_2}; + } + if (count_3 != 0) { + buf3[idxs[3]++] = {temp_3, count_3}; + } + // copy results and set output pointers + writePos += idxs[0] * sizeof(t_0); + if (writePos > barriers[1]) {throw std::runtime_error("error: larger barriers needed");} + std::memcpy((void *)(output + writePos), (void *)buf0, idxs[0] * sizeof(t_0)); + writePos += idxs[1] * sizeof(t_1); + if (writePos > barriers[2]) {throw std::runtime_error("error: larger barriers needed");} + std::memcpy((void *)(output + writePos), (void *)buf1, idxs[1] * sizeof(t_1)); + writePos += idxs[2] * sizeof(t_2); + if (writePos > barriers[3]) {throw std::runtime_error("error: larger barriers needed");} + std::memcpy((void *)(output + writePos), (void *)buf2, idxs[2] * sizeof(t_2)); + writePos += idxs[3] * sizeof(t_3); + if (writePos > length) {throw std::runtime_error("error: larger barriers needed");} +} + +void compressGenInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (4); + GorillaTimestampCompressor gorillaComp; + Simple8 simpleComp1; + Simple8 simpleComp2; + Simple8 simpleComp3; + //GorillaValuesCompressor xorComp; + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + + size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + // simple 8 + auto buf2 = (uint64_t *)(output + (int) (length*0.25)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::mf01, 1, buf2); + + // simple 8 + auto buf3 = (uint64_t *)(output + (int) (length*0.5)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::mf02, 1, buf3); + + // simple 8 + auto buf4 = (uint64_t *)(output + (int) (length*0.75)); + inOffset = 0; + outOffset = 0; + idxs[3] = simpleComp3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::mf03, 1, buf4); + + + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + } + + std::memcpy((void *)(output + idxs[0] * sizeof(uint64_t)), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + std::memcpy((void *)(output + idxs[0] * sizeof(uint64_t) + + idxs[1] * sizeof(uint64_t)), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + std::memcpy((void *)(output + idxs[0] * sizeof(uint64_t) + + idxs[1] * sizeof(uint64_t) + idxs[2] * sizeof(uint64_t)), + (void *)buf4, idxs[3] * sizeof(uint64_t)); + writePos += idxs[0] * sizeof(uint64_t) + idxs[1] * sizeof(uint64_t) + + idxs[2] * sizeof(uint64_t) + idxs[3] * sizeof(uint64_t); +} + +void noCompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + struct res { + long timestamp; + int mf01; + int mf02; + int mf03; + }; + auto out = (res*) output; + size_t n = (end - start) / sizeof(input_tuple_t); + for (size_t idx = 0; idx < n; idx++) { + out[idx] = {data[idx].timestamp, data[idx].mf01, data[idx].mf02, data[idx].mf02}; + } + writePos = n * sizeof(res); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + throw std::runtime_error("error: the decompression function is not implemented"); +} + +void onlyCompressInputLossless(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + + size_t output_length; + auto buf1 = (uint64_t *)input; + snappy::RawCompress((const char *)(buf1), end, (char*)(output), &output_length); + writePos += output_length; +} + +inline void filterAndCompress(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency) { + if (start == 0 && end == -1) { + return; + } + if (clear) { + clear = false; + } + + if (start == end || end < start) { + return; + } + + struct tempS { + long timestamp; + int mf01; + int mf02; + int mf03; + }; + // Input Buffer + auto data = (input_tuple_t *)input; + auto buf = (tempS *) (output + (int)(end*0.5)); + std::vector idxs (4, 0); + size_t n = (end - start) / sizeof(input_tuple_t); + + for (size_t idx = 0; idx < n; idx++) { + buf[idx] = {data[idx].timestamp, data[idx].mf01, data[idx].mf02, data[idx].mf03}; + } + size_t output_length; + snappy::RawCompress((const char *)(buf),n*sizeof(tempS), (char*)(output), &output_length); + writePos += output_length; +} +}; + +class ME1 : public ManufacturingEquipment { + private: + void createApplication() override { + //SystemConf::getInstance().UNBOUNDED_BUFFER_SIZE = 4096; + SystemConf::getInstance().SLOTS = 128; + SystemConf::getInstance().PARTIAL_WINDOWS = 2*128; // change this depending on the batch size + + bool useParallelMerge = SystemConf::getInstance().PARALLEL_MERGE_ON; + bool persistInput = SystemConf::getInstance().PERSIST_INPUT; + + // Configure first query + std::vector aggregationTypes(3); + aggregationTypes[0] = AggregationTypes::fromString("avg"); + aggregationTypes[1] = AggregationTypes::fromString("avg"); + aggregationTypes[2] = AggregationTypes::fromString("avg"); + + std::vector aggregationAttributes(3); + aggregationAttributes[0] = new ColumnReference(2, BasicType::Integer); + aggregationAttributes[1] = new ColumnReference(3, BasicType::Integer); + aggregationAttributes[2] = new ColumnReference(4, BasicType::Integer); + + std::vector groupByAttributes; + + auto window = new WindowDefinition(RANGE_BASED, 60, 1); + + Aggregation *aggregation = new Aggregation(*window, aggregationTypes, aggregationAttributes, groupByAttributes); + +#if defined(TCP_INPUT) + bool replayTimestamps = false; +#elif defined(RDMA_INPUT) + bool replayTimestamps = false; +#else + bool replayTimestamps = window->isRangeBased(); +#endif + + // Set up code-generated operator + // reuse previous generated file + OperatorKernel *genCode = new OperatorKernel(true, true, useParallelMerge, true); + genCode->setInputSchema(getSchema()); + genCode->setAggregation(aggregation); + genCode->setQueryId(0); + genCode->setup(); + OperatorCode *cpuCode = genCode; + + + // Print operator + std::cout << cpuCode->toSExpr() << std::endl; + + // Define an ft-operator + auto queryOperator = new QueryOperator(*cpuCode, true); + std::vector operators; + operators.push_back(queryOperator); + + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + + std::vector> queries(1); + queries[0] = std::make_shared(0, + operators, + *window, + m_schema, + m_timestampReference, + true, + replayTimestamps, + !replayTimestamps, + useParallelMerge, + 0, persistInput, nullptr, !SystemConf::getInstance().RECOVER); + + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + if (SystemConf::getInstance().ADAPTIVE_CHANGE_DATA) { + queries[0]->getBuffer()->setCompressionFP(ME1Compress::compressGenInput); + //queries[0]->getBuffer()->setCompressionFP(ME1Compress::compressInput_); + } else { + queries[0]->getBuffer()->setCompressionFP(ME1Compress::compressInput_); + } + queries[0]->getBuffer()->setDecompressionFP(ME1Compress::decompressInput); + } + + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, !SystemConf::getInstance().RECOVER); + m_application->setup(); + } + + public: + ME1(bool inMemory = true) { + m_name = "ME1"; + createSchema(); + createApplication(); + if (inMemory) + loadInMemoryData(); + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/ManufacturingEquipment/ManufacturingEquipment.h b/test/benchmarks/applicationsWithCheckpoints/ManufacturingEquipment/ManufacturingEquipment.h new file mode 100644 index 0000000..6a8dcae --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/ManufacturingEquipment/ManufacturingEquipment.h @@ -0,0 +1,162 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include "boost/date_time/posix_time/posix_time.hpp" + +#include "utils/TupleSchema.h" +#include "utils/QueryApplication.h" +#include "utils/Utils.h" +#include "benchmarks/applications/BenchmarkQuery.h" + +class ManufacturingEquipment : public BenchmarkQuery { + private: + struct alignas(64) InputSchema { + long timestamp; + long messageIndex; + int mf01; //Electrical Power Main Phase 1 + int mf02; //Electrical Power Main Phase 2 + int mf03; //Electrical Power Main Phase 3 + int pc13; //Anode Current Drop Detection Cell 1 + int pc14; //Anode Current Drop Detection Cell 2 + int pc15; //Anode Current Drop Detection Cell 3 + unsigned int pc25; //Anode Voltage Drop Detection Cell 1 + unsigned int pc26; //Anode Voltage Drop Detection Cell 2 + unsigned int pc27; //Anode Voltage Drop Detection Cell 3 + unsigned int res; + int bm05 = 0; + int bm06 = 0; + /*bool bm05 = false; + bool bm06 = false; + bool bm07 = false; + bool bm08 = false; + bool bm09 = false; + bool bm10 = false;*/ + + static void parse(InputSchema &tuple, std::string &line, boost::posix_time::ptime &myEpoch) { + std::istringstream iss(line); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + + const std::locale + loc = std::locale(std::locale::classic(), new boost::posix_time::time_input_facet("%Y-%m-%dT%H:%M:%S%f")); + std::istringstream is(words[0]); + is.imbue(loc); + boost::posix_time::ptime myTime; + is >> myTime; + boost::posix_time::time_duration myTimeFromEpoch = myTime - myEpoch; + + tuple.timestamp = myTimeFromEpoch.total_milliseconds() / 1000; + tuple.messageIndex = std::stol(words[1]); + tuple.mf01 = std::stoi(words[2]); + tuple.mf02 = std::stoi(words[3]); + tuple.mf03 = std::stoi(words[4]); + tuple.pc13 = std::stoi(words[5]); + tuple.pc14 = std::stoi(words[6]); + tuple.pc15 = std::stoi(words[7]); + tuple.pc25 = std::stoi(words[8]); + tuple.pc26 = std::stoi(words[9]); + tuple.pc27 = std::stoi(words[10]); + tuple.res = std::stoi(words[11]); + } + }; + + public: + TupleSchema *m_schema = nullptr; + QueryApplication *m_application = nullptr; + std::vector *m_data = nullptr; + bool m_debug = false; + + QueryApplication *getApplication() override { + return m_application; + } + + virtual void createApplication() = 0; + + void loadInMemoryData() { + size_t len = SystemConf::getInstance().BUNDLE_SIZE; + m_data = new std::vector(len); + auto buf = (InputSchema *) m_data->data(); + + const std::string cell = "2012-02-22T16:46:28.9670320+00:00"; + const std::locale + loc = std::locale(std::locale::classic(), new boost::posix_time::time_input_facet("%Y-%m-%dT%H:%M:%S%f")); + std::istringstream is(cell); + is.imbue(loc); + boost::posix_time::ptime myEpoch; + is >> myEpoch; + //std::cout << myEpoch << std::endl; + + std::string filePath = Utils::getHomeDir() + "/LightSaber/resources/datasets/manufacturing_equipment/"; + std::ifstream file(filePath + "DEBS2012-small.txt"); + if (!file.good()) + throw std::runtime_error("error: input file does not exist, check the path."); + std::string line; + unsigned long idx = 0; + while (std::getline(file, line) && idx < len / sizeof(InputSchema)) { + InputSchema::parse(buf[idx], line, myEpoch); + idx++; + } + + if (m_debug) { + std::cout + << "timestamp messageIndex mf01 mf02 mf03 pc13 pc14 pc15 pc25 pc26 pc27 res bm05 bm06 bm07 bm08 bm09 bm10" + << std::endl; + for (unsigned long i = 0; i < m_data->size() / sizeof(InputSchema); ++i) { + printf("[DBG] %06d: %09d %09d %6d %6d %6d %6d %6d %6d %6d %6d %6d %6d" + " %1d %1d \n", + i, buf[i].timestamp, buf[i].messageIndex, buf[i].mf01, + buf[i].mf02, buf[i].mf03, buf[i].pc13, buf[i].pc14, buf[i].pc15, buf[i].pc25, + buf[i].pc26, buf[i].pc27, buf[i].res, buf[i].bm05, buf[i].bm06 + //buf[i].bm07, buf[i].bm08, buf[i].bm09, buf[i].bm10 + ); + } + } + }; + + std::vector *getInMemoryData() override { + return m_data; + } + + std::vector *getStaticData() override { + throw std::runtime_error("error: this benchmark does not have static data"); + } + + TupleSchema *getSchema() override { + if (m_schema == nullptr) + createSchema(); + return m_schema; + } + + void createSchema() { + m_schema = new TupleSchema(14, "ManufactoringEquipment"); + auto longAttr = AttributeType(BasicType::Long); + auto intAttr = AttributeType(BasicType::Integer); + auto boolAttr = AttributeType(BasicType::Char); + + m_schema->setAttributeType(0, longAttr); + m_schema->setAttributeType(1, longAttr); + m_schema->setAttributeType(2, intAttr); + m_schema->setAttributeType(3, intAttr); + m_schema->setAttributeType(4, intAttr); + m_schema->setAttributeType(5, intAttr); + m_schema->setAttributeType(6, intAttr); + m_schema->setAttributeType(7, intAttr); + m_schema->setAttributeType(8, intAttr); + m_schema->setAttributeType(9, intAttr); + m_schema->setAttributeType(10, intAttr); + m_schema->setAttributeType(11, intAttr); + m_schema->setAttributeType(12, intAttr); + m_schema->setAttributeType(13, intAttr); + //m_schema->setAttributeType(14, boolAttr ); + //m_schema->setAttributeType(15, boolAttr ); + //m_schema->setAttributeType(16, boolAttr ); + //m_schema->setAttributeType(17, boolAttr ); + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/ManufacturingEquipment/main.cpp b/test/benchmarks/applicationsWithCheckpoints/ManufacturingEquipment/main.cpp new file mode 100644 index 0000000..f1c76fd --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/ManufacturingEquipment/main.cpp @@ -0,0 +1,19 @@ +#include +#include + +#include "ME1.cpp" + +int main(int argc, const char **argv) { + std::unique_ptr benchmarkQuery {}; + + SystemConf::getInstance().QUERY_NUM = 1; + BenchmarkQuery::parseCommandLineArguments(argc, argv); + + if (SystemConf::getInstance().QUERY_NUM == 1) { + benchmarkQuery = std::make_unique(); + } else { + throw std::runtime_error("error: invalid benchmark query id"); + } + + return benchmarkQuery->runBenchmark(); +} \ No newline at end of file diff --git a/test/benchmarks/applicationsWithCheckpoints/Nexmark/NBQ5.cpp b/test/benchmarks/applicationsWithCheckpoints/Nexmark/NBQ5.cpp new file mode 100644 index 0000000..5d7960f --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/Nexmark/NBQ5.cpp @@ -0,0 +1,729 @@ +#include + +#include "benchmarks/applications/Nexmark/Nexmark.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "snappy.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +namespace NBQ5Compress { +struct alignas(16) input_tuple_t { + long timestamp; + long id; + long itemName; + long description; + long initialBid; + long reserve; + long expires; + long seller; + long category; + long padding_0; + long padding_1; + long padding_2; + long padding_3; + long padding_4; + long padding_5; + long padding_6; +}; + +std::vector> *metadata; + +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (2); + BaseDeltaCompressor bcomp1(data[0].timestamp); + BaseDeltaCompressor bcomp2(data[0].id); + struct t_1 { + uint16_t timestamp : 6; + uint16_t counter : 10; + }; + struct t_2 { + uint16_t id : 9; + uint8_t counter : 7; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.4)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint8_t count_1 = 1; + uint8_t count_2 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp1.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && count_1 < 1024 && fVal_1 == + (sVal_1 = bcomp1.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + auto fVal_2 = bcomp2.compress(data[idx].id); + auto sVal_2 = fVal_2; + if (idx < n - 1 && count_2 < 512 && fVal_2 == + (sVal_2 = bcomp2.compress(data[idx + 1].id))) { + count_2++; + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + } + + writePos += idxs[0] * sizeof(t_1); + //(*metadata)[pid] = "c0 RLE BD " + std::to_string(data[0].timestamp) + " {uint16_t:6,uint16_t:10} " + std::to_string(writePos); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[1] * sizeof(t_2); + //(*metadata)[pid] = "c1 RLE BD " + std::to_string(data[0].id) + " {uint16_t:7,uint16_t:9} " + std::to_string(writePos); + + if (SystemConf::getInstance().LATENCY_ON) { + auto value = data[0].timestamp; + latency = (int) (value >> 32); + (*metadata)[pid] += " " + std::to_string(latency) + " "; + } + //(*metadata)[pid] = "r0 " + std::to_string(idxs[0]) + " r1 " + std::to_string(idxs[1]) + " "; + //if ((*metadata)[pid].size() > 128) { + // throw std::runtime_error("error: increase the metadata size"); + //} + //std::memcpy((void *)(output - 128), (*metadata)[pid].data(), (*metadata)[pid].size()); + //(*metadata)[pid].clear(); +} + +struct tempV { + int _1; +}; +std::vector tempVec[20]; +bool isFirst[20] = {false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false}; +void compressGenInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (2); + GorillaTimestampCompressor gorillaComp; + Simple8 simpleComp; + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + + size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + // simple 8 + auto buf2 = (uint64_t *)(output + (int) (length*0.5)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp.compress( + inOffset, outOffset, n, &data, &input_tuple_t::id, 1, buf2); + + if (!isFirst[pid]) { + tempVec[pid].resize(n); + isFirst[pid] = true; + } + + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + } + + std::memcpy((void *)(output + idxs[0] * sizeof(uint64_t)), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[0] * sizeof(uint64_t) + idxs[1] * sizeof(uint64_t) + + idxs[2] * sizeof(uint64_t); +} + +void noCompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + struct res { + long timestamp; + long id; + }; + auto out = (res*) output; + size_t n = (end - start) / sizeof(input_tuple_t); + for (size_t idx = 0; idx < n; idx++) { + out[idx] = {data[idx].timestamp, data[idx].id}; + } + writePos = n * sizeof(res); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + // parse metadata + std::string meta; + if (copy) { + std::memcpy(output, input, end); + for (size_t i = 0; i < 128; i++) { + meta += input[i]; + } + } else { + for (size_t i = 0; i < 128; i++) { + meta += output[i]; + } + } + + std::istringstream iss(meta); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + + throw std::runtime_error("error: fix decompression.."); + auto base = std::stoi(words[3]); + std::vector idxs (3); + idxs[0] = std::stoi(words[5]); + idxs[1] = std::stoi(words[9]); + if (SystemConf::getInstance().LATENCY_ON) { + latency = std::stoi(words[10]); + } + + + //BaseDeltaCompressor bcomp(base); + struct t_1 { + uint16_t timestamp : 6; + uint16_t counter : 10; + }; + struct t_2 { + uint16_t id : 9; + uint8_t counter : 7; + }; + + auto res = (input_tuple_t*) input; + t_1 *col0 = (t_1 *)(output + 128); + auto *col7 = (t_2 *)(output + 128 + idxs[0]); + auto wPos = 0; + auto dataSize = end / sizeof(input_tuple_t); + auto col1Size = idxs[0] / sizeof(t_1); + for (int idx = 0; idx < col1Size; ++idx) { + auto temp = col0[idx]; + for (int it = 0; it < temp.counter; ++it) { + res[wPos++].timestamp = temp.timestamp + base; + if (wPos * sizeof(input_tuple_t) > SystemConf::getInstance().BATCH_SIZE) { + throw std::runtime_error("error: the write position exceeds the batch size"); + } + } + } + + if (SystemConf::getInstance().LATENCY_ON) { + res[0].timestamp = Utils::pack(latency, (int)res[0].timestamp); + } + + // c0 + wPos = 0; + for (int idx = 0; idx < col1Size; ++idx) { + auto temp = col7[idx]; + for (int it = 0; it < temp.counter; ++it) { + res[wPos++].id = temp.id; + if (wPos * sizeof(input_tuple_t) > SystemConf::getInstance().BATCH_SIZE) { + throw std::runtime_error("error: the write position exceeds the batch size"); + } + } + } + + + writePos = wPos * sizeof(input_tuple_t); + + /*std::cout << "===========decompress===========" << std::endl; + auto n = dataSize; + for (int i = 0; i aggregationTypes(1); + aggregationTypes[0] = AggregationTypes::fromString("cnt"); + + std::vector aggregationAttributes(1); + aggregationAttributes[0] = new ColumnReference(1, BasicType::Float); + + std::vector groupByAttributes(1); + groupByAttributes[0] = new ColumnReference(1, BasicType::Long); + + Aggregation *aggregation = new Aggregation(*window, aggregationTypes, aggregationAttributes, groupByAttributes); + +#if defined(TCP_INPUT) + bool replayTimestamps = false; +#elif defined(RDMA_INPUT) + bool replayTimestamps = false; +#else + bool replayTimestamps = window->isRangeBased(); +#endif + + OperatorCode *cpuCode; + // Set up code-generated operator + OperatorKernel *genCode = new OperatorKernel(true, true, useParallelMerge); + genCode->setInputSchema(getSchema()); + genCode->setAggregation(aggregation); + //genCode->setCustomHashTable(customHashtable); + genCode->setPostWindowOperation(postOperation, postCondition, (useParallelMerge) ? parallelMergeOperation : mergeOperation); + genCode->setQueryId(0); + genCode->setup(); + cpuCode = genCode; + + // Print operator + std::cout << cpuCode->toSExpr() << std::endl; + + // Define an ft-operator + auto queryOperator = new QueryOperator(*cpuCode, true); + std::vector operators; + operators.push_back(queryOperator); + + // this is used for latency measurements + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + + std::vector> queries(1); + queries[0] = std::make_shared(0, + operators, + *window, + m_schema, + m_timestampReference, + true, + replayTimestamps, + !replayTimestamps, + useParallelMerge, + 0, persistInput, nullptr, !SystemConf::getInstance().RECOVER); + + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + queries[0]->getBuffer()->setCompressionFP(NBQ5Compress::compressInput); + queries[0]->getBuffer()->setDecompressionFP(NBQ5Compress::decompressInput); + NBQ5Compress::metadata = new std::vector>(SystemConf::getInstance().WORKER_THREADS, ""); + } + + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, !SystemConf::getInstance().RECOVER); + m_application->setup(); + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION && (SystemConf::getInstance().CHECKPOINT_ON || persistInput)) { + //NBQ5Compress::metadata = new std::vector>(SystemConf::getInstance().WORKER_THREADS, ""); + //m_application->getCheckpointCoordinator()->setCompressionFP(0, NBQ5Compress::compress); + } + } + + std::string parallelMergeOperation = + " int _max = INT_MIN;\n" + " for (int idx = 0; idx < mapSize; idx++) {\n" + " if (tempCompleteWindowsRes[idx].state == 1 && _max < tempCompleteWindowsRes[idx].value._1) /* Skip empty slot */\n" + " _max = tempCompleteWindowsRes[idx].value._1; \n" + " }\n"; + + std::string mergeOperation = + " int _max = INT_MIN;\n" + " for (int idx = 0; idx < mapSize; idx++) {\n" + " if (openingWindowsRes[wid][idx].state != 1) /* Skip empty slot */\n" + " continue;\n" + " isFound = map2.get_index(openingWindowsRes[wid][idx].key, posInB2);\n" + " if (posInB2 < 0) {\n" + " printf(\"error: open-adress hash table is full \\n\");\n" + " exit(1);\n" + " }\n" + " if (!isFound) { \n" + " _max = (_max > openingWindowsRes[wid][idx].value._1) ? _max : openingWindowsRes[wid][idx].value._1;\n" + " } else { // merge values based on the number of aggregated values and their types! \n" + " int temp = openingWindowsRes[wid][idx].value._1+partialRes[wid2][posInB2].value._1;\n" + " _max = (_max > temp) ? _max : temp;\n" + " }\n" + " }\n" + "\n" + " /* Iterate over the remaining tuples in the second table. */\n" + " for (int idx = 0; idx < mapSize; idx++) {\n" + " if (partialRes[wid2][idx].state == 1 && _max < partialRes[wid2][idx].value._1) /* Skip empty slot */\n" + " _max = partialRes[wid2][idx].value._1;\n" + " }\n"; + + std::string postOperation = "\tint _max = INT_MIN;\n" + "\tfor (int i = 0; i < mapSize; i++) {\n" + "\t\tif (aggrStructures[pid].getBuckets()[i].state == 1 && _max < aggrStructures[pid].getBuckets()[i].value._1)\n" + "\t\t\t_max = aggrStructures[pid].getBuckets()[i].value._1;\n" + "\t}\n"; + + std::string postCondition = "completeWindowsResults[completeWindowsPointer]._2 == _max"; + + std::string customHashtable = "using KeyT = long;\n" + "using ValueT = Value;\n" + "\n" + "struct MyHash{\n" + " std::size_t operator()(KeyT m) const {\n" + " std::hash hashVal;\n" + " return hashVal(m%1000);\n" + " }\n" + "};\n" + "struct HashMapEqualTo {\n" + " constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const {\n" + " return lhs == rhs;\n" + " }\n" + "};\n" + "\n" + "struct alignas(16) Bucket {\n" + " char state;\n" + " char dirty;\n" + " long timestamp;\n" + " KeyT key;\n" + " ValueT value;\n" + " int counter;\n" + "};\n" + "\n" + "using BucketT = Bucket;\n" + "\n" + "class alignas(64) HashTable {\n" + "private:\n" + " using HashT = MyHash; //std::hash;\n" + " using EqT = HashMapEqualTo;\n" + " using AggrT = Aggregator;\n" + "\n" + " HashT _hasher;\n" + " EqT _eq;\n" + " BucketT* _buckets = nullptr;\n" + " AggrT* _aggrs = nullptr;\n" + " size_t _num_buckets = MAP_SIZE;\n" + " size_t _num_filled = 0;\n" + " size_t _mask = MAP_SIZE-1;\n" + "public:\n" + " HashTable ();\n" + " HashTable (Bucket*nodes);\n" + " void init ();\n" + " void reset ();\n" + " void clear ();\n" + " void insert (KeyT &key, ValueT &value, long timestamp);\n" + " void insert_or_modify (KeyT &key, ValueT &value, long timestamp);\n" + " bool evict (KeyT &key);\n" + " void insertSlices ();\n" + " void evictSlices ();\n" + " void setValues ();\n" + " void setIntermValues (int pos, long timestamp);\n" + " bool get_value (const KeyT &key, ValueT &result);\n" + " bool get_result (const KeyT &key, ValueT &result);\n" + " bool get_index (const KeyT &key, int &index);\n" + " void deleteHashTable();\n" + " BucketT* getBuckets ();\n" + " size_t getSize() const;\n" + " bool isEmpty() const;\n" + " size_t getNumberOfBuckets() const;\n" + " float load_factor() const;\n" + "};\n" + "\n" + "HashTable::HashTable () {}\n" + "\n" + "HashTable::HashTable (Bucket *nodes) : _buckets(nodes) {\n" + " if (!(_num_buckets && !(_num_buckets & (_num_buckets - 1)))) {\n" + " throw std::runtime_error (\"error: the size of the hash table has to be a power of two\\n\");\n" + " }\n" + "}\n" + "\n" + "void HashTable::init () {\n" + " if (!(_num_buckets && !(_num_buckets & (_num_buckets - 1)))) {\n" + " throw std::runtime_error (\"error: the size of the hash table has to be a power of two\\n\");\n" + " }\n" + "\n" + " _buckets = (BucketT*)_mm_malloc(_num_buckets * sizeof(BucketT), 64);\n" + " _aggrs = (AggrT*)_mm_malloc(_num_buckets * sizeof(AggrT), 64);\n" + " if (!_buckets /*|| !_aggrs*/) {\n" + " free(_buckets);\n" + " /*free(_aggrs);*/\n" + " throw std::bad_alloc();\n" + " }\n" + "\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " _buckets[i].state = 0;\n" + " _buckets[i].dirty = 0;\n" + " _aggrs[i] = AggrT (); // maybe initiliaze this on insert\n" + " _aggrs[i].initialise();\n" + " }\n" + "}\n" + "\n" + "void HashTable::reset () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " _buckets[i].state = 0;\n" + " //_aggrs[i].initialise();\n" + " }\n" + " _num_filled = 0;\n" + "}\n" + "\n" + "void HashTable::clear () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " _buckets[i].state = 0;\n" + " _buckets[i].dirty = 0;\n" + " //_buckets[i].counter = 0;\n" + " _aggrs[i].initialise();\n" + " }\n" + " _num_filled = 0;\n" + "}\n" + "\n" + "void HashTable::insert (KeyT &key, ValueT &value, long timestamp) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " for (; i < _num_buckets; i++) {\n" + " if (!_buckets[i].state || _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key; //std::memcpy(&_buckets[i].key, key, KEY_SIZE);\n" + " _buckets[i].value = value;\n" + " return;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (!_buckets[i].state || _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key;\n" + " _buckets[i].value = value;\n" + " return;\n" + " }\n" + " }\n" + " throw std::runtime_error (\"error: the hashtable is full \\n\");\n" + "}\n" + "\n" + "void HashTable::insert_or_modify (KeyT &key, ValueT &value, long timestamp) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " char tempState;\n" + " for (; i < _num_buckets; i++) {\n" + " tempState = _buckets[i].state;\n" + " if (tempState && _eq(_buckets[i].key, key)) {\n" + "\t\t\t_buckets[i].value._1 = _buckets[i].value._1+value._1;\n" + " _buckets[i].counter++;\n" + " return;\n" + " }\n" + " if (!tempState && (_eq(_buckets[i].key, key) || _buckets[i].dirty == 0)) { // first insert -- keep track of previous inserted value\n" + " _buckets[i].state = 1;\n" + " _buckets[i].dirty = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key;\n" + " _buckets[i].value = value;\n" + " _buckets[i].counter = 1;\n" + " return;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " tempState = _buckets[i].state;\n" + " if (tempState && _eq(_buckets[i].key, key)) {\n" + "\t\t\t\t_buckets[i].value._1 = _buckets[i].value._1+value._1;\n" + " _buckets[i].counter++;\n" + " return;\n" + " }\n" + " if (!tempState && (_eq(_buckets[i].key, key) || _buckets[i].dirty == 0)) { // first insert -- keep track of previous inserted value\n" + " _buckets[i].state = 1;\n" + " _buckets[i].dirty = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + " _buckets[i].key = key;\n" + " _buckets[i].value = value;\n" + " _buckets[i].counter = 1;\n" + " return;\n" + " }\n" + " }\n" + " throw std::runtime_error (\"error: the hashtable is full \\n\");\n" + "}\n" + "\n" + "bool HashTable::evict (KeyT &key) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " for (; i < _num_buckets; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 0;\n" + " return true;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " _buckets[i].state = 0;\n" + " return true;\n" + " }\n" + " }\n" + " printf (\"error: entry not found \\n\");\n" + " return false;\n" + "}\n" + "\n" + "void HashTable::insertSlices () {\n" + " int maxNumOfSlices = INT_MIN;\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " int temp = _aggrs[i].addedElements - _aggrs[i].removedElements;\n" + " if (_buckets[i].state) {\n" + " node n;\n" + "\t\t\t\tn._1 = _buckets[i].value._1;\n" + " _aggrs[i].insert(n);\n" + " _buckets[i].state = 0;\n" + " //_buckets[i].value = ValueT();\n" + " } else if (temp > 0) {\n" + " ValueT val;\n" + " node n;\n" + "\t\t\tn._1 = val._1;\n" + " _aggrs[i].insert(n);\n" + " }\n" + " }\n" + "}\n" + "\n" + "void HashTable::evictSlices () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" + " _aggrs[i].evict();\n" + " }\n" + " }\n" + "}\n" + "\n" + "void HashTable::setValues () {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" + " auto res = _aggrs[i].query();\n" + " _buckets[i].state = 1;\n" + "\t\t\t_buckets[i].value._1 = res._1;\n" + " _buckets[i].counter = 1;\n" + " }\n" + " }\n" + "}\n" + "\n" + "void HashTable::setIntermValues (int pos, long timestamp) {\n" + " for (auto i = 0; i < _num_buckets; ++i) {\n" + " if (_aggrs[i].addedElements - _aggrs[i].removedElements > 0) {\n" + " auto res = _aggrs[i].queryIntermediate (pos);\n" + " _buckets[i].state = 1;\n" + " _buckets[i].timestamp = timestamp;\n" + "\t\t\t_buckets[i].value._1 = res._1;\n" + " }\n" + " }\n" + "}\n" + "\n" + "bool HashTable::get_value (const KeyT &key, ValueT &result) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " for (; i < _num_buckets; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " result = _buckets[i].value;\n" + " return true;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " result = _buckets[i].value;\n" + " return true;\n" + " }\n" + " }\n" + " return false;\n" + "}\n" + "\n" + "bool HashTable::get_index (const KeyT &key, int &index) {\n" + " size_t ind = _hasher(key) & _mask, i = ind;\n" + " index = -1;\n" + " for (; i < _num_buckets; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " index = i;\n" + " return true;\n" + " }\n" + " if (_buckets[i].state == 0 && index == -1) {\n" + " index = i;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (_buckets[i].state && _eq(_buckets[i].key, key)) {\n" + " index = i;\n" + " return true;\n" + " }\n" + " if (_buckets[i].state == 0 && index == -1) {\n" + " index = i;\n" + " }\n" + " }\n" + " return false;\n" + "}\n" + "\n" + "void HashTable::deleteHashTable() {\n" + " for (size_t bucket=0; bucket<_num_buckets; ++bucket) {\n" + " _buckets[bucket].~BucketT();\n" + " _aggrs->~AggrT();\n" + " }\n" + " free(_buckets);\n" + " free(_aggrs);\n" + "}\n" + "\n" + "BucketT* HashTable::getBuckets () {\n" + " return _buckets;\n" + "}\n" + "\n" + "size_t HashTable::getSize() const {\n" + " return _num_filled;\n" + "}\n" + "\n" + "bool HashTable::isEmpty() const {\n" + " return _num_filled==0;\n" + "}\n" + "\n" + "size_t HashTable::getNumberOfBuckets() const {\n" + " return _num_buckets;\n" + "}\n" + "\n" + "float HashTable::load_factor() const {\n" + " return static_cast(_num_filled) / static_cast(_num_buckets);\n" + "}\n"; + + public: + explicit NBQ5(bool inMemory = true, bool startApp = true) { + m_name = "NBQ5"; + createSchema(); + if (inMemory) + loadInMemoryData(); + if (startApp) + createApplication(); + } +}; \ No newline at end of file diff --git a/test/benchmarks/applicationsWithCheckpoints/Nexmark/Nexmark.h b/test/benchmarks/applicationsWithCheckpoints/Nexmark/Nexmark.h new file mode 100644 index 0000000..379b9cb --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/Nexmark/Nexmark.h @@ -0,0 +1,317 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../BenchmarkQuery.h" +#include "utils/QueryApplication.h" +#include "utils/TupleSchema.h" +#include "utils/Utils.h" + +class Nexmark : public BenchmarkQuery { + private: + struct InputSchema { + long timestamp; + long id; + long itemName; + long description; + long initialBid; + long reserve; + long expires; + long seller; + long category; + long padding_0; + long padding_1; + long padding_2; + long padding_3; + long padding_4; + long padding_5; + long padding_6; + + static void parse(InputSchema &tuple, std::string &line) { + std::istringstream iss(line); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + tuple.timestamp = std::stol(words[0]); + tuple.id = std::stol(words[1]); + tuple.itemName = std::stol(words[2]); + tuple.description = std::stol(words[3]); + tuple.initialBid = std::stol(words[4]); + tuple.reserve = std::stol(words[5]); + tuple.expires = std::stol(words[6]); + tuple.seller = std::stol(words[7]); + tuple.category = std::stol(words[8]); + } + }; + + /** + * We start the ids at specific values to help ensure the queries find a match + * even on small synthesized dataset sizes. + */ + const long FIRST_AUCTION_ID = 1000L; + const long FIRST_PERSON_ID = 1000L; + const long FIRST_CATEGORY_ID = 10L; + /** Proportions of people/auctions/bids to synthesize. */ + const int PERSON_PROPORTION = 1; + + const int AUCTION_PROPORTION = 3; + const int BID_PROPORTION = 46; + const int PROPORTION_DENOMINATOR = + PERSON_PROPORTION + AUCTION_PROPORTION + BID_PROPORTION; + + /** + * Keep the number of categories small so the example queries will find + * results even with a small batch of events. + */ + const int NUM_CATEGORIES = 5; + /** Number of yet-to-be-created people and auction ids allowed. */ + const int AUCTION_ID_LEAD = 10; + /** + * Fraction of people/auctions which may be 'hot' sellers/bidders/auctions are + * 1 over these values. + */ + const int HOT_SELLER_RATIO = 100; + + /* + * Extra parameters + * */ + long outOfOrderGroupSize = 1; + long firstEventNumber = 0; + long firstEventId = 0; + /** Number of yet-to-be-created people and auction ids allowed. */ + const int PERSON_ID_LEAD = 10; + /** Average idealized size of a 'new person' event, in bytes. */ + int avgPersonByteSize = 200; + /** Average idealized size of a 'new auction' event, in bytes. */ + int avgAuctionByteSize = 500; + /** Average idealized size of a 'bid' event, in bytes. */ + int avgBidByteSize = 100; + /** Ratio of bids to 'hot' auctions compared to all other auctions. */ + int hotAuctionRatio = 2; + /** Ratio of auctions for 'hot' sellers compared to all other people. */ + int hotSellersRatio = 4; + /** Ratio of bids for 'hot' bidders compared to all other people. */ + int hotBiddersRatio = 4; + /** Window size, in seconds, for queries 3, 5, 7 and 8. */ + long windowSizeSec = 10; + /** Sliding window period, in seconds, for query 5. */ + long windowPeriodSec = 5; + /** Number of seconds to hold back events according to their reported + * timestamp. */ + long watermarkHoldbackSec = 0; + /** Average number of auction which should be inflight at any time, per + * generator. */ + int numInFlightAuctions = 100; + /** Maximum number of people to consider as active for placing auctions or + * bids. */ + int numActivePeople = 1000; + /** Initial overall event rate. */ + int firstEventRate = 10000; + /** Next overall event rate. */ + int nextEventRate = 10000; + /** Events per second **/ + const int eventsPerSec = 1000; + + long lastBase0AuctionId(long eventId) { + long epoch = eventId / PROPORTION_DENOMINATOR; + long offset = eventId % PROPORTION_DENOMINATOR; + if (offset < PERSON_PROPORTION) { + // About to generate a person. + // Go back to the last auction in the last epoch. + epoch--; + offset = AUCTION_PROPORTION - 1; + } else if (offset >= PERSON_PROPORTION + AUCTION_PROPORTION) { + // About to generate a bid. + // Go back to the last auction generated in this epoch. + offset = AUCTION_PROPORTION - 1; + } else { + // About to generate an auction. + offset -= PERSON_PROPORTION; + } + return epoch * AUCTION_PROPORTION + offset; + } + + long lastBase0PersonId(long eventId) { + long epoch = eventId / PROPORTION_DENOMINATOR; + long offset = eventId % PROPORTION_DENOMINATOR; + if (offset >= PERSON_PROPORTION) { + // About to generate an auction or bid. + // Go back to the last person generated in this epoch. + offset = PERSON_PROPORTION - 1; + } + // About to generate a person. + return epoch * PERSON_PROPORTION + offset; + } + + long nextBase0PersonId(long eventId) { + // Choose a random person from any of the 'active' people, plus a few + // 'leads'. By limiting to 'active' we ensure the density of bids or + // auctions per person does not decrease over time for long running jobs. By + // choosing a person id ahead of the last valid person id we will make + // newPerson and newAuction events appear to have been swapped in time. + // todo: fix this + std::random_device rd; + std::mt19937_64 eng(rd()); + + long numPeople = lastBase0PersonId(eventId) + 1; + long activePeople = std::min(numPeople, (long)numActivePeople); + + std::uniform_int_distribution distr(0, activePeople + PERSON_ID_LEAD); + long n = distr(eng); + return numPeople - activePeople + n; + } + + long nextEventNumber(long numEvents) { return firstEventNumber + numEvents; } + + long nextAdjustedEventNumber(long numEvents) { + long n = outOfOrderGroupSize; + long eventNumber = nextEventNumber(numEvents); + long base = (eventNumber / n) * n; + long offset = (eventNumber * 953) % n; + return base + offset; + } + + long getNextEventId(long eventsCountSoFar) { + return firstEventId + nextAdjustedEventNumber(eventsCountSoFar); + } + + long getNextAuctionId(long eventsCountSoFar) { + return FIRST_AUCTION_ID + nextAdjustedEventNumber(eventsCountSoFar); + } + + public: + TupleSchema *m_schema = nullptr; + QueryApplication *m_application = nullptr; + std::vector *m_data = nullptr; + bool m_debug = false; + + QueryApplication *getApplication() override { return m_application; } + + virtual void createApplication() = 0; + + void loadInMemoryData() { + std::random_device rd; + std::mt19937_64 eng(rd()); + std::uniform_int_distribution distr(0, 1000000); + + std::random_device rd_; + std::mt19937_64 eng_(rd_()); + std::uniform_real_distribution<> dbl(0.0, 1.0); + + std::random_device _rd; + std::mt19937_64 _eng(_rd()); + std::uniform_int_distribution _distr(HOT_SELLER_RATIO); + + std::random_device _rd_; + std::mt19937_64 _eng_(_rd_()); + std::uniform_int_distribution _distr_(0, NUM_CATEGORIES); + + size_t len = SystemConf::getInstance().BUNDLE_SIZE; + m_data = new std::vector(len); + auto buf = (InputSchema *)m_data->data(); + + std::unordered_set set; + + std::string line; + unsigned long idx = 0; + long timestamp = 0; + while (idx < len / sizeof(InputSchema)) { + auto eventsCountSoFar = idx; + auto newEventId = getNextEventId(eventsCountSoFar); + + if ((int)idx % eventsPerSec == 0) { + timestamp++; + } + auto id = lastBase0AuctionId(newEventId) + FIRST_AUCTION_ID; + set.insert(id); + auto initialBid = std::round(std::pow(10.0, dbl(eng_) * 6.0) * 100.0); + auto itemName = distr(eng); + auto description = distr(eng); + auto reserve = + initialBid + std::round(std::pow(10.0, dbl(eng_) * 6.0) * 100.0); + long seller; + // Here P(auction will be for a hot seller) = 1 - 1/hotSellersRatio. + if (_distr(_eng) > 0) { + // Choose the first person in the batch of last HOT_SELLER_RATIO people. + seller = (lastBase0PersonId(newEventId) / HOT_SELLER_RATIO) * + HOT_SELLER_RATIO; + } else { + seller = nextBase0PersonId(newEventId); + } + seller += FIRST_PERSON_ID; + auto category = FIRST_CATEGORY_ID + _distr_(_eng_); + auto expires = timestamp + distr(eng); + + line = std::to_string(timestamp) + " " + std::to_string(id) + " " + + std::to_string(itemName) + " " + std::to_string(description) + + " " + std::to_string(initialBid) + " " + std::to_string(reserve) + + " " + std::to_string(expires) + " " + std::to_string(seller) + + " " + std::to_string(category); + InputSchema::parse(buf[idx], line); + if (m_startTimestamp == 0) { + m_startTimestamp = buf[0].timestamp; + } + m_endTimestamp = buf[idx].timestamp; + idx++; + } + + std::cout << "Distinct keys " << set.size() << std::endl; + + if (m_debug) { + std::cout << "timestamp id itemName description initialBid reserve " + "expires seller category" + << std::endl; + for (unsigned long i = 0; i < m_data->size() / sizeof(InputSchema); ++i) { + printf("[DBG] %09d: %7ld %13ld %8ld %13ld %3ld %6ld %2ld %6ld %6ld \n", + i, buf[i].timestamp, (long)buf[i].id, (long)buf[i].itemName, + (long)buf[i].description, buf[i].initialBid, buf[i].reserve, + (long)buf[i].expires, (long)buf[i].seller, + (long)buf[i].category); + } + } + }; + + std::vector *getInMemoryData() override { return m_data; } + + TupleSchema *getSchema() override { + if (m_schema == nullptr) createSchema(); + return m_schema; + } + + std::vector *getStaticData() override { + throw std::runtime_error("error: this benchmark does not have static data"); + } + + void createSchema() { + m_schema = new TupleSchema(16, "Nexmark"); // 9, "Nexmark"); + auto longAttr = AttributeType(BasicType::Long); + + m_schema->setAttributeType(0, longAttr); /* timestamp: long */ + m_schema->setAttributeType(1, longAttr); /* id: long */ + m_schema->setAttributeType(2, longAttr); /* itemName: long */ + m_schema->setAttributeType(3, longAttr); /* description: long */ + m_schema->setAttributeType(4, longAttr); /* initialBid: long */ + m_schema->setAttributeType(5, longAttr); /* reserve: long */ + m_schema->setAttributeType(6, longAttr); /* expires: long */ + m_schema->setAttributeType(7, longAttr); /* seller: long */ + m_schema->setAttributeType(8, longAttr); /* category: long */ + m_schema->setAttributeType(9, longAttr); /* padding: long */ + m_schema->setAttributeType(10, longAttr); /* padding: long */ + m_schema->setAttributeType(11, longAttr); /* padding: long */ + m_schema->setAttributeType(12, longAttr); /* padding: long */ + m_schema->setAttributeType(13, longAttr); /* padding: long */ + m_schema->setAttributeType(14, longAttr); /* padding: long */ + m_schema->setAttributeType(15, longAttr); /* padding: long */ + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/Nexmark/main.cpp b/test/benchmarks/applicationsWithCheckpoints/Nexmark/main.cpp new file mode 100644 index 0000000..079aa38 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/Nexmark/main.cpp @@ -0,0 +1,18 @@ +#include + +#include "NBQ5.cpp" + +int main(int argc, const char **argv) { + std::unique_ptr benchmarkQuery {}; + + SystemConf::getInstance().QUERY_NUM = 1; + BenchmarkQuery::parseCommandLineArguments(argc, argv); + + if (SystemConf::getInstance().QUERY_NUM == 1) { + benchmarkQuery = std::make_unique(); + } else { + throw std::runtime_error("error: invalid benchmark query id"); + } + + return benchmarkQuery->runBenchmark(); +} \ No newline at end of file diff --git a/test/benchmarks/applicationsWithCheckpoints/SmartGrid/SG1.cpp b/test/benchmarks/applicationsWithCheckpoints/SmartGrid/SG1.cpp new file mode 100644 index 0000000..48a0a5e --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/SmartGrid/SG1.cpp @@ -0,0 +1,449 @@ +#include "benchmarks/applications/SmartGrid/SmartGrid.h" +#include "compression/Compressor.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "snappy.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +namespace SG1Compress { +struct alignas(16) input_tuple_t { + long timestamp; + float value; + int property; + int plug; + int household; + int house; + int padding; +}; + +std::vector> *metadata; + +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (2); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 8; + uint16_t counter : 8; + }; + struct t_2 { + uint32_t value : 22; + uint16_t counter : 10; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.5)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint16_t count_1 = 1; + uint8_t count_2 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = (uint16_t) bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + auto fVal_2 = fcomp.compress(data[idx].value); + auto sVal_2 = fVal_2; + if (idx < n - 1 && + fVal_2 == (sVal_2 = fcomp.compress(data[idx + 1].value))) { + count_2++; + } else { + buf2[idxs[1]++] = {(uint16_t)fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + } + std::memcpy((void *)(output + idxs[0] * sizeof(t_1)), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2); +} + +void compressInput_(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (2); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000); + struct t_1 { + uint16_t timestamp : 8; + uint16_t counter : 8; + }; + struct t_2 { + uint32_t value : 20; + uint16_t counter : 12; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.5)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint16_t count_1 = 1; + uint16_t count_2 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = (uint16_t) bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && count_1 < 255 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + auto fVal_2 = fcomp.compress(data[idx].value); + auto sVal_2 = fVal_2; + if (idx < n - 1 && count_2 < 4095 && + fVal_2 == (sVal_2 = fcomp.compress(data[idx].value))) { + count_2++; + } else { + buf2[idxs[1]++] = {(uint8_t)fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + //buf2[idxs[1]++] = {(uint16_t)std::round(data[idx].value*1)}; + } + + writePos += idxs[0] * sizeof(t_1); + (*metadata)[pid] = "c0 BS " + std::to_string(data[0].timestamp) + " {uint8_t:8;uint8_t:8} " + std::to_string(writePos); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[1] * sizeof(t_2); + (*metadata)[pid] += "c1 FM 1000 {uint32_t:20;uint16_t:12} " + std::to_string(writePos); + + if (SystemConf::getInstance().LATENCY_ON) { + auto value = data[0].timestamp; + latency = (int) (value >> 32); + (*metadata)[pid] += " " + std::to_string(latency) + " "; + } + //(*metadata)[pid] = "r0 " + std::to_string(idxs[0]) + " r1 " + std::to_string(idxs[1]) + " "; + if ((*metadata)[pid].size() > 128) { + throw std::runtime_error("error: increase the metadata size"); + } + std::memcpy((void *)(output - 128), (*metadata)[pid].data(), (*metadata)[pid].size()); + (*metadata)[pid].clear(); +} + +void decompressInput_(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + // parse metadata + std::string meta; + if (copy) { + std::memcpy(output, input, end); + for (size_t i = 0; i < 128; i++) { + meta += input[i]; + } + } else { + for (size_t i = 0; i < 128; i++) { + meta += output[i]; + } + } + + std::istringstream iss(meta); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + + auto base = std::stoi(words[2]); + auto mul = std::stoi(words[7]); + std::vector idxs (2); + idxs[0] = std::stoi(words[4]); + idxs[1] = std::stoi(words[9]); + if (SystemConf::getInstance().LATENCY_ON) { + latency = std::stoi(words[10]); + } + + struct t_1 { + uint16_t timestamp : 8; + uint16_t counter : 8; + }; + struct t_2 { + uint32_t value : 20; + uint16_t counter : 12; + }; + + auto res = (input_tuple_t*) input; + t_1 *col0 = (t_1 *)(output + 128); + auto *col1 = (t_2 *)(output + 128 + idxs[0]); + auto wPos = 0; + auto dataSize = end / sizeof(input_tuple_t); + auto col1Size = idxs[0] / sizeof(t_1); + for (int idx = 0; idx < col1Size; ++idx) { + auto temp = col0[idx]; + for (int it = 0; it < temp.counter; ++it) { + res[wPos++].timestamp = temp.timestamp + base; + if (wPos * sizeof(input_tuple_t) > SystemConf::getInstance().BATCH_SIZE) { + throw std::runtime_error("error: the write position exceeds the batch size"); + } + } + } + + if (SystemConf::getInstance().LATENCY_ON) { + res[0].timestamp = Utils::pack(latency, (int)res[0].timestamp); + } + + // c1 + wPos = 0; + for (int idx = 0; idx < col1Size; ++idx) { + auto temp = col1[idx]; + for (int it = 0; it < temp.counter; ++it) { + res[wPos++].value = (float) temp.value * mul; + if (wPos * sizeof(input_tuple_t) > SystemConf::getInstance().BATCH_SIZE) { + throw std::runtime_error("error: the write position exceeds the batch size"); + } + } + } + + writePos = wPos * sizeof(input_tuple_t); + + /*std::cout << "===========decompress===========" << std::endl; + auto n = dataSize; + for (int i = 0; i tempVec[20]; +bool isFirst[20] = {false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false}; +void compressGenInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (2); + GorillaTimestampCompressor gorillaComp; + Simple8 simpleComp; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + if (!isFirst[pid]) { + tempVec[pid].resize(n); + isFirst[pid] = true; + } + + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + tempVec[pid][idx]._1 = data[idx].value; + } + + // simple 8 + auto tempData = tempVec[pid].data(); + auto buf2 = (uint64_t *)(output + (int) (length*0.33)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp.compress( + inOffset, outOffset, n, &tempData, &tempV::_1, 1, buf2); + + std::memcpy((void *)(output + idxs[0] * sizeof(uint64_t)), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[0] * sizeof(uint64_t) + idxs[1] * sizeof(uint64_t); +} + +void noCompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + struct res1 { + long timestamp; + }; + struct res2 { + float value; + }; + size_t n = (end - start) / sizeof(input_tuple_t); + auto out1 = (res1*) output; + auto out2 = (res2*) (output + (int)(n * sizeof(res1))); + for (size_t idx = 0; idx < n; idx++) { + out1[idx] = {data[idx].timestamp}; + out2[idx] = {data[idx].value}; + } + writePos = n * sizeof(res1) + n * sizeof(res2); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + throw std::runtime_error("error: the decompression function is not implemented"); +} + +void filterInput(char *input, int start, int end, char *output, int startOutput, int &writePos) { + auto data = (input_tuple_t *)input; + struct res { + long timestamp; + long _pad; + __uint128_t userId; + }; + auto out = (res*) output; + size_t n = (end - start) / sizeof(input_tuple_t); + size_t outIdx = startOutput; + for (size_t idx = 0; idx < n; idx++) { + std::memcpy(&out[idx], &data[idx], 16); + } + writePos = n * sizeof(res); +} + +void onlyCompressInputLossless(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (1); + for (auto &i : idxs) { + i = 0; + } + + size_t output_length; + auto buf1 = (uint64_t *)input; + snappy::RawCompress((const char *)(buf1), end, (char*)(output), &output_length); + writePos += output_length; +} +}; + +class SG1 : public SmartGrid { + private: + void createApplication() override { + SystemConf::getInstance().PARTIAL_WINDOWS = 3800; + + bool persistInput = SystemConf::getInstance().PERSIST_INPUT; + + // Configure first query + std::vector aggregationTypes(1); + aggregationTypes[0] = AggregationTypes::fromString("avg"); + + std::vector aggregationAttributes(1); + aggregationAttributes[0] = new ColumnReference(1, BasicType::Float); + + std::vector groupByAttributes; + + auto window = new WindowDefinition(RANGE_BASED, 3600, 1); //ROW_BASED, 85*400, 1*400); + Aggregation *aggregation = new Aggregation(*window, aggregationTypes, aggregationAttributes, groupByAttributes); + +#if defined(TCP_INPUT) + bool replayTimestamps = false; +#elif defined(RDMA_INPUT) + bool replayTimestamps = false; +#else + bool replayTimestamps = window->isRangeBased(); +#endif + + // Set up code-generated operator + OperatorKernel *genCode = new OperatorKernel(true); + genCode->setInputSchema(getSchema()); + genCode->setAggregation(aggregation); + genCode->setQueryId(0); + genCode->setup(); + OperatorCode *cpuCode = genCode; + + // Print operator + std::cout << genCode->toSExpr() << std::endl; + + // Define an ft-operator + auto queryOperator = new QueryOperator(*cpuCode, true); + std::vector operators; + operators.push_back(queryOperator); + + // this is used for latency measurements + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + + std::vector> queries(1); + queries[0] = std::make_shared(0, + operators, + *window, + m_schema, + m_timestampReference, + true, + replayTimestamps, + !replayTimestamps, false, + 0, persistInput, nullptr, !SystemConf::getInstance().RECOVER); + +#if defined(RDMA_INPUT) + //queries[0]->getBuffer()->setFilterFP(SG1Compress::filterInput); +#endif + + if (persistInput && SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + queries[0]->getBuffer()->setCompressionFP(SG1Compress::compressInput_); + queries[0]->getBuffer()->setDecompressionFP(SG1Compress::decompressInput_); + } + + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, !SystemConf::getInstance().RECOVER); + m_application->setup(); + + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION && (SystemConf::getInstance().CHECKPOINT_ON || persistInput)) { + SG1Compress::metadata = new std::vector>(SystemConf::getInstance().WORKER_THREADS, ""); + //m_application->getCheckpointCoordinator()->setCompressionFP(0, SG1Compress::compress); + } + } + + public: + SG1(bool inMemory = true) { + m_name = "SG1"; + createSchema(); + createApplication(); + if (inMemory) + loadInMemoryData(); + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/SmartGrid/SG2.cpp b/test/benchmarks/applicationsWithCheckpoints/SmartGrid/SG2.cpp new file mode 100644 index 0000000..19d90da --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/SmartGrid/SG2.cpp @@ -0,0 +1,571 @@ +#include "benchmarks/applications/SmartGrid/SmartGrid.h" +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "compression/Compressor.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +namespace SG2Compress { +struct alignas(16) input_tuple_t { + long timestamp; + float value; + int property; + int plug; + int household; + int house; + int padding; +}; +struct alignas(16) output_tuple_t { + long timestamp; + int _1; + int _2; + int _3; + float _4; + int _5; +}; + +struct Value { + float _1; +}; +struct Key { + int _0; + int _1; + int _2; +}; +using KeyT = Key; +using ValueT = Value; +struct alignas(16) Bucket { + char state; + char dirty; + long timestamp; + KeyT key; + ValueT value; + int counter; +}; +struct hash { + std::size_t operator()(const Key &key) const { + uint64_t result = uint16_t(key._0) * 100 + uint16_t(key._2) * 10 + uint16_t(key._2); + return result; + } +}; +struct Eq { + constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const { + return lhs._0 == rhs._0 && lhs._1 == rhs._1 && lhs._2 == rhs._2; + } +}; +std::vector>> *dcomp; + +struct dBucket { + Key key; +}; +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto buf = (dBucket*) (output + writePos); + auto bucket = (Bucket *)(*dcomp)[pid]->getTable().buckets(); + for (size_t idx = 0; idx < (*dcomp)[pid]->getTable().max_size(); ++idx) { + if (bucket[idx].state) { + buf[idx] = dBucket{bucket[idx].key}; + } else { + buf[idx] = dBucket{ -1, -1, -1}; + } + } + writePos += (*dcomp)[pid]->getTable().max_size() * sizeof(dBucket); + return; + } + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (3); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 8; + uint16_t counter : 8; + }; + struct t_2 { + uint16_t groupKey : 10; + uint16_t counter : 6; + }; + struct t_3 { + uint32_t value : 22; + uint16_t counter : 10; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.33)); + t_3 *buf3 = (t_3 *)(output + (int) (length*0.66)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint16_t count_1 = 1; + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {(uint16_t)fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + Key temp{data[idx].plug, data[idx].household, + data[idx].house}; + auto fVal_2 = (*dcomp)[pid]->compress(temp); + auto sVal_2 = fVal_2; + if (idx < n - 1) { + Key temp2{data[idx+1].plug, data[idx+1].household, + data[idx+1].house}; + sVal_2 = (*dcomp)[pid]->compress(temp2); + if (sVal_2 == fVal_2) { + count_2++; + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + + auto fVal_3 = fcomp.compress(data[idx].value); + auto sVal_3 = fVal_3; + if (idx < n - 1 && + fVal_3 == (sVal_3 = fcomp.compress(data[idx + 1].value))) { + count_3++; + } else { + buf3[idxs[2]++] = {(uint16_t)fVal_3, count_3}; + fVal_3 = sVal_3; + count_3 = 1; + } + } + std::memcpy((void *)(output + idxs[0] * sizeof(t_1)), (void *)buf2, + idxs[1] * sizeof(t_2)); + std::memcpy((void *)(output + idxs[0] * sizeof(t_1) + + idxs[1] * sizeof(t_2)), + (void *)buf3, idxs[2] * sizeof(t_3)); + writePos += idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2) + + idxs[2] * sizeof(t_3); +} + +void compressInput_(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + return; + } + if (clear) { + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (5); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 8; + uint16_t counter : 8; + }; + struct t_2 { + uint8_t _1 : 6; + uint8_t _2 : 6; + uint8_t _3 : 4; + }; + struct t_5 { + uint32_t value : 22; + uint16_t counter : 10; + }; + Simple8 simpleComp1; + Simple8 simpleComp2; + Simple8 simpleComp3; + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + size_t n = (end - start) / sizeof(input_tuple_t); + t_1 *buf1 = (t_1 *)(output); + uint8_t count_1 = 1; // as the first delta is stored in 14 bits + + // simple 8 + auto buf2 = (t_2 *)(output + (int) (length*0.2)); + /*auto buf2 = (uint64_t *)(output + (int) (length*0.2)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::plug, 1, buf2); + + // simple 8 + auto buf3 = (uint64_t *)(output + (int) (length*0.4)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::household, 1, buf2); + + // simple 8 + auto buf4 = (uint64_t *)(output + (int) (length*0.6)); + inOffset = 0; + outOffset = 0; + idxs[3] = simpleComp3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::house, 1, buf2);*/ + + // store first float in 64 bits + t_5 *buf5 = (t_5 *)(output + (int) (length*0.8)); + uint16_t count_5 = 1; + + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {(uint16_t)fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + buf2[idxs[1]++] = {static_cast(data[idx].plug), static_cast(data[idx].household), static_cast(data[idx].house)}; + + auto fVal_3 = fcomp.compress(data[idx].value); + auto sVal_3 = fVal_3; + if (idx < n - 1 && + fVal_3 == (sVal_3 = fcomp.compress(data[idx + 1].value))) { + count_5++; + } else { + buf5[idxs[4]++] = {(uint16_t)fVal_3, count_5}; + fVal_3 = sVal_3; + count_5 = 1; + } + } + writePos += idxs[0] * sizeof(t_1); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[1] * sizeof(t_2); + /*std::memcpy((void *)(output + writePos), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + writePos += idxs[2] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf4, idxs[3] * sizeof(uint64_t)); + writePos += idxs[3] * sizeof(uint64_t);*/ + std::memcpy((void *)(output + writePos), + (void *)buf5, idxs[4] * sizeof(t_5)); + writePos += idxs[4] * sizeof(t_5); +} + +void compressGenInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + return; + } + if (clear) { + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (5); + GorillaTimestampCompressor gorillaComp; + Simple8 simpleComp1; + Simple8 simpleComp2; + Simple8 simpleComp3; + GorillaValuesCompressor xorComp; + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + // simple 8 + auto buf2 = (uint64_t *)(output + (int) (length*0.2)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::plug, 1, buf2); + + // simple 8 + auto buf3 = (uint64_t *)(output + (int) (length*0.4)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::household, 1, buf3); + + // simple 8 + auto buf4 = (uint64_t *)(output + (int) (length*0.6)); + inOffset = 0; + outOffset = 0; + idxs[3] = simpleComp3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::house, 1, buf4); + + // gorilla float + // store first float in 64 bits + auto buf5 = (uint64_t *)(output + (int) (length*0.8)); + buf5[idxs[4]++] = data[0].value; + // calculate trailing and leading zeros for first float + uint64_t *firstV = (uint64_t *)&data[0].value; + int prevLeadingZeros = __builtin_clzll(*firstV); + int prevTrailingZeros = __builtin_ctzll(*firstV); + uint16_t count_5 = 1; + + for (size_t idx = 0; idx < n; idx++) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + if (idx > 0) { + auto prev = (float)data[idx - 1].value; + auto current = (float)data[idx].value; + uint64_t *a = (uint64_t *)&prev; + uint64_t *b = (uint64_t *)¤t; + uint64_t xorValue = *a ^ *b; + auto [appendedValue, appendedValueLength, leadingZeros, + trailingZeros] = xorComp.compress(xorValue, prevLeadingZeros, prevTrailingZeros); + prevLeadingZeros = leadingZeros; + prevTrailingZeros = trailingZeros; + if (count_5 + appendedValueLength > 64) { + uint8_t split = (64 - count_5); + if (appendedValueLength > 1) { + buf5[idxs[4]] |= + appendedValue >> (appendedValueLength - split); + } + ++idxs[4]; + count_5 = appendedValueLength - split; + } else { + count_5 += appendedValueLength; + } + buf5[idxs[4]] |= appendedValue << (64 - count_5); + } + } + writePos += idxs[0] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[1] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + writePos += idxs[2] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf4, idxs[3] * sizeof(uint64_t)); + writePos += idxs[3] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf5, idxs[4] * sizeof(uint64_t)); + writePos += idxs[4] * sizeof(uint64_t); +} + +void noCompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + struct res { + long timestamp; + int plug; + int household; + int house; + float value; + }; + auto out = (res*) output; + size_t n = (end - start) / sizeof(input_tuple_t); + for (size_t idx = 0; idx < n; idx++) { + out[idx] = {data[idx].timestamp, data[idx].plug, + data[idx].household, data[idx].house, data[idx].value}; + } + writePos = n * sizeof(res); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + throw std::runtime_error("error: the decompression function is not implemented"); +} + +void compress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + if (start == 0 && end == -1) { + auto offset = (*dcomp)[pid]->getTable().max_size() * + (*dcomp)[pid]->getTable().bucket_size(); + std::memcpy(output + writePos, (void *)(*dcomp)[pid]->getTable().buckets(), offset); + writePos += offset; + return; + } + + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + start = start / sizeof(Bucket); + end = end / sizeof(Bucket); + DummyFloatCompressor fcomp(1000000); + + if (!isComplete) { + auto inputBuf = (Bucket *)input; + BaseDeltaCompressor bcomp(inputBuf[0].timestamp); + struct res { + uint32_t timestamp : 8; + uint32_t groupKey : 10; + uint32_t value : 20; + uint32_t counter : 10; + }; + auto outputBuf = (res *)output; + auto outIdx = writePos / sizeof(res); + for (size_t idx = start; idx < end; ++idx) { + if (inputBuf[idx].state) { + outputBuf[outIdx++] = {bcomp.compress(inputBuf[idx].timestamp), + (*dcomp)[pid]->compress(inputBuf[idx].key), + fcomp.compress(inputBuf[idx].value._1), + static_cast(inputBuf[idx].counter)}; + } + } + writePos = outIdx * sizeof(res); + } else { + auto inputBuf = (output_tuple_t *)input; + BaseDeltaCompressor bcomp(inputBuf[0].timestamp); + struct res { + uint32_t timestamp : 8; + uint32_t groupKey : 10; + uint32_t value : 20; + }; + auto outputBuf = (res *)output; + auto outIdx = writePos / sizeof(res); + for (size_t idx = start; idx < end; ++idx) { + Key temp{inputBuf[idx]._1, inputBuf[idx]._2, inputBuf[idx]._3}; + outputBuf[outIdx++] = {bcomp.compress(inputBuf[idx].timestamp), + (*dcomp)[pid]->compress(temp), + fcomp.compress(inputBuf[idx]._4)}; + } + writePos = outIdx * sizeof(res); + } +} + +void decompress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + throw std::runtime_error("error: the decompression function is not implemented"); +} +}; + +class SG2 : public SmartGrid { + private: + void createApplication() override { + SystemConf::getInstance().PARTIAL_WINDOWS = 144; //3600; + + bool useParallelMerge = SystemConf::getInstance().PARALLEL_MERGE_ON; + bool persistInput = SystemConf::getInstance().PERSIST_INPUT; + + // Configure first query + std::vector aggregationTypes(1); + aggregationTypes[0] = AggregationTypes::fromString("avg"); + + std::vector aggregationAttributes(1); + aggregationAttributes[0] = new ColumnReference(1, BasicType::Float); + + std::vector groupByAttributes(3); + groupByAttributes[0] = new ColumnReference(3, BasicType::Integer); + groupByAttributes[1] = new ColumnReference(4, BasicType::Integer); + groupByAttributes[2] = new ColumnReference(5, BasicType::Integer); + + auto window = new WindowDefinition(RANGE_BASED, 128, 1); //ROW_BASED, 36*1000, 1*1000); + Aggregation *aggregation = new Aggregation(*window, aggregationTypes, aggregationAttributes, groupByAttributes); + +#if defined(TCP_INPUT) + bool replayTimestamps = false; +#elif defined(RDMA_INPUT) + bool replayTimestamps = false; +#else + bool replayTimestamps = window->isRangeBased(); +#endif + + // Set up code-generated operator + OperatorKernel *genCode = new OperatorKernel(true, true, useParallelMerge); + genCode->setInputSchema(getSchema()); + genCode->setAggregation(aggregation); + genCode->setCollisionBarrier(28); + genCode->setQueryId(0); + genCode->setup(); + OperatorCode *cpuCode = genCode; + + // Print operator + std::cout << genCode->toSExpr() << std::endl; + + // Define an ft-operator + auto queryOperator = new QueryOperator(*cpuCode, true); + std::vector operators; + operators.push_back(queryOperator); + + // this is used for latency measurements + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + + std::vector> queries(1); + queries[0] = std::make_shared(0, + operators, + *window, + m_schema, + m_timestampReference, + true, + replayTimestamps, + !replayTimestamps, + useParallelMerge, + 0, persistInput, nullptr, !SystemConf::getInstance().RECOVER); + + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + queries[0]->getBuffer()->setCompressionFP(SG2Compress::compressInput_); + queries[0]->getBuffer()->setDecompressionFP(SG2Compress::decompressInput); + } + + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, !SystemConf::getInstance().RECOVER); + m_application->setup(); + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION && (SystemConf::getInstance().CHECKPOINT_ON || persistInput)) { + SG2Compress::dcomp = new std::vector>>(); + for (int w = 0; w < SystemConf::getInstance().WORKER_THREADS; ++w) { + SG2Compress::dcomp->emplace_back( + std::make_unique>( + SystemConf::getInstance().HASH_TABLE_SIZE)); + } + } + if (SystemConf::getInstance().CHECKPOINT_ON && SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + m_application->getCheckpointCoordinator()->setCompressionFP(0, SG2Compress::compress); + m_application->getCheckpointCoordinator()->setDecompressionFP(0, SG2Compress::decompress); + } + } + + public: + SG2(bool inMemory = true) { + m_name = "SG2"; + createSchema(); + createApplication(); + if (inMemory) + loadInMemoryData(); + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/SmartGrid/SG3.cpp b/test/benchmarks/applicationsWithCheckpoints/SmartGrid/SG3.cpp new file mode 100644 index 0000000..407f462 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/SmartGrid/SG3.cpp @@ -0,0 +1,1115 @@ +#include + +#include "benchmarks/applications/SmartGrid/SmartGrid.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "cql/predicates/ComparisonPredicate.h" +#include "utils/Query.h" +#include "utils/QueryConfig.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +namespace SG1Compress_ { +struct alignas(16) input_tuple_t { + long timestamp; + float value; + int property; + int plug; + int household; + int house; + int padding; +}; + +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (2); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 8; + uint16_t counter : 8; + }; + struct t_2 { + uint32_t value : 22; + uint16_t counter : 10; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.5)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint16_t count_1 = 1; + uint8_t count_2 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = (uint16_t) bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + auto fVal_2 = fcomp.compress(data[idx].value); + auto sVal_2 = fVal_2; + if (idx < n - 1 && + fVal_2 == (sVal_2 = fcomp.compress(data[idx + 1].value))) { + count_2++; + } else { + buf2[idxs[1]++] = {(uint16_t)fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + } + std::memcpy((void *)(output + idxs[0] * sizeof(t_1)), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2); +} + +void compressInput_(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (2); + BaseDeltaCompressor bcomp(data[0].timestamp); + struct t_1 { + uint16_t timestamp : 8; + uint16_t counter : 8; + }; + struct t_2 { + //uint16_t value : 10; + //uint16_t counter : 6; + uint32_t value : 20; + uint16_t counter : 12; + }; + /*struct t_2 { + uint8_t value : 4; + uint8_t counter : 4; + };*/ + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.5)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint16_t count_1 = 1; + uint8_t count_2 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = (uint16_t) bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + auto fVal_2 = data[idx].value;//(int)std::round(data[idx].value*100); + auto sVal_2 = fVal_2; + if (idx < n - 1 && + fVal_2 == (sVal_2 = data[idx].value)) {//(int)std::round(data[idx+1].value*100))) { + count_2++; + } else { + buf2[idxs[1]++] = {(uint8_t)fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + //buf2[idxs[1]++] = {(uint16_t)std::round(data[idx].value*1)}; + } + std::memcpy((void *)(output + idxs[0] * sizeof(t_1)), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2); +} + +struct tempV { + int _1; +}; +std::vector tempVec[20]; +bool isFirst[20] = {false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false}; +void compressGenInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (2); + GorillaTimestampCompressor gorillaComp; + Simple8 simpleComp; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + if (!isFirst[pid]) { + tempVec[pid].resize(n); + isFirst[pid] = true; + } + + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + tempVec[pid][idx]._1 = data[idx].value; + } + + // simple 8 + auto tempData = tempVec[pid].data(); + auto buf2 = (uint64_t *)(output + (int) (length*0.33)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp.compress( + inOffset, outOffset, n, &tempData, &tempV::_1, 1, buf2); + + std::memcpy((void *)(output + idxs[0] * sizeof(uint64_t)), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[0] * sizeof(uint64_t) + idxs[1] * sizeof(uint64_t); +} + +void noCompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + struct res1 { + long timestamp; + }; + struct res2 { + float value; + }; + size_t n = (end - start) / sizeof(input_tuple_t); + auto out1 = (res1*) output; + auto out2 = (res2*) (output + (int)(n * sizeof(res1))); + for (size_t idx = 0; idx < n; idx++) { + out1[idx] = {data[idx].timestamp}; + out2[idx] = {data[idx].value}; + } + writePos = n * sizeof(res1) + n * sizeof(res2); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + throw std::runtime_error("error: the decompression function is not implemented"); +} +}; +namespace SG2Compress_ { +struct alignas(16) input_tuple_t { + long timestamp; + float value; + int property; + int plug; + int household; + int house; + int padding; +}; +struct alignas(16) output_tuple_t { + long timestamp; + int _1; + int _2; + int _3; + float _4; + int _5; +}; + +struct Value { + float _1; +}; +struct Key { + int _0; + int _1; + int _2; +}; +using KeyT = Key; +using ValueT = Value; +struct alignas(16) Bucket { + char state; + char dirty; + long timestamp; + KeyT key; + ValueT value; + int counter; +}; +struct hash { + std::size_t operator()(const Key &key) const { + uint64_t result = uint16_t(key._0) * 100 + uint16_t(key._2) * 10 + uint16_t(key._2); + return result; + } +}; +struct Eq { + constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const { + return lhs._0 == rhs._0 && lhs._1 == rhs._1 && lhs._2 == rhs._2; + } +}; +std::vector>> *dcomp; + +struct dBucket { + Key key; +}; +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto buf = (dBucket*) (output + writePos); + auto bucket = (Bucket *)(*dcomp)[pid]->getTable().buckets(); + for (size_t idx = 0; idx < (*dcomp)[pid]->getTable().max_size(); ++idx) { + if (bucket[idx].state) { + buf[idx] = dBucket{bucket[idx].key}; + } else { + buf[idx] = dBucket{ -1, -1, -1}; + } + } + writePos += (*dcomp)[pid]->getTable().max_size() * sizeof(dBucket); + return; + } + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (3); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 8; + uint16_t counter : 8; + }; + struct t_2 { + uint16_t groupKey : 10; + uint16_t counter : 6; + }; + struct t_3 { + uint32_t value : 22; + uint16_t counter : 10; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.33)); + t_3 *buf3 = (t_3 *)(output + (int) (length*0.66)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint16_t count_1 = 1; + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {(uint16_t)fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + Key temp{data[idx].plug, data[idx].household, + data[idx].house}; + auto fVal_2 = (*dcomp)[pid]->compress(temp); + auto sVal_2 = fVal_2; + if (idx < n - 1) { + Key temp2{data[idx+1].plug, data[idx+1].household, + data[idx+1].house}; + sVal_2 = (*dcomp)[pid]->compress(temp2); + if (sVal_2 == fVal_2) { + count_2++; + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + + auto fVal_3 = fcomp.compress(data[idx].value); + auto sVal_3 = fVal_3; + if (idx < n - 1 && + fVal_3 == (sVal_3 = fcomp.compress(data[idx + 1].value))) { + count_3++; + } else { + buf3[idxs[2]++] = {(uint16_t)fVal_3, count_3}; + fVal_3 = sVal_3; + count_3 = 1; + } + } + std::memcpy((void *)(output + idxs[0] * sizeof(t_1)), (void *)buf2, + idxs[1] * sizeof(t_2)); + std::memcpy((void *)(output + idxs[0] * sizeof(t_1) + + idxs[1] * sizeof(t_2)), + (void *)buf3, idxs[2] * sizeof(t_3)); + writePos += idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2) + + idxs[2] * sizeof(t_3); +} + +void compressInput_(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + return; + } + if (clear) { + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (5); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 8; + uint16_t counter : 8; + }; + struct t_2 { + uint8_t _1 : 6; + uint8_t _2 : 6; + uint8_t _3 : 4; + }; + struct t_5 { + uint32_t value : 22; + uint16_t counter : 10; + }; + Simple8 simpleComp1; + Simple8 simpleComp2; + Simple8 simpleComp3; + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + size_t n = (end - start) / sizeof(input_tuple_t); + t_1 *buf1 = (t_1 *)(output); + uint8_t count_1 = 1; // as the first delta is stored in 14 bits + + // simple 8 + auto buf2 = (t_2 *)(output + (int) (length*0.2)); + /*auto buf2 = (uint64_t *)(output + (int) (length*0.2)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::plug, 1, buf2); + + // simple 8 + auto buf3 = (uint64_t *)(output + (int) (length*0.4)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::household, 1, buf2); + + // simple 8 + auto buf4 = (uint64_t *)(output + (int) (length*0.6)); + inOffset = 0; + outOffset = 0; + idxs[3] = simpleComp3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::house, 1, buf2);*/ + + // store first float in 64 bits + t_5 *buf5 = (t_5 *)(output + (int) (length*0.8)); + uint16_t count_5 = 1; + + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {(uint16_t)fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + buf2[idxs[1]++] = {static_cast(data[idx].plug), static_cast(data[idx].household), static_cast(data[idx].house)}; + + auto fVal_3 = fcomp.compress(data[idx].value); + auto sVal_3 = fVal_3; + if (idx < n - 1 && + fVal_3 == (sVal_3 = fcomp.compress(data[idx + 1].value))) { + count_5++; + } else { + buf5[idxs[4]++] = {(uint16_t)fVal_3, count_5}; + fVal_3 = sVal_3; + count_5 = 1; + } + } + writePos += idxs[0] * sizeof(t_1); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[1] * sizeof(t_2); + /*std::memcpy((void *)(output + writePos), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + writePos += idxs[2] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf4, idxs[3] * sizeof(uint64_t)); + writePos += idxs[3] * sizeof(uint64_t);*/ + std::memcpy((void *)(output + writePos), + (void *)buf5, idxs[4] * sizeof(t_5)); + writePos += idxs[4] * sizeof(t_5); +} + +void compressGenInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + return; + } + if (clear) { + clear = false; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (5); + GorillaTimestampCompressor gorillaComp; + Simple8 simpleComp1; + Simple8 simpleComp2; + Simple8 simpleComp3; + GorillaValuesCompressor xorComp; + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + size_t n = (end - start) / sizeof(input_tuple_t); + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + // simple 8 + auto buf2 = (uint64_t *)(output + (int) (length*0.2)); + int32_t inOffset = 0; + int32_t outOffset = 0; + idxs[1] = simpleComp1.compress( + inOffset, outOffset, n, &data, &input_tuple_t::plug, 1, buf2); + + // simple 8 + auto buf3 = (uint64_t *)(output + (int) (length*0.4)); + inOffset = 0; + outOffset = 0; + idxs[2] = simpleComp2.compress( + inOffset, outOffset, n, &data, &input_tuple_t::household, 1, buf3); + + // simple 8 + auto buf4 = (uint64_t *)(output + (int) (length*0.6)); + inOffset = 0; + outOffset = 0; + idxs[3] = simpleComp3.compress( + inOffset, outOffset, n, &data, &input_tuple_t::house, 1, buf4); + + // gorilla float + // store first float in 64 bits + auto buf5 = (uint64_t *)(output + (int) (length*0.8)); + buf5[idxs[4]++] = data[0].value; + // calculate trailing and leading zeros for first float + uint64_t *firstV = (uint64_t *)&data[0].value; + int prevLeadingZeros = __builtin_clzll(*firstV); + int prevTrailingZeros = __builtin_ctzll(*firstV); + uint16_t count_5 = 1; + + for (size_t idx = 0; idx < n; idx++) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + if (idx > 0) { + auto prev = (float)data[idx - 1].value; + auto current = (float)data[idx].value; + uint64_t *a = (uint64_t *)&prev; + uint64_t *b = (uint64_t *)¤t; + uint64_t xorValue = *a ^ *b; + auto [appendedValue, appendedValueLength, leadingZeros, + trailingZeros] = xorComp.compress(xorValue, prevLeadingZeros, prevTrailingZeros); + prevLeadingZeros = leadingZeros; + prevTrailingZeros = trailingZeros; + if (count_5 + appendedValueLength > 64) { + uint8_t split = (64 - count_5); + if (appendedValueLength > 1) { + buf5[idxs[4]] |= + appendedValue >> (appendedValueLength - split); + } + ++idxs[4]; + count_5 = appendedValueLength - split; + } else { + count_5 += appendedValueLength; + } + buf5[idxs[4]] |= appendedValue << (64 - count_5); + } + } + writePos += idxs[0] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(uint64_t)); + writePos += idxs[1] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf3, idxs[2] * sizeof(uint64_t)); + writePos += idxs[2] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf4, idxs[3] * sizeof(uint64_t)); + writePos += idxs[3] * sizeof(uint64_t); + std::memcpy((void *)(output + writePos), + (void *)buf5, idxs[4] * sizeof(uint64_t)); + writePos += idxs[4] * sizeof(uint64_t); +} + +void noCompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + struct res { + long timestamp; + int plug; + int household; + int house; + float value; + }; + auto out = (res*) output; + size_t n = (end - start) / sizeof(input_tuple_t); + for (size_t idx = 0; idx < n; idx++) { + out[idx] = {data[idx].timestamp, data[idx].plug, + data[idx].household, data[idx].house, data[idx].value}; + } + writePos = n * sizeof(res); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + throw std::runtime_error("error: the decompression function is not implemented"); +} + +void compress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + if (start == 0 && end == -1) { + auto offset = (*dcomp)[pid]->getTable().max_size() * + (*dcomp)[pid]->getTable().bucket_size(); + std::memcpy(output + writePos, (void *)(*dcomp)[pid]->getTable().buckets(), offset); + writePos += offset; + return; + } + + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + start = start / sizeof(Bucket); + end = end / sizeof(Bucket); + DummyFloatCompressor fcomp(1000000); + + if (!isComplete) { + auto inputBuf = (Bucket *)input; + BaseDeltaCompressor bcomp(inputBuf[0].timestamp); + struct res { + uint32_t timestamp : 8; + uint32_t groupKey : 10; + uint32_t value : 20; + uint32_t counter : 10; + }; + auto outputBuf = (res *)output; + auto outIdx = writePos / sizeof(res); + for (size_t idx = start; idx < end; ++idx) { + if (inputBuf[idx].state) { + outputBuf[outIdx++] = {bcomp.compress(inputBuf[idx].timestamp), + (*dcomp)[pid]->compress(inputBuf[idx].key), + fcomp.compress(inputBuf[idx].value._1), + static_cast(inputBuf[idx].counter)}; + } + } + writePos = outIdx * sizeof(res); + } else { + auto inputBuf = (output_tuple_t *)input; + BaseDeltaCompressor bcomp(inputBuf[0].timestamp); + struct res { + uint32_t timestamp : 8; + uint32_t groupKey : 10; + uint32_t value : 20; + }; + auto outputBuf = (res *)output; + auto outIdx = writePos / sizeof(res); + for (size_t idx = start; idx < end; ++idx) { + Key temp{inputBuf[idx]._1, inputBuf[idx]._2, inputBuf[idx]._3}; + outputBuf[outIdx++] = {bcomp.compress(inputBuf[idx].timestamp), + (*dcomp)[pid]->compress(temp), + fcomp.compress(inputBuf[idx]._4)}; + } + writePos = outIdx * sizeof(res); + } +} + +void decompress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + throw std::runtime_error("error: the decompression function is not implemented"); +} +}; +namespace SG3Compress { +struct alignas(16) input_tuple_t_1 { + long timestamp; + float _1; + int _2; +}; + +struct alignas(16) input_tuple_t_2 { + long timestamp; + int _1; + int _2; + int _3; + float _4; + float _5; + int _6; +}; + +void compressInput1(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t_1 *)input; + std::vector idxs (2); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 8; + uint16_t counter : 8; + }; + struct t_2 { + uint32_t value : 22; + uint16_t counter : 10; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.5)); + size_t n = (end - start) / sizeof(input_tuple_t_1); + uint16_t count_1 = 1; + uint8_t count_2 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = (uint16_t) bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + auto fVal_2 = fcomp.compress(data[idx]._1); + buf2[idxs[1]++] = {(uint16_t)fVal_2, count_2}; + } + std::memcpy((void *)(output + idxs[0] * sizeof(t_1)), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2); +} + +void decompressInput1(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + throw std::runtime_error("error: the decompression function is not implemented"); +} + +struct Value { + float _1; +}; +struct Key { + int _0; + int _1; + int _2; +}; +using KeyT = Key; +using ValueT = Value; +struct alignas(16) Bucket { + char state; + char dirty; + long timestamp; + KeyT key; + ValueT value; + int counter; +}; +struct hash { + std::size_t operator()(const Key &key) const { + uint64_t result = uint16_t(key._0) * 100 + uint16_t(key._2) * 10 + uint16_t(key._2); + return result; + } +}; +struct Eq { + constexpr bool operator()(const KeyT& lhs, const KeyT& rhs) const { + return lhs._0 == rhs._0 && lhs._1 == rhs._1 && lhs._2 == rhs._2; + } +}; +std::vector>> *dcomp; + +struct dBucket { + Key key; +}; +void compressInput2(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto buf = (dBucket*) (output + writePos); + auto bucket = (Bucket *)(*dcomp)[pid]->getTable().buckets(); + for (size_t idx = 0; idx < (*dcomp)[pid]->getTable().max_size(); ++idx) { + if (bucket[idx].state) { + buf[idx] = dBucket{bucket[idx].key}; + } else { + buf[idx] = dBucket{ -1, -1, -1}; + } + } + writePos += (*dcomp)[pid]->getTable().max_size() * sizeof(dBucket); + return; + } + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + + auto data = (input_tuple_t_2 *)input; + std::vector idxs (3); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000000); + struct t_1 { + uint16_t timestamp : 8; + uint16_t counter : 8; + }; + struct t_2 { + uint16_t groupKey : 10; + uint16_t counter : 6; + }; + struct t_3 { + uint32_t value : 22; + uint16_t counter : 10; + }; + + //writePos = 0; + // compress + for (auto &i : idxs) { + i = 0; + } + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.33)); + t_3 *buf3 = (t_3 *)(output + (int) (length*0.66)); + size_t n = (end - start) / sizeof(input_tuple_t_2); + uint16_t count_1 = 1; + uint8_t count_2 = 1; + uint16_t count_3 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {(uint16_t)fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + Key temp{data[idx]._1, data[idx]._2, data[idx]._3}; + auto fVal_2 = (*dcomp)[pid]->compress(temp); + auto sVal_2 = fVal_2; + if (idx < n - 1) { + Key temp2{data[idx+1]._1, data[idx+1]._2, data[idx+1]._3}; + sVal_2 = (*dcomp)[pid]->compress(temp2); + if (sVal_2 == fVal_2) { + count_2++; + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + } else { + buf2[idxs[1]++] = {fVal_2, count_2}; + fVal_2 = sVal_2; + count_2 = 1; + } + + auto fVal_3 = fcomp.compress(data[idx]._4); + auto sVal_3 = fVal_3; + if (idx < n - 1 && + fVal_3 == (sVal_3 = fcomp.compress(data[idx + 1]._4))) { + count_3++; + } else { + buf3[idxs[2]++] = {(uint16_t)fVal_3, count_3}; + fVal_3 = sVal_3; + count_3 = 1; + } + } + std::memcpy((void *)(output + idxs[0] * sizeof(t_1)), (void *)buf2, + idxs[1] * sizeof(t_2)); + std::memcpy((void *)(output + idxs[0] * sizeof(t_1) + + idxs[1] * sizeof(t_2)), + (void *)buf3, idxs[2] * sizeof(t_3)); + writePos += idxs[0] * sizeof(t_1) + idxs[1] * sizeof(t_2) + + idxs[2] * sizeof(t_3); +} +void decompressInput2(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + throw std::runtime_error("error: the decompression function is not implemented"); +} +}; + +class SG3 : public SmartGrid { + private: + void createApplication() override { + SystemConf::getInstance().PARTIAL_WINDOWS = 3800; + + bool persistInput = SystemConf::getInstance().PERSIST_INPUT; + bool useParallelMerge = SystemConf::getInstance().PARALLEL_MERGE_ON; + + // Configure first query + std::vector aggregationTypes1(1); + aggregationTypes1[0] = AggregationTypes::fromString("avg"); + + std::vector aggregationAttributes1(1); + aggregationAttributes1[0] = new ColumnReference(1, BasicType::Float); + + std::vector groupByAttributes1; + + auto window1 = new WindowDefinition(RANGE_BASED, 128, 1); //ROW_BASED, 85*400, 1*400); + Aggregation *aggregation1 = new Aggregation(*window1, aggregationTypes1, aggregationAttributes1, groupByAttributes1); + +#if defined(TCP_INPUT) + bool replayTimestamps1 = false; +#elif defined(RDMA_INPUT) + bool replayTimestamps1 = false; +#else + bool replayTimestamps1 = window1->isRangeBased(); +#endif + + // Set up code-generated operator + OperatorKernel *genCode1 = new OperatorKernel(true); + genCode1->setInputSchema(getSchema()); + genCode1->setAggregation(aggregation1); + genCode1->setQueryId(0); + genCode1->setup(); + OperatorCode *cpuCode1 = genCode1; + + // Print operator + std::cout << genCode1->toSExpr() << std::endl; + + // Define an ft-operator + auto queryOperator1 = new QueryOperator(*cpuCode1, true); + std::vector operators1; + operators1.push_back(queryOperator1); + + // Configure second query + std::vector aggregationTypes2(2); + aggregationTypes2[0] = AggregationTypes::fromString("avg"); + aggregationTypes2[1] = AggregationTypes::fromString("cnt"); + + std::vector aggregationAttributes2(2); + aggregationAttributes2[0] = new ColumnReference(1, BasicType::Float); + aggregationAttributes2[1] = new ColumnReference(1, BasicType::Float); + + std::vector groupByAttributes2(3); + groupByAttributes2[0] = new ColumnReference(3, BasicType::Integer); + groupByAttributes2[1] = new ColumnReference(4, BasicType::Integer); + groupByAttributes2[2] = new ColumnReference(5, BasicType::Integer); + + auto window2 = new WindowDefinition(RANGE_BASED, 128, 1); //ROW_BASED, 36*1000, 1*1000); + Aggregation *aggregation2 = new Aggregation(*window2, aggregationTypes2, aggregationAttributes2, groupByAttributes2); + +#if defined(TCP_INPUT) + replayTimestamps1 = false; + bool replayTimestamps2 = false; +#elif defined(RDMA_INPUT) + replayTimestamps1 = false; + bool replayTimestamps2 = false; +#else + replayTimestamps1 = window1->isRangeBased(); + bool replayTimestamps2 = window2->isRangeBased(); +#endif + + // Set up code-generated operator + OperatorKernel *genCode2 = new OperatorKernel(true, true, useParallelMerge); + genCode2->setInputSchema(getSchema()); + genCode2->setAggregation(aggregation2); + genCode2->setCollisionBarrier(28); + genCode2->setQueryId(1); + genCode2->setup(); + OperatorCode *cpuCode2 = genCode2; + + // Print operator + std::cout << genCode2->toSExpr() << std::endl; + + // Define an ft-operator + auto queryOperator2 = new QueryOperator(*cpuCode2, true); + std::vector operators2; + operators2.push_back(queryOperator2); + + // Configure third query + auto config3 = + new QueryConfig(128 * SystemConf::getInstance()._MB, + 256 * SystemConf::getInstance()._KB, + 256 * SystemConf::getInstance()._KB, 1, 4000); + bool persistJoinInput = false; + auto window3 = new WindowDefinition(RANGE_BASED, 1, 1); //ROW_BASED, 1, 1); + TupleSchema *schema3 = &cpuCode1->getOutputSchema(); + + auto window4 = new WindowDefinition(RANGE_BASED, 1, 1); //ROW_BASED, 470, 470); + TupleSchema *schema4 = &cpuCode2->getOutputSchema(); + + auto predicate3 = new ComparisonPredicate(LESS_OP, new ColumnReference(1), new ColumnReference(4)); + auto join = new ThetaJoin(*schema3, *schema4, predicate3); + join->setQueryId(2); + join->setup(window3, window4, config3->getCircularBufferSize()); + + // Define an ft-operator + auto queryOperator3 = new QueryOperator(*join, true); + std::vector operators3; + operators3.push_back(queryOperator3); + + // this is used for latency measurements + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + + std::vector> queries(3); + queries[0] = std::make_shared(0, + operators1, + *window1, + m_schema, + m_timestampReference, + true, + replayTimestamps1, + !replayTimestamps1, + false, + 0, false, nullptr, !SystemConf::getInstance().RECOVER); + queries[1] = std::make_shared(1, + operators2, + *window2, + m_schema, + m_timestampReference, + true, + replayTimestamps2, + !replayTimestamps2, useParallelMerge, + 0, persistInput); + queries[2] = std::make_shared(2, + operators3, + *window3, + schema3, + *window4, + schema4, + m_timestampReference, + true, + false, + true, + false, + 0, persistJoinInput, config3); + queries[0]->connectTo(queries[2].get()); + queries[1]->connectTo(queries[2].get()); + + //queries[0]->markForCheckpoint(false); + //queries[1]->markForCheckpoint(false); + //queries[2]->markForCheckpoint(false); + + if (persistInput && SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + queries[0]->getBuffer()->setCompressionFP(SG1Compress_::compressInput); + queries[1]->getBuffer()->setCompressionFP(SG2Compress_::compressInput_); + queries[2]->getBuffer()->setCompressionFP(SG3Compress::compressInput1); + queries[2]->getSecondBuffer()->setCompressionFP(SG3Compress::compressInput2); + + queries[0]->getBuffer()->setDecompressionFP(SG1Compress_::decompressInput); + queries[1]->getBuffer()->setDecompressionFP(SG2Compress_::decompressInput); + queries[2]->getBuffer()->setDecompressionFP(SG3Compress::decompressInput1); + queries[2]->getSecondBuffer()->setDecompressionFP(SG3Compress::decompressInput2); + } + + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, !SystemConf::getInstance().RECOVER); + m_application->setup(); + //std::vector rates {1, 4}; + //m_application->setupRates(rates); + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION && (SystemConf::getInstance().CHECKPOINT_ON || persistInput)) { + SG2Compress_::dcomp = new std::vector>>(); + for (int w = 0; w < SystemConf::getInstance().WORKER_THREADS; ++w) { + SG2Compress_::dcomp->emplace_back( + std::make_unique>( + SystemConf::getInstance().HASH_TABLE_SIZE)); + } + SG3Compress::dcomp = new std::vector>>(); + for (int w = 0; w < SystemConf::getInstance().WORKER_THREADS; ++w) { + SG3Compress::dcomp->emplace_back( + std::make_unique>( + SystemConf::getInstance().HASH_TABLE_SIZE)); + } + } + if (SystemConf::getInstance().CHECKPOINT_ON && SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + m_application->getCheckpointCoordinator()->setCompressionFP(1, SG2Compress_::compress); + m_application->getCheckpointCoordinator()->setDecompressionFP(1, SG2Compress_::decompress); + } + } + + public: + SG3(bool inMemory = true) { + m_name = "SG3"; + createSchema(); + createApplication(); + if (inMemory) + loadInMemoryData(); + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/SmartGrid/SmartGrid.h b/test/benchmarks/applicationsWithCheckpoints/SmartGrid/SmartGrid.h new file mode 100644 index 0000000..9ac6a3f --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/SmartGrid/SmartGrid.h @@ -0,0 +1,111 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils/TupleSchema.h" +#include "utils/QueryApplication.h" +#include "utils/Utils.h" +#include "benchmarks/applications/BenchmarkQuery.h" + +class SmartGrid : public BenchmarkQuery { + private: + long normalisedTimestamp = -1; + struct InputSchema { + long timestamp; + float value; + int property; + int plug; + int household; + int house; + int padding; + + static void parse(InputSchema &tuple, std::string &line, long &normalisedTimestamp) { + std::istringstream iss(line); + std::vector words{std::istream_iterator{iss}, + std::istream_iterator{}}; + if (normalisedTimestamp == -1) + normalisedTimestamp = std::stol(words[0]); + + tuple.timestamp = std::stol(words[0]) - normalisedTimestamp; + tuple.value = std::stof(words[1]); + tuple.property = std::stoi(words[2]); + tuple.plug = std::stoi(words[3]); + tuple.household = std::stoi(words[4]); + tuple.house = std::stoi(words[5]); + } + }; + + public: + TupleSchema *m_schema = nullptr; + QueryApplication *m_application = nullptr; + std::vector *m_data = nullptr; + bool m_debug = false; + + QueryApplication *getApplication() override { + return m_application; + } + + virtual void createApplication() = 0; + + void loadInMemoryData() { + size_t len = SystemConf::getInstance().BUNDLE_SIZE; + m_data = new std::vector(len); + auto buf = (InputSchema *) m_data->data(); + + std::string filePath = Utils::getHomeDir() + "/LightSaber/resources/datasets/smartgrid/"; + std::ifstream file(filePath + "smartgrid-data.txt"); + if (!file.good()) + throw std::runtime_error("error: input file does not exist, check the path."); + std::string line; + unsigned long idx = 0; + while (std::getline(file, line) && idx < len / sizeof(InputSchema)) { + InputSchema::parse(buf[idx], line, normalisedTimestamp); + idx++; + } + + if (m_debug) { + std::cout << "timestamp value property plug household house" << std::endl; + for (unsigned long i = 0; i < m_data->size() / sizeof(InputSchema); ++i) { + printf("[DBG] %09d: %7d %5.3f %5d %5d %5d %5d \n", + i, buf[i].timestamp, buf[i].value, buf[i].property, buf[i].plug, + buf[i].household, buf[i].house); + } + } + }; + + std::vector *getInMemoryData() override { + return m_data; + } + + std::vector *getStaticData() override { + throw std::runtime_error("error: this benchmark does not have static data"); + } + + TupleSchema *getSchema() override { + if (m_schema == nullptr) + createSchema(); + return m_schema; + } + + void createSchema() { + m_schema = new TupleSchema(7, "SmartGrid"); + auto longAttr = AttributeType(BasicType::Long); + auto intAttr = AttributeType(BasicType::Integer); + auto floatAttr = AttributeType(BasicType::Float); + + m_schema->setAttributeType(0, longAttr); /* timestamp: long */ + m_schema->setAttributeType(1, floatAttr); /* value: float */ + m_schema->setAttributeType(2, intAttr); /* property: int */ + m_schema->setAttributeType(3, intAttr); /* plug: int */ + m_schema->setAttributeType(4, intAttr); /* household: int */ + m_schema->setAttributeType(5, intAttr); /* house: int */ + m_schema->setAttributeType(6, intAttr); /* padding: int */ + } +}; diff --git a/test/benchmarks/applicationsWithCheckpoints/SmartGrid/main.cpp b/test/benchmarks/applicationsWithCheckpoints/SmartGrid/main.cpp new file mode 100644 index 0000000..e3b551a --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/SmartGrid/main.cpp @@ -0,0 +1,27 @@ +#include +#include + +#include "SG1.cpp" +#include "SG2.cpp" +#include "SG3.cpp" + +// +// ./smartgrid_checkpoints --query 2 --hashtable-size 512 --unbounded-size 1048576 --circular-size 16777216 --bundle-size 524288 --slots 128 --batch-size 524288 --unbounded-size 4194304 --checkpoint-duration 1000 --disk-block-size 4194304 --threads 1 +int main(int argc, const char **argv) { + std::unique_ptr benchmarkQuery {}; + + SystemConf::getInstance().QUERY_NUM = 2; + BenchmarkQuery::parseCommandLineArguments(argc, argv); + + if (SystemConf::getInstance().QUERY_NUM == 1) { + benchmarkQuery = std::make_unique(); + } else if (SystemConf::getInstance().QUERY_NUM == 2) { + benchmarkQuery = std::make_unique(); + } else if (SystemConf::getInstance().QUERY_NUM == 3) { + benchmarkQuery = std::make_unique(); + } else { + throw std::runtime_error("error: invalid benchmark query id"); + } + + return benchmarkQuery->runBenchmark(); +} \ No newline at end of file diff --git a/test/benchmarks/applicationsWithCheckpoints/YahooBenchmark/YSB.cpp b/test/benchmarks/applicationsWithCheckpoints/YahooBenchmark/YSB.cpp new file mode 100644 index 0000000..ca4f108 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/YahooBenchmark/YSB.cpp @@ -0,0 +1,618 @@ +#include "benchmarks/applications/YahooBenchmark/YahooBenchmark.h" +#include "compression/Compressor.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/expressions/IntConstant.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "cql/predicates/ComparisonPredicate.h" +#include "snappy.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +namespace YSBCompress { +struct alignas(16) input_tuple_t { + long timestamp; + long _1; + __uint128_t _2; + __uint128_t _3; + __uint128_t _4; + long _5; + long _6; + __uint128_t _7; + __uint128_t _8; + __uint128_t _9; +}; +struct hash { + std::size_t operator()(const __uint128_t &key) const { + std::hash hasher; + return hasher((int)key); + } +}; +struct Eq { + constexpr bool operator()(const __uint128_t &k1, + const __uint128_t &k2) const { + return k1 == k2; + } +}; +using Key = __uint128_t; +using Value = uint16_t; +std::vector>> *dcomp; + +struct dBucket { + __uint128_t key; +}; + +struct HMEqualTo { + constexpr bool operator()(const Key& lhs, const Key& rhs) const { + return lhs == rhs; + } +}; +struct UInt128Hash { + UInt128Hash() = default; + inline std::size_t operator()(__uint128_t data) const { + const __uint128_t __mask = static_cast(-1); + const std::size_t __a = (std::size_t)(data & __mask); + const std::size_t __b = (std::size_t)((data & (__mask << 64)) >> 64); + auto hasher = std::hash(); + return hasher(__a) + hasher(__b); + } +}; +using MyHash = UInt128Hash; + + +using std::numeric_limits; + +template +bool CanTypeFitValue(const U value) { + const intmax_t botT = intmax_t(numeric_limits::min() ); + const intmax_t botU = intmax_t(numeric_limits::min() ); + const uintmax_t topT = uintmax_t(numeric_limits::max() ); + const uintmax_t topU = uintmax_t(numeric_limits::max() ); + auto b = !( (botT > botU && value < static_cast (botT)) || (topT < topU && value > static_cast (topT)) ); + if (!b) { + std::cout << "can't fit" << std::endl; + } + return b; +} +template +class BaseDeltaCompressor2 { + private: + In m_base; + + public: + BaseDeltaCompressor2(In base) : m_base(base) {} + inline Out compress(In &input) { return (Out) std::abs(m_base - input); } + inline bool check(In &input) { + auto res = input - m_base; + auto b = !CanTypeFitValue(res); + return b; + } + inline std::string getBase() { + return std::to_string(m_base); + } +}; + +#include +static const int numOfWorkers = 20; +static const int numOfCols = 2; +static std::unique_ptr> dcomp2[numOfWorkers][numOfCols]; +static std::string metadata[numOfWorkers][numOfCols]; +static bool isFirst [numOfWorkers] = {true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true}; +inline void compressInput1(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency) { + if (start == 0 && end == -1) { + // write metadata + return; + } + if (isFirst[pid]) { + dcomp2[pid][1] = std::make_unique>(1024); + isFirst[pid] = false; + } + if (clear) { + dcomp2[pid][1]->clear(); + clear = false; + } + + if (start == end || end < start) { + return; + } + + // Input Buffer + auto data = (input_tuple_t *)input; + std::vector idxs (2, 0); + uint16_t count_0 = 1; + BaseDeltaCompressor2 comp_0(data[0].timestamp); + auto temp_0 = comp_0.compress(data[0].timestamp); + struct t_0 { + uint8_t _0 : 4; + uint16_t counter : 10; + }; + struct t_1 { + uint16_t _1 : 10; + }; + // output buffers + int barriers[2]; + barriers[0] = (int)(length*0.000000); + t_0 *buf0 = (t_0 *) (output + barriers[0]); + barriers[1] = (int)(length*0.500000); + t_1 *buf1 = (t_1 *) (output + barriers[1]); + size_t n = (end - start) / sizeof(input_tuple_t); + + for (size_t idx = 0; idx < n; idx++) { + if ( data[idx]._6 == 0 ) + { + // apply compression + if (comp_0.check(data[idx].timestamp)) { + std::cout << "warning: falling back to the original compression scheme"<< std::endl; + clear = true; + return; + } + auto res_0 = comp_0.compress(data[idx].timestamp); + // apply RLE + if (temp_0 != res_0 || count_0 >= 1023.000000) { + buf0[idxs[0]++] = {temp_0, count_0}; + count_0 = 0; + temp_0 = res_0; + } else { + count_0++; + } + auto res_1 = dcomp2[pid][1]->compress(data[idx]._4); + buf1[idxs[1]++] = {res_1}; + } + } + if (count_0 != 0) { + buf0[idxs[0]++] = {temp_0, count_0}; + } + // copy results and set output pointers + writePos += idxs[0] * sizeof(t_0); + if (writePos > barriers[1]) {throw std::runtime_error("error: larger barriers needed");} + std::memcpy((void *)(output + writePos), (void *)buf0, idxs[0] * sizeof(t_0)); + writePos += idxs[1] * sizeof(t_1); + if (writePos > length) {throw std::runtime_error("error: larger barriers needed");} + //write metadata + writePos = 0; + metadata[pid][0] = ""; + metadata[pid][0] += "0 ""RLE ""BD "+comp_0.getBase()+" ""{long:2;uint16_t:10;} " + std::to_string(writePos) + " "; + writePos += idxs[0] * sizeof(t_0); + metadata[pid][0] += std::to_string(writePos) + " "; + auto endPtr = idxs[0] * sizeof(t_0)+idxs[1] * sizeof(t_1); + auto dcompSize = dcomp2[pid][1]->getTable().bucket_size() * dcomp2[pid][1]->getTable().max_size(); + metadata[pid][0] += "4 ""D "+std::to_string(endPtr)+" "+std::to_string(endPtr+dcompSize)+" ""{__uint128_t:10;} " + std::to_string(writePos) + " "; + writePos += idxs[1] * sizeof(t_1); + metadata[pid][0] += std::to_string(writePos) + " "; + if (metadata[pid][0].size() > 128) { throw std::runtime_error("error: increase the size of metadata"); } + std::memcpy((void *)(output - 128), (void *)metadata[pid][0].data(), metadata[pid][0].size()); +} + +void compressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + // static hashtable that never changes! + /*auto buf = (dBucket*) (output + writePos); + auto bucket = (Bucket *)(*dcomp)[pid]->getTable().buckets(); + for (size_t idx = 0; idx < (*dcomp)[pid]->getTable().max_size(); ++idx) { + if (bucket[idx].state) { + buf[idx] = dBucket{bucket[idx].key}; + } else { + buf[idx] = dBucket{(__uint128_t) -1}; + } + }*/ + writePos += 0; //(*dcomp)[pid]->getTable().max_size() * sizeof(dBucket); + return; + } + if (clear) { + (*dcomp)[pid]->clear(); + clear = false; + } + + BaseDeltaCompressor bcomp(1000); + struct res { + uint16_t timestamp : 4; + uint16_t user_id : 10; + }; + + auto data = (input_tuple_t *)input; + res *buf = (res *)(output); + size_t n = (end - start) / sizeof(input_tuple_t); + writePos = 0; + for (size_t idx = 0; idx < n; idx++) { + if (data[idx]._6 == 0) { + buf[writePos++] = {bcomp.compress(data[idx].timestamp), + (*dcomp)[pid]->compress(data[idx]._4)}; + } + } + writePos = writePos * sizeof(res); +} + +void compressGenInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + // static hashtable that never changes! + /*auto buf = (dBucket*) (output + writePos); + auto bucket = (Bucket *)(*dcomp)[pid]->getTable().buckets(); + for (size_t idx = 0; idx < (*dcomp)[pid]->getTable().max_size(); ++idx) { + if (bucket[idx].state) { + buf[idx] = dBucket{bucket[idx].key}; + } else { + buf[idx] = dBucket{(__uint128_t) -1}; + } + }*/ + writePos += 0; //(*dcomp)[pid]->getTable().max_size() * sizeof(dBucket); + return; + } + if (clear) { + //(*dcomp)[pid]->clear(); + clear = false; + } + + auto data = (input_tuple_t *)input; + int idxs[2] = {0, 0}; + GorillaTimestampCompressor gorillaComp; + size_t n = (end - start) / sizeof(input_tuple_t); + + // gorilla timestamp + auto buf1 = (uint64_t *)output; + uint8_t count_1 = 14; // as the first delta is stored in 14 bits + // store first timestamp in 64bits + first delta int 14 bits + buf1[idxs[0]++] = data[0].timestamp; + int64_t newDelta = data[1].timestamp - data[0].timestamp; + buf1[idxs[0]] = newDelta << (64 - count_1); + + auto *buf2 = (__uint128_t *)(output + (int)(length* 0.5)); + writePos = 0; + + for (size_t idx = 0; idx < n; idx++) { + if (data[idx]._6 == 0) { + if (idx > 1) { + auto [deltaD, deltaLength] = gorillaComp.compress( + data[idx].timestamp, data[idx - 1].timestamp, + data[idx - 2].timestamp); + if (count_1 + deltaLength > 64) { + uint8_t split = (64 - count_1); + if (deltaLength > 1) { + buf1[idxs[0]] |= deltaD >> (deltaLength - split); + } + ++idxs[0]; + count_1 = deltaLength - split; + } else { + count_1 += deltaLength; + } + buf1[idxs[0]] |= deltaD << (64 - count_1); + } + buf2[idxs[1]++] = data[idx]._4;//(*dcomp)[pid]->compress(data[idx]._4); + } + } + size_t output_length; + snappy::RawCompress((const char *)(buf2), + idxs[1] * sizeof(__int128_t), (char*)(output + idxs[0] * sizeof(uint64_t)), + &output_length); + //std::memcpy((void *)(output + idxs[0] * sizeof(uint64_t)), (void *)buf2, + // idxs[1] * sizeof(uint16_t)); + writePos += idxs[0] * sizeof(uint64_t) + output_length;//idxs[1] * sizeof(uint16_t); +} + +void noCompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + struct res { + long timestamp; + __uint128_t userId; + }; + auto out = (res*) output; + size_t n = (end - start) / sizeof(input_tuple_t); + for (size_t idx = 0; idx < n; idx++) { + out[idx] = {data[idx].timestamp, data[idx]._4}; + } + writePos = n * sizeof(res); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool ©, long latency = -1) { + throw std::runtime_error("error: the decompression function is not implemented"); +} + +void filterInput(char *input, int start, int end, char *output, int startOutput, int &writePos) { + auto data = (input_tuple_t *)input; + struct res { + long timestamp; + long _pad; + __uint128_t userId; + }; + auto out = (res*) output; + size_t n = (end - start) / sizeof(input_tuple_t); + size_t outIdx = startOutput; + for (size_t idx = 0; idx < n; idx++) { + //if (data[idx]._6 == 0) { + // out[outIdx++] = {data[idx].timestamp, 0, data[idx]._4}; + //} + //std::memcpy(&out[idx], &data[idx], 64); + } + writePos = n * sizeof(res); +} + +void onlyCompressInputLossless(int pid, char *input, int start, int end, char *output, int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + auto offset = 0; + writePos += offset; + return; + } + auto data = (input_tuple_t *)input; + std::vector idxs (1); + for (auto &i : idxs) { + i = 0; + } + + size_t output_length; + auto buf1 = (uint64_t *)input; + snappy::RawCompress((const char *)(buf1), end, (char*)(output), &output_length); + writePos += output_length; +} +}; + +class YSB : public YahooBenchmark { + private: + TupleSchema *createStaticSchema() { + if (m_is64) + return createStaticSchema_64(); + else + return createStaticSchema_128(); + } + + TupleSchema *createStaticSchema_64() { + auto staticSchema = new TupleSchema(2, "Campaigns"); + auto longAttr = AttributeType(BasicType::Long); + //auto longLongAttr = AttributeType(BasicType::LongLong); + + staticSchema->setAttributeType(0, longAttr); /* ad_id: long */ + staticSchema->setAttributeType(1, longAttr); /* campaign_id: long */ + return staticSchema; + } + + TupleSchema *createStaticSchema_128() { + auto staticSchema = new TupleSchema(2, "Campaigns"); + auto longLongAttr = AttributeType(BasicType::LongLong); + + staticSchema->setAttributeType(0, longLongAttr); /* ad_id: longLong */ + staticSchema->setAttributeType(1, longLongAttr); /* campaign_id: longLong */ + return staticSchema; + } + + std::string getStaticHashTable(size_t adsNum) { + auto tableSize = std::to_string(Utils::getPowerOfTwo(adsNum)); + std::string s; + std::string type; + if (m_is64) + type = "long"; + else + type = "__uint128_t"; + s.append( + "\n" + "struct interm_node {\n" + " long timestamp;\n" + " " + type + " ad_id;\n" + " " + type + " campaign_id;\n" + "};\n" + "struct static_node {\n" + " " + type + " key;\n" + " " + type + " value;\n" + "};\n" + "class staticHashTable {\n" + "private:\n" + " int size = "+tableSize+";\n" + " int mask = size-1;\n" + " static_node *table;\n"); + + if (m_is64) + s.append(" std::hash hashVal;\n"); + else + s.append(" MyHash hashVal\n;"); + + s.append( + "public:\n" + " staticHashTable (static_node *table);\n" + " bool get_value (const " + type + " key, " + type + " &result);\n" + "};\n" + "staticHashTable::staticHashTable (static_node *table) {\n" + " this->table = table;\n" + "}\n" + "bool staticHashTable::get_value (const " + type + + " key, " + type + " &result) {\n" + " int ind = hashVal(key) & mask;\n" + " int i = ind;\n" + " for (; i < this->size; i++) {\n" + " if (this->table[i].key == key) {\n" + " result = this->table[i].value;\n" + " return true;\n" + " }\n" + " }\n" + " for (i = 0; i < ind; i++) {\n" + " if (this->table[i].key == key) {\n" + " result = this->table[i].value;\n" + " return true;\n" + " }\n" + " }\n" + " return false;\n" + "}\n\n" + ); + return s; + } + + std::string getStaticComputation(WindowDefinition *window) { + std::string s; + if (m_is64) { + if (window->isRowBased()) + s.append("if (data[bufferPtr]._5 == 0) {\n"); + + s.append(" long joinRes;\n"); + s.append( + " bool joinFound = staticMap.get_value(data[bufferPtr]._3, joinRes);\n" + " if (joinFound) {\n" + " interm_node tempNode = {data[bufferPtr].timestamp, data[bufferPtr]._3, joinRes};\n" + " curVal._1 = 1;\n" + " curVal._2 = tempNode.timestamp;\n" + " aggrStructures[pid].insert_or_modify(tempNode.campaign_id, curVal, tempNode.timestamp);\n" + " }\n"); + if (window->isRowBased()) + s.append("}\n"); + } else { + if (window->isRowBased()) + s.append("if (data[bufferPtr]._6 == 0) {\n"); + + s.append(" __uint128_t joinRes;\n"); + s.append( + " bool joinFound = staticMap.get_value(data[bufferPtr]._4, joinRes);\n" + " if (joinFound) {\n" + " interm_node tempNode = {data[bufferPtr].timestamp, data[bufferPtr]._4, joinRes};\n" + " curVal._1 = 1;\n" + " curVal._2 = tempNode.timestamp;\n" + " aggrStructures[pid].insert_or_modify(tempNode.campaign_id, curVal, tempNode.timestamp);\n" + " }\n"); + if (window->isRowBased()) + s.append("}\n"); + } + return s; + } + + std::string getStaticInitialization() { + std::string s; + s.append( + "static_node *sBuf = (static_node *) staticBuffer;\n" + "staticHashTable staticMap (sBuf);\n" + ); + return s; + } + + void createApplication() override { + SystemConf::getInstance().SLOTS = 128; + SystemConf::getInstance().PARTIAL_WINDOWS = 32; + SystemConf::getInstance().HASH_TABLE_SIZE = Utils::getPowerOfTwo(SystemConf::getInstance().CAMPAIGNS_NUM); + auto adsNum = Utils::getPowerOfTwo(SystemConf::getInstance().CAMPAIGNS_NUM * 10); + //SystemConf::getInstance().CHECKPOINT_INTERVAL = 1000L; + //SystemConf::getInstance().CHECKPOINT_ON = + // SystemConf::getInstance().CHECKPOINT_INTERVAL > 0; + + bool useParallelMerge = SystemConf::getInstance().PARALLEL_MERGE_ON; + bool persistInput = SystemConf::getInstance().PERSIST_INPUT; + + int incr = (m_is64) ? 0 : 1; + + auto window = new WindowDefinition(RANGE_BASED, 100, 100); + + // Configure selection predicate + auto predicate = new ComparisonPredicate(EQUAL_OP, new ColumnReference(5 + incr), new IntConstant(0)); + Selection *selection = new Selection(predicate); + + // Configure projection + std::vector expressions(2); + // Always project the timestamp + expressions[0] = new ColumnReference(0); + expressions[1] = new ColumnReference(3 + incr); + Projection *projection = new Projection(expressions, true); + + // Configure static hashjoin + auto staticSchema = createStaticSchema(); + auto joinPredicate = new ComparisonPredicate(EQUAL_OP, new ColumnReference(1), new ColumnReference(0)); + StaticHashJoin *staticJoin = new StaticHashJoin(joinPredicate, + projection->getOutputSchema(), + *staticSchema, + getStaticData(), + getStaticInitialization(), + getStaticHashTable(adsNum), + getStaticComputation(window)); + + // Configure aggregation + std::vector aggregationTypes(2); + aggregationTypes[0] = AggregationTypes::fromString("cnt"); + aggregationTypes[1] = AggregationTypes::fromString("max"); + + std::vector aggregationAttributes(2); + aggregationAttributes[0] = new ColumnReference(1 + incr, BasicType::Float); + aggregationAttributes[1] = new ColumnReference(0, BasicType::Float); + + std::vector groupByAttributes(1); + if (m_is64) + groupByAttributes[0] = new ColumnReference(3, BasicType::Long); + else + groupByAttributes[0] = new ColumnReference(4, BasicType::LongLong); + + Aggregation *aggregation = new Aggregation(*window, aggregationTypes, aggregationAttributes, groupByAttributes); + +#if defined(TCP_INPUT) + bool replayTimestamps = false; +#elif defined(RDMA_INPUT) + bool replayTimestamps = false; +#else + bool replayTimestamps = window->isRangeBased(); +#endif + OperatorCode *cpuCode; + // Set up code-generated operator + OperatorKernel *genCode = new OperatorKernel(true, true, useParallelMerge, true); + genCode->setInputSchema(getSchema()); + genCode->setSelection(selection); + //genCode->setProjection(projection); + genCode->setStaticHashJoin(staticJoin); + genCode->setAggregation(aggregation); + genCode->setQueryId(0); + genCode->setup(); + cpuCode = genCode; + + // Print operator + std::cout << cpuCode->toSExpr() << std::endl; + + // Define an ft-operator + auto queryOperator = new QueryOperator(*cpuCode, true); + std::vector operators; + operators.push_back(queryOperator); + + // this is used for latency measurements + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + + std::vector> queries(1); + queries[0] = std::make_shared(0, + operators, + *window, + m_schema, + m_timestampReference, + true, + replayTimestamps, + !replayTimestamps, + useParallelMerge, + 0, persistInput, nullptr, !SystemConf::getInstance().RECOVER); + +#if defined(RDMA_INPUT) + //queries[0]->getBuffer()->setFilterFP(YSBCompress::filterInput); +#endif + + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + queries[0]->getBuffer()->setCompressionFP(YSBCompress::compressInput); + queries[0]->getBuffer()->setDecompressionFP(YSBCompress::decompressInput); + } + + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, !SystemConf::getInstance().RECOVER); + m_application->setup(); + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION && (SystemConf::getInstance().CHECKPOINT_ON || persistInput)) { + YSBCompress::dcomp = new std::vector>>(); + for (int w = 0; w < SystemConf::getInstance().WORKER_THREADS; ++w) { + YSBCompress::dcomp->emplace_back( + std::make_unique>(adsNum)); + } + } + /*if (SystemConf::getInstance().CHECKPOINT_ON && SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + m_application->getCheckpointCoordinator()->setCompressionFP(0, YSBCompress::compress); + }*/ + } + + public: + YSB(bool inMemory = true) { + m_name = "YSB"; + createSchema(); + if (inMemory) + loadInMemoryData(); + createApplication(); + } +}; \ No newline at end of file diff --git a/test/benchmarks/applicationsWithCheckpoints/YahooBenchmark/main.cpp b/test/benchmarks/applicationsWithCheckpoints/YahooBenchmark/main.cpp new file mode 100644 index 0000000..940fd62 --- /dev/null +++ b/test/benchmarks/applicationsWithCheckpoints/YahooBenchmark/main.cpp @@ -0,0 +1,19 @@ +#include + +#include "YSB.cpp" + +// ./yahoo_benchmark_checkpoints --circular-size 16777216 --slots 128 --batch-size 524288 --bundle-size 524288 --checkpoint-duration 1000 --threads 1 +int main(int argc, const char **argv) { + std::unique_ptr benchmarkQuery {}; + + SystemConf::getInstance().QUERY_NUM = 1; + BenchmarkQuery::parseCommandLineArguments(argc, argv); + + if (SystemConf::getInstance().QUERY_NUM == 1) { + benchmarkQuery = std::make_unique(); + } else { + throw std::runtime_error("error: invalid benchmark query id"); + } + + return benchmarkQuery->runBenchmark(); +} \ No newline at end of file diff --git a/test/benchmarks/kafka-flink/BenchmarkUtils.h b/test/benchmarks/kafka-flink/BenchmarkUtils.h new file mode 100644 index 0000000..a1dc90e --- /dev/null +++ b/test/benchmarks/kafka-flink/BenchmarkUtils.h @@ -0,0 +1,802 @@ +#pragma once + +#include + +#include +#include +#include + +#include "utils/SystemConf.h" +#include "cql/operators/HashFunctions.h" +#include "cql/operators/HashTable.h" + +bool autoConsume = true; +const bool compress = true; +const int mergeDivider = 4; + +// for checkpointing +std::atomic checkpointCounter = 0; +std::atomic releaseBarrier = 0; +std::atomic pushBarriers = false; +bool useCheckpoints = false; +const bool debug = false; + +struct QueryByteBuffer { + int m_id; + size_t m_capacity; + size_t m_position; + long m_latencyMark = -1; + long m_originalPosition = 0; + ByteBuffer m_buffer; + bool m_compressed = false; + long m_watermark = 0; + bool m_hasBarrier = false; + + QueryByteBuffer(int id, size_t capacity) : m_id(id), m_capacity(capacity), + m_position(0L), m_buffer(capacity) {} + + [[nodiscard]] int getBufferId() const { + return m_id; + } + + ByteBuffer &getBuffer() { + return m_buffer; + } + + [[nodiscard]] size_t getCapacity() const { + return m_capacity; + } + + void setPosition(size_t pos) { + m_position = pos; + } + + void clear() { + std::fill(m_buffer.begin(), m_buffer.end(), 0); + } + + [[nodiscard]] size_t getPosition() const { + return m_position; + } + + bool tryToMerge(std::shared_ptr &buffer) { + if (m_position + buffer->getPosition() <= m_capacity/mergeDivider) { + std::memcpy(m_buffer.data()+m_position, buffer->m_buffer.data(), buffer->m_position); + m_position += buffer->getPosition(); + m_watermark = std::max(m_watermark, buffer->m_watermark); + m_latencyMark = std::min(m_latencyMark, buffer->m_latencyMark); + return true; + } else { + return false; + } + } + + long getLong(size_t index) { + auto p = (long *) m_buffer.data(); + return p[index]; + } + + void putLong(size_t index, long value) { + auto p = (long *) m_buffer.data(); + p[index] = value; + } + + void putBytes(char *value, size_t length) { + if (m_position + length > m_capacity) { + throw std::runtime_error("error: increase the size of the QueryByteBuffer (" + std::to_string(m_capacity) + ")"); + } + std::memcpy(m_buffer.data() + m_position, value, length); + m_position += length; + } +}; + +struct MemoryPool { + const int m_numberOfThreads; + std::atomic count{}; + std::vector>> m_pool; + explicit MemoryPool(int workers = SystemConf::getInstance().WORKER_THREADS) + : m_numberOfThreads(workers), + m_pool(m_numberOfThreads){ + + /*int pid = 0; + for (auto &q : m_pool) { + auto maxSize = 64; + std::vector> tempVec(maxSize); + for (int cnt = 0; cnt < maxSize; cnt++) { + tempVec[cnt] = newInstance(pid); + } + for (int cnt = 0; cnt < maxSize; cnt++) { + free(tempVec[cnt]->m_id, tempVec[cnt]); + } + pid++; + }*/ + }; + + std::shared_ptr newInstance(int pid) { + if (pid >= m_numberOfThreads) + throw std::runtime_error("error: invalid pid for creating an unbounded buffer: " + std::to_string(pid) + " >= " + std::to_string(m_numberOfThreads)); + std::shared_ptr buffer; + bool hasRemaining = m_pool[pid].try_pop(buffer); + if (!hasRemaining) { + count.fetch_add(1); + buffer = std::make_shared(QueryByteBuffer(pid, SystemConf::getInstance().BLOCK_SIZE)); + } + return buffer; + } + + void free(int pid, std::shared_ptr &buffer) { + if (buffer.use_count() > 1) { + //std::cout << "warning: in worker " + std::to_string(pid) + " the buffer has multiple owners: " + std::to_string(buffer.use_count()) << std::endl; + return; + } + //buffer->clear(); + buffer->m_watermark = 0; + buffer->m_hasBarrier = false; + buffer->m_compressed = false; + buffer->m_originalPosition = 0; + buffer->setPosition(0); + m_pool[pid].push(buffer); + } + + void freeUnsafe(int pid, std::shared_ptr &buffer) { + buffer->m_watermark = 0; + buffer->m_hasBarrier = false; + buffer->m_compressed = false; + buffer->m_originalPosition = 0; + buffer->setPosition(0); + m_pool[pid].push(buffer); + } +}; + +using BoundedQueue = tbb::concurrent_bounded_queue>; +using BoundedQueuePtr = std::shared_ptr; +using Queue = tbb::concurrent_queue>; +using QueuePtr = std::shared_ptr; +using LatQueue = tbb::concurrent_queue; +using LatQueuePtr = std::shared_ptr; + +/* + * + * Queries + * + * */ + +namespace YSBQuery { +struct InputSchema { + long timestamp; + long padding_0; + __uint128_t user_id; + __uint128_t page_id; + __uint128_t ad_id; + long ad_type; + long event_type; + __uint128_t ip_address; + __uint128_t padding_1; + __uint128_t padding_2; +}; + +struct IntermSchema { + long timestamp; + long padding_0; + __uint128_t ad_id; + __uint128_t campaing_id; + __uint128_t padding_1; +}; + +struct OutputSchema { + long timestamp; + __uint128_t ad_id; + int count; +}; + +MurmurHash3<__uint128_t, 16> m_hash; + +struct hash { + static inline int partition(InputSchema &event, int partitions) { + //return m_hash(event.ad_id) % partitions; + //return ((int) event.ad_id) % partitions; + const __uint128_t __mask = static_cast(-1); + const std::size_t __a = (std::size_t)(event.ad_id & __mask); + const std::size_t __b = (std::size_t)((event.ad_id & (__mask << 64)) >> 64); + auto hasher = std::hash(); + return (hasher(__a) + hasher(__b)) % partitions; + } + std::size_t operator()(const __uint128_t &key) const { + const __uint128_t __mask = static_cast(-1); + const std::size_t __a = (std::size_t)(key & __mask); + const std::size_t __b = (std::size_t)((key & (__mask << 64)) >> 64); + auto hasher = std::hash(); + return hasher(__a) + hasher(__b); + } +}; +struct Eq { + constexpr bool operator()(const __uint128_t &k1, + const __uint128_t &k2) const { + return k1 == k2; + } +}; +using Key = __uint128_t; +using Value = __uint128_t; + + +struct static_node { + __uint128_t key; + __uint128_t value; +}; +struct staticHashTable { + int size = 1024; + int mask = size - 1; + static_node *table; + hash hashVal; + + public: + explicit staticHashTable(static_node *table) { this->table = table; } + bool get_value (const __uint128_t key, __uint128_t &result) const { + int ind = hashVal(key) & mask; + int i = ind; + for (; i < this->size; i++) { + if (this->table[i].key == key) { + result = this->table[i].value; + return true; + } + } + for (i = 0; i < ind; i++) { + if (this->table[i].key == key) { + result = this->table[i].value; + return true; + } + } + return false; + } +}; +std::vector *m_staticData[32]; +staticHashTable *staticMap[32]; + +bool isFirst[32] = {true,true,true,true,true,true,true,true, + true,true,true,true,true,true,true,true, + true,true,true,true,true,true,true,true, + true,true,true,true,true,true,true,true}; + +long m_timestampOffset = 0; + +thread_local long partitionOffset[16] {0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0}; + +struct StatelessOp { + static inline void process(int pid, long watermark, char *input, int inLength, std::shared_ptr &output) { + // initialize input + auto data = (InputSchema *) input; + inLength = inLength / sizeof (InputSchema); + + /*for (unsigned long i = 0; i < inLength; ++i) { + printf("[DBG] %09d: %7d %13ld %8ld %13ld %3ld %6ld %2ld \n", + i, data[i].timestamp, (long) data[i].user_id, (long) data[i].page_id, (long) data[i].ad_id, + data[i].ad_type, data[i].event_type, (long) data[i].ip_address); + }*/ + + // initialize hashtable + if (isFirst[pid]) { + auto *sBuf = (static_node *) m_staticData[pid]; + staticMap[pid] = new staticHashTable(sBuf); + isFirst[pid] = false; + } + + for (int idx = 0; idx < inLength; idx++) { + if (data[idx].event_type == 0) { + __uint128_t joinRes; + bool joinFound = staticMap[pid]->get_value(data[idx].ad_id, joinRes); + if (joinFound) { + IntermSchema tempNode = {data[idx].timestamp + m_timestampOffset, 0, data[idx].ad_id, + joinRes, 0}; + output->putBytes((char *) &tempNode, sizeof(IntermSchema)); + } + } + } + + partitionOffset[0] += inLength; + } + + static inline void sendDownstream( + int pid, + BoundedQueuePtr &outputQueue, + std::vector>> *nextOperatorQueues, + std::vector> &partitionBuffers, + std::shared_ptr &pool) { + if (!nextOperatorQueues) { + std::shared_ptr buffer; + while (outputQueue->try_pop(buffer)) { + if (buffer) { + pool->freeUnsafe(buffer->m_id, buffer); + } + } + } else { + // if outputQueue is full wait until data is pushed downstream + std::shared_ptr buffer; + while (outputQueue->try_pop(buffer)) { + if (buffer) { + auto data = (IntermSchema *)buffer->getBuffer().data(); + size_t length = buffer->getPosition() / sizeof(IntermSchema); + auto partitions = partitionBuffers.size(); + auto watermark = buffer->m_watermark; + auto latencyMark = buffer->m_latencyMark; + /*if (length > 0) { + // assume there is a single partition + auto partition = ((int)data[0].ad_id) % partitions; + auto &parBuf = partitionBuffers[partition]; + if (!parBuf) { + parBuf = pool->newInstance(partition); + } + bool hasSinglePartition = true; + for (size_t idx = 0; idx < length; idx++) { + auto nextPart = ((int)data[idx].ad_id) % partitions; + if (partition != nextPart) { + hasSinglePartition = false; + break; + } + parBuf->putBytes((char *)&data[idx], sizeof(IntermSchema)); + } + if (hasSinglePartition) { + auto &queues = (*nextOperatorQueues)[pid]; + while (!queues[partition]->try_push(parBuf)) { + // std::cout << "warning: partition " + std::to_string(par) << " + // is full" << std::endl; + } + parBuf = nullptr; + } else { + parBuf->setPosition(0); + for (size_t idx = 0; idx < length; idx++) { + partition = ((int)data[idx].ad_id) % partitions; + auto &buf = partitionBuffers[partition]; + if (!buf) { + buf = pool->newInstance(partition); + } + buf->putBytes((char *)&data[idx], sizeof(IntermSchema)); + } + + int countPartitions = 0; + auto &queues = (*nextOperatorQueues)[pid]; + for (auto par = 0; par < partitions; par++) { + auto &buf = partitionBuffers[par]; + if (buf) { + if (nextOperatorQueues) { + while (!queues[par]->try_push(buf)) { + // std::cout << "warning: partition " + + // std::to_string(par) << " is full" << std::endl; + } + countPartitions++; + } else { + pool->freeUnsafe(buf->m_id, buf); + } + buf = nullptr; + } + } + if (countPartitions > 1) { + std::cout << "Worker " + std::to_string(pid) + " sent to " + + std::to_string(countPartitions) + + " partitions." + << std::endl; + } + } + }*/ + for (size_t idx = 0; idx < length; idx++) { + auto partition = ((int) data[idx].ad_id) % partitions; + auto &buf = partitionBuffers[partition]; + if (!buf) { + buf = pool->newInstance(partition); + } + buf->putBytes((char *)&data[idx], sizeof(IntermSchema)); + } + + int countPartitions = 0; + auto &queues = (*nextOperatorQueues)[pid]; + for (auto par = 0; par < partitions; par++) { + auto &buf = partitionBuffers[par]; + if (buf) { + buf->m_watermark = watermark; + buf->m_latencyMark = latencyMark; + if (nextOperatorQueues) { + while (!queues[par]->try_push(buf)) { + //std::cout << "warning: partition " + std::to_string(par) << " is full" << std::endl; + } + countPartitions++; + } else { + pool->freeUnsafe(buf->m_id, buf); + } + buf = nullptr; + } + } + // free initial batch + pool->freeUnsafe(buffer->m_id, buffer); + } + } + } + } + + static inline void checkpoint(int pid, int fd, std::shared_ptr &temp) { + ::pwrite(fd, &partitionOffset, 16 * sizeof(long), 0); + fsync(fd); + //std::cout << "Worker " + std::to_string(pid) << " finished its checkpoint" << std::endl; + } +}; + + +struct GroupKey { + __uint128_t key; + long window; + bool operator==(const GroupKey &other) const { + return (key == other.key && window == other.window); + } +}; +struct GroupHash { + MurmurHash ghash; + std::size_t operator()(const GroupKey &key) const { return ghash(key); } +}; + + +thread_local std::unordered_map map; +thread_local int checkpointCnt = 0; +thread_local long checkpointSize = 0; +thread_local int checkpointTimes = 0; + +struct StateFulOp { + static inline void process(int pid, long watermark, char *input, int inLength, std::shared_ptr &output) { + // initialize input + auto data = (IntermSchema *) input; + inLength = inLength / sizeof (IntermSchema); + + for (int idx = 0; idx < inLength; idx++) { + GroupKey tempNode = {data[idx].ad_id, data[idx].timestamp / 100}; + auto elem = map.find(tempNode); + if (elem != map.end()) { + elem->second++; + } else { + map[tempNode] = 1; + } + } + + // do this only with a watermark + if (watermark > 0) { + auto it = map.begin(); + while (it != map.end()) { + if (it->first.window < watermark) { + OutputSchema tempNode = {it->first.window, it->first.key, it->second}; + output->putBytes((char *)&tempNode, sizeof(OutputSchema)); + it = map.erase(it); + } else + it++; + } + map.size(); + } + + //for (const auto &elem : map) { + //} + //map.clear(); + } + + static inline void sendDownstream( + int pid, + BoundedQueuePtr &outputQueue, + std::vector>> *nextOperatorQueues, + std::vector> &partitionBuffers, + std::shared_ptr &pool) { + std::shared_ptr buffer; + while (outputQueue->try_pop(buffer)) { + if (buffer) { + pool->freeUnsafe(buffer->m_id, buffer); + } + } + } + + static inline void checkpoint(int pid, int fd, std::shared_ptr &temp) { + for (const auto &elem : map) { + OutputSchema tempNode = {elem.first.window, elem.first.key, + elem.second}; + temp->putBytes((char *)&tempNode, sizeof(OutputSchema)); + } + ::pwrite(fd, temp->getBuffer().data(), temp->getPosition(), 0); + fsync(fd); + checkpointTimes++; + checkpointCnt++; + checkpointSize += temp->getPosition(); + if (pid == 0 && checkpointTimes == 5) { + std::cout << "Worker " + std::to_string(pid) << " finished its checkpoint: " + std::to_string(temp->getPosition()) + + " bytes with " + std::to_string(map.size()) + " elements" + + "[AVG: " + std::to_string(checkpointSize/checkpointCnt) + "]" << std::endl; + checkpointTimes = 0; + } + temp->setPosition(0); + } +}; + +}; + +namespace CM1Query { +std::vector *m_staticData[32]; +struct InputSchema { + long timestamp; + long jobId; + long taskId; + long machineId; + int eventType; + int userId; + int category; + int priority; + float cpu; + float ram; + float disk; + int constraints; +}; +struct hash { + static inline int partition(InputSchema &event, int partitions) { + auto hasher = std::hash(); + return (hasher(event.category)) % partitions; + } + std::size_t operator()(const int &key) const { + auto hasher = std::hash(); + return hasher(key); + } +}; +}; + +namespace CM2Query { +std::vector *m_staticData[32]; +struct InputSchema { + long timestamp; + long jobId; + long taskId; + long machineId; + int eventType; + int userId; + int category; + int priority; + float cpu; + float ram; + float disk; + int constraints; +}; +struct hash { + static inline int partition(InputSchema &event, int partitions) { + auto hasher = std::hash(); + return (hasher(event.jobId)) % partitions; + } + std::size_t operator()(const int &key) const { + auto hasher = std::hash(); + return hasher(key); + } +}; +}; + +namespace SG1Query { +std::vector *m_staticData[32]; +struct InputSchema { + long timestamp; + float value; + int property; + int plug; + int household; + int house; + int padding; +}; +struct hash { + static inline int partition(InputSchema &event, int partitions) { + return 0; + } + std::size_t operator()(const int &key) const { + return 0; + } +}; +}; + +namespace SG2Query { +std::vector *m_staticData[32]; +struct InputSchema { + long timestamp; + float value; + int property; + int plug; + int household; + int house; + int padding; +}; +struct key { + int plug; + int household; + int house; +}; +MurmurHash3 m_hash; + +struct hash { + static inline int partition(InputSchema &event, int partitions) { + key temp = {event.plug, event.household, event.house}; + return m_hash(temp) % partitions; + } + std::size_t operator()(const InputSchema &event) const { + key temp = {event.plug, event.household, event.house}; + return m_hash(temp); + } +}; +}; + +namespace SG3Query { +std::vector *m_staticData[32]; +struct InputSchema { + long timestamp; + float value; + int property; + int plug; + int household; + int house; + int padding; +}; +struct key { + int plug; + int household; + int house; +}; +MurmurHash3 m_hash; + +struct hash { + static inline int partition(InputSchema &event, int partitions) { + key temp = {event.plug, event.household, event.house}; + return m_hash(temp) % partitions; + } + std::size_t operator()(const InputSchema &event) const { + key temp = {event.plug, event.household, event.house}; + return m_hash(temp); + } +}; +}; + +namespace LRB1Query { +std::vector *m_staticData[32]; +struct InputSchema { + long timestamp; + int vehicle; + float speed; + int highway; + int lane; + int direction; + int position; +}; +struct key { + int highway; + int direction; + int segment; +}; +MurmurHash3 m_hash; + +struct hash { + static inline int partition(InputSchema &event, int partitions) { + key temp = {event.highway, event.direction, event.position / 5280}; + return m_hash(temp) % partitions; + } + std::size_t operator()(const InputSchema &event) const { + key temp = {event.highway, event.direction, event.position / 5280}; + return m_hash(temp); + } +}; +}; + +namespace LRB2Query { +std::vector *m_staticData[32]; +struct InputSchema { + long timestamp; + int vehicle; + float speed; + int highway; + int lane; + int direction; + int position; +}; +struct key { + int highway; + int vehicle; + int direction; + int segment; +}; +MurmurHash3 m_hash; + +struct hash { + static inline int partition(InputSchema &event, int partitions) { + key temp = {event.vehicle, event.highway, event.direction, event.position / 5280}; + return m_hash(temp) % partitions; + } + std::size_t operator()(const InputSchema &event) const { + key temp = {event.vehicle, event.highway, event.direction, event.position / 5280}; + return m_hash(temp); + } +}; +}; + +namespace LRB3Query { +std::vector *m_staticData[32]; +struct InputSchema { + long timestamp; + int vehicle; + float speed; + int highway; + int lane; + int direction; + int position; +}; +struct key { + int highway; + int vehicle; + int direction; + int segment; +}; +MurmurHash3 m_hash; + +struct hash { + static inline int partition(InputSchema &event, int partitions) { + key temp = {event.vehicle, event.highway, event.direction, event.position / 5280}; + return m_hash(temp) % partitions; + } + std::size_t operator()(const InputSchema &event) const { + key temp = {event.vehicle, event.highway, event.direction, event.position / 5280}; + return m_hash(temp); + } +}; +}; + +namespace ME1Query { +std::vector *m_staticData[32]; +struct alignas(64) InputSchema { + long timestamp; + long messageIndex; + int mf01; //Electrical Power Main Phase 1 + int mf02; //Electrical Power Main Phase 2 + int mf03; //Electrical Power Main Phase 3 + int pc13; //Anode Current Drop Detection Cell 1 + int pc14; //Anode Current Drop Detection Cell 2 + int pc15; //Anode Current Drop Detection Cell 3 + unsigned int pc25; //Anode Voltage Drop Detection Cell 1 + unsigned int pc26; //Anode Voltage Drop Detection Cell 2 + unsigned int pc27; //Anode Voltage Drop Detection Cell 3 + unsigned int res; + int bm05 = 0; + int bm06 = 0; +}; +struct hash { + static inline int partition(InputSchema &event, int partitions) { + return 0; + } + std::size_t operator()(const int &key) const { + return 0; + } +}; +}; + +namespace NBQ5Query { +std::vector *m_staticData[32]; +struct alignas(16) InputSchema { + long timestamp; + long id; + long itemName; + long description; + long initialBid; + long reserve; + long expires; + long seller; + long category; + long padding_0; + long padding_1; + long padding_2; + long padding_3; + long padding_4; + long padding_5; + long padding_6; +}; +struct hash { + static inline int partition(InputSchema &event, int partitions) { + auto hasher = std::hash(); + return (hasher(event.id)) % partitions; + } + std::size_t operator()(const int &key) const { + auto hasher = std::hash(); + return hasher(key); + } +}; +}; \ No newline at end of file diff --git a/test/benchmarks/kafka-flink/CMakeLists.txt b/test/benchmarks/kafka-flink/CMakeLists.txt new file mode 100644 index 0000000..f70d0c6 --- /dev/null +++ b/test/benchmarks/kafka-flink/CMakeLists.txt @@ -0,0 +1,93 @@ +find_package(GTest REQUIRED) +include_directories(${GTEST_INCLUDE_DIRS}) + +# Configure CCache if available +find_program(CCACHE_PROGRAM ccache) +if (CCACHE_PROGRAM) + message("Using CCache...") + #set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + #set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) + set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) + set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) + set(CMAKE_CXX_COMPILER "/usr/lib/ccache/clang++") +endif () + +SET(CPP_FILES + ../../../src/filesystem/File.cpp + ../../../src/checkpoint/FileBackedCheckpointCoordinator.cpp + ../../../src/checkpoint/BlockManager.cpp + ../../../src/checkpoint/LineageGraph.cpp + ../../../src/cql/expressions/Expression.cpp + ../../../src/dispatcher/ITaskDispatcher.cpp + ../../../src/dispatcher/JoinTaskDispatcher.cpp + ../../../src/dispatcher/TaskDispatcher.cpp + ../../../src/compression/CompressionCodeGenUtils.cpp + ../../../src/compression/CompressionStatistics.cpp + ../../../src/monitors/CompressionMonitor.cpp + ../../../src/monitors/PerformanceMonitor.cpp + ../../../src/monitors/Measurement.cpp + ../../../src/monitors/LatencyMonitor.cpp + ../../../src/processor/TaskProcessor.cpp + ../../../src/result/ResultHandler.cpp + ../../../src/tasks/NumaTaskQueueWrapper.cpp + ../../../src/tasks/WindowBatch.cpp + ../../../src/tasks/Task.cpp + ../../../src/utils/AttributeType.cpp + ../../../src/utils/Query.cpp + ../../../src/utils/QueryApplication.cpp + ../../../src/utils/Utils.cpp + ../../../src/utils/SystemConf.cpp + ) + +SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -lnuma") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -g -Wall -Wextra -DHAVE_NUM -DHAVE_Oo") + +find_package(benchmark REQUIRED) +include_directories(${benchmark_INCLUDE_DIRS}) + +FIND_LIBRARY(tbb NAMES libtbb.so) + +link_directories(${LIBPMEMOBJ++_LIBRARY_DIRS}) + +# Yahoo Benchmark +add_executable(yahoo_benchmark_flink + main.cpp + ${CPP_FILES} + ) +target_link_options(yahoo_benchmark_flink PRIVATE -Wl,--unresolved-symbols=ignore-all) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(yahoo_benchmark_flink ${Boost_LIBRARIES}) +endif () +target_include_directories(yahoo_benchmark_flink PUBLIC ${LIBPMEMOBJ++_INCLUDE_DIRS}) +target_link_libraries(yahoo_benchmark_flink + ${LIBPMEMOBJ++_LIBRARIES} + operatorJITLib + boost_fiber + boost_system + tbb snappy + pthread dl aio uuid stdc++fs) +target_compile_options(yahoo_benchmark_flink PRIVATE -Wall -Wextra -O3 -march=native -UNDEBUG) +set_target_properties(yahoo_benchmark_flink PROPERTIES COMPILE_FLAGS "-DHAVE_NUMA -DPREFETCH") + + +# Kafka Benchmarks +add_executable(kafka_benchmarks + mainKafka.cpp + ${CPP_FILES} + ) +target_link_options(kafka_benchmarks PRIVATE -Wl,--unresolved-symbols=ignore-all) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(kafka_benchmarks ${Boost_LIBRARIES}) +endif () +target_include_directories(kafka_benchmarks PUBLIC ${LIBPMEMOBJ++_INCLUDE_DIRS}) +target_link_libraries(kafka_benchmarks + ${LIBPMEMOBJ++_LIBRARIES} + operatorJITLib + boost_fiber + boost_system + tbb snappy + pthread dl aio uuid stdc++fs) +target_compile_options(kafka_benchmarks PRIVATE -Wall -Wextra -O3 -march=native -UNDEBUG) +set_target_properties(kafka_benchmarks PROPERTIES COMPILE_FLAGS "-DHAVE_NUMA -DPREFETCH") \ No newline at end of file diff --git a/test/benchmarks/kafka-flink/Flink.h b/test/benchmarks/kafka-flink/Flink.h new file mode 100644 index 0000000..e1a164c --- /dev/null +++ b/test/benchmarks/kafka-flink/Flink.h @@ -0,0 +1,814 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "BenchmarkUtils.h" +#include "benchmarks/kafka-flink/queues/readerwritercircularbuffer.h" +#include "benchmarks/kafka-flink/queues/readerwriterqueue.h" +#include "snappy.h" +#include "utils/Utils.h" + +/* + * \brief A prototype implementation of Flink in C++. + * + * */ + +std::atomic startFlink = false; +// for checkpointing +const bool batchResults = true; +const bool copyInput = false; +thread_local std::unordered_set blockedQueues; + +std::atomic flinkBarrier; +size_t m_flinkDuration = 60; +thread_local bool m_flinkMeasureLatency = true; +std::mutex m_flinkMeasurementsMutex; +std::vector m_flinkTotalMeasurements; +int m_flinkBatchSize = 8; + +template +struct FlinkProcessor { + int m_opId, m_pid, m_allocateId; + std::shared_ptr m_pool, m_kafkaPool = nullptr; + BoundedQueuePtr m_inputQueue; + std::vector> *m_inputShuffleQueues; + BoundedQueuePtr m_outputQueue; + std::shared_ptr m_tempBuffer, m_tempResBuffer; + + long m_watermark = 0; + long limitOffset = 100; + long limit = 100; + + bool m_hasShuffle = false; + + // upstream queues + int m_upstreamQueues = 0; + + // downstream queues + std::vector> *m_nextOperatorQueues; + std::vector>> *m_nextOperatorShuffleQueues; + std::vector> m_partitionBuffers; + int m_itemsToSend = -1, m_itemsToSend_ = 0; + + // file properties + std::vector> m_diskBuffer; + size_t m_offset = 0; + size_t m_fileSize = 64 * 1024 * 1024; + const std::string m_fileName; + int m_fd; + + // used for latency measurements + long m_timestampReference = 0; + LatQueuePtr m_latQueue; + int m_repeat = 0; + std::vector m_measurements; + long m_count = 0; + double m_min = DBL_MAX, m_max = DBL_MIN, m_avg = 0; + double m_latency = 0; + + FlinkProcessor(int opId, int pid, bool hasShuffle, BoundedQueuePtr &inputQueue, BoundedQueuePtr &outputQueue, + std::shared_ptr &pool, std::shared_ptr kafkaPool = nullptr, + std::vector> *nextOperatorQueues = nullptr, + std::vector>> *nextOperatorShuffleQueues = nullptr) + : m_opId(opId), + m_pid(pid), + m_hasShuffle(hasShuffle), + m_pool(pool), + m_kafkaPool(kafkaPool), + m_inputQueue(inputQueue), + m_outputQueue(outputQueue), + m_nextOperatorQueues(nextOperatorQueues), + m_nextOperatorShuffleQueues(nextOperatorShuffleQueues), + m_diskBuffer(m_flinkBatchSize), + m_fileName(SystemConf::FILE_ROOT_PATH + "/kafka/flink_file_" + std::to_string(opId) + "_" + std::to_string(m_pid)) { + m_allocateId = (m_opId == 0) ? m_pid : m_opId * SystemConf::getInstance().WORKER_THREADS + m_pid - 1; + m_tempBuffer = m_pool->newInstance(m_pid); + + if (m_nextOperatorQueues) { + m_partitionBuffers.resize(m_nextOperatorQueues->size()); + for (auto &p: m_partitionBuffers) { + //p = m_pool->newInstance(m_pid); + } + } + + // create file + std::remove(m_fileName.c_str()); + m_fd = ::open(m_fileName.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + } + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wmissing-noreturn" + void operator()() { + while (!startFlink) + ; + + std::shared_ptr buffer; + while (true) { + try { + long latencyMark = -1; + buffer = getNextData(); + + if (SystemConf::LATENCY_ON && m_latQueue) { + while (!m_latQueue->try_pop(latencyMark)) + ; + } + + //__builtin_prefetch(buffer->getBuffer().data(), 1, 3); + + // decompress data + if (buffer->m_compressed) { + snappy::RawUncompress(buffer->getBuffer().data(), buffer->getPosition(), m_tempBuffer->getBuffer().data()); + m_tempBuffer->setPosition(buffer->m_originalPosition); + freeInputBuffer(buffer); + buffer = m_tempBuffer; + } else if (m_opId == 0) { + if (copyInput) { + std::memcpy(m_tempBuffer->getBuffer().data(), buffer->getBuffer().data(), buffer->getPosition()); + m_tempBuffer->setPosition(buffer->getPosition()); + ////freeInputBuffer(buffer); + buffer = m_tempBuffer; + } + } else if (m_opId > 0 && buffer->m_latencyMark > 0) { + latencyMark = buffer->m_latencyMark; + } + + auto resultBuffer = m_pool->newInstance(m_pid); + resultBuffer->m_latencyMark = latencyMark; + // process data + Op::process(m_pid, buffer->m_watermark, buffer->getBuffer().data(), buffer->getPosition(), resultBuffer); + + // add watermark + if (m_watermark >= limit) { + resultBuffer->m_watermark = m_watermark; + limit += limitOffset; + } + // Put result in the output queue + if (/*resultBuffer->m_hasBarrier || */resultBuffer->getPosition() > 0 || resultBuffer->m_watermark > 0) { + // batch results to save up space + if (batchResults) { + if (!m_tempResBuffer) { + m_tempResBuffer = resultBuffer; + } else { + if (m_tempResBuffer->tryToMerge(resultBuffer)) { + m_pool->free(resultBuffer->m_id, resultBuffer); + } else { + addToOutputQueue(m_tempResBuffer); + m_tempResBuffer = resultBuffer; + } + } + } else { + addToOutputQueue(resultBuffer); + } + } else { + m_pool->free(resultBuffer->m_id, resultBuffer); + } + resultBuffer.reset(); + + // send downstream + if (m_hasShuffle) { + if (useCheckpoints) { + // todo: this won't work if there are no items to send + if (m_itemsToSend >= 0) { + // Op::sendDownstream(m_pid, m_outputQueue, nullptr,m_partitionBuffers, m_pool); + while (m_outputQueue->try_pop(buffer)) { + if (buffer) { + if (SystemConf::LATENCY_ON && m_flinkMeasureLatency) { + measureLatency(buffer->m_latencyMark); + } + m_pool->freeUnsafe(buffer->m_id, buffer); + } + } + releaseBarrier.fetch_add(-1); + m_itemsToSend = -1; + if (debug) { + std::cout << "Worker " + std::to_string(m_pid) << " decreased the release barrier" << std::endl; + } + } + } else { + //Op::sendDownstream(m_pid, m_outputQueue, nullptr, m_partitionBuffers, m_pool); + while (m_outputQueue->try_pop(buffer)) { + if (buffer) { + if (SystemConf::LATENCY_ON && m_flinkMeasureLatency) { + measureLatency(buffer->m_latencyMark); + } + m_pool->freeUnsafe(buffer->m_id, buffer); + } + } + } + reportLatency(); + } else { + Op::sendDownstream(m_pid, m_outputQueue, m_nextOperatorShuffleQueues, m_partitionBuffers, m_pool); + } + + // increase watermark + m_watermark += YSBQuery::m_timestampOffset; + + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + exit(1); + } + } + } + + inline void freeInputBuffer(std::shared_ptr &buffer) { + if (m_kafkaPool) { + m_kafkaPool->free(buffer->m_id, buffer); + } else { + m_pool->freeUnsafe(buffer->m_id, buffer); + } + buffer.reset(); + } + + inline std::shared_ptr getNextData() { + std::shared_ptr buffer; + if (!useCheckpoints) { + if (m_hasShuffle) { + if (!m_inputShuffleQueues) { + throw std::runtime_error( + "error: m_inputShuffleQueues are not initialized"); + } + while (true) { + for (auto &sq : *m_inputShuffleQueues) { + sq->try_pop(buffer); + if (buffer) { + return buffer; + } + } + // std::cout << "warning: worker " + std::to_string(m_pid) << " is waiting for work" << std::endl; + } + } else { + while (!m_inputQueue->try_pop(buffer) || !buffer) { + // std::cout << "warning: worker " + std::to_string(m_pid) << " is waiting for work" << std::endl; + std::this_thread::sleep_for(std::chrono::nanoseconds(1)); + } + } + } else { + if (m_hasShuffle) { + if (!m_inputShuffleQueues) { + throw std::runtime_error( + "error: m_inputShuffleQueues are not initialized"); + } + while (true) { + int cnt = 0; + for (auto &sq : *m_inputShuffleQueues) { + if (blockedQueues.find(sq) == blockedQueues.end()) { + sq->try_pop(buffer); + } + + if (buffer) { + if (buffer->m_hasBarrier) { + if (debug) { + std::cout << "Worker " + std::to_string(m_pid) << " received a barrier from " + std::to_string(cnt) << std::endl; + } + // block queue + blockedQueues.insert(sq); + m_pool->free(buffer->m_id, buffer); + buffer = nullptr; + } else { + return buffer; + } + + // if all queues blocked + if (blockedQueues.size() == m_upstreamQueues) { + if (m_tempResBuffer && m_tempResBuffer->getPosition() > 0) { + addToOutputQueue(m_tempResBuffer); + m_tempResBuffer.reset(); + m_itemsToSend_++; + } + m_itemsToSend = m_itemsToSend_; + m_itemsToSend_ = 0; + //auto &queues = (*m_nextOperatorShuffleQueues)[m_pid]; + // send out markers + /*for (auto par = 0; par < queues.size(); par++) { + auto barrierBuf = m_pool->newInstance(par); + barrierBuf->m_hasBarrier = true; + while (!queues[par]->try_push(barrierBuf)) + ; + }*/ + // take a snapshot + Op::checkpoint(m_pid, m_fd, m_tempBuffer); + // update checkpoint counter + checkpointCounter.fetch_add(1); + // clear the blocked queues + blockedQueues.clear(); + } + } + cnt++; + } + } + } else { + while (true) { + while (!m_inputQueue->try_pop(buffer) || !buffer) { + // std::cout << "warning: worker " + std::to_string(m_pid) << " is waiting for work" << std::endl; + std::this_thread::sleep_for(std::chrono::nanoseconds(1)); + } + if (buffer->m_hasBarrier) { + // block queue + blockedQueues.insert(m_inputQueue); + m_pool->free(buffer->m_id, buffer); + buffer = nullptr; + // if all queues blocked + if (blockedQueues.size() == 1) { + auto &queues = (*m_nextOperatorShuffleQueues)[m_pid]; + // send out markers + if (debug) { + std::cout << "Worker op0_" + std::to_string(m_pid) << " starts sending barriers" << std::endl; + } + if (batchResults) { + addToOutputQueue(m_tempResBuffer); + m_tempResBuffer.reset(); + Op::sendDownstream(m_pid, m_outputQueue, m_nextOperatorShuffleQueues, m_partitionBuffers, m_pool); + } + for (auto par = 0; par < queues.size(); par++) { + auto barrierBuf = m_pool->newInstance(par); + barrierBuf->m_hasBarrier = true; + int cnt = 0; + while (!queues[par]->try_push(barrierBuf)) { + cnt++; + if ((cnt % 1000000) == 0) { + //std::cout << "warning: waiting to send the barrier downstream" << std::endl; + } + } + if (debug) { + std::cout << "Worker op0_" + std::to_string(m_pid) << " sent a barrier to " + std::to_string(par) << std::endl; + } + } + // take a snapshot + Op::checkpoint(m_pid, m_fd, m_tempBuffer); + // update checkpoint counter + checkpointCounter.fetch_add(1); + // clear the blocked queues + blockedQueues.clear(); + //std::cout << "Worker " + std::to_string(m_pid) << " unblocked its queues" << std::endl; + } + } else { + break; + } + } + } + } + return buffer; + } + + inline void addToOutputQueue(std::shared_ptr &resultBuffer) { + /*if (resultBuffer->m_watermark > 0) { + std::cout << "sending watermark " + std::to_string(resultBuffer->m_watermark) << std::endl; + }*/ + bool flag = m_outputQueue->try_push(resultBuffer); + if (!flag && m_outputQueue->capacity() == m_outputQueue->size()) { + //if (m_pid == 0) { + std::cout << "warning: increasing the output queue size in pipeline " + std::to_string(m_hasShuffle) << std::endl; + //} + m_outputQueue->set_capacity(m_outputQueue->capacity() * 2); + m_outputQueue->push(resultBuffer); + } + m_itemsToSend_++; + } + + inline void measureLatency(long latencyMark) { + double dt = 0; + long t1 = latencyMark; + auto currentTime = std::chrono::high_resolution_clock::now(); + auto currentTimeNano = std::chrono::duration_cast( + currentTime.time_since_epoch()) + .count(); + long t2 = (currentTimeNano - m_timestampReference) / 1000L; + dt = ((double)(t2 - t1)) / 1000.; /* In milliseconds */ + + m_measurements.push_back(dt); + + m_latency += dt; + m_count += 1; + m_min = std::min(dt, m_min); + m_max = std::max(dt, m_max); + m_avg = m_latency / ((double)m_count); + + if ((t2 / 1000) >= (m_flinkDuration * 1000)) { + stop(false); + } + } + + void stop(bool print = true) { + int length = m_measurements.size(); + if (length < 1) + return; + + if (print) { + std::sort(m_measurements.begin(), m_measurements.end()); + std::ostringstream streamObj; + streamObj << std::fixed; + streamObj << std::setprecision(3); + streamObj << "[MON] [LatencyMonitor] " << std::to_string(length) << " measurements\n"; + streamObj << "[MON] [LatencyMonitor] " << m_pid << " 5th " << std::to_string(evaluateSorted(5)); + streamObj << " 25th " << std::to_string(evaluateSorted(25)); + streamObj << " 50th " << std::to_string(evaluateSorted(50)); + streamObj << " 75th " << std::to_string(evaluateSorted(75)); + streamObj << " 99th " << std::to_string(evaluateSorted(99)); + std::cout << streamObj.str() << std::endl; + } else { + std::lock_guard guard(m_flinkMeasurementsMutex); + //std::cout << m_pid << " adding its latency results " << std::endl; + //m_flinkTotalMeasurements.resize(m_flinkTotalMeasurements.size() + m_measurements.size()); + m_flinkTotalMeasurements.insert(m_flinkTotalMeasurements.end(), m_measurements.begin(), m_measurements.end()); + } + flinkBarrier.fetch_add(-1); + m_flinkMeasureLatency = false; + } + + double evaluateSorted(double p) { + double n = m_measurements.size(); + double pos = p * (n + 1) / 100; + double fpos = floor(pos); + int intPos = (int) fpos; + double dif = pos - fpos; + + if (pos < 1) { + return m_measurements[0]; + } + if (pos >= n) { + return m_measurements[m_measurements.size() - 1]; + } + + double lower = m_measurements[intPos - 1]; + double upper = m_measurements[intPos]; + return lower + dif * (upper - lower); + } + + inline void reportLatency() { + if (SystemConf::LATENCY_ON && m_count > 0) { + m_repeat++; + if (m_pid == 0 && m_repeat == 1024) { + std::cout << "Latency metrics [avg " + std::to_string(m_avg) + + "] " + "[min " + std::to_string(m_min) + "] " + + "[max " + std::to_string(m_max) + "] " + << std::endl; + m_repeat = 0; + } + } + } +}; + + +// todo: this template doesn't work... +template +struct Operator { + typedef boost::alignment::aligned_allocator aligned_allocator; + using IntBuffer = std::vector; + + const int m_queueSize = 32; + + int m_opId = 0; + int m_numberOfThreads = 0; + int m_partitions = 0; + IntBuffer m_partitionOffsets; + std::vector m_threads; + std::vector>> m_processors; + std::vector> m_readerOffsets; + std::vector> m_consumerQueues; + std::vector> m_producerQueues; + std::vector> m_latQueues; + std::vector> m_partitionBuffers; + std::shared_ptr m_pool, m_kafkaPool = nullptr; + + // for grouping operators + bool m_hasShuffle = false; + std::vector>> m_shuffleQueues; + int m_upstreamQueues = 0; + + long m_timestampReference = 0; + + /* Information used for pining worker threads to cores in-order based on the + * socket topology */ + std::vector m_orderedCores; + + // next operator -- assume only one downstream operator + std::shared_ptr> m_nextOperator; + std::vector> *m_nextOperatorQueues = nullptr; + std::vector>> *m_nextOperatorShuffleQueues = nullptr; + + explicit Operator(int opId, std::shared_ptr &pool, bool hasShuffle, int workers = SystemConf::getInstance().WORKER_THREADS, + long timestampReference = 0) + : m_opId(opId), + m_numberOfThreads(workers), + m_partitions(m_numberOfThreads), + m_partitionOffsets(m_numberOfThreads, 0), + m_processors(m_numberOfThreads), + m_readerOffsets(m_numberOfThreads, IntBuffer(1, 0)), + m_consumerQueues(m_numberOfThreads,std::make_shared()), + m_producerQueues(m_numberOfThreads,std::make_shared()), + m_latQueues(m_numberOfThreads, std::make_shared()), + m_partitionBuffers(m_numberOfThreads), + m_pool(pool), m_hasShuffle(hasShuffle), + m_timestampReference(timestampReference) { + + if (m_hasShuffle) { + m_shuffleQueues.resize(m_partitions); + for (auto &sq: m_shuffleQueues) { + sq.resize(m_partitions); + for (int i = 0; i < m_numberOfThreads; ++i) { + sq[i] = std::make_shared(); + sq[i]->set_capacity(m_queueSize); + } + } + } + } + + + void connectWith(std::shared_ptr> &next) { + m_nextOperatorQueues = new std::vector>(); + for (auto &q: next->m_consumerQueues) { + m_nextOperatorQueues->push_back(q); + } + if (next->m_hasShuffle) { + m_nextOperatorShuffleQueues = &next->m_shuffleQueues; + } + m_nextOperator = next; + + next->m_upstreamQueues = m_partitions; + } + + void setupOperator(bool autoConsume = true) { + Utils::getOrderedCores(m_orderedCores); + for (int i = 0; i < m_numberOfThreads; ++i) { + m_consumerQueues[i] = std::make_shared(); + m_consumerQueues[i]->set_capacity(m_queueSize); + m_producerQueues[i] = std::make_shared(); + m_producerQueues[i]->set_capacity(m_queueSize); + m_latQueues[i] = std::make_shared(); + m_processors[i] = std::make_unique>(m_opId, i, m_hasShuffle, m_consumerQueues[i], m_producerQueues[i], m_pool, m_kafkaPool, m_nextOperatorQueues, m_nextOperatorShuffleQueues); + m_processors[i]->m_timestampReference = m_timestampReference; + if (m_hasShuffle) { + m_processors[i]->m_inputShuffleQueues = &m_shuffleQueues[i]; + m_processors[i]->m_upstreamQueues = m_upstreamQueues; + } else { + m_processors[i]->m_latQueue = m_latQueues[i]; + } + } + + if (autoConsume) { + startThreads(); + } + } + + void startThreads() { + for (int i = 0; i < m_numberOfThreads; ++i) { + m_threads.emplace_back(std::thread(*m_processors[i])); + Utils::bindProcess(m_threads[i], m_orderedCores[i + 1]); + } + } +}; + +struct JobManager { + std::shared_ptr> m_op1; + std::shared_ptr m_pool; + long m_checkpointId = 0; + long m_checkpoinDur = 0; + long m_checkpoinCnt = 0; + + [[maybe_unused]] JobManager(std::shared_ptr>&op1, + std::shared_ptr &pool) + : m_op1(op1), m_pool(pool) {} + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wmissing-noreturn" + [[noreturn]] void operator()() { + while (!startFlink) + ; + + std::cout << "[CP] starting the checkpoint coordinator" << std::endl; + auto t1 = std::chrono::high_resolution_clock::now(); + auto t2 = t1; + auto time_span = + std::chrono::duration_cast>(t2 - t1); + + while(true) { + auto duration = + std::max((int)(SystemConf::getInstance().CHECKPOINT_INTERVAL - + (size_t) (time_span.count() * 1000)), 0); + if (duration) { + //std::cout << "[CP] sleeping for " + std::to_string(duration) << std::endl; + std::this_thread::sleep_for(std::chrono::milliseconds(duration)); + } + + t1 = std::chrono::high_resolution_clock::now(); + std::cout << "[CP] starting checkpoint " + std::to_string(m_checkpointId) << std::endl; + // reset counter and barrier + checkpointCounter.store(0); + releaseBarrier.store(SystemConf::getInstance().WORKER_THREADS); + + // insert marker to the first operator + pushBarriers.store(true); + /*auto &queues = m_op1->m_consumerQueues; + for (auto &q: queues) { + auto barrier = m_pool->newInstance(0); + barrier->m_hasBarrier = true; + while (!q->try_push(barrier)) + ; + }*/ + + while (checkpointCounter.load() != SystemConf::getInstance().WORKER_THREADS * 2) { + //std::cout << "[CP] waiting for the checkpointCounter: " + std::to_string(checkpointCounter) << std::endl; + //std::this_thread::sleep_for(std::chrono::nanoseconds (1)); + _mm_pause(); + } + + if (debug) { + std::cout << "[CP] waiting for the releaseBarrier: " + std::to_string(releaseBarrier) << std::endl; + } + + while (releaseBarrier.load() != 0){ + // std::cout << "[CP] waiting for the releaseBarrier: " + std::to_string(releaseBarrier) << std::endl; + // //std::this_thread::sleep_for(std::chrono::nanoseconds (1)); + _mm_pause(); + } + + m_checkpointId++; + typedef std::chrono::milliseconds ms; + t2 = std::chrono::high_resolution_clock::now(); + time_span = std::chrono::duration_cast>(t2 - t1); + auto ms_time = std::chrono::duration_cast(time_span); + m_checkpoinDur += ms_time.count(); + m_checkpoinCnt++; + std::cout << "[CP] checkpoint duration " + std::to_string(ms_time.count()) + " ms " + "[AVG:" + std::to_string(m_checkpoinDur/m_checkpoinCnt) + "]" << std::endl; + } + } +}; + +struct FlinkYSB { + int m_numberOfThreads = 0; + int m_partitions = 0; + std::vector> m_partitionBuffers; + std::shared_ptr m_pool; + + long m_timestampReference = 0; + + bool m_first = true; + + std::shared_ptr> m_op1; + std::shared_ptr> m_op2; + + std::vector m_threads; + std::unique_ptr m_jobManager; + + // kafka variables + Kafka *m_kakfa; + std::shared_ptr m_kafkaPool; + + explicit FlinkYSB(int workers = SystemConf::getInstance().WORKER_THREADS, + long timestampReference = 0, bool autoConsume = true) + : m_numberOfThreads(workers), + m_partitions(m_numberOfThreads), + m_partitionBuffers(m_numberOfThreads), + m_pool(std::make_shared(m_numberOfThreads * 2)), + m_timestampReference(timestampReference) { + + m_op1 = std::make_shared>(0, m_pool, false, m_partitions, timestampReference); + m_op2 = std::make_shared>(1, m_pool, true, m_partitions, timestampReference); + m_op1->connectWith(m_op2); + m_op1->setupOperator(autoConsume); + m_op2->setupOperator(autoConsume); + + if (useCheckpoints) { + m_jobManager = std::make_unique(m_op1, m_pool); + m_threads.emplace_back(std::thread(*m_jobManager)); + Utils::bindProcess(m_threads[0], 0); + } + } + + static void startWorkers() { + std::cout << "Starting flink workers" << std::endl; + startFlink.store(true); + } + + void connect(Kafka *kafka) { + if (!kafka) { + throw std::runtime_error("error: kafka is not set"); + } + m_kakfa = kafka; + m_kafkaPool = kafka->m_pool; + int idx = 0; + for (auto &q: m_op1->m_consumerQueues) { + m_op1->m_processors[idx]->m_kafkaPool = m_kafkaPool; + kafka->m_producerQueues[idx] = q; + kafka->m_processors[idx]->m_outputQueue = q; + if (SystemConf::LATENCY_ON) { + kafka->m_latQueues[idx] = m_op1->m_latQueues[idx]; + m_op1->m_processors[idx]->m_latQueue = m_op1->m_latQueues[idx]; + } + idx++; + } + + // start now the threads + kafka->startThreads(); + m_op1->startThreads(); + m_op2->startThreads(); + } + + void processPartitionedData(std::vector &values, long latencyMark) { + // pay the partitioning tax once and keep sending over the same data + if (m_first) { + auto data = (YSBQuery::InputSchema *)values.data(); + size_t length = values.size() / sizeof(YSBQuery::InputSchema); + + // partition by key + for (size_t idx = 0; idx < length; idx++) { + auto partition = YSBQuery::hash::partition( data[idx], m_partitions); // m_hash(data[idx].ad_id) % m_partitions; + auto &buffer = m_partitionBuffers[partition]; + if (!buffer) { + buffer = m_pool->newInstance(partition); + } + buffer->putBytes((char *)&data[idx], sizeof(YSBQuery::InputSchema)); + } + m_first = false; + } + + bool hasSentBarrier = false; + bool beforeLoopBarrier = pushBarriers.load(); + for (auto par = 0; par < m_partitions; par++) { + if (useCheckpoints && beforeLoopBarrier) { + auto barrier = m_pool->newInstance(0); + barrier->m_hasBarrier = true; + if (debug) { + std::cout << "Start pushing checkpoint barriers downstream to " + std::to_string(par) << std::endl; + } + while (!m_op1->m_consumerQueues[par]->try_push(barrier)) { + // std::cout << "warning: partition " + std::to_string(par) << " is full" << std::endl; + _mm_pause(); + } + //std::cout << "Finished pushing checkpoint barriers downstream" << std::endl; + hasSentBarrier = true; + } else { + auto &buffer = m_partitionBuffers[par]; + if (buffer) { + auto tempBuffer = buffer; + if (!tempBuffer) { + throw std::runtime_error("error: adding invalid buffer to the queue"); + } + while (!m_op1->m_consumerQueues[par]->try_push(tempBuffer)) { + // std::cout << "warning: partition " + std::to_string(par) << " is full" << std::endl; + _mm_pause(); + } + if (SystemConf::LATENCY_ON) { + m_op1->m_latQueues[par]->push(latencyMark); + } + } + } + } + + if (useCheckpoints && beforeLoopBarrier && hasSentBarrier) { + //std::cout << "Unsetting pushBarriers flag" << std::endl; + pushBarriers.store(false); + } + } + + void measureLatency() { + //while (kafkaBarrier.load() != 0) { + // ; + //} + if (flinkBarrier.load() != 0) { + std::this_thread::sleep_for(std::chrono::seconds (2)); + } + std::sort(m_flinkTotalMeasurements.begin(), m_flinkTotalMeasurements.end()); + std::ostringstream streamObj; + streamObj << std::fixed; + streamObj << std::setprecision(3); + streamObj << "[MON] [LatencyMonitor] 5th " << std::to_string(evaluateSorted(5, m_flinkTotalMeasurements)); + streamObj << " 25th " << std::to_string(evaluateSorted(25, m_flinkTotalMeasurements)); + streamObj << " 50th " << std::to_string(evaluateSorted(50, m_flinkTotalMeasurements)); + streamObj << " 75th " << std::to_string(evaluateSorted(75, m_flinkTotalMeasurements)); + streamObj << " 99th " << std::to_string(evaluateSorted(99, m_flinkTotalMeasurements)); + std::cout << streamObj.str() << std::endl; + } + + double evaluateSorted(double p, std::vector &totalMeasurements) { + if (totalMeasurements.empty()) { + return -1; + } + double n = totalMeasurements.size(); + double pos = p * (n + 1) / 100; + double fpos = floor(pos); + int intPos = (int) fpos; + double dif = pos - fpos; + + if (pos < 1) { + return totalMeasurements[0]; + } + if (pos >= n) { + return totalMeasurements[totalMeasurements.size() - 1]; + } + + double lower = totalMeasurements[intPos - 1]; + double upper = totalMeasurements[intPos]; + return lower + dif * (upper - lower); + } +}; \ No newline at end of file diff --git a/test/benchmarks/kafka-flink/Kafka.h b/test/benchmarks/kafka-flink/Kafka.h new file mode 100644 index 0000000..d1d1467 --- /dev/null +++ b/test/benchmarks/kafka-flink/Kafka.h @@ -0,0 +1,551 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "BenchmarkUtils.h" +#include "benchmarks/kafka-flink/queues/readerwritercircularbuffer.h" +#include "benchmarks/kafka-flink/queues/readerwriterqueue.h" +#include "utils/Utils.h" + +/* + * \brief A prototype implementation of Kafka in C++ using fsync after + * storing a number of data batches on disk. + * + * */ + +std::atomic startKafka = false; +std::atomic kafkaBarrier; +size_t m_kafkaDuration = 60; +thread_local bool m_measureLatency = true; +std::mutex m_measurementsMutex; +std::vector m_totalMeasurements; +int m_batchSize = 8; + +//using BoundedQueue = moodycamel::BlockingReaderWriterCircularBuffer>; +//using BoundedQueuePtr = std::shared_ptr; +// using Queue = moodycamel::ReaderWriterQueue>; +//using QueuePtr = std::shared_ptr; + +struct KafkaProcessor { + std::shared_ptr m_pool; + BoundedQueuePtr m_inputQueue; + BoundedQueuePtr m_outputQueue; + LatQueuePtr m_latQueue; + int m_pid; + std::shared_ptr m_tempBuffer; + + // file properties + std::vector> m_diskBuffer; + size_t m_offset = 0; + size_t m_fileSize = 64 * 1024 * 1024; + const std::string m_fileName; + int m_fd; + + // used for latency measurements + int m_repeat = 0; + std::vector m_measurements; + long m_count = 0; + double m_min = DBL_MAX, m_max = DBL_MIN, m_avg = 0; + long m_timestampReference = 0; + double m_latency = 0; + + KafkaProcessor(int pid, BoundedQueuePtr &inputQueue, BoundedQueuePtr &outputQueue, + std::shared_ptr &pool, + LatQueuePtr &latQueue, long timestampReference) + : m_pool(pool), + m_inputQueue(inputQueue), + m_outputQueue(outputQueue), + m_latQueue(latQueue), + m_pid(pid), + m_diskBuffer(m_batchSize), + m_fileName(SystemConf::FILE_ROOT_PATH + "/kafka/file_" + std::to_string(m_pid)), + m_timestampReference(timestampReference) { + m_tempBuffer = m_pool->newInstance(m_pid); + + // create file + std::remove(m_fileName.c_str()); + m_fd = ::open(m_fileName.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + } + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wmissing-noreturn" + void operator()() { + while (!startKafka) + ; + + std::shared_ptr buffer; + long latencyMark; + int writes = 0; + while (true) { + try { + while (!m_inputQueue->try_pop(buffer)) { + //while (!m_inputQueue->try_dequeue(buffer) || !buffer) { + // std::cout << "warning: worker " + std::to_string(m_pid) << " is waiting for work" << std::endl; + std::this_thread::sleep_for(std::chrono::nanoseconds(1)); + } + if (SystemConf::LATENCY_ON && autoConsume) { + while (!m_latQueue->try_pop(latencyMark)) + ; + } + + if (buffer->m_hasBarrier) { + m_outputQueue->push(buffer); + } else { + // todo: build indexes and persist the metadata + // todo: assign an offset to each record + + auto tempBuffer = m_pool->newInstance(m_pid); + tempBuffer->m_originalPosition = buffer->getPosition(); + tempBuffer->m_latencyMark = latencyMark; + // compress data + if (compress) { + size_t output_length; + snappy::RawCompress(buffer->getBuffer().data(), + buffer->getPosition(), + tempBuffer->getBuffer().data(), &output_length); + tempBuffer->setPosition(output_length); + tempBuffer->m_compressed = true; + buffer = tempBuffer; + } else { + std::memcpy(tempBuffer->getBuffer().data(), + buffer->getBuffer().data(), buffer->getPosition()); + tempBuffer->setPosition(buffer->getPosition()); + buffer = tempBuffer; + } + + /*if (!m_diskBuffer[0]) { + m_diskBuffer[0] = buffer; + } else { + if (!m_diskBuffer[writes]->tryToMerge(buffer)) { + ::pwrite(m_fd, m_diskBuffer[writes]->getBuffer().data(), + m_diskBuffer[writes]->getPosition(), m_offset); m_offset += + m_diskBuffer[writes]->getPosition(); if (m_offset >= m_fileSize) { + m_offset = 0; + } + writes++; + if (writes == m_batchSize) { + fsync(m_fd); + // fdatasync(m_fd); + writes = 0; + for (auto &b : m_diskBuffer) { + // append data to the output queue + if (!b) { + throw std::runtime_error("error: invalid buffer ptr"); + } + if (SystemConf::LATENCY_ON && autoConsume && m_measureLatency) + { measureLatency(b->m_latencyMark); + } + m_outputQueue->push(b); + // while (m_outputQueue->try_enqueue(b)) + // ; + b = nullptr; + } + + if (SystemConf::LATENCY_ON && autoConsume) { + m_repeat++; + if (m_pid == 0 && m_repeat == 128) { + std::cout << "Latency metrics [avg " + std::to_string(m_avg) + + + "] " + "[min " + std::to_string(m_min) + + "] " + "[max " + std::to_string(m_max) + "] + " + << std::endl; + m_repeat = 0; + } + } + } + m_diskBuffer[writes] = buffer; + } else { + m_pool->free(buffer->getBufferId(), buffer); + } + }*/ + + // write data to disk and fsync + ::pwrite(m_fd, buffer->getBuffer().data(), buffer->getPosition(), + m_offset); + if (writes == m_batchSize) { + fsync(m_fd); + // fdatasync(m_fd); + writes = 0; + for (auto &b : m_diskBuffer) { + // append data to the output queue + if (!b) { + throw std::runtime_error("error: invalid buffer ptr"); + } + if (SystemConf::LATENCY_ON && autoConsume && m_measureLatency) { + measureLatency(b->m_latencyMark); + } + m_outputQueue->push(b); + // while (m_outputQueue->try_enqueue(b)) + // ; + b = nullptr; + } + + if (SystemConf::LATENCY_ON && autoConsume) { + m_repeat++; + if (m_pid == 0 && m_repeat == 128) { + std::cout << "Latency metrics [avg " + std::to_string(m_avg) + + "] " + "[min " + std::to_string(m_min) + "] " + + "[max " + std::to_string(m_max) + "] " + << std::endl; + m_repeat = 0; + } + } + } + if (!buffer) { + throw std::runtime_error("error: invalid buffer ptr"); + } + m_diskBuffer[writes] = buffer; + writes++; + + m_offset += buffer->getPosition(); + if (m_offset >= m_fileSize) { + m_offset = 0; + } + + if (autoConsume) { + // return buffer + while (m_outputQueue->try_pop(buffer)) { + // while (m_outputQueue->try_dequeue(buffer)) { + m_pool->free(buffer->getBufferId(), buffer); + } + } + } + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + exit(1); + } + } + } + + void measureLatency(long latencyMark) { + double dt = 0; + long t1 = latencyMark; + auto currentTime = std::chrono::high_resolution_clock::now(); + auto currentTimeNano = std::chrono::duration_cast( + currentTime.time_since_epoch()) + .count(); + long t2 = (currentTimeNano - m_timestampReference) / 1000L; + dt = ((double)(t2 - t1)) / 1000.; /* In milliseconds */ + + m_measurements.push_back(dt); + + m_latency += dt; + m_count += 1; + m_min = std::min(dt, m_min); + m_max = std::max(dt, m_max); + m_avg = m_latency / ((double)m_count); + + if ((t2 / 1000) >= (m_kafkaDuration * 1000)) { + stop(false); + } + } + + void stop(bool print = true) { + int length = m_measurements.size(); + if (length < 1) + return; + + if (print) { + std::sort(m_measurements.begin(), m_measurements.end()); + std::ostringstream streamObj; + streamObj << std::fixed; + streamObj << std::setprecision(3); + streamObj << "[MON] [LatencyMonitor] " << std::to_string(length) << " measurements\n"; + streamObj << "[MON] [LatencyMonitor] " << m_pid << " 5th " << std::to_string(evaluateSorted(5)); + streamObj << " 25th " << std::to_string(evaluateSorted(25)); + streamObj << " 50th " << std::to_string(evaluateSorted(50)); + streamObj << " 75th " << std::to_string(evaluateSorted(75)); + streamObj << " 99th " << std::to_string(evaluateSorted(99)); + std::cout << streamObj.str() << std::endl; + } else { + std::lock_guard guard(m_measurementsMutex); + //std::cout << m_pid << " adding its latency results " << std::endl; + //m_totalMeasurements.resize(m_totalMeasurements.size() + m_measurements.size()); + m_totalMeasurements.insert(m_totalMeasurements.end(), m_measurements.begin(), m_measurements.end()); + } + kafkaBarrier.fetch_add(-1); + m_measureLatency = false; + } + + double evaluateSorted(double p) { + double n = m_measurements.size(); + double pos = p * (n + 1) / 100; + double fpos = floor(pos); + int intPos = (int) fpos; + double dif = pos - fpos; + + if (pos < 1) { + return m_measurements[0]; + } + if (pos >= n) { + return m_measurements[m_measurements.size() - 1]; + } + + double lower = m_measurements[intPos - 1]; + double upper = m_measurements[intPos]; + return lower + dif * (upper - lower); + } +}; + +template +struct Kafka { + typedef boost::alignment::aligned_allocator aligned_allocator; + using IntBuffer = std::vector; + + const int m_queueSize = 64; + + int m_numberOfThreads = 0; + int m_partitions = 0; + IntBuffer m_partitionOffsets; + std::vector m_threads; + std::vector> m_processors; + std::vector> m_writerOffsets; + std::vector> m_readerOffsets; + std::vector> m_consumerQueues; + std::vector> m_producerQueues; + std::vector> m_latQueues; + std::vector> m_partitionBuffers; + std::shared_ptr m_pool; + + long m_timestampReference = 0; + + bool m_first = true; + + /* Information used for pining worker threads to cores in-order based on the + * socket topology */ + std::vector m_orderedCores; + + explicit Kafka(int workers = SystemConf::getInstance().WORKER_THREADS, + long timestampReference = 0, bool autoConsume = true) + : m_numberOfThreads(workers), + m_partitions(m_numberOfThreads), + m_partitionOffsets(m_numberOfThreads, 0), + m_processors(m_numberOfThreads), + m_writerOffsets(m_numberOfThreads, IntBuffer(1, 0)), + m_readerOffsets(m_numberOfThreads, IntBuffer(1, 0)), + m_consumerQueues(m_numberOfThreads,nullptr), + m_producerQueues(m_numberOfThreads, nullptr), + m_latQueues(m_numberOfThreads, std::make_shared()), + m_partitionBuffers(m_numberOfThreads), + m_pool(std::make_shared(m_numberOfThreads)), + m_timestampReference(timestampReference) { + Utils::getOrderedCores(m_orderedCores); + for (int i = 0; i < m_numberOfThreads; ++i) { + // m_consumerQueues[i]->set_capacity(m_queueSize); + m_consumerQueues[i] = std::make_shared();//m_queueSize); + m_producerQueues[i] = std::make_shared();//m_queueSize); + m_latQueues[i] = std::make_shared(); + m_consumerQueues[i]->set_capacity(m_queueSize); + m_producerQueues[i]->set_capacity(m_queueSize); + m_processors[i] = std::make_unique(i, m_consumerQueues[i], m_producerQueues[i], m_pool, m_latQueues[i],timestampReference); + } + + if (autoConsume) { + startThreads(); + } + } + + void startThreads() { + for (int i = 0; i < m_numberOfThreads; ++i) { + m_threads.emplace_back(std::thread(*m_processors[i])); + Utils::bindProcess(m_threads[i], m_orderedCores[i + 1]); + } + } + + static void startWorkers() { + std::cout << "Starting kafka workers" << std::endl; + kafkaBarrier.store(SystemConf::getInstance().WORKER_THREADS); + startKafka.store(true); + } + + void processData(std::vector &values) { + auto data = (Input *)values.data(); + size_t length = values.size() / sizeof(Input); + + // partition by key + for (size_t idx = 0; idx < length; idx++) { + auto partition = hash::partition(data[idx], m_partitions); // m_hash(data[idx].ad_id) % m_partitions; + auto &buffer = m_partitionBuffers[partition]; + if (!buffer) { + buffer = m_pool->newInstance(0); + } + buffer->putBytes((char *)&data[idx], sizeof(Input)); + } + + for (auto par = 0; par < m_partitions; par++) { + auto &buffer = m_partitionBuffers[par]; + auto bytes = buffer->getPosition(); + if (buffer) { + // while (!m_consumerQueues[par].try_push(buffer)) { + // std::cout << "warning: partition " + std::to_string(par) << " is full" << std::endl; + //} + m_writerOffsets[par][0] += bytes; + buffer.reset(); + } + } + } + + void processPartitionedData(std::vector &values, long latencyMark) { + // pay the partitioning tax once and keep sending over the same data + if (m_first) { + auto data = (Input *)values.data(); + size_t length = values.size() / sizeof(Input); + + std::unordered_set set; + // partition by key + for (size_t idx = 0; idx < length; idx++) { + auto partition = hash::partition( data[idx], m_partitions); // m_hash(data[idx].ad_id) % m_partitions; + if (set.find(partition) == set.end()) { + set.insert(partition); + //std::cout << "Found " << set.size() << " partitions: " << partition << std::endl; + } else { + //std::cout << "Found " << set.size() << " partitions until " << idx << std::endl; + } + auto &buffer = m_partitionBuffers[partition]; + if (!buffer) { + buffer = m_pool->newInstance(partition); + } + buffer->putBytes((char *)&data[idx], sizeof(Input)); + } + m_first = false; + } + + for (auto par = 0; par < m_partitions; par++) { + auto &buffer = m_partitionBuffers[par]; + if (buffer) { + auto bytes = buffer->getPosition(); + auto tempBuffer = buffer; + // auto tempBuffer = m_pool->newInstance(par); + // std::memcpy(tempBuffer->getBuffer().data(), + // buffer->getBuffer().data(), bytes); tempBuffer->setPosition(bytes); + if (!tempBuffer) { + throw std::runtime_error("error: adding invalid buffer to the queue"); + } + while (!m_consumerQueues[par]->try_push(tempBuffer)) { + //while (!m_consumerQueues[par]->try_enqueue(tempBuffer)) { + // std::cout << "warning: partition " + std::to_string(par) << " is full" << std::endl; + //if(!startKafka) { + // return; + //} + } + if (SystemConf::LATENCY_ON) { + m_latQueues[par]->push(latencyMark); + } + m_writerOffsets[par][0] += bytes; + } + } + } + + void processPartitionedDataWithCheckpoints(std::vector &values, long latencyMark) { + // pay the partitioning tax once and keep sending over the same data + if (m_first) { + auto data = (Input *)values.data(); + size_t length = values.size() / sizeof(Input); + + std::unordered_set set; + // partition by key + for (size_t idx = 0; idx < length; idx++) { + auto partition = hash::partition( data[idx], m_partitions); // m_hash(data[idx].ad_id) % m_partitions; + auto &buffer = m_partitionBuffers[partition]; + if (!buffer) { + buffer = m_pool->newInstance(partition); + } + buffer->putBytes((char *)&data[idx], sizeof(Input)); + } + m_first = false; + } + + bool hasSentBarrier = false; + bool beforeLoopBarrier = pushBarriers.load(); + for (auto par = 0; par < m_partitions; par++) { + if (useCheckpoints && beforeLoopBarrier) { + auto barrier = m_pool->newInstance(0); + barrier->m_hasBarrier = true; + if (debug) { + std::cout << "Start pushing checkpoint barriers downstream to " + std::to_string(par) << std::endl; + } + while (!m_consumerQueues[par]->try_push(barrier)) { + // std::cout << "warning: partition " + std::to_string(par) << " is full" << std::endl; + _mm_pause(); + } + //std::cout << "Finished pushing checkpoint barriers downstream" << std::endl; + hasSentBarrier = true; + } else { + auto &buffer = m_partitionBuffers[par]; + if (buffer) { + auto tempBuffer = buffer; + if (!tempBuffer) { + throw std::runtime_error("error: adding invalid buffer to the queue"); + } + while (!m_consumerQueues[par]->try_push(tempBuffer)) { + // std::cout << "warning: partition " + std::to_string(par) << " is full" << std::endl; + _mm_pause(); + } + if (SystemConf::LATENCY_ON) { + m_latQueues[par]->push(latencyMark); + } + } + } + } + + if (useCheckpoints && beforeLoopBarrier && hasSentBarrier) { + //std::cout << "Unsetting pushBarriers flag" << std::endl; + pushBarriers.store(false); + } + } + + void measureLatency() { + //while (kafkaBarrier.load() != 0) { + // ; + //} + if (kafkaBarrier.load() != 0) { + std::this_thread::sleep_for(std::chrono::seconds (2)); + } + std::sort(m_totalMeasurements.begin(), m_totalMeasurements.end()); + std::ostringstream streamObj; + streamObj << std::fixed; + streamObj << std::setprecision(3); + streamObj << "[MON] [LatencyMonitor] 5th " << std::to_string(evaluateSorted(5, m_totalMeasurements)); + streamObj << " 25th " << std::to_string(evaluateSorted(25, m_totalMeasurements)); + streamObj << " 50th " << std::to_string(evaluateSorted(50, m_totalMeasurements)); + streamObj << " 75th " << std::to_string(evaluateSorted(75, m_totalMeasurements)); + streamObj << " 99th " << std::to_string(evaluateSorted(99, m_totalMeasurements)); + std::cout << streamObj.str() << std::endl; + } + + double evaluateSorted(double p, std::vector &totalMeasurements) { + if (totalMeasurements.empty()) { + return -1; + } + double n = totalMeasurements.size(); + double pos = p * (n + 1) / 100; + double fpos = floor(pos); + int intPos = (int) fpos; + double dif = pos - fpos; + + if (pos < 1) { + return totalMeasurements[0]; + } + if (pos >= n) { + return totalMeasurements[totalMeasurements.size() - 1]; + } + + double lower = totalMeasurements[intPos - 1]; + double upper = totalMeasurements[intPos]; + return lower + dif * (upper - lower); + } +}; \ No newline at end of file diff --git a/test/benchmarks/kafka-flink/main.cpp b/test/benchmarks/kafka-flink/main.cpp new file mode 100644 index 0000000..e360ef7 --- /dev/null +++ b/test/benchmarks/kafka-flink/main.cpp @@ -0,0 +1,151 @@ +#include +#include + +#include "Kafka.h" +#include "Flink.h" +#include "benchmarks/applications/BenchmarkQuery.h" +#include "benchmarks/applications/YahooBenchmark/YSB.cpp" +#include "utils/SystemConf.h" +#include "utils/TupleSchema.h" + +const bool terminate = true; +size_t m_bytes = 0; +size_t m_totalCnt = 0; +size_t m_totalBytes = 0; +size_t m_duration = 1 * 60; +long m_timestampReference = 0; + +// kafka +// --bundle-size 62914560 --batch-size 62914560 --disk-block-size 4194304 --threads 16 + +static void parseCommandLineArguments(int argc, const char **argv) { + int i, j; + for (i = 1; i < argc;) { + if ((j = i + 1) == argc) { + throw std::runtime_error("error: wrong number of arguments"); + } + if (strcmp(argv[i], "--threads") == 0) { + SystemConf::getInstance().WORKER_THREADS = std::stoi(argv[j]); + if (SystemConf::getInstance().WORKER_THREADS > 1) { + SystemConf::getInstance().BATCH_SIZE = (SystemConf::getInstance().WORKER_THREADS - 1) * SystemConf::getInstance().BLOCK_SIZE; + SystemConf::getInstance().BUNDLE_SIZE = SystemConf::getInstance().BATCH_SIZE; + } + std::cout << "BATCH_SIZE: " << SystemConf::getInstance().BATCH_SIZE << " / BUNDLE_SIZE: " << SystemConf::getInstance().BUNDLE_SIZE << std::endl; + } else if (strcmp(argv[i], "--batch-size") == 0) { + SystemConf::getInstance().BATCH_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--bundle-size") == 0) { + SystemConf::getInstance().BUNDLE_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--disk-block-size") == 0) { + SystemConf::getInstance().BLOCK_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--input-size") == 0) { + SystemConf::getInstance().INPUT_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--query") == 0) { + SystemConf::getInstance().QUERY_NUM = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--latency") == 0) { + SystemConf::getInstance().LATENCY_ON = + (strcasecmp(argv[j], "true") == 0 || std::atoi(argv[j]) != 0); + } else if (strcmp(argv[i], "--use-flink") == 0) { + SystemConf::getInstance().USE_FLINK = + (strcasecmp(argv[j], "false") == 0 || std::atoi(argv[j]) == 0); + } else if (strcmp(argv[i], "--use-kafka") == 0) { + SystemConf::getInstance().USE_KAFKA = + (strcasecmp(argv[j], "false") == 0 || std::atoi(argv[j]) == 0); + } else if (strcmp(argv[i], "--disk-buffer") == 0) { + SystemConf::getInstance().DISK_BUFFER = std::stoi(argv[j]); + m_flinkBatchSize = SystemConf::getInstance().DISK_BUFFER; + } else if (strcmp(argv[i], "--use-checkpoints") == 0) { + useCheckpoints = (strcasecmp(argv[j], "true") == 0 || std::atoi(argv[j]) == 1); + } else { + std::string argument(argv[i]); + throw std::runtime_error("error: unknown argument " + argument); + } + i = j + 1; + } +} + +int main(int argc, const char **argv) { + parseCommandLineArguments(argc, argv); + + auto benchmark = std::make_unique(true, false); + auto buffer = benchmark->getInMemoryData(); + YSBQuery::m_staticData[0] = benchmark->getStaticData(); + YSBQuery::m_timestampOffset = benchmark->getEndTimestamp(); + for (int w = 1; w < SystemConf::WORKER_THREADS; w++) { + YSBQuery::m_staticData[w] = new std::vector(YSBQuery::m_staticData[0]->size()); + std::memcpy(YSBQuery::m_staticData[w]->data(), YSBQuery::m_staticData[0]->data(), YSBQuery::m_staticData[0]->size()); + } + std::vector input (buffer->size()); + std::memcpy(input.data(), buffer->data(), buffer->size()); + + // this is used for latency measurements + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + long latencyMark = -1; + + autoConsume = !SystemConf::getInstance().USE_KAFKA; + // initialize Kafka + auto kafka = std::make_unique>(SystemConf::getInstance().WORKER_THREADS, m_timestampReference, false); + m_kafkaDuration = m_duration - 1; + + // initialize Flink + std::unique_ptr flink = std::make_unique(SystemConf::getInstance().WORKER_THREADS, m_timestampReference, autoConsume); + m_flinkDuration = m_duration - 1; + + if (SystemConf::getInstance().USE_KAFKA) { + flink->connect(kafka.get()); + kafka->startWorkers(); + } + flink->startWorkers(); + + // run benchmark + auto t1 = std::chrono::high_resolution_clock::now(); + auto _t1 = std::chrono::high_resolution_clock::now(); + + std::cout << "Start running Yahoo Benchmark..." << std::endl; + try { + while (true) { + if (terminate) { + auto t2 = std::chrono::high_resolution_clock::now(); + auto time_span = std::chrono::duration_cast>(t2 - _t1); + if (time_span.count() >= 1) { + auto thr = (m_bytes / (1024 * 1024)) / time_span.count(); + m_totalBytes += thr; + m_totalCnt++; + m_bytes = 0; + _t1 = t2; + std::cout << "[DBG] " + std::to_string(thr) + " MB/s" + + " [AVG: " + std::to_string(m_totalBytes/m_totalCnt) + " MB/s " + + std::to_string((m_totalBytes/sizeof(YSBQuery::InputSchema) * 1024 * 1024)/m_totalCnt) + " tuples/s]" << std::endl; + } + time_span = std::chrono::duration_cast>( + t2 - t1); + if (time_span.count() >= (double)m_duration + 2) { + flink->measureLatency(); + std::cout << "Stop running the kafka-flink benchmark..." << std::endl; + return 0; + } + } + + if (SystemConf::getInstance().LATENCY_ON) { + auto currentTime = std::chrono::high_resolution_clock::now(); + auto currentTimeNano = + std::chrono::duration_cast(currentTime.time_since_epoch()).count(); + latencyMark = (long)((currentTimeNano - m_timestampReference) / 1000L); + } + + // send data + if (SystemConf::getInstance().USE_KAFKA) { + if (useCheckpoints) { + kafka->processPartitionedDataWithCheckpoints(input, latencyMark); + } else { + kafka->processPartitionedData(input, latencyMark); + } + } else { + flink->processPartitionedData(input, latencyMark); + } + m_bytes += input.size(); + } + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + exit(1); + } +} \ No newline at end of file diff --git a/test/benchmarks/kafka-flink/mainKafka.cpp b/test/benchmarks/kafka-flink/mainKafka.cpp new file mode 100644 index 0000000..b253892 --- /dev/null +++ b/test/benchmarks/kafka-flink/mainKafka.cpp @@ -0,0 +1,177 @@ +#include +#include + +#include "Kafka.h" +#include "benchmarks/applications/ClusterMonitoring/CM1.cpp" +#include "benchmarks/applications/ClusterMonitoring/CM2.cpp" +#include "benchmarks/applications/LinearRoadBenchmark/LRB1.cpp" +#include "benchmarks/applications/LinearRoadBenchmark/LRB2.cpp" +#include "benchmarks/applications/ManufacturingEquipment/ME1.cpp" +#include "benchmarks/applications/Nexmark/NBQ5.cpp" +#include "benchmarks/applications/SmartGrid/SG1.cpp" +#include "benchmarks/applications/SmartGrid/SG2.cpp" +#include "benchmarks/applications/YahooBenchmark/YSB.cpp" +#include "utils/SystemConf.h" +#include "utils/TupleSchema.h" + +const bool terminate = true; +size_t m_bytes = 0; +size_t m_totalCnt = 0; +size_t m_totalBytes = 0; +size_t m_duration = 1 * 60; +long m_timestampReference = 0; + +// kafka +// --bundle-size 62914560 --batch-size 62914560 --disk-block-size 4194304 --threads 16 + +static void parseCommandLineArguments(int argc, const char **argv) { + int i, j; + for (i = 1; i < argc;) { + if ((j = i + 1) == argc) { + throw std::runtime_error("error: wrong number of arguments"); + } + if (strcmp(argv[i], "--threads") == 0) { + SystemConf::getInstance().WORKER_THREADS = std::stoi(argv[j]); + if (SystemConf::getInstance().WORKER_THREADS > 1) { + SystemConf::getInstance().BATCH_SIZE = (SystemConf::getInstance().WORKER_THREADS - 1) * SystemConf::getInstance().BLOCK_SIZE; + SystemConf::getInstance().BUNDLE_SIZE = SystemConf::getInstance().BATCH_SIZE; + } + std::cout << "BATCH_SIZE: " << SystemConf::getInstance().BATCH_SIZE << " / BUNDLE_SIZE: " << SystemConf::getInstance().BUNDLE_SIZE << std::endl; + } else if (strcmp(argv[i], "--batch-size") == 0) { + SystemConf::getInstance().BATCH_SIZE = std::stoi(argv[j]); + SystemConf::getInstance().BUNDLE_SIZE = SystemConf::getInstance().BATCH_SIZE; + } else if (strcmp(argv[i], "--bundle-size") == 0) { + SystemConf::getInstance().BUNDLE_SIZE = std::stoi(argv[j]); + SystemConf::getInstance().BATCH_SIZE = SystemConf::getInstance().BUNDLE_SIZE; + } else if (strcmp(argv[i], "--disk-block-size") == 0) { + SystemConf::getInstance().BLOCK_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--input-size") == 0) { + SystemConf::getInstance().INPUT_SIZE = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--query") == 0) { + SystemConf::getInstance().QUERY_NUM = std::stoi(argv[j]); + } else if (strcmp(argv[i], "--latency") == 0) { + SystemConf::getInstance().LATENCY_ON = + (strcasecmp(argv[j], "true") == 0 || std::atoi(argv[j]) != 0); + } else if (strcmp(argv[i], "--use-flink") == 0) { + SystemConf::getInstance().USE_FLINK = + (strcasecmp(argv[j], "false") == 0 || std::atoi(argv[j]) == 0); + } else if (strcmp(argv[i], "--use-kafka") == 0) { + SystemConf::getInstance().USE_KAFKA = + (strcasecmp(argv[j], "false") == 0 || std::atoi(argv[j]) == 0); + } else if (strcmp(argv[i], "--disk-buffer") == 0) { + SystemConf::getInstance().DISK_BUFFER = std::stoi(argv[j]); + m_batchSize = SystemConf::getInstance().DISK_BUFFER; + } else { + std::string argument(argv[i]); + throw std::runtime_error("error: unknown argument " + argument); + } + i = j + 1; + } +} + +template +void run() { + auto benchmark = std::make_unique(true, false); + auto buffer = benchmark->getInMemoryData(); + + std::vector input (buffer->size()); + std::memcpy(input.data(), buffer->data(), buffer->size()); + + // this is used for latency measurements + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + long latencyMark = -1; + + autoConsume = true; + // initialize Kafka + auto kafka = std::make_unique>(SystemConf::getInstance().WORKER_THREADS, m_timestampReference, autoConsume); + m_kafkaDuration = m_duration - 1; + + kafka->startWorkers(); + + // run benchmark + auto t1 = std::chrono::high_resolution_clock::now(); + auto _t1 = std::chrono::high_resolution_clock::now(); + + std::cout << "Start running benchmark..." << std::endl; + try { + while (true) { + if (terminate) { + auto t2 = std::chrono::high_resolution_clock::now(); + auto time_span = std::chrono::duration_cast>(t2 - _t1); + if (time_span.count() >= 1) { + auto thr = (m_bytes / (1024 * 1024)) / time_span.count(); + m_totalBytes += thr; + m_totalCnt++; + m_bytes = 0; + _t1 = t2; + std::cout << "[DBG] " + std::to_string(thr) + " MB/s" + + " [AVG: " + std::to_string(m_totalBytes/m_totalCnt) + " MB/s " + + std::to_string((m_totalBytes/sizeof(InputSchema) * 1024 * 1024)/m_totalCnt) + " tuples/s]" << std::endl; + } + time_span = std::chrono::duration_cast>( + t2 - t1); + if (time_span.count() >= (double)m_duration + 2) { + kafka->measureLatency(); + std::cout << "Stop running the kafka benchmark..." << std::endl; + return; + } + } + + if (SystemConf::getInstance().LATENCY_ON) { + auto currentTime = std::chrono::high_resolution_clock::now(); + auto currentTimeNano = + std::chrono::duration_cast(currentTime.time_since_epoch()).count(); + latencyMark = (long)((currentTimeNano - m_timestampReference) / 1000L); + } + + // send data + kafka->processPartitionedData(input, latencyMark); + + m_bytes += input.size(); + } + } catch (std::exception &e) { + std::cout << e.what() << std::endl; + exit(1); + } + +} + +int main(int argc, const char **argv) { + SystemConf::getInstance().QUERY_NUM = 0; + parseCommandLineArguments(argc, argv); + + switch (SystemConf::getInstance().QUERY_NUM) { + case 0: + run (); + return 0; + case 1: + run (); + return 0; + case 2: + run (); + return 0; + case 3: + case 4: + run (); + return 0; + case 5: + run (); + return 0; + case 6: + case 7: + run (); + return 0; + case 8: + run (); + return 0; + case 9: + run (); + return 0; + case 10: + run (); + return 0; + default: + throw std::runtime_error("error: wrong query number"); + } + +} \ No newline at end of file diff --git a/test/benchmarks/kafka-flink/queues/atomicops.h b/test/benchmarks/kafka-flink/queues/atomicops.h new file mode 100644 index 0000000..f094b4f --- /dev/null +++ b/test/benchmarks/kafka-flink/queues/atomicops.h @@ -0,0 +1,679 @@ +// ©2013-2016 Cameron Desrochers. +// Distributed under the simplified BSD license (see the license file that +// should have come with this header). +// Uses Jeff Preshing's semaphore implementation (under the terms of its +// separate zlib license, embedded below). + +#pragma once + +// Provides portable (VC++2010+, Intel ICC 13, GCC 4.7+, and anything C++11 compliant) implementation +// of low-level memory barriers, plus a few semi-portable utility macros (for inlining and alignment). +// Also has a basic atomic type (limited to hardware-supported atomics with no memory ordering guarantees). +// Uses the AE_* prefix for macros (historical reasons), and the "moodycamel" namespace for symbols. + +#include +#include +#include +#include +#include +#include + +// Platform detection +#if defined(__INTEL_COMPILER) +#define AE_ICC +#elif defined(_MSC_VER) +#define AE_VCPP +#elif defined(__GNUC__) +#define AE_GCC +#endif + +#if defined(_M_IA64) || defined(__ia64__) +#define AE_ARCH_IA64 +#elif defined(_WIN64) || defined(__amd64__) || defined(_M_X64) || defined(__x86_64__) +#define AE_ARCH_X64 +#elif defined(_M_IX86) || defined(__i386__) +#define AE_ARCH_X86 +#elif defined(_M_PPC) || defined(__powerpc__) +#define AE_ARCH_PPC +#else +#define AE_ARCH_UNKNOWN +#endif + + +// AE_UNUSED +#define AE_UNUSED(x) ((void)x) + +// AE_NO_TSAN +#if defined(__has_feature) +#if __has_feature(thread_sanitizer) +#define AE_NO_TSAN __attribute__((no_sanitize("thread"))) +#else +#define AE_NO_TSAN +#endif +#else +#define AE_NO_TSAN +#endif + + +// AE_FORCEINLINE +#if defined(AE_VCPP) || defined(AE_ICC) +#define AE_FORCEINLINE __forceinline +#elif defined(AE_GCC) +//#define AE_FORCEINLINE __attribute__((always_inline)) +#define AE_FORCEINLINE inline +#else +#define AE_FORCEINLINE inline +#endif + + +// AE_ALIGN +#if defined(AE_VCPP) || defined(AE_ICC) +#define AE_ALIGN(x) __declspec(align(x)) +#elif defined(AE_GCC) +#define AE_ALIGN(x) __attribute__((aligned(x))) +#else +// Assume GCC compliant syntax... +#define AE_ALIGN(x) __attribute__((aligned(x))) +#endif + + +// Portable atomic fences implemented below: + +namespace moodycamel { + +enum memory_order { + memory_order_relaxed, + memory_order_acquire, + memory_order_release, + memory_order_acq_rel, + memory_order_seq_cst, + + // memory_order_sync: Forces a full sync: + // #LoadLoad, #LoadStore, #StoreStore, and most significantly, #StoreLoad + memory_order_sync = memory_order_seq_cst +}; + +} // end namespace moodycamel + +#if (defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))) || (defined(AE_ICC) && __INTEL_COMPILER < 1600) +// VS2010 and ICC13 don't support std::atomic_*_fence, implement our own fences + +#include + +#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86) +#define AeFullSync _mm_mfence +#define AeLiteSync _mm_mfence +#elif defined(AE_ARCH_IA64) +#define AeFullSync __mf +#define AeLiteSync __mf +#elif defined(AE_ARCH_PPC) +#include +#define AeFullSync __sync +#define AeLiteSync __lwsync +#endif + + +#ifdef AE_VCPP +#pragma warning(push) +#pragma warning(disable: 4365) // Disable erroneous 'conversion from long to unsigned int, signed/unsigned mismatch' error when using `assert` +#ifdef __cplusplus_cli +#pragma managed(push, off) +#endif +#endif + +namespace moodycamel { + +AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN +{ + switch (order) { + case memory_order_relaxed: break; + case memory_order_acquire: _ReadBarrier(); break; + case memory_order_release: _WriteBarrier(); break; + case memory_order_acq_rel: _ReadWriteBarrier(); break; + case memory_order_seq_cst: _ReadWriteBarrier(); break; + default: assert(false); + } +} + +// x86/x64 have a strong memory model -- all loads and stores have +// acquire and release semantics automatically (so only need compiler +// barriers for those). +#if defined(AE_ARCH_X86) || defined(AE_ARCH_X64) +AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN +{ + switch (order) { + case memory_order_relaxed: break; + case memory_order_acquire: _ReadBarrier(); break; + case memory_order_release: _WriteBarrier(); break; + case memory_order_acq_rel: _ReadWriteBarrier(); break; + case memory_order_seq_cst: + _ReadWriteBarrier(); + AeFullSync(); + _ReadWriteBarrier(); + break; + default: assert(false); + } +} +#else +AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN +{ + // Non-specialized arch, use heavier memory barriers everywhere just in case :-( + switch (order) { + case memory_order_relaxed: + break; + case memory_order_acquire: + _ReadBarrier(); + AeLiteSync(); + _ReadBarrier(); + break; + case memory_order_release: + _WriteBarrier(); + AeLiteSync(); + _WriteBarrier(); + break; + case memory_order_acq_rel: + _ReadWriteBarrier(); + AeLiteSync(); + _ReadWriteBarrier(); + break; + case memory_order_seq_cst: + _ReadWriteBarrier(); + AeFullSync(); + _ReadWriteBarrier(); + break; + default: assert(false); + } +} +#endif +} // end namespace moodycamel +#else +// Use standard library of atomics +#include + +namespace moodycamel { + +AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN +{ + switch (order) { + case memory_order_relaxed: break; + case memory_order_acquire: std::atomic_signal_fence(std::memory_order_acquire); break; + case memory_order_release: std::atomic_signal_fence(std::memory_order_release); break; + case memory_order_acq_rel: std::atomic_signal_fence(std::memory_order_acq_rel); break; + case memory_order_seq_cst: std::atomic_signal_fence(std::memory_order_seq_cst); break; + default: assert(false); + } +} + +AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN +{ + switch (order) { + case memory_order_relaxed: break; + case memory_order_acquire: std::atomic_thread_fence(std::memory_order_acquire); break; + case memory_order_release: std::atomic_thread_fence(std::memory_order_release); break; + case memory_order_acq_rel: std::atomic_thread_fence(std::memory_order_acq_rel); break; + case memory_order_seq_cst: std::atomic_thread_fence(std::memory_order_seq_cst); break; + default: assert(false); + } +} + +} // end namespace moodycamel + +#endif + + +#if !defined(AE_VCPP) || (_MSC_VER >= 1700 && !defined(__cplusplus_cli)) +#define AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC +#endif + +#ifdef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC +#include +#endif +#include + +// WARNING: *NOT* A REPLACEMENT FOR std::atomic. READ CAREFULLY: +// Provides basic support for atomic variables -- no memory ordering guarantees are provided. +// The guarantee of atomicity is only made for types that already have atomic load and store guarantees +// at the hardware level -- on most platforms this generally means aligned pointers and integers (only). +namespace moodycamel { +template +class weak_atomic +{ + public: + AE_NO_TSAN weak_atomic() : value() { } +#ifdef AE_VCPP + #pragma warning(push) +#pragma warning(disable: 4100) // Get rid of (erroneous) 'unreferenced formal parameter' warning +#endif + template AE_NO_TSAN weak_atomic(U&& x) : value(std::forward(x)) { } +#ifdef __cplusplus_cli + // Work around bug with universal reference/nullptr combination that only appears when /clr is on + AE_NO_TSAN weak_atomic(nullptr_t) : value(nullptr) { } +#endif + AE_NO_TSAN weak_atomic(weak_atomic const& other) : value(other.load()) { } + AE_NO_TSAN weak_atomic(weak_atomic&& other) : value(std::move(other.load())) { } +#ifdef AE_VCPP +#pragma warning(pop) +#endif + + AE_FORCEINLINE operator T() const AE_NO_TSAN { return load(); } + + +#ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC + template AE_FORCEINLINE weak_atomic const& operator=(U&& x) AE_NO_TSAN { value = std::forward(x); return *this; } + AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN { value = other.value; return *this; } + + AE_FORCEINLINE T load() const AE_NO_TSAN { return value; } + + AE_FORCEINLINE T fetch_add_acquire(T increment) AE_NO_TSAN + { +#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86) + if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment); +#if defined(_M_AMD64) + else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment); +#endif +#else +#error Unsupported platform +#endif + assert(false && "T must be either a 32 or 64 bit type"); + return value; + } + + AE_FORCEINLINE T fetch_add_release(T increment) AE_NO_TSAN + { +#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86) + if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment); +#if defined(_M_AMD64) + else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment); +#endif +#else +#error Unsupported platform +#endif + assert(false && "T must be either a 32 or 64 bit type"); + return value; + } +#else + template + AE_FORCEINLINE weak_atomic const& operator=(U&& x) AE_NO_TSAN + { + value.store(std::forward(x), std::memory_order_relaxed); + return *this; + } + + AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN + { + value.store(other.value.load(std::memory_order_relaxed), std::memory_order_relaxed); + return *this; + } + + AE_FORCEINLINE T load() const AE_NO_TSAN { return value.load(std::memory_order_relaxed); } + + AE_FORCEINLINE T fetch_add_acquire(T increment) AE_NO_TSAN + { + return value.fetch_add(increment, std::memory_order_acquire); + } + + AE_FORCEINLINE T fetch_add_release(T increment) AE_NO_TSAN + { + return value.fetch_add(increment, std::memory_order_release); + } +#endif + + + private: +#ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC + // No std::atomic support, but still need to circumvent compiler optimizations. + // `volatile` will make memory access slow, but is guaranteed to be reliable. + volatile T value; +#else + std::atomic value; +#endif +}; + +} // end namespace moodycamel + + + +// Portable single-producer, single-consumer semaphore below: + +#if defined(_WIN32) +// Avoid including windows.h in a header; we only need a handful of +// items, so we'll redeclare them here (this is relatively safe since +// the API generally has to remain stable between Windows versions). +// I know this is an ugly hack but it still beats polluting the global +// namespace with thousands of generic names or adding a .cpp for nothing. +extern "C" { + struct _SECURITY_ATTRIBUTES; + __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName); + __declspec(dllimport) int __stdcall CloseHandle(void* hObject); + __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds); + __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount); +} +#elif defined(__MACH__) +#include +#elif defined(__unix__) +#include +#endif + +namespace moodycamel +{ +// Code in the spsc_sema namespace below is an adaptation of Jeff Preshing's +// portable + lightweight semaphore implementations, originally from +// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h +// LICENSE: +// Copyright (c) 2015 Jeff Preshing +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgement in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. +namespace spsc_sema +{ +#if defined(_WIN32) +class Semaphore + { + private: + void* m_hSema; + + Semaphore(const Semaphore& other); + Semaphore& operator=(const Semaphore& other); + + public: + AE_NO_TSAN Semaphore(int initialCount = 0) : m_hSema() + { + assert(initialCount >= 0); + const long maxLong = 0x7fffffff; + m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr); + assert(m_hSema); + } + + AE_NO_TSAN ~Semaphore() + { + CloseHandle(m_hSema); + } + + bool wait() AE_NO_TSAN + { + const unsigned long infinite = 0xffffffff; + return WaitForSingleObject(m_hSema, infinite) == 0; + } + + bool try_wait() AE_NO_TSAN + { + return WaitForSingleObject(m_hSema, 0) == 0; + } + + bool timed_wait(std::uint64_t usecs) AE_NO_TSAN + { + return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0; + } + + void signal(int count = 1) AE_NO_TSAN + { + while (!ReleaseSemaphore(m_hSema, count, nullptr)); + } + }; +#elif defined(__MACH__) +//--------------------------------------------------------- + // Semaphore (Apple iOS and OSX) + // Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html + //--------------------------------------------------------- + class Semaphore + { + private: + semaphore_t m_sema; + + Semaphore(const Semaphore& other); + Semaphore& operator=(const Semaphore& other); + + public: + AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema() + { + assert(initialCount >= 0); + kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount); + assert(rc == KERN_SUCCESS); + AE_UNUSED(rc); + } + + AE_NO_TSAN ~Semaphore() + { + semaphore_destroy(mach_task_self(), m_sema); + } + + bool wait() AE_NO_TSAN + { + return semaphore_wait(m_sema) == KERN_SUCCESS; + } + + bool try_wait() AE_NO_TSAN + { + return timed_wait(0); + } + + bool timed_wait(std::uint64_t timeout_usecs) AE_NO_TSAN + { + mach_timespec_t ts; + ts.tv_sec = static_cast(timeout_usecs / 1000000); + ts.tv_nsec = static_cast((timeout_usecs % 1000000) * 1000); + + // added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html + kern_return_t rc = semaphore_timedwait(m_sema, ts); + return rc == KERN_SUCCESS; + } + + void signal() AE_NO_TSAN + { + while (semaphore_signal(m_sema) != KERN_SUCCESS); + } + + void signal(int count) AE_NO_TSAN + { + while (count-- > 0) + { + while (semaphore_signal(m_sema) != KERN_SUCCESS); + } + } + }; +#elif defined(__unix__) +//--------------------------------------------------------- +// Semaphore (POSIX, Linux) +//--------------------------------------------------------- +class Semaphore +{ + private: + sem_t m_sema; + + Semaphore(const Semaphore& other); + Semaphore& operator=(const Semaphore& other); + + public: + AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema() + { + assert(initialCount >= 0); + int rc = sem_init(&m_sema, 0, static_cast(initialCount)); + assert(rc == 0); + AE_UNUSED(rc); + } + + AE_NO_TSAN ~Semaphore() + { + sem_destroy(&m_sema); + } + + bool wait() AE_NO_TSAN + { + // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error + int rc; + do + { + rc = sem_wait(&m_sema); + } + while (rc == -1 && errno == EINTR); + return rc == 0; + } + + bool try_wait() AE_NO_TSAN + { + int rc; + do { + rc = sem_trywait(&m_sema); + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + bool timed_wait(std::uint64_t usecs) AE_NO_TSAN + { + struct timespec ts; + const int usecs_in_1_sec = 1000000; + const int nsecs_in_1_sec = 1000000000; + clock_gettime(CLOCK_REALTIME, &ts); + ts.tv_sec += static_cast(usecs / usecs_in_1_sec); + ts.tv_nsec += static_cast(usecs % usecs_in_1_sec) * 1000; + // sem_timedwait bombs if you have more than 1e9 in tv_nsec + // so we have to clean things up before passing it in + if (ts.tv_nsec >= nsecs_in_1_sec) { + ts.tv_nsec -= nsecs_in_1_sec; + ++ts.tv_sec; + } + + int rc; + do { + rc = sem_timedwait(&m_sema, &ts); + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + void signal() AE_NO_TSAN + { + while (sem_post(&m_sema) == -1); + } + + void signal(int count) AE_NO_TSAN + { + while (count-- > 0) + { + while (sem_post(&m_sema) == -1); + } + } +}; +#else +#error Unsupported platform! (No semaphore wrapper available) +#endif + +//--------------------------------------------------------- +// LightweightSemaphore +//--------------------------------------------------------- +class LightweightSemaphore +{ + public: + typedef std::make_signed::type ssize_t; + + private: + weak_atomic m_count; + Semaphore m_sema; + + bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1) AE_NO_TSAN + { + ssize_t oldCount; + // Is there a better way to set the initial spin count? + // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC, + // as threads start hitting the kernel semaphore. + int spin = 1024; + while (--spin >= 0) + { + if (m_count.load() > 0) + { + m_count.fetch_add_acquire(-1); + return true; + } + compiler_fence(memory_order_acquire); // Prevent the compiler from collapsing the loop. + } + oldCount = m_count.fetch_add_acquire(-1); + if (oldCount > 0) + return true; + if (timeout_usecs < 0) + { + if (m_sema.wait()) + return true; + } + if (timeout_usecs > 0 && m_sema.timed_wait(static_cast(timeout_usecs))) + return true; + // At this point, we've timed out waiting for the semaphore, but the + // count is still decremented indicating we may still be waiting on + // it. So we have to re-adjust the count, but only if the semaphore + // wasn't signaled enough times for us too since then. If it was, we + // need to release the semaphore too. + while (true) + { + oldCount = m_count.fetch_add_release(1); + if (oldCount < 0) + return false; // successfully restored things to the way they were + // Oh, the producer thread just signaled the semaphore after all. Try again: + oldCount = m_count.fetch_add_acquire(-1); + if (oldCount > 0 && m_sema.try_wait()) + return true; + } + } + + public: + AE_NO_TSAN LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount), m_sema() + { + assert(initialCount >= 0); + } + + bool tryWait() AE_NO_TSAN + { + if (m_count.load() > 0) + { + m_count.fetch_add_acquire(-1); + return true; + } + return false; + } + + bool wait() AE_NO_TSAN + { + return tryWait() || waitWithPartialSpinning(); + } + + bool wait(std::int64_t timeout_usecs) AE_NO_TSAN + { + return tryWait() || waitWithPartialSpinning(timeout_usecs); + } + + void signal(ssize_t count = 1) AE_NO_TSAN + { + assert(count >= 0); + ssize_t oldCount = m_count.fetch_add_release(count); + assert(oldCount >= -1); + if (oldCount < 0) + { + m_sema.signal(1); + } + } + + std::size_t availableApprox() const AE_NO_TSAN + { + ssize_t count = m_count.load(); + return count > 0 ? static_cast(count) : 0; + } +}; +} // end namespace spsc_sema +} // end namespace moodycamel + +#if defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli)) +#pragma warning(pop) +#ifdef __cplusplus_cli +#pragma managed(pop) +#endif +#endif \ No newline at end of file diff --git a/test/benchmarks/kafka-flink/queues/readerwritercircularbuffer.h b/test/benchmarks/kafka-flink/queues/readerwritercircularbuffer.h new file mode 100644 index 0000000..3230517 --- /dev/null +++ b/test/benchmarks/kafka-flink/queues/readerwritercircularbuffer.h @@ -0,0 +1,288 @@ +// ©2020 Cameron Desrochers. +// Distributed under the simplified BSD license (see the license file that +// should have come with this header). + +// Provides a C++11 implementation of a single-producer, single-consumer wait-free concurrent +// circular buffer (fixed-size queue). + +#pragma once + +#include +#include +#include +#include +#include +#include + +// Note that this implementation is fully modern C++11 (not compatible with old MSVC versions) +// but we still include atomicops.h for its LightweightSemaphore implementation. +#include "atomicops.h" + +#ifndef MOODYCAMEL_CACHE_LINE_SIZE +#define MOODYCAMEL_CACHE_LINE_SIZE 64 +#endif + +namespace moodycamel { + +template +class BlockingReaderWriterCircularBuffer +{ + public: + typedef T value_type; + + public: + explicit BlockingReaderWriterCircularBuffer(std::size_t capacity) + : maxcap(capacity), mask(), rawData(), data(), + slots(new spsc_sema::LightweightSemaphore(static_cast(capacity))), + items(new spsc_sema::LightweightSemaphore(0)), + nextSlot(0), nextItem(0) + { + // Round capacity up to power of two to compute modulo mask. + // Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + --capacity; + capacity |= capacity >> 1; + capacity |= capacity >> 2; + capacity |= capacity >> 4; + for (std::size_t i = 1; i < sizeof(std::size_t); i <<= 1) + capacity |= capacity >> (i << 3); + mask = capacity++; + rawData = static_cast(std::malloc(capacity * sizeof(T) + std::alignment_of::value - 1)); + data = align_for(rawData); + } + + BlockingReaderWriterCircularBuffer(BlockingReaderWriterCircularBuffer&& other) + : maxcap(0), mask(0), rawData(nullptr), data(nullptr), + slots(new spsc_sema::LightweightSemaphore(0)), + items(new spsc_sema::LightweightSemaphore(0)), + nextSlot(), nextItem() + { + swap(other); + } + + BlockingReaderWriterCircularBuffer(BlockingReaderWriterCircularBuffer const&) = delete; + + // Note: The queue should not be accessed concurrently while it's + // being deleted. It's up to the user to synchronize this. + ~BlockingReaderWriterCircularBuffer() + { + for (std::size_t i = 0, n = items->availableApprox(); i != n; ++i) + reinterpret_cast(data)[(nextItem + i) & mask].~T(); + std::free(rawData); + } + + BlockingReaderWriterCircularBuffer& operator=(BlockingReaderWriterCircularBuffer&& other) noexcept + { + swap(other); + return *this; + } + + BlockingReaderWriterCircularBuffer& operator=(BlockingReaderWriterCircularBuffer const&) = delete; + + // Swaps the contents of this buffer with the contents of another. + // Not thread-safe. + void swap(BlockingReaderWriterCircularBuffer& other) noexcept + { + std::swap(maxcap, other.maxcap); + std::swap(mask, other.mask); + std::swap(rawData, other.rawData); + std::swap(data, other.data); + std::swap(slots, other.slots); + std::swap(items, other.items); + std::swap(nextSlot, other.nextSlot); + std::swap(nextItem, other.nextItem); + } + + // Enqueues a single item (by copying it). + // Fails if not enough room to enqueue. + // Thread-safe when called by producer thread. + // No exception guarantee (state will be corrupted) if constructor of T throws. + bool try_enqueue(T const& item) + { + if (!slots->tryWait()) + return false; + inner_enqueue(item); + return true; + } + + // Enqueues a single item (by moving it, if possible). + // Fails if not enough room to enqueue. + // Thread-safe when called by producer thread. + // No exception guarantee (state will be corrupted) if constructor of T throws. + bool try_enqueue(T&& item) + { + if (!slots->tryWait()) + return false; + inner_enqueue(std::move(item)); + return true; + } + + // Blocks the current thread until there's enough space to enqueue the given item, + // then enqueues it (via copy). + // Thread-safe when called by producer thread. + // No exception guarantee (state will be corrupted) if constructor of T throws. + void wait_enqueue(T const& item) + { + while (!slots->wait()); + inner_enqueue(item); + } + + // Blocks the current thread until there's enough space to enqueue the given item, + // then enqueues it (via move, if possible). + // Thread-safe when called by producer thread. + // No exception guarantee (state will be corrupted) if constructor of T throws. + void wait_enqueue(T&& item) + { + while (!slots->wait()); + inner_enqueue(std::move(item)); + } + + // Blocks the current thread until there's enough space to enqueue the given item, + // or the timeout expires. Returns false without enqueueing the item if the timeout + // expires, otherwise enqueues the item (via copy) and returns true. + // Thread-safe when called by producer thread. + // No exception guarantee (state will be corrupted) if constructor of T throws. + bool wait_enqueue_timed(T const& item, std::int64_t timeout_usecs) + { + if (!slots->wait(timeout_usecs)) + return false; + inner_enqueue(item); + return true; + } + + // Blocks the current thread until there's enough space to enqueue the given item, + // or the timeout expires. Returns false without enqueueing the item if the timeout + // expires, otherwise enqueues the item (via move, if possible) and returns true. + // Thread-safe when called by producer thread. + // No exception guarantee (state will be corrupted) if constructor of T throws. + bool wait_enqueue_timed(T&& item, std::int64_t timeout_usecs) + { + if (!slots->wait(timeout_usecs)) + return false; + inner_enqueue(std::move(item)); + return true; + } + + // Blocks the current thread until there's enough space to enqueue the given item, + // or the timeout expires. Returns false without enqueueing the item if the timeout + // expires, otherwise enqueues the item (via copy) and returns true. + // Thread-safe when called by producer thread. + // No exception guarantee (state will be corrupted) if constructor of T throws. + template + inline bool wait_enqueue_timed(T const& item, std::chrono::duration const& timeout) + { + return wait_enqueue_timed(item, std::chrono::duration_cast(timeout).count()); + } + + // Blocks the current thread until there's enough space to enqueue the given item, + // or the timeout expires. Returns false without enqueueing the item if the timeout + // expires, otherwise enqueues the item (via move, if possible) and returns true. + // Thread-safe when called by producer thread. + // No exception guarantee (state will be corrupted) if constructor of T throws. + template + inline bool wait_enqueue_timed(T&& item, std::chrono::duration const& timeout) + { + return wait_enqueue_timed(std::move(item), std::chrono::duration_cast(timeout).count()); + } + + // Attempts to dequeue a single item. + // Returns false if the buffer is empty. + // Thread-safe when called by consumer thread. + // No exception guarantee (state will be corrupted) if assignment operator of U throws. + template + bool try_dequeue(U& item) + { + if (!items->tryWait()) + return false; + inner_dequeue(item); + return true; + } + + // Blocks the current thread until there's something to dequeue, then dequeues it. + // Thread-safe when called by consumer thread. + // No exception guarantee (state will be corrupted) if assignment operator of U throws. + template + void wait_dequeue(U& item) + { + while (!items->wait()); + inner_dequeue(item); + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout expires. Returns false without setting `item` if the + // timeout expires, otherwise assigns to `item` and returns true. + // Thread-safe when called by consumer thread. + // No exception guarantee (state will be corrupted) if assignment operator of U throws. + template + bool wait_dequeue_timed(U& item, std::int64_t timeout_usecs) + { + if (!items->wait(timeout_usecs)) + return false; + inner_dequeue(item); + return true; + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout expires. Returns false without setting `item` if the + // timeout expires, otherwise assigns to `item` and returns true. + // Thread-safe when called by consumer thread. + // No exception guarantee (state will be corrupted) if assignment operator of U throws. + template + inline bool wait_dequeue_timed(U& item, std::chrono::duration const& timeout) + { + return wait_dequeue_timed(item, std::chrono::duration_cast(timeout).count()); + } + + // Returns a (possibly outdated) snapshot of the total number of elements currently in the buffer. + // Thread-safe. + inline std::size_t size_approx() const + { + return items->availableApprox(); + } + + // Returns the maximum number of elements that this circular buffer can hold at once. + // Thread-safe. + inline std::size_t max_capacity() const + { + return maxcap; + } + + private: + template + void inner_enqueue(U&& item) + { + std::size_t i = nextSlot++; + new (reinterpret_cast(data) + (i & mask)) T(std::forward(item)); + items->signal(); + } + + template + void inner_dequeue(U& item) + { + std::size_t i = nextItem++; + T& element = reinterpret_cast(data)[i & mask]; + item = std::move(element); + element.~T(); + slots->signal(); + } + + template + static inline char* align_for(char* ptr) + { + const std::size_t alignment = std::alignment_of::value; + return ptr + (alignment - (reinterpret_cast(ptr) % alignment)) % alignment; + } + + private: + std::size_t maxcap; // actual (non-power-of-two) capacity + std::size_t mask; // circular buffer capacity mask (for cheap modulo) + char* rawData; // raw circular buffer memory + char* data; // circular buffer memory aligned to element alignment + std::unique_ptr slots; // number of slots currently free + std::unique_ptr items; // number of elements currently enqueued + char cachelineFiller0[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(char*) * 2 - sizeof(std::size_t) * 2 - sizeof(std::unique_ptr) * 2]; + std::size_t nextSlot; // index of next free slot to enqueue into + char cachelineFiller1[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(std::size_t)]; + std::size_t nextItem; // index of next element to dequeue from +}; + +} \ No newline at end of file diff --git a/test/benchmarks/kafka-flink/queues/readerwriterqueue.h b/test/benchmarks/kafka-flink/queues/readerwriterqueue.h new file mode 100644 index 0000000..e9318d5 --- /dev/null +++ b/test/benchmarks/kafka-flink/queues/readerwriterqueue.h @@ -0,0 +1,979 @@ +// ©2013-2020 Cameron Desrochers. +// Distributed under the simplified BSD license (see the license file that +// should have come with this header). + +#pragma once + +#include "atomicops.h" +#include +#include +#include +#include +#include +#include +#include +#include // For malloc/free/abort & size_t +#include +#if __cplusplus > 199711L || _MSC_VER >= 1700 // C++11 or VS2012 +#include +#endif + + +// A lock-free queue for a single-consumer, single-producer architecture. +// The queue is also wait-free in the common path (except if more memory +// needs to be allocated, in which case malloc is called). +// Allocates memory sparingly, and only once if the original maximum size +// estimate is never exceeded. +// Tested on x86/x64 processors, but semantics should be correct for all +// architectures (given the right implementations in atomicops.h), provided +// that aligned integer and pointer accesses are naturally atomic. +// Note that there should only be one consumer thread and producer thread; +// Switching roles of the threads, or using multiple consecutive threads for +// one role, is not safe unless properly synchronized. +// Using the queue exclusively from one thread is fine, though a bit silly. + +#ifndef MOODYCAMEL_CACHE_LINE_SIZE +#define MOODYCAMEL_CACHE_LINE_SIZE 64 +#endif + +#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED +#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__)) +#define MOODYCAMEL_EXCEPTIONS_ENABLED +#endif +#endif + +#ifndef MOODYCAMEL_HAS_EMPLACE +#if !defined(_MSC_VER) || _MSC_VER >= 1800 // variadic templates: either a non-MS compiler or VS >= 2013 +#define MOODYCAMEL_HAS_EMPLACE 1 +#endif +#endif + +#ifndef MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE +#if defined (__APPLE__) && defined (__MACH__) && __cplusplus >= 201703L +// This is required to find out what deployment target we are using +#include +#if !defined(MAC_OS_X_VERSION_MIN_REQUIRED) || MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_14 +// C++17 new(size_t, align_val_t) is not backwards-compatible with older versions of macOS, so we can't support over-alignment in this case +#define MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE +#endif +#endif +#endif + +#ifndef MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE +#define MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE AE_ALIGN(MOODYCAMEL_CACHE_LINE_SIZE) +#endif + +#ifdef AE_VCPP +#pragma warning(push) +#pragma warning(disable: 4324) // structure was padded due to __declspec(align()) +#pragma warning(disable: 4820) // padding was added +#pragma warning(disable: 4127) // conditional expression is constant +#endif + +namespace moodycamel { + +template +class MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE ReaderWriterQueue +{ + // Design: Based on a queue-of-queues. The low-level queues are just + // circular buffers with front and tail indices indicating where the + // next element to dequeue is and where the next element can be enqueued, + // respectively. Each low-level queue is called a "block". Each block + // wastes exactly one element's worth of space to keep the design simple + // (if front == tail then the queue is empty, and can't be full). + // The high-level queue is a circular linked list of blocks; again there + // is a front and tail, but this time they are pointers to the blocks. + // The front block is where the next element to be dequeued is, provided + // the block is not empty. The back block is where elements are to be + // enqueued, provided the block is not full. + // The producer thread owns all the tail indices/pointers. The consumer + // thread owns all the front indices/pointers. Both threads read each + // other's variables, but only the owning thread updates them. E.g. After + // the consumer reads the producer's tail, the tail may change before the + // consumer is done dequeuing an object, but the consumer knows the tail + // will never go backwards, only forwards. + // If there is no room to enqueue an object, an additional block (of + // equal size to the last block) is added. Blocks are never removed. + + public: + typedef T value_type; + + // Constructs a queue that can hold at least `size` elements without further + // allocations. If more than MAX_BLOCK_SIZE elements are requested, + // then several blocks of MAX_BLOCK_SIZE each are reserved (including + // at least one extra buffer block). + AE_NO_TSAN explicit ReaderWriterQueue(size_t size = 15) +#ifndef NDEBUG + : enqueuing(false) + ,dequeuing(false) +#endif + { + assert(MAX_BLOCK_SIZE == ceilToPow2(MAX_BLOCK_SIZE) && "MAX_BLOCK_SIZE must be a power of 2"); + assert(MAX_BLOCK_SIZE >= 2 && "MAX_BLOCK_SIZE must be at least 2"); + + Block* firstBlock = nullptr; + + largestBlockSize = ceilToPow2(size + 1); // We need a spare slot to fit size elements in the block + if (largestBlockSize > MAX_BLOCK_SIZE * 2) { + // We need a spare block in case the producer is writing to a different block the consumer is reading from, and + // wants to enqueue the maximum number of elements. We also need a spare element in each block to avoid the ambiguity + // between front == tail meaning "empty" and "full". + // So the effective number of slots that are guaranteed to be usable at any time is the block size - 1 times the + // number of blocks - 1. Solving for size and applying a ceiling to the division gives us (after simplifying): + size_t initialBlockCount = (size + MAX_BLOCK_SIZE * 2 - 3) / (MAX_BLOCK_SIZE - 1); + largestBlockSize = MAX_BLOCK_SIZE; + Block* lastBlock = nullptr; + for (size_t i = 0; i != initialBlockCount; ++i) { + auto block = make_block(largestBlockSize); + if (block == nullptr) { +#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED + throw std::bad_alloc(); +#else + abort(); +#endif + } + if (firstBlock == nullptr) { + firstBlock = block; + } + else { + lastBlock->next = block; + } + lastBlock = block; + block->next = firstBlock; + } + } + else { + firstBlock = make_block(largestBlockSize); + if (firstBlock == nullptr) { +#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED + throw std::bad_alloc(); +#else + abort(); +#endif + } + firstBlock->next = firstBlock; + } + frontBlock = firstBlock; + tailBlock = firstBlock; + + // Make sure the reader/writer threads will have the initialized memory setup above: + fence(memory_order_sync); + } + + // Note: The queue should not be accessed concurrently while it's + // being moved. It's up to the user to synchronize this. + AE_NO_TSAN ReaderWriterQueue(ReaderWriterQueue&& other) + : frontBlock(other.frontBlock.load()), + tailBlock(other.tailBlock.load()), + largestBlockSize(other.largestBlockSize) +#ifndef NDEBUG + ,enqueuing(false) + ,dequeuing(false) +#endif + { + other.largestBlockSize = 32; + Block* b = other.make_block(other.largestBlockSize); + if (b == nullptr) { +#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED + throw std::bad_alloc(); +#else + abort(); +#endif + } + b->next = b; + other.frontBlock = b; + other.tailBlock = b; + } + + // Note: The queue should not be accessed concurrently while it's + // being moved. It's up to the user to synchronize this. + ReaderWriterQueue& operator=(ReaderWriterQueue&& other) AE_NO_TSAN + { + Block* b = frontBlock.load(); + frontBlock = other.frontBlock.load(); + other.frontBlock = b; + b = tailBlock.load(); + tailBlock = other.tailBlock.load(); + other.tailBlock = b; + std::swap(largestBlockSize, other.largestBlockSize); + return *this; + } + + // Note: The queue should not be accessed concurrently while it's + // being deleted. It's up to the user to synchronize this. + AE_NO_TSAN ~ReaderWriterQueue() + { + // Make sure we get the latest version of all variables from other CPUs: + fence(memory_order_sync); + + // Destroy any remaining objects in queue and free memory + Block* frontBlock_ = frontBlock; + Block* block = frontBlock_; + do { + Block* nextBlock = block->next; + size_t blockFront = block->front; + size_t blockTail = block->tail; + + for (size_t i = blockFront; i != blockTail; i = (i + 1) & block->sizeMask) { + auto element = reinterpret_cast(block->data + i * sizeof(T)); + element->~T(); + (void)element; + } + + auto rawBlock = block->rawThis; + block->~Block(); + std::free(rawBlock); + block = nextBlock; + } while (block != frontBlock_); + } + + + // Enqueues a copy of element if there is room in the queue. + // Returns true if the element was enqueued, false otherwise. + // Does not allocate memory. + AE_FORCEINLINE bool try_enqueue(T const& element) AE_NO_TSAN + { + return inner_enqueue(element); + } + + // Enqueues a moved copy of element if there is room in the queue. + // Returns true if the element was enqueued, false otherwise. + // Does not allocate memory. + AE_FORCEINLINE bool try_enqueue(T&& element) AE_NO_TSAN + { + return inner_enqueue(std::forward(element)); + } + +#if MOODYCAMEL_HAS_EMPLACE + // Like try_enqueue() but with emplace semantics (i.e. construct-in-place). + template + AE_FORCEINLINE bool try_emplace(Args&&... args) AE_NO_TSAN + { + return inner_enqueue(std::forward(args)...); + } +#endif + + // Enqueues a copy of element on the queue. + // Allocates an additional block of memory if needed. + // Only fails (returns false) if memory allocation fails. + AE_FORCEINLINE bool enqueue(T const& element) AE_NO_TSAN + { + return inner_enqueue(element); + } + + // Enqueues a moved copy of element on the queue. + // Allocates an additional block of memory if needed. + // Only fails (returns false) if memory allocation fails. + AE_FORCEINLINE bool enqueue(T&& element) AE_NO_TSAN + { + return inner_enqueue(std::forward(element)); + } + +#if MOODYCAMEL_HAS_EMPLACE + // Like enqueue() but with emplace semantics (i.e. construct-in-place). + template + AE_FORCEINLINE bool emplace(Args&&... args) AE_NO_TSAN + { + return inner_enqueue(std::forward(args)...); + } +#endif + + // Attempts to dequeue an element; if the queue is empty, + // returns false instead. If the queue has at least one element, + // moves front to result using operator=, then returns true. + template + bool try_dequeue(U& result) AE_NO_TSAN + { +#ifndef NDEBUG + ReentrantGuard guard(this->dequeuing); +#endif + + // High-level pseudocode: + // Remember where the tail block is + // If the front block has an element in it, dequeue it + // Else + // If front block was the tail block when we entered the function, return false + // Else advance to next block and dequeue the item there + + // Note that we have to use the value of the tail block from before we check if the front + // block is full or not, in case the front block is empty and then, before we check if the + // tail block is at the front block or not, the producer fills up the front block *and + // moves on*, which would make us skip a filled block. Seems unlikely, but was consistently + // reproducible in practice. + // In order to avoid overhead in the common case, though, we do a double-checked pattern + // where we have the fast path if the front block is not empty, then read the tail block, + // then re-read the front block and check if it's not empty again, then check if the tail + // block has advanced. + + Block* frontBlock_ = frontBlock.load(); + size_t blockTail = frontBlock_->localTail; + size_t blockFront = frontBlock_->front.load(); + + if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) { + fence(memory_order_acquire); + + non_empty_front_block: + // Front block not empty, dequeue from here + auto element = reinterpret_cast(frontBlock_->data + blockFront * sizeof(T)); + result = std::move(*element); + element->~T(); + + blockFront = (blockFront + 1) & frontBlock_->sizeMask; + + fence(memory_order_release); + frontBlock_->front = blockFront; + } + else if (frontBlock_ != tailBlock.load()) { + fence(memory_order_acquire); + + frontBlock_ = frontBlock.load(); + blockTail = frontBlock_->localTail = frontBlock_->tail.load(); + blockFront = frontBlock_->front.load(); + fence(memory_order_acquire); + + if (blockFront != blockTail) { + // Oh look, the front block isn't empty after all + goto non_empty_front_block; + } + + // Front block is empty but there's another block ahead, advance to it + Block* nextBlock = frontBlock_->next; + // Don't need an acquire fence here since next can only ever be set on the tailBlock, + // and we're not the tailBlock, and we did an acquire earlier after reading tailBlock which + // ensures next is up-to-date on this CPU in case we recently were at tailBlock. + + size_t nextBlockFront = nextBlock->front.load(); + size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load(); + fence(memory_order_acquire); + + // Since the tailBlock is only ever advanced after being written to, + // we know there's for sure an element to dequeue on it + assert(nextBlockFront != nextBlockTail); + AE_UNUSED(nextBlockTail); + + // We're done with this block, let the producer use it if it needs + fence(memory_order_release); // Expose possibly pending changes to frontBlock->front from last dequeue + frontBlock = frontBlock_ = nextBlock; + + compiler_fence(memory_order_release); // Not strictly needed + + auto element = reinterpret_cast(frontBlock_->data + nextBlockFront * sizeof(T)); + + result = std::move(*element); + element->~T(); + + nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask; + + fence(memory_order_release); + frontBlock_->front = nextBlockFront; + } + else { + // No elements in current block and no other block to advance to + return false; + } + + return true; + } + + + // Returns a pointer to the front element in the queue (the one that + // would be removed next by a call to `try_dequeue` or `pop`). If the + // queue appears empty at the time the method is called, nullptr is + // returned instead. + // Must be called only from the consumer thread. + T* peek() const AE_NO_TSAN + { +#ifndef NDEBUG + ReentrantGuard guard(this->dequeuing); +#endif + // See try_dequeue() for reasoning + + Block* frontBlock_ = frontBlock.load(); + size_t blockTail = frontBlock_->localTail; + size_t blockFront = frontBlock_->front.load(); + + if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) { + fence(memory_order_acquire); + non_empty_front_block: + return reinterpret_cast(frontBlock_->data + blockFront * sizeof(T)); + } + else if (frontBlock_ != tailBlock.load()) { + fence(memory_order_acquire); + frontBlock_ = frontBlock.load(); + blockTail = frontBlock_->localTail = frontBlock_->tail.load(); + blockFront = frontBlock_->front.load(); + fence(memory_order_acquire); + + if (blockFront != blockTail) { + goto non_empty_front_block; + } + + Block* nextBlock = frontBlock_->next; + + size_t nextBlockFront = nextBlock->front.load(); + fence(memory_order_acquire); + + assert(nextBlockFront != nextBlock->tail.load()); + return reinterpret_cast(nextBlock->data + nextBlockFront * sizeof(T)); + } + + return nullptr; + } + + // Removes the front element from the queue, if any, without returning it. + // Returns true on success, or false if the queue appeared empty at the time + // `pop` was called. + bool pop() AE_NO_TSAN + { +#ifndef NDEBUG + ReentrantGuard guard(this->dequeuing); +#endif + // See try_dequeue() for reasoning + + Block* frontBlock_ = frontBlock.load(); + size_t blockTail = frontBlock_->localTail; + size_t blockFront = frontBlock_->front.load(); + + if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) { + fence(memory_order_acquire); + + non_empty_front_block: + auto element = reinterpret_cast(frontBlock_->data + blockFront * sizeof(T)); + element->~T(); + + blockFront = (blockFront + 1) & frontBlock_->sizeMask; + + fence(memory_order_release); + frontBlock_->front = blockFront; + } + else if (frontBlock_ != tailBlock.load()) { + fence(memory_order_acquire); + frontBlock_ = frontBlock.load(); + blockTail = frontBlock_->localTail = frontBlock_->tail.load(); + blockFront = frontBlock_->front.load(); + fence(memory_order_acquire); + + if (blockFront != blockTail) { + goto non_empty_front_block; + } + + // Front block is empty but there's another block ahead, advance to it + Block* nextBlock = frontBlock_->next; + + size_t nextBlockFront = nextBlock->front.load(); + size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load(); + fence(memory_order_acquire); + + assert(nextBlockFront != nextBlockTail); + AE_UNUSED(nextBlockTail); + + fence(memory_order_release); + frontBlock = frontBlock_ = nextBlock; + + compiler_fence(memory_order_release); + + auto element = reinterpret_cast(frontBlock_->data + nextBlockFront * sizeof(T)); + element->~T(); + + nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask; + + fence(memory_order_release); + frontBlock_->front = nextBlockFront; + } + else { + // No elements in current block and no other block to advance to + return false; + } + + return true; + } + + // Returns the approximate number of items currently in the queue. + // Safe to call from both the producer and consumer threads. + inline size_t size_approx() const AE_NO_TSAN + { + size_t result = 0; + Block* frontBlock_ = frontBlock.load(); + Block* block = frontBlock_; + do { + fence(memory_order_acquire); + size_t blockFront = block->front.load(); + size_t blockTail = block->tail.load(); + result += (blockTail - blockFront) & block->sizeMask; + block = block->next.load(); + } while (block != frontBlock_); + return result; + } + + // Returns the total number of items that could be enqueued without incurring + // an allocation when this queue is empty. + // Safe to call from both the producer and consumer threads. + // + // NOTE: The actual capacity during usage may be different depending on the consumer. + // If the consumer is removing elements concurrently, the producer cannot add to + // the block the consumer is removing from until it's completely empty, except in + // the case where the producer was writing to the same block the consumer was + // reading from the whole time. + inline size_t max_capacity() const { + size_t result = 0; + Block* frontBlock_ = frontBlock.load(); + Block* block = frontBlock_; + do { + fence(memory_order_acquire); + result += block->sizeMask; + block = block->next.load(); + } while (block != frontBlock_); + return result; + } + + + private: + enum AllocationMode { CanAlloc, CannotAlloc }; + +#if MOODYCAMEL_HAS_EMPLACE + template + bool inner_enqueue(Args&&... args) AE_NO_TSAN +#else + template + bool inner_enqueue(U&& element) AE_NO_TSAN +#endif + { +#ifndef NDEBUG + ReentrantGuard guard(this->enqueuing); +#endif + + // High-level pseudocode (assuming we're allowed to alloc a new block): + // If room in tail block, add to tail + // Else check next block + // If next block is not the head block, enqueue on next block + // Else create a new block and enqueue there + // Advance tail to the block we just enqueued to + + Block* tailBlock_ = tailBlock.load(); + size_t blockFront = tailBlock_->localFront; + size_t blockTail = tailBlock_->tail.load(); + + size_t nextBlockTail = (blockTail + 1) & tailBlock_->sizeMask; + if (nextBlockTail != blockFront || nextBlockTail != (tailBlock_->localFront = tailBlock_->front.load())) { + fence(memory_order_acquire); + // This block has room for at least one more element + char* location = tailBlock_->data + blockTail * sizeof(T); +#if MOODYCAMEL_HAS_EMPLACE + new (location) T(std::forward(args)...); +#else + new (location) T(std::forward(element)); +#endif + + fence(memory_order_release); + tailBlock_->tail = nextBlockTail; + } + else { + fence(memory_order_acquire); + if (tailBlock_->next.load() != frontBlock) { + // Note that the reason we can't advance to the frontBlock and start adding new entries there + // is because if we did, then dequeue would stay in that block, eventually reading the new values, + // instead of advancing to the next full block (whose values were enqueued first and so should be + // consumed first). + + fence(memory_order_acquire); // Ensure we get latest writes if we got the latest frontBlock + + // tailBlock is full, but there's a free block ahead, use it + Block* tailBlockNext = tailBlock_->next.load(); + size_t nextBlockFront = tailBlockNext->localFront = tailBlockNext->front.load(); + nextBlockTail = tailBlockNext->tail.load(); + fence(memory_order_acquire); + + // This block must be empty since it's not the head block and we + // go through the blocks in a circle + assert(nextBlockFront == nextBlockTail); + tailBlockNext->localFront = nextBlockFront; + + char* location = tailBlockNext->data + nextBlockTail * sizeof(T); +#if MOODYCAMEL_HAS_EMPLACE + new (location) T(std::forward(args)...); +#else + new (location) T(std::forward(element)); +#endif + + tailBlockNext->tail = (nextBlockTail + 1) & tailBlockNext->sizeMask; + + fence(memory_order_release); + tailBlock = tailBlockNext; + } + else if (canAlloc == CanAlloc) { + // tailBlock is full and there's no free block ahead; create a new block + auto newBlockSize = largestBlockSize >= MAX_BLOCK_SIZE ? largestBlockSize : largestBlockSize * 2; + auto newBlock = make_block(newBlockSize); + if (newBlock == nullptr) { + // Could not allocate a block! + return false; + } + largestBlockSize = newBlockSize; + +#if MOODYCAMEL_HAS_EMPLACE + new (newBlock->data) T(std::forward(args)...); +#else + new (newBlock->data) T(std::forward(element)); +#endif + assert(newBlock->front == 0); + newBlock->tail = newBlock->localTail = 1; + + newBlock->next = tailBlock_->next.load(); + tailBlock_->next = newBlock; + + // Might be possible for the dequeue thread to see the new tailBlock->next + // *without* seeing the new tailBlock value, but this is OK since it can't + // advance to the next block until tailBlock is set anyway (because the only + // case where it could try to read the next is if it's already at the tailBlock, + // and it won't advance past tailBlock in any circumstance). + + fence(memory_order_release); + tailBlock = newBlock; + } + else if (canAlloc == CannotAlloc) { + // Would have had to allocate a new block to enqueue, but not allowed + return false; + } + else { + assert(false && "Should be unreachable code"); + return false; + } + } + + return true; + } + + + // Disable copying + ReaderWriterQueue(ReaderWriterQueue const&) { } + + // Disable assignment + ReaderWriterQueue& operator=(ReaderWriterQueue const&) { } + + + AE_FORCEINLINE static size_t ceilToPow2(size_t x) + { + // From http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + for (size_t i = 1; i < sizeof(size_t); i <<= 1) { + x |= x >> (i << 3); + } + ++x; + return x; + } + + template + static AE_FORCEINLINE char* align_for(char* ptr) AE_NO_TSAN + { + const std::size_t alignment = std::alignment_of::value; + return ptr + (alignment - (reinterpret_cast(ptr) % alignment)) % alignment; + } + private: +#ifndef NDEBUG + struct ReentrantGuard + { + AE_NO_TSAN ReentrantGuard(weak_atomic& _inSection) + : inSection(_inSection) + { + assert(!inSection && "Concurrent (or re-entrant) enqueue or dequeue operation detected (only one thread at a time may hold the producer or consumer role)"); + inSection = true; + } + + AE_NO_TSAN ~ReentrantGuard() { inSection = false; } + + private: + ReentrantGuard& operator=(ReentrantGuard const&); + + private: + weak_atomic& inSection; + }; +#endif + + struct Block + { + // Avoid false-sharing by putting highly contended variables on their own cache lines + weak_atomic front; // (Atomic) Elements are read from here + size_t localTail; // An uncontended shadow copy of tail, owned by the consumer + + char cachelineFiller0[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic) - sizeof(size_t)]; + weak_atomic tail; // (Atomic) Elements are enqueued here + size_t localFront; + + char cachelineFiller1[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic) - sizeof(size_t)]; // next isn't very contended, but we don't want it on the same cache line as tail (which is) + weak_atomic next; // (Atomic) + + char* data; // Contents (on heap) are aligned to T's alignment + + const size_t sizeMask; + + + // size must be a power of two (and greater than 0) + AE_NO_TSAN Block(size_t const& _size, char* _rawThis, char* _data) + : front(0UL), localTail(0), tail(0UL), localFront(0), next(nullptr), data(_data), sizeMask(_size - 1), rawThis(_rawThis) + { + } + + private: + // C4512 - Assignment operator could not be generated + Block& operator=(Block const&); + + public: + char* rawThis; + }; + + + static Block* make_block(size_t capacity) AE_NO_TSAN + { + // Allocate enough memory for the block itself, as well as all the elements it will contain + auto size = sizeof(Block) + std::alignment_of::value - 1; + size += sizeof(T) * capacity + std::alignment_of::value - 1; + auto newBlockRaw = static_cast(std::malloc(size)); + if (newBlockRaw == nullptr) { + return nullptr; + } + + auto newBlockAligned = align_for(newBlockRaw); + auto newBlockData = align_for(newBlockAligned + sizeof(Block)); + return new (newBlockAligned) Block(capacity, newBlockRaw, newBlockData); + } + + private: + weak_atomic frontBlock; // (Atomic) Elements are dequeued from this block + + char cachelineFiller[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic)]; + weak_atomic tailBlock; // (Atomic) Elements are enqueued to this block + + size_t largestBlockSize; + +#ifndef NDEBUG + weak_atomic enqueuing; + mutable weak_atomic dequeuing; +#endif +}; + +// Like ReaderWriterQueue, but also providees blocking operations +template +class BlockingReaderWriterQueue +{ + private: + typedef ::moodycamel::ReaderWriterQueue ReaderWriterQueue; + + public: + explicit BlockingReaderWriterQueue(size_t size = 15) AE_NO_TSAN + : inner(size), sema(new spsc_sema::LightweightSemaphore()) + { } + + BlockingReaderWriterQueue(BlockingReaderWriterQueue&& other) AE_NO_TSAN + : inner(std::move(other.inner)), sema(std::move(other.sema)) + { } + + BlockingReaderWriterQueue& operator=(BlockingReaderWriterQueue&& other) AE_NO_TSAN + { + std::swap(sema, other.sema); + std::swap(inner, other.inner); + return *this; + } + + + // Enqueues a copy of element if there is room in the queue. + // Returns true if the element was enqueued, false otherwise. + // Does not allocate memory. + AE_FORCEINLINE bool try_enqueue(T const& element) AE_NO_TSAN + { + if (inner.try_enqueue(element)) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a moved copy of element if there is room in the queue. + // Returns true if the element was enqueued, false otherwise. + // Does not allocate memory. + AE_FORCEINLINE bool try_enqueue(T&& element) AE_NO_TSAN + { + if (inner.try_enqueue(std::forward(element))) { + sema->signal(); + return true; + } + return false; + } + +#if MOODYCAMEL_HAS_EMPLACE + // Like try_enqueue() but with emplace semantics (i.e. construct-in-place). + template + AE_FORCEINLINE bool try_emplace(Args&&... args) AE_NO_TSAN + { + if (inner.try_emplace(std::forward(args)...)) { + sema->signal(); + return true; + } + return false; + } +#endif + + + // Enqueues a copy of element on the queue. + // Allocates an additional block of memory if needed. + // Only fails (returns false) if memory allocation fails. + AE_FORCEINLINE bool enqueue(T const& element) AE_NO_TSAN + { + if (inner.enqueue(element)) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a moved copy of element on the queue. + // Allocates an additional block of memory if needed. + // Only fails (returns false) if memory allocation fails. + AE_FORCEINLINE bool enqueue(T&& element) AE_NO_TSAN + { + if (inner.enqueue(std::forward(element))) { + sema->signal(); + return true; + } + return false; + } + +#if MOODYCAMEL_HAS_EMPLACE + // Like enqueue() but with emplace semantics (i.e. construct-in-place). + template + AE_FORCEINLINE bool emplace(Args&&... args) AE_NO_TSAN + { + if (inner.emplace(std::forward(args)...)) { + sema->signal(); + return true; + } + return false; + } +#endif + + + // Attempts to dequeue an element; if the queue is empty, + // returns false instead. If the queue has at least one element, + // moves front to result using operator=, then returns true. + template + bool try_dequeue(U& result) AE_NO_TSAN + { + if (sema->tryWait()) { + bool success = inner.try_dequeue(result); + assert(success); + AE_UNUSED(success); + return true; + } + return false; + } + + + // Attempts to dequeue an element; if the queue is empty, + // waits until an element is available, then dequeues it. + template + void wait_dequeue(U& result) AE_NO_TSAN + { + while (!sema->wait()); + bool success = inner.try_dequeue(result); + AE_UNUSED(result); + assert(success); + AE_UNUSED(success); + } + + + // Attempts to dequeue an element; if the queue is empty, + // waits until an element is available up to the specified timeout, + // then dequeues it and returns true, or returns false if the timeout + // expires before an element can be dequeued. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue. + template + bool wait_dequeue_timed(U& result, std::int64_t timeout_usecs) AE_NO_TSAN + { + if (!sema->wait(timeout_usecs)) { + return false; + } + bool success = inner.try_dequeue(result); + AE_UNUSED(result); + assert(success); + AE_UNUSED(success); + return true; + } + + +#if __cplusplus > 199711L || _MSC_VER >= 1700 + // Attempts to dequeue an element; if the queue is empty, + // waits until an element is available up to the specified timeout, + // then dequeues it and returns true, or returns false if the timeout + // expires before an element can be dequeued. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue. + template + inline bool wait_dequeue_timed(U& result, std::chrono::duration const& timeout) AE_NO_TSAN + { + return wait_dequeue_timed(result, std::chrono::duration_cast(timeout).count()); + } +#endif + + + // Returns a pointer to the front element in the queue (the one that + // would be removed next by a call to `try_dequeue` or `pop`). If the + // queue appears empty at the time the method is called, nullptr is + // returned instead. + // Must be called only from the consumer thread. + AE_FORCEINLINE T* peek() const AE_NO_TSAN + { + return inner.peek(); + } + + // Removes the front element from the queue, if any, without returning it. + // Returns true on success, or false if the queue appeared empty at the time + // `pop` was called. + AE_FORCEINLINE bool pop() AE_NO_TSAN + { + if (sema->tryWait()) { + bool result = inner.pop(); + assert(result); + AE_UNUSED(result); + return true; + } + return false; + } + + // Returns the approximate number of items currently in the queue. + // Safe to call from both the producer and consumer threads. + AE_FORCEINLINE size_t size_approx() const AE_NO_TSAN + { + return sema->availableApprox(); + } + + // Returns the total number of items that could be enqueued without incurring + // an allocation when this queue is empty. + // Safe to call from both the producer and consumer threads. + // + // NOTE: The actual capacity during usage may be different depending on the consumer. + // If the consumer is removing elements concurrently, the producer cannot add to + // the block the consumer is removing from until it's completely empty, except in + // the case where the producer was writing to the same block the consumer was + // reading from the whole time. + AE_FORCEINLINE size_t max_capacity() const { + return inner.max_capacity(); + } + + private: + // Disable copying & assignment + BlockingReaderWriterQueue(BlockingReaderWriterQueue const&) { } + BlockingReaderWriterQueue& operator=(BlockingReaderWriterQueue const&) { } + + private: + ReaderWriterQueue inner; + std::unique_ptr sema; +}; + +} // end namespace moodycamel + +#ifdef AE_VCPP +#pragma warning(pop) +#endif \ No newline at end of file diff --git a/test/benchmarks/microbenchmarks/CMakeLists.txt b/test/benchmarks/microbenchmarks/CMakeLists.txt index ed5679f..249bf3c 100644 --- a/test/benchmarks/microbenchmarks/CMakeLists.txt +++ b/test/benchmarks/microbenchmarks/CMakeLists.txt @@ -13,7 +13,17 @@ if (CCACHE_PROGRAM) endif () SET(CPP_FILES + ../../../src/filesystem/File.cpp + ../../../src/checkpoint/FileBackedCheckpointCoordinator.cpp + ../../../src/checkpoint/BlockManager.cpp + ../../../src/checkpoint/LineageGraph.cpp + ../../../src/cql/expressions/Expression.cpp + ../../../src/dispatcher/ITaskDispatcher.cpp + ../../../src/dispatcher/JoinTaskDispatcher.cpp ../../../src/dispatcher/TaskDispatcher.cpp + ../../../src/compression/CompressionCodeGenUtils.cpp + ../../../src/compression/CompressionStatistics.cpp + ../../../src/monitors/CompressionMonitor.cpp ../../../src/monitors/PerformanceMonitor.cpp ../../../src/monitors/Measurement.cpp ../../../src/monitors/LatencyMonitor.cpp @@ -28,6 +38,18 @@ SET(CPP_FILES ../../../src/utils/Utils.cpp ../../../src/utils/SystemConf.cpp ) +SET(RDMA_CPP_FILES + ../../../src/RDMA/infinity/core/Context.cpp + ../../../src/RDMA/infinity/memory/Atomic.cpp + ../../../src/RDMA/infinity/memory/Buffer.cpp + ../../../src/RDMA/infinity/memory/Region.cpp + ../../../src/RDMA/infinity/memory/RegionToken.cpp + ../../../src/RDMA/infinity/memory/RegisteredMemory.cpp + ../../../src/RDMA/infinity/queues/QueuePair.cpp + ../../../src/RDMA/infinity/queues/QueuePairFactory.cpp + ../../../src/RDMA/infinity/requests/RequestToken.cpp + ../../../src/RDMA/infinity/utils/Address.cpp + ) SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -lnuma") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -g -Wall -Wextra -DHAVE_NUM") @@ -37,10 +59,25 @@ include_directories(${benchmark_INCLUDE_DIRS}) FIND_LIBRARY(tbb NAMES libtbb.so) +if(NOT WIN32) + find_package(PkgConfig QUIET) # PATHS /usr/local/lib64/pkgconfig /usr/local/lib/pkgconfig) +endif() + +SET(ENV{PKG_CONFIG_PATH} "/usr/local/lib64/pkgconfig:/usr/local/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}") +message(STATUS "PKG_CONFIG_PATH: $ENV{PKG_CONFIG_PATH}") + +if(PKG_CONFIG_FOUND) + pkg_check_modules(LIBPMEMOBJ++ REQUIRED libpmemobj++) +else() + find_package(LIBPMEMOBJ++ REQUIRED) +endif() + +link_directories(${LIBPMEMOBJ++_LIBRARY_DIRS}) + # Operator Microbenchmarks add_executable(TestProjection TestProjection.cpp - ${CPP_FILES} + ${CPP_FILES} ${RDMA_CPP_FILES} ) target_link_options(TestProjection PRIVATE -Wl,--unresolved-symbols=ignore-all) if (Boost_FOUND) @@ -52,13 +89,13 @@ target_link_libraries(TestProjection boost_iostreams boost_fiber operatorJITLib - tbb - pthread dl) + tbb ibverbs + pthread dl aio uuid stdc++fs) target_compile_options(TestProjection PRIVATE -Wall -Wextra -O3 -march=native) add_executable(TestSelection TestSelection.cpp - ${CPP_FILES} + ${CPP_FILES} ${RDMA_CPP_FILES} ) target_link_options(TestSelection PRIVATE -Wl,--unresolved-symbols=ignore-all) if (Boost_FOUND) @@ -66,17 +103,18 @@ if (Boost_FOUND) target_link_libraries(TestSelection ${Boost_LIBRARIES}) endif () target_link_libraries(TestSelection + ${LIBPMEMOBJ++_LIBRARIES} z boost_iostreams boost_fiber operatorJITLib - tbb - pthread dl) + tbb ibverbs + pthread dl aio uuid stdc++fs) target_compile_options(TestSelection PRIVATE -Wall -Wextra -O3 -march=native) add_executable(TestAggregation TestAggregation.cpp - ${CPP_FILES} + ${CPP_FILES} ${RDMA_CPP_FILES} ) target_link_options(TestAggregation PRIVATE -Wl,--unresolved-symbols=ignore-all) if (Boost_FOUND) @@ -88,14 +126,14 @@ target_link_libraries(TestAggregation boost_iostreams boost_fiber operatorJITLib - tbb - pthread dl) + tbb ibverbs + pthread dl aio uuid stdc++fs) target_compile_options(TestAggregation PRIVATE -Wall -Wextra -O3 -march=native) # GAG standalone Microbenchmarks add_executable(TestGAG TestGAG.cpp - ${CPP_FILES} + ${CPP_FILES} ${RDMA_CPP_FILES} ) target_link_options(TestGAG PRIVATE -Wl,--unresolved-symbols=ignore-all) if (Boost_FOUND) @@ -107,6 +145,66 @@ target_link_libraries(TestGAG boost_iostreams boost_fiber operatorJITLib - tbb - pthread dl) -target_compile_options(TestGAG PRIVATE -Wall -Wextra -O3 -march=native) \ No newline at end of file + tbb ibverbs + pthread dl aio uuid stdc++fs) +target_compile_options(TestGAG PRIVATE -Wall -Wextra -O3 -march=native) + +add_executable(TestJoin + TestJoin.cpp + ${CPP_FILES} ${RDMA_CPP_FILES} + ) +target_link_options(TestJoin PRIVATE -Wl,--unresolved-symbols=ignore-all) +target_include_directories(TestJoin PUBLIC ${LIBPMEMOBJ++_INCLUDE_DIRS}) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(TestJoin ${Boost_LIBRARIES}) +endif () +target_link_libraries(TestJoin + ${LIBPMEMOBJ++_LIBRARIES} + z + boost_iostreams + boost_fiber + operatorJITLib + tbb ibverbs + pthread dl aio uuid stdc++fs) +target_compile_options(TestJoin PRIVATE -Wall -Wextra -O3 -march=native) + +add_executable(TestPersistentProjection + TestPersistentProjection.cpp + ${CPP_FILES} ${RDMA_CPP_FILES} + ) +target_link_options(TestPersistentProjection PRIVATE -Wl,--unresolved-symbols=ignore-all) +target_include_directories(TestPersistentProjection PUBLIC ${LIBPMEMOBJ++_INCLUDE_DIRS}) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(TestJoin ${Boost_LIBRARIES}) +endif () +target_link_libraries(TestPersistentProjection + ${LIBPMEMOBJ++_LIBRARIES} + z snappy + boost_iostreams + boost_fiber + operatorJITLib + tbb ibverbs + pthread dl aio uuid stdc++fs) +target_compile_options(TestPersistentProjection PRIVATE -Wall -Wextra -O3 -march=native -UNDEBUG) + +add_executable(TestPersistentAggregation + TestPersistentAggregation.cpp + ${CPP_FILES} ${RDMA_CPP_FILES} + ) +target_link_options(TestPersistentAggregation PRIVATE -Wl,--unresolved-symbols=ignore-all) +target_include_directories(TestPersistentAggregation PUBLIC ${LIBPMEMOBJ++_INCLUDE_DIRS}) +if (Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + target_link_libraries(TestJoin ${Boost_LIBRARIES}) +endif () +target_link_libraries(TestPersistentAggregation + ${LIBPMEMOBJ++_LIBRARIES} + z snappy + boost_iostreams + boost_fiber + operatorJITLib + tbb ibverbs + pthread dl aio uuid stdc++fs) +target_compile_options(TestPersistentAggregation PRIVATE -Wall -Wextra -O3 -march=native -UNDEBUG) \ No newline at end of file diff --git a/test/benchmarks/microbenchmarks/RandomDataGenerator.h b/test/benchmarks/microbenchmarks/RandomDataGenerator.h index e59f0ad..3745ef8 100644 --- a/test/benchmarks/microbenchmarks/RandomDataGenerator.h +++ b/test/benchmarks/microbenchmarks/RandomDataGenerator.h @@ -18,7 +18,8 @@ class RandomDataGenerator : public BenchmarkQuery { public: TupleSchema *m_schema = nullptr; QueryApplication *m_application = nullptr; - std::vector *m_data = nullptr; + std::vector *m_data_1 = nullptr; + std::vector *m_data_2 = nullptr; bool m_debug = false; QueryApplication *getApplication() override { @@ -29,8 +30,8 @@ class RandomDataGenerator : public BenchmarkQuery { void loadInMemoryData() { size_t len = SystemConf::getInstance().BUNDLE_SIZE; - m_data = new std::vector(len); - auto buf = (InputSchema *) m_data->data(); + m_data_1 = new std::vector(len); + auto buf = (InputSchema *) m_data_1->data(); const int range_from = 1; const int range_to = 1000; @@ -48,7 +49,7 @@ class RandomDataGenerator : public BenchmarkQuery { if (m_debug) { std::cout << "timestamp jobId machineId eventType userId category priority cpu ram disk constraints" << std::endl; - for (unsigned long i = 0; i < m_data->size() / sizeof(InputSchema); ++i) { + for (unsigned long i = 0; i < m_data_1->size() / sizeof(InputSchema); ++i) { printf("[DBG] %09d: %7d %8d %8d \n", i, buf[i].timestamp, buf[i].attr1, buf[i].attr2); } @@ -56,7 +57,36 @@ class RandomDataGenerator : public BenchmarkQuery { }; std::vector *getInMemoryData() override { - return m_data; + return m_data_1; + } + + std::vector *getSecondInMemoryData() override { + if (m_data_2 == nullptr) { + size_t len = SystemConf::getInstance().BUNDLE_SIZE; + m_data_2 = new std::vector(len); + + auto buf1 = (InputSchema *) m_data_1->data(); + auto buf2 = (InputSchema *) m_data_2->data(); + + const int range_from = 1; + const int range_to = len; + std::random_device rand_dev; + std::mt19937 generator(rand_dev()); + std::uniform_int_distribution distr(range_from, range_to); + + unsigned long idx = 0; + while (idx < len / sizeof(InputSchema)) { + buf1[idx].timestamp = 1; //idx; + buf1[idx].attr1 = 2; //distr(generator); + buf1[idx].attr2 = distr(generator); + + buf2[idx].timestamp = 1; //idx; + buf2[idx].attr1 = 3; //distr(generator); + buf2[idx].attr2 = distr(generator); + idx++; + } + } + return m_data_2; } std::vector *getStaticData() override { diff --git a/test/benchmarks/microbenchmarks/TestAggregation.cpp b/test/benchmarks/microbenchmarks/TestAggregation.cpp index 6503ef2..907a2e2 100644 --- a/test/benchmarks/microbenchmarks/TestAggregation.cpp +++ b/test/benchmarks/microbenchmarks/TestAggregation.cpp @@ -15,7 +15,7 @@ class TestAggregation : public RandomDataGenerator { SystemConf::getInstance().SLOTS = 256; SystemConf::getInstance().PARTIAL_WINDOWS = 64; // change this depending on the batch size - // Configure non-grouped aggregation. Check the application benchmarks for grouped aggregations. + // Configure non-grouped aggregation. Check the application benchmarks for grouped aggreagations. std::vector aggregationTypes(1); aggregationTypes[0] = AggregationTypes::fromString("sum"); @@ -24,7 +24,7 @@ class TestAggregation : public RandomDataGenerator { std::vector groupByAttributes(0); - auto window = new WindowDefinition(RANGE_BASED, 50000, 10000); + auto window = new WindowDefinition(ROW_BASED, 1000, 1000); Aggregation *aggregation = new Aggregation(*window, aggregationTypes, aggregationAttributes, groupByAttributes); // Set up code-generated operator @@ -44,15 +44,8 @@ class TestAggregation : public RandomDataGenerator { long timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); - bool replayTimestamps = false; - bool copyDataOnInsert = true; - if (window->isRangeBased()) { - replayTimestamps = true; - copyDataOnInsert = false; - } - std::vector> queries(1); - queries[0] = std::make_shared(0, operators, *window, m_schema, timestampReference, true, replayTimestamps, copyDataOnInsert); + queries[0] = std::make_shared(0, operators, *window, m_schema, timestampReference, true, false, true); m_application = new QueryApplication(queries); m_application->setup(); @@ -69,11 +62,11 @@ class TestAggregation : public RandomDataGenerator { }; int main(int argc, const char **argv) { - BenchmarkQuery *benchmarkQuery = nullptr; + std::unique_ptr benchmarkQuery {}; BenchmarkQuery::parseCommandLineArguments(argc, argv); - benchmarkQuery = new TestAggregation(); + benchmarkQuery = std::make_unique(); return benchmarkQuery->runBenchmark(); } \ No newline at end of file diff --git a/test/benchmarks/microbenchmarks/TestGAG.cpp b/test/benchmarks/microbenchmarks/TestGAG.cpp index 6482976..24a4696 100644 --- a/test/benchmarks/microbenchmarks/TestGAG.cpp +++ b/test/benchmarks/microbenchmarks/TestGAG.cpp @@ -52,7 +52,7 @@ long normalisedTimestamp = -1; struct alignas(64) InputSchema { long timestamp; long messageIndex; - int value; //Electrical Power Main Phase 1 + int value; //Electrical Power Main Phase 1 int mf02; //Electrical Power Main Phase 2 int mf03; //Electrical Power Main Phase 3 int pc13; //Anode Current Drop Detection Cell 1 @@ -108,7 +108,7 @@ void loadData() { boost::posix_time::ptime myEpoch; is >> myEpoch; - std::string filePath = Utils::GetHomeDir() + "/LightSaber/resources/datasets/manufacturing_equipment/"; + std::string filePath = Utils::getHomeDir() + "/LightSaber/resources/datasets/manufacturing_equipment/"; std::ifstream file(filePath + "DEBS2012-small.txt"); std::string line; unsigned long idx = 0; @@ -148,7 +148,7 @@ void generateFunctionsSingle(int argc, const char **argv, std::vector + +#include "cql/expressions/ColumnReference.h" +#include "cql/operators/ThetaJoin.h" +#include "cql/predicates/ComparisonPredicate.h" +#include "microbenchmarks/RandomDataGenerator.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +class TestJoin : public RandomDataGenerator { + private: + void createApplication() override { + SystemConf::getInstance().BATCH_SIZE = 256 * SystemConf::getInstance()._KB; + SystemConf::getInstance().BUNDLE_SIZE = 4 * SystemConf::getInstance()._KB; + SystemConf::getInstance().UNBOUNDED_BUFFER_SIZE = 2 * SystemConf::getInstance()._MB; + //SystemConf::getInstance().SLOTS = 16 * 1024; + SystemConf::getInstance().CIRCULAR_BUFFER_SIZE = 256 * 1048576; + + long windowSize = 1024; + long windowSlide = 1024; + bool copyDataOnInsert = false; + + int numberOfAttributes = 2; + auto longAttr = AttributeType(BasicType::Long); + auto intAttr = AttributeType(BasicType::Integer); + + auto window1 = new WindowDefinition(ROW_BASED, windowSize, windowSlide); + auto schema1 = new TupleSchema(numberOfAttributes + 1, "Stream1"); + for (int i = 1; i < numberOfAttributes + 1; i++) { + schema1->setAttributeType(i, intAttr); + } + schema1->setAttributeType(0, longAttr); + + auto window2 = new WindowDefinition(ROW_BASED, windowSize, windowSlide); + auto schema2 = new TupleSchema(numberOfAttributes + 1, "Stream2"); + for (int i = 1; i < numberOfAttributes + 1; i++) { + schema2->setAttributeType(i, intAttr); + } + schema2->setAttributeType(0, longAttr); + + auto predicate = new ComparisonPredicate(EQUAL_OP, new ColumnReference(1), new ColumnReference(1)); + auto join = new ThetaJoin(*schema1, *schema2, predicate); + join->setQueryId(0); + join->setup(window1, window2); + + // set up application + auto queryOperator = new QueryOperator(*join); + std::vector operators; + operators.push_back(queryOperator); + m_timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + std::vector> queries(1); + queries[0] = std::make_shared(0, + operators, + *window1, + schema1, + *window2, + schema2, + m_timestampReference, + true, + false, + copyDataOnInsert, + false); + + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON); + m_application->setup(); + } + + public: + TestJoin(bool inMemory = true) { + m_name = "TestJoin"; + createSchema(); + createApplication(); + if (inMemory) + loadInMemoryData(); + } +}; + +int main(int argc, const char **argv) { + std::unique_ptr benchmarkQuery {}; + + BenchmarkQuery::parseCommandLineArguments(argc, argv); + + benchmarkQuery = std::make_unique(); + + return benchmarkQuery->runTwoStreamsBenchmark(); +} \ No newline at end of file diff --git a/test/benchmarks/microbenchmarks/TestPersistentAggregation.cpp b/test/benchmarks/microbenchmarks/TestPersistentAggregation.cpp new file mode 100644 index 0000000..da8def5 --- /dev/null +++ b/test/benchmarks/microbenchmarks/TestPersistentAggregation.cpp @@ -0,0 +1,218 @@ +#include + +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "compression/Compressor.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "microbenchmarks/RandomDataGenerator.h" +#include "snappy.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +// Template for implementing hard-coded compression schemes for Scabbard +namespace TPACompression { +struct input_tuple_t { + long timestamp; + int attr1; + int attr2; +}; + +std::vector> *metadata; + +void compressInput(int pid, char *input, int start, int end, char *output, + int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + return; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (2); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000); + struct t_1 { + uint16_t timestamp : 10; + uint8_t counter : 6; + }; + struct t_2 { + uint16_t attr1 : 10; + }; + + for (auto &i : idxs) { + i = 0; + } + + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.5)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint8_t count_1 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && count_1 < 1023 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + buf2[idxs[1]++] = {(uint16_t) data[idx].attr1}; + } + + writePos += idxs[0] * sizeof(t_1); + (*metadata)[pid] = "c0 RLE BD " + std::to_string(data[0].timestamp) + " {uint16_t:10,uint8_t:6} " + std::to_string(writePos); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[1] * sizeof(t_2); + (*metadata)[pid] += " c1 NS {uint16_t:16} " + std::to_string(writePos); + + // write metadata required for decompression + if ((*metadata)[pid].size() > 128) { + throw std::runtime_error("error: increase the metadata size"); + } + std::memcpy((void *)(output - 128), (*metadata)[pid].data(), + (*metadata)[pid].size()); + (*metadata)[pid].clear(); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, + int &writePos, int length, bool ©, long latency = -1) { + // parse metadata for decompression + throw std::runtime_error("error: not implemented"); +} + +void compress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + if (start == 0 && end == -1) { + return; + } + + size_t output_length; + auto buf1 = (uint64_t *)input; + snappy::RawCompress((const char *)(buf1), end, (char *)(output), + &output_length); + writePos += output_length; + // write metadata required for decompression + /*(*metadata)[pid] = "snappy " + std::to_string(output_length); + if ((*metadata)[pid].size() > 128) { + throw std::runtime_error("error: increase the metadata size"); + } + std::memcpy((void *)(output - 128), (*metadata)[pid].data(), + (*metadata)[pid].size()); + (*metadata)[pid].clear(); + }*/ +} + +}; // namespace TPACompression + +class TestPersistentAggregation : public RandomDataGenerator { + private: + void createApplication() override { + SystemConf::getInstance().WORKER_THREADS = 1; + // Setup input queue and batch size + SystemConf::getInstance().CIRCULAR_BUFFER_SIZE = 33554432; + SystemConf::getInstance().UNBOUNDED_BUFFER_SIZE = 524288; + SystemConf::getInstance().BATCH_SIZE = 524288; + SystemConf::getInstance().BUNDLE_SIZE = 524288; + + SystemConf::getInstance().SLOTS = 256; + SystemConf::getInstance().PARTIAL_WINDOWS = 64; // change this depending on the batch size + + // Configure non-grouped aggregation. Check the application benchmarks for grouped aggreagations. + std::vector aggregationTypes(1); + aggregationTypes[0] = AggregationTypes::fromString("sum"); + + std::vector aggregationAttributes(1); + aggregationAttributes[0] = new ColumnReference(1, BasicType::Float); + + std::vector groupByAttributes(0); + + auto window = new WindowDefinition(ROW_BASED, 1000, 1000); + Aggregation *aggregation = new Aggregation(*window, aggregationTypes, aggregationAttributes, groupByAttributes); + + // Set up code-generated operator + OperatorKernel *genCode = new OperatorKernel(true); + genCode->setInputSchema(getSchema()); + genCode->setAggregation(aggregation); + genCode->setQueryId(0); + genCode->setup(); + OperatorCode *cpuCode = genCode; + + // Print operator + std::cout << cpuCode->toSExpr() << std::endl; + + // Setup checkpointing + SystemConf::getInstance().CHECKPOINT_ON = true; + // Set checkpoint interval to 1-sec + SystemConf::getInstance().CHECKPOINT_INTERVAL = 1000L; + // Enable compression when checkpointing state/output results + SystemConf::getInstance().CHECKPOINT_COMPRESSION = true; + // Set input as persistent + SystemConf::getInstance().PERSIST_INPUT = true; + // Set disk block size + SystemConf::getInstance().BLOCK_SIZE = 512*1024; + // Set if we are recovering from previous data + SystemConf::getInstance().RECOVER = false; + // Enable dependency tracking + SystemConf::getInstance().LINEAGE_ON = true; + + // Define the operator as ft-operator + bool isFaultTolerant = true; + auto queryOperator = new QueryOperator(*cpuCode, isFaultTolerant); + std::vector operators; + operators.push_back(queryOperator); + + long timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + + std::vector> queries(1); + // Define the input as persistent + bool persistInput = SystemConf::getInstance().PERSIST_INPUT; + bool clearPreviousFiles = + !SystemConf::getInstance().RECOVER; // set false if you need to recover + // from already persisted files + queries[0] = std::make_shared( + 0, operators, *window, m_schema, timestampReference, true, false, true, + false, 0, persistInput, nullptr, clearPreviousFiles); + + // define hard-coded compression schemes to start compressing + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + queries[0]->getBuffer()->setCompressionFP(TPACompression::compressInput); + queries[0]->getBuffer()->setDecompressionFP(TPACompression::decompressInput); + } + + m_application = + new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, + !SystemConf::getInstance().RECOVER); + m_application->setup(); + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION && + (SystemConf::getInstance().CHECKPOINT_ON || persistInput)) { + TPACompression::metadata = new std::vector>( + SystemConf::getInstance().WORKER_THREADS, ""); + // Here set hard-coded compression schemes for state/output results compression + //m_application->getCheckpointCoordinator()->setCompressionFP(0, TPACompression::compress); + } + } + + public: + TestPersistentAggregation(bool inMemory = true) { + m_name = "TestPersistentAggregation"; + createSchema(); + createApplication(); + if (inMemory) + loadInMemoryData(); + } +}; + +int main(int argc, const char **argv) { + std::unique_ptr benchmarkQuery {}; + + BenchmarkQuery::parseCommandLineArguments(argc, argv); + + benchmarkQuery = std::make_unique(); + + return benchmarkQuery->runBenchmark(); +} \ No newline at end of file diff --git a/test/benchmarks/microbenchmarks/TestPersistentProjection.cpp b/test/benchmarks/microbenchmarks/TestPersistentProjection.cpp new file mode 100644 index 0000000..b011538 --- /dev/null +++ b/test/benchmarks/microbenchmarks/TestPersistentProjection.cpp @@ -0,0 +1,213 @@ +#include + +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "compression/Compressor.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/operators/codeGeneration/OperatorKernel.h" +#include "microbenchmarks/RandomDataGenerator.h" +#include "snappy.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/WindowDefinition.h" + +// Template for implementing hard-coded compression schemes for Scabbard +namespace TPPCompression { +struct input_tuple_t { + long timestamp; + int attr1; + int attr2; +}; + +std::vector> *metadata; + +void compressInput(int pid, char *input, int start, int end, char *output, + int &writePos, int length, bool &clear, long latency = -1) { + if (start == 0 && end == -1) { + return; + } + + auto data = (input_tuple_t *)input; + std::vector idxs (2); + BaseDeltaCompressor bcomp(data[0].timestamp); + DummyFloatCompressor fcomp(1000); + struct t_1 { + uint16_t timestamp : 10; + uint8_t counter : 6; + }; + struct t_2 { + uint16_t attr1 : 10; + }; + + for (auto &i : idxs) { + i = 0; + } + + t_1 *buf1 = (t_1 *)(output); + t_2 *buf2 = (t_2 *)(output + (int) (length*0.5)); + size_t n = (end - start) / sizeof(input_tuple_t); + uint8_t count_1 = 1; + for (size_t idx = 0; idx < n; idx++) { + auto fVal_1 = bcomp.compress(data[idx].timestamp); + auto sVal_1 = fVal_1; + if (idx < n - 1 && count_1 < 1023 && fVal_1 == + (sVal_1 = bcomp.compress(data[idx + 1].timestamp))) { + count_1++; + } else { + buf1[idxs[0]++] = {fVal_1, count_1}; + fVal_1 = sVal_1; + count_1 = 1; + } + + buf2[idxs[1]++] = {(uint16_t) data[idx].attr1}; + } + + writePos += idxs[0] * sizeof(t_1); + (*metadata)[pid] = "c0 RLE BD " + std::to_string(data[0].timestamp) + " {uint16_t:10,uint8_t:6} " + std::to_string(writePos); + std::memcpy((void *)(output + writePos), (void *)buf2, + idxs[1] * sizeof(t_2)); + writePos += idxs[1] * sizeof(t_2); + (*metadata)[pid] += " c1 NS {uint16_t:16} " + std::to_string(writePos); + + // write metadata required for decompression + if ((*metadata)[pid].size() > 128) { + throw std::runtime_error("error: increase the metadata size"); + } + std::memcpy((void *)(output - 128), (*metadata)[pid].data(), + (*metadata)[pid].size()); + (*metadata)[pid].clear(); +} + +void decompressInput(int pid, char *input, int start, int end, char *output, + int &writePos, int length, bool ©, long latency = -1) { + // parse metadata for decompression + throw std::runtime_error("error: not implemented"); +} + +void compress(int pid, char *input, int start, int end, char *output, int &writePos, + bool isComplete, bool &clear) { + if (start == 0 && end == -1) { + return; + } + + size_t output_length; + auto buf1 = (uint64_t *)input; + snappy::RawCompress((const char *)(buf1), end, (char *)(output), + &output_length); + writePos += output_length; + // write metadata required for decompression + /*(*metadata)[pid] = "snappy " + std::to_string(output_length); + if ((*metadata)[pid].size() > 128) { + throw std::runtime_error("error: increase the metadata size"); + } + std::memcpy((void *)(output - 128), (*metadata)[pid].data(), + (*metadata)[pid].size()); + (*metadata)[pid].clear(); + }*/ +} + +}; // namespace TPPCompression + +class TestPersistentProjection : public RandomDataGenerator { + private: + void createApplication() override { + SystemConf::getInstance().WORKER_THREADS = 1; + // Setup input queue and batch size + SystemConf::getInstance().CIRCULAR_BUFFER_SIZE = 33554432; + SystemConf::getInstance().UNBOUNDED_BUFFER_SIZE = 524288; + SystemConf::getInstance().BATCH_SIZE = 524288; + SystemConf::getInstance().BUNDLE_SIZE = 524288; + + // Configure projection + std::vector expressions(2); + // Always project the timestamp + expressions[0] = new ColumnReference(0); + expressions[1] = new ColumnReference(1); + Projection *projection = new Projection(expressions); + + auto window = new WindowDefinition(ROW_BASED, 60, 60); + + bool replayTimestamps = window->isRangeBased(); + + // Set up code-generated operator + OperatorKernel *genCode = new OperatorKernel(true); + genCode->setInputSchema(getSchema()); + genCode->setProjection(projection); + genCode->setQueryId(0); + genCode->setup(); + OperatorCode *cpuCode = genCode; + + // Print operator + std::cout << cpuCode->toSExpr() << std::endl; + + // Setup checkpointing - no state to checkpoint, only output tuples + SystemConf::getInstance().CHECKPOINT_ON = true; + // Set checkpoint interval to 1-sec + SystemConf::getInstance().CHECKPOINT_INTERVAL = 1000L; + // Enable compression when checkpointing state/output results + SystemConf::getInstance().CHECKPOINT_COMPRESSION = true; + // Set input as persistent + SystemConf::getInstance().PERSIST_INPUT = true; + // Set disk block size + SystemConf::getInstance().BLOCK_SIZE = 1024*1024; + // Set if we are recovering from previous data + SystemConf::getInstance().RECOVER = false; + // Enable dependency tracking + SystemConf::getInstance().LINEAGE_ON = true; + + // Define the operator as ft-operator + bool isFaultTolerant = true; + auto queryOperator = new QueryOperator(*cpuCode, isFaultTolerant); + std::vector operators; + operators.push_back(queryOperator); + + long timestampReference = + std::chrono::system_clock::now().time_since_epoch().count(); + + std::vector> queries(1); + // Define the input as persistent + bool persistInput = SystemConf::getInstance().PERSIST_INPUT; + bool clearPreviousFiles = + !SystemConf::getInstance().RECOVER; // set false if you need to recover + // from already persisted files + queries[0] = std::make_shared( + 0, operators, *window, m_schema, timestampReference, false, false, true, + false, 0, persistInput, nullptr, clearPreviousFiles); + + + // define hard-coded compression schemes to start compressing + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION) { + queries[0]->getBuffer()->setCompressionFP(TPPCompression::compressInput); + queries[0]->getBuffer()->setDecompressionFP(TPPCompression::decompressInput); + } + + m_application = + new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON, + !SystemConf::getInstance().RECOVER); + m_application->setup(); + if (SystemConf::getInstance().CHECKPOINT_COMPRESSION && + (SystemConf::getInstance().CHECKPOINT_ON || persistInput)) { + TPPCompression::metadata = new std::vector>( + SystemConf::getInstance().WORKER_THREADS, ""); + // Here set hard-coded compression schemes for state/output results compression + //m_application->getCheckpointCoordinator()->setCompressionFP(0, TPPCompression::compress); + } + } + + public: + TestPersistentProjection(bool inMemory = true) { + m_name = "TestPersistentProjection"; + createSchema(); + createApplication(); + if (inMemory) loadInMemoryData(); + } +}; + +int main(int argc, const char **argv) { + std::unique_ptr benchmarkQuery{}; + + BenchmarkQuery::parseCommandLineArguments(argc, argv); + + benchmarkQuery = std::make_unique(); + + return benchmarkQuery->runBenchmark(); +} \ No newline at end of file diff --git a/test/benchmarks/microbenchmarks/TestProjection.cpp b/test/benchmarks/microbenchmarks/TestProjection.cpp index 14befc4..cb52b24 100644 --- a/test/benchmarks/microbenchmarks/TestProjection.cpp +++ b/test/benchmarks/microbenchmarks/TestProjection.cpp @@ -57,11 +57,11 @@ class TestProjection : public RandomDataGenerator { }; int main(int argc, const char **argv) { - BenchmarkQuery *benchmarkQuery = nullptr; + std::unique_ptr benchmarkQuery {}; BenchmarkQuery::parseCommandLineArguments(argc, argv); - benchmarkQuery = new TestProjection(); + benchmarkQuery = std::make_unique(); return benchmarkQuery->runBenchmark(); } \ No newline at end of file diff --git a/test/benchmarks/microbenchmarks/TestSelection.cpp b/test/benchmarks/microbenchmarks/TestSelection.cpp index 2e9df9d..ff43e59 100644 --- a/test/benchmarks/microbenchmarks/TestSelection.cpp +++ b/test/benchmarks/microbenchmarks/TestSelection.cpp @@ -38,7 +38,7 @@ class TestSelection : public RandomDataGenerator { std::vector> queries(1); queries[0] = std::make_shared(0, operators, *window, m_schema, timestampReference, false, false, true); - m_application = new QueryApplication(queries); + m_application = new QueryApplication(queries, SystemConf::getInstance().CHECKPOINT_ON); m_application->setup(); } @@ -53,11 +53,11 @@ class TestSelection : public RandomDataGenerator { }; int main(int argc, const char **argv) { - BenchmarkQuery *benchmarkQuery = nullptr; + std::unique_ptr benchmarkQuery {}; BenchmarkQuery::parseCommandLineArguments(argc, argv); - benchmarkQuery = new TestSelection(); + benchmarkQuery = std::make_unique(); return benchmarkQuery->runBenchmark(); } \ No newline at end of file diff --git a/test/unit_tests/CMakeLists.txt b/test/unit_tests/CMakeLists.txt index c432d3e..a68237b 100644 --- a/test/unit_tests/CMakeLists.txt +++ b/test/unit_tests/CMakeLists.txt @@ -1,6 +1,9 @@ find_package(GTest REQUIRED) include_directories(${GTEST_INCLUDE_DIRS}) +SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -lnuma -lrt") +SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -g -Wall -Wextra") + #find_package(benchmark REQUIRED) #include_directories(${benchmark_INCLUDE_DIRS}) @@ -15,9 +18,18 @@ if (CCACHE_PROGRAM) set(CMAKE_CXX_COMPILER "/usr/lib/ccache/clang++") endif () -add_executable(unit_tests_run - main.cpp +SET(CPP_FILES + ../../src/filesystem/File.cpp + ../../src/checkpoint/FileBackedCheckpointCoordinator.cpp + ../../src/checkpoint/BlockManager.cpp + ../../src/checkpoint/LineageGraph.cpp + ../../src/cql/expressions/Expression.cpp + ../../src/dispatcher/ITaskDispatcher.cpp + ../../src/dispatcher/JoinTaskDispatcher.cpp ../../src/dispatcher/TaskDispatcher.cpp + ../../src/compression/CompressionCodeGenUtils.cpp + ../../src/compression/CompressionStatistics.cpp + ../../src/monitors/CompressionMonitor.cpp ../../src/monitors/PerformanceMonitor.cpp ../../src/monitors/Measurement.cpp ../../src/monitors/LatencyMonitor.cpp @@ -31,11 +43,57 @@ add_executable(unit_tests_run ../../src/utils/SystemConf.cpp ../../src/utils/Utils.cpp ) +SET(RDMA_CPP_FILES + ../../src/RDMA/infinity/core/Context.cpp + ../../src/RDMA/infinity/memory/Atomic.cpp + ../../src/RDMA/infinity/memory/Buffer.cpp + ../../src/RDMA/infinity/memory/Region.cpp + ../../src/RDMA/infinity/memory/RegionToken.cpp + ../../src/RDMA/infinity/memory/RegisteredMemory.cpp + ../../src/RDMA/infinity/queues/QueuePair.cpp + ../../src/RDMA/infinity/queues/QueuePairFactory.cpp + ../../src/RDMA/infinity/requests/RequestToken.cpp + ../../src/RDMA/infinity/utils/Address.cpp + ) FIND_LIBRARY(tbb NAMES libtbb.so) -target_link_libraries(unit_tests_run ${GTEST_LIBRARIES} pthread tbb dl boost_fiber) +if(NOT WIN32) + find_package(PkgConfig QUIET) +endif() + +if(PKG_CONFIG_FOUND) + pkg_check_modules(LIBPMEMOBJ++ REQUIRED libpmemobj++) +else() + find_package(LIBPMEMOBJ++ REQUIRED) +endif() + +link_directories(${LIBPMEMOBJ++_LIBRARY_DIRS}) + +add_executable(operators_unit_tests operators.cpp ${CPP_FILES} ${RDMA_CPP_FILES}) +target_link_options(operators_unit_tests PRIVATE -Wl,--unresolved-symbols=ignore-all) +target_include_directories(operators_unit_tests PUBLIC ${LIBPMEMOBJ++_INCLUDE_DIRS}) +target_link_libraries(operators_unit_tests ${GTEST_LIBRARIES} ${LIBPMEMOBJ++_LIBRARIES} + z + boost_iostreams + boost_fiber + operatorJITLib + tbb ibverbs + pthread dl aio uuid stdc++fs) #target_link_libraries(Unit_Tests_run ${benchmark_LIBRARIES} pthread tbb dl) +target_compile_options(operators_unit_tests PRIVATE -Wall -Wextra -O0 -march=native -UNDEBUG) + +add_executable(internals_unit_tests internals.cpp ${CPP_FILES} ${RDMA_CPP_FILES}) +target_include_directories(internals_unit_tests PUBLIC ${LIBPMEMOBJ++_INCLUDE_DIRS}) +target_link_libraries(internals_unit_tests ${GTEST_LIBRARIES} ${LIBPMEMOBJ++_LIBRARIES} pthread tbb dl boost_fiber aio uuid stdc++fs ibverbs) +target_compile_options(internals_unit_tests PRIVATE -Wall -Wextra -O0 -march=native -UNDEBUG) -target_compile_options(unit_tests_run PRIVATE -Wall -Wextra -O3 -march=native) +add_executable(ds_unit_tests datastructures.cpp ${CPP_FILES} ${RDMA_CPP_FILES}) +target_include_directories(ds_unit_tests PUBLIC ${LIBPMEMOBJ++_INCLUDE_DIRS}) +target_link_libraries(ds_unit_tests ${GTEST_LIBRARIES} ${LIBPMEMOBJ++_LIBRARIES} pthread tbb dl boost_fiber aio uuid stdc++fs ibverbs) +target_compile_options(ds_unit_tests PRIVATE -Wall -Wextra -O0 -march=native -UNDEBUG) +#add_executable(checkpoint_unit_tests checkpoint.cpp ${CPP_FILES} ../../src/filesystem/File.cpp ../../src/checkpoint/FileBackedCheckpointCoordinator.cpp ${RDMA_CPP_FILES}) +#target_include_directories(checkpoint_unit_tests PUBLIC ${LIBPMEMOBJ++_INCLUDE_DIRS}) +#target_link_libraries(checkpoint_unit_tests ${GTEST_LIBRARIES} ${LIBPMEMOBJ++_LIBRARIES} pthread tbb dl boost_fiber aio uuid stdc++fs ibverbs) +#target_compile_options(checkpoint_unit_tests PRIVATE ${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -O0 -march=native) \ No newline at end of file diff --git a/test/unit_tests/checkpoint.cpp b/test/unit_tests/checkpoint.cpp new file mode 100644 index 0000000..8f075b7 --- /dev/null +++ b/test/unit_tests/checkpoint.cpp @@ -0,0 +1,150 @@ +#include + +#include +#include + +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/AggregationType.h" +#include "dispatcher/TaskDispatcher.h" +#include "filesystem/File.h" +#include "filesystem/FileSystemDisk.h" +#include "filesystem/NullDisk.h" +#include "gtest/gtest.h" +#include "result/PartialResultSlot.h" +#include "result/ResultHandler.h" +#include "unit_tests/utils/AsyncCircularQueryBuffer.h" +#include "utils/Query.h" +#include "utils/QueryOperator.h" +#include "utils/SystemConf.h" +#include "utils/Utils.h" + +#define DISK +//#undef DISK +#define NUM_OF_WORKERS 1 + +typedef QueueIoHandler f_handler_t; +typedef FileSystemDisk f_disk_t; +typedef NullHandler nd_handler_t; +typedef NullDisk nd_disk_t; + +// helper functions definition +TupleSchema createSchema(); +Aggregation *createAggregation(); +std::shared_ptr createQuery(); + +TEST(Checkpoint, CreateAndDeleteCheckpointFiles) { + // Create a single pipeline + std::vector> queries(1); + queries[0] = createQuery(); + +#ifdef DISK + FileBackedCheckpointCoordinator coordinator(0, queries); +#else + CheckpointCoordinator coordinator(0, queries); +#endif + + coordinator.clearPersistentMemory(); +} + +TEST(Checkpoint, EmulateCheckpointWrites) { + SystemConf::getInstance().SLOTS = 256; + SystemConf::getInstance().PARTIAL_WINDOWS = 512; + // SystemConf::getInstance().UNBOUNDED_BUFFER_SIZE = 4 * + // SystemConf::getInstance()._4MB; + SystemConf::getInstance().HASH_TABLE_SIZE = 8; + SystemConf::getInstance().CHECKPOINT_INTERVAL = 1000L; // msec + SystemConf::getInstance().BLOCK_SIZE = SystemConf::_4MB; + SystemConf::getInstance().DURATION = 10; + + // Create a single pipeline + std::vector> queries(1); + queries[0] = createQuery(); + + auto filesystem = std::make_shared(SystemConf::FILE_ROOT_PATH); + //FileBackedCheckpointCoordinator coordinator(0, queries, filesystem, false); + + auto dispatcher = queries[0]->getTaskDispatcher(); + auto resultHandler = queries[0]->getResultHandler(); + + // fill with dummy data the window fragments + auto &slots = resultHandler->getPartials(); + for (auto &s : slots) { + s.m_taskId = dispatcher->getTaskNumber(); + s.m_slot.store(1); // set the slot as ready for checkpoint + s.m_openingWindows = PartialWindowResultsFactory::getInstance().newInstance( + 0, SystemConf::getInstance().HASH_TABLE_SIZE); + s.m_openingWindows->incrementCount(60); + s.m_closingWindows = PartialWindowResultsFactory::getInstance().newInstance( + 0, SystemConf::getInstance().HASH_TABLE_SIZE); + s.m_closingWindows->incrementCount(60); + s.m_completeWindows = + PartialWindowResultsFactory::getInstance().newInstance(0); + s.m_completeWindows->incrementCount(220); + s.m_completeWindows->setPosition( + 220 * SystemConf::getInstance().HASH_TABLE_SIZE * + queries[0]->getOperator()->getCode().getOutputSchema().getTupleSize()); + } + + //std::thread checkpointThread = std::thread(std::ref(coordinator)); + //checkpointThread.detach(); + + const long m_duration = SystemConf::getInstance().DURATION + 2; + auto t1 = std::chrono::high_resolution_clock::now(); + std::chrono::duration time_span{}; + while (true) { + auto t2 = std::chrono::high_resolution_clock::now(); + time_span = + std::chrono::duration_cast>(t2 - t1); + if (time_span.count() >= (double)m_duration) { + std::cout << "Master is stopping..." << std::endl; + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + break; + } + } + + //coordinator.clearPersistentMemory(); +} + +// helper functions implementation +TupleSchema createSchema() { + TupleSchema schema(3, "Stream"); + for (int i = 1; i < schema.numberOfAttributes() + 1; i++) { + auto attr = AttributeType(BasicType::Integer); + schema.setAttributeType(i, attr); + } + auto attr = AttributeType(BasicType::Long); + schema.setAttributeType(0, attr); + return schema; +} + +Aggregation *createAggregation() { + std::vector aggregationTypes(1); + aggregationTypes[0] = AggregationTypes::fromString("sum"); + std::vector aggregationAttributes(1); + aggregationAttributes[0] = new ColumnReference(1, BasicType::Integer); + std::vector groupByAttributes(1); + groupByAttributes[0] = new ColumnReference(2, BasicType::Integer); + auto window = new WindowDefinition(RANGE_BASED, 60, 1); + return new Aggregation(*window, aggregationTypes, aggregationAttributes, + groupByAttributes); +} + +std::shared_ptr createQuery() { + auto schema = createSchema(); + auto aggregation = createAggregation(); + OperatorCode *cpuCode = aggregation; + auto queryOperator = new QueryOperator(*cpuCode); + std::vector operators; + operators.push_back(queryOperator); + return std::make_shared(0, operators, + aggregation->getWindowDefinition(), &schema, 0, + true, false, true, false); +} + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} diff --git a/test/unit_tests/datastructures.cpp b/test/unit_tests/datastructures.cpp new file mode 100644 index 0000000..b9f8752 --- /dev/null +++ b/test/unit_tests/datastructures.cpp @@ -0,0 +1,81 @@ +#include + +#include "buffers/CircularQueryBuffer.h" +#include "cql/operators/HashTable.h" +#include "gtest/gtest.h" + +TEST(HashTable, TestFunctions) { + std::random_device rd; + std::mt19937_64 eng(rd()); + std::uniform_int_distribution distr(0, 1000000); + std::vector input (512); + for (size_t i = 0; i < input.size(); i++) { + input[i] = distr(eng); + } + + struct Key { + long timestamp; + int id; + }; + struct Value { + int _1; + void combine (Value &v) { + this->_1 += v._1; + } + }; + struct Hasher { + std::size_t operator()(const Key& key) const { + std::hash hasher; + return hasher(key.id); + } + }; + struct Eq { + constexpr bool operator()(const Key& k1, const Key& k2) const { + return k1.timestamp == k2.timestamp && + k1.id == k2.id; + } + }; + HashTable table (1024); + + // check insertions + for (auto &t: input) { + table.insert_or_modify({0, t}, {1}); + } + for (auto &t: input) { + table.insert_or_modify({0, t}, {1}); + } + auto buf = table.buckets(); + for (size_t i = 0; i < table.size(); i++) { + if (buf[i].state) { + EXPECT_EQ(buf[i].value._1, 2); + EXPECT_EQ(buf[i].counter, 2); + } + } + + // check erase + table.erase({0, input[0]}); + Value val; + auto res = table.find({0, input[0]}, val); + EXPECT_EQ(res, false); + EXPECT_LE(table.load_factor(), 0.5); + + // check clearing the hashtable + table.clear(); + for (size_t i = 0; i < table.size(); i++) { + EXPECT_EQ(buf[i].state, false); + } +} + +TEST(CircularBuffer, ProcessBytes) { + CircularQueryBuffer circularBuffer(0, 1024, 32); + std::vector v(128); + circularBuffer.put(v.data(), v.size(), -1); + circularBuffer.free(127); + EXPECT_EQ(circularBuffer.getBytesProcessed(), 128); +} + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} diff --git a/test/unit_tests/main.cpp b/test/unit_tests/internals.cpp similarity index 55% rename from test/unit_tests/main.cpp rename to test/unit_tests/internals.cpp index 612b657..7519b76 100644 --- a/test/unit_tests/main.cpp +++ b/test/unit_tests/internals.cpp @@ -1,86 +1,21 @@ -#include "gtest/gtest.h" - #include "buffers/CircularQueryBuffer.h" #include "buffers/PartialWindowResultsFactory.h" #include "buffers/UnboundedQueryBufferFactory.h" +#include "checkpoint/FileBackedCheckpointCoordinator.h" +#include "compression/CompressionStatistics.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/NoOp.h" +#include "cql/operators/OperatorCode.h" +#include "dispatcher/TaskDispatcher.h" +#include "gtest/gtest.h" +#include "monitors/LatencyMonitor.h" +#include "monitors/PerformanceMonitor.h" +#include "result/ResultHandler.h" #include "tasks/TaskFactory.h" #include "tasks/WindowBatchFactory.h" -#include "cql/operators/OperatorCode.h" -#include "cql/operators/NoOp.h" -#include "utils/QueryOperator.h" #include "utils/QueryApplication.h" +#include "utils/QueryOperator.h" #include "utils/TupleSchema.h" -#include "dispatcher/TaskDispatcher.h" -#include "result/ResultHandler.h" -#include "cql/expressions/ColumnReference.h" -#include "cql/expressions/LongConstant.h" -#include "cql/expressions/IntConstant.h" -#include "cql/expressions/operations/Addition.h" -#include "cql/expressions/operations/Multiplication.h" -#include "cql/expressions/operations/Division.h" -#include "cql/expressions/operations/Subtraction.h" -#include "cql/predicates/ComparisonPredicate.h" -#include "cql/operators/Selection.h" -#include "cql/operators/Aggregation.h" -#include "cql/operators/Projection.h" -#include "monitors/PerformanceMonitor.h" -#include "monitors/LatencyMonitor.h" - -TEST(Expressions, BasicExpressionAndPredicateCreation) { - ColumnReference ref1(0); - ColumnReference ref2(3); - LongConstant const1(2L); - IntConstant const2(5); - IntConstant const3(3); - Addition add(&const2, &const3); - Multiplication mul(&ref2, &add); - Division div(&ref1, &const1); - Subtraction sub(&div, &mul); - EXPECT_EQ(sub.toSExpr(), "( ( \"0\" / Constant 2 ) - ( \"3\" * ( Constant 5 + Constant 3 ) ) )"); - - ColumnReference ref3(1); - ComparisonPredicate pr1(NONEQUAL_OP, &sub, &ref3); - EXPECT_EQ(pr1.toSExpr(), "( ( \"0\" / Constant 2 ) - ( \"3\" * ( Constant 5 + Constant 3 ) ) ) != \"1\""); -} - -TEST(Selection, OperatorInitialization) { - Selection selection(new ComparisonPredicate(LESS_OP, new ColumnReference(0), new IntConstant(100))); - EXPECT_EQ(selection.toSExpr(), "Selection (\"0\" < Constant 100)"); -} - -TEST(Projection, OperatorInitialization) { - std::vector expressions(3); - // Always project the timestamp - expressions[0] = new ColumnReference(0); - expressions[1] = new ColumnReference(1); - expressions[2] = new Division(new Multiplication(new IntConstant(3), new IntConstant(15)), new IntConstant(2)); - Projection projection(expressions); - EXPECT_EQ(projection.toSExpr(), "Projection (\"0\", \"1\", ( ( Constant 3 * Constant 15 ) / Constant 2 ))"); -} - -TEST(Aggregation, OperatorInitialization) { - WindowDefinition windowDefinition(ROW_BASED, 1024, 32); - std::vector aggregationTypes(2); - aggregationTypes[0] = AVG; - aggregationTypes[1] = MIN; - std::vector aggregationAttributes(2); - aggregationAttributes[0] = new ColumnReference(1); - aggregationAttributes[1] = new ColumnReference(2); - std::vector groupByAttributes(1); - groupByAttributes[0] = new ColumnReference(2); - Aggregation aggregation(windowDefinition, aggregationTypes, aggregationAttributes, groupByAttributes); - std::cout << aggregation.toSExpr() << std::endl; - EXPECT_EQ(aggregation.toSExpr(), - "[Partial window u-aggregation] AVG(\"1\") MIN(\"2\") (group-by ? 1) (incremental ? 1)"); -} - -TEST(CircularBuffer, ProcessBytes) { - CircularQueryBuffer circularBuffer(0, 1024, 32); - std::vector v(127); - circularBuffer.put(v.data(), v.size()); - circularBuffer.free(127); - EXPECT_EQ(circularBuffer.getBytesProcessed(), 128); -} TEST(PartialWindowResultsFactory, InitiliazeAndFree) { SystemConf::getInstance().PARTIAL_WINDOWS = 1024; @@ -105,8 +40,7 @@ TEST(WindowBatchFactory, InitiliazeAndFree) { std::vector operators; Query query(0, operators, window, &schema, 0); auto batch = WindowBatchFactory::getInstance().newInstance( - 1024, 0, 1024, &query, &buffer, &window, &schema, 0 - ); + 1024, 0, 1024, -1, &query, &buffer, &window, &schema, 0); EXPECT_EQ(batch->getTaskId(), 0); WindowBatchFactory::getInstance().free(batch); batch.reset(); @@ -115,8 +49,10 @@ TEST(WindowBatchFactory, InitiliazeAndFree) { TEST(TaskDispatcher, CreateTasks) { int batchSize = 64 * 1024; SystemConf::getInstance().BATCH_SIZE = batchSize; - std::unique_ptr window = std::make_unique(ROW_BASED, 1, 1); - std::unique_ptr schema = std::make_unique(2, "Stream"); + std::unique_ptr window = + std::make_unique(ROW_BASED, 1, 1); + std::unique_ptr schema = + std::make_unique(2, "Stream"); int numberOfAttributes = 1; for (int i = 1; i < numberOfAttributes + 1; i++) { auto attr = AttributeType(BasicType::Long); @@ -129,9 +65,11 @@ TEST(TaskDispatcher, CreateTasks) { auto queryOperator = new QueryOperator(*cpuCode); std::vector operators; operators.push_back(queryOperator); - long timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + long timestampReference = + std::chrono::system_clock::now().time_since_epoch().count(); std::vector> queries(1); - queries[0] = std::make_shared(0, operators, *window, schema.get(), timestampReference); + queries[0] = std::make_shared(0, operators, *window, schema.get(), + timestampReference); auto application = new QueryApplication(queries); queries[0]->setParent(application); @@ -149,14 +87,16 @@ TEST(TaskDispatcher, CreateTasks) { EXPECT_EQ(taskSize, 2); delete cpuCode; delete queryOperator; - delete application; + //delete application; // todo: fix this } TEST(ResultHandler, FreeSlots) { int batchSize = 64 * 1024; SystemConf::getInstance().BATCH_SIZE = batchSize; - std::unique_ptr window = std::make_unique(ROW_BASED, 1, 1); - std::unique_ptr schema = std::make_unique(2, "Stream"); + std::unique_ptr window = + std::make_unique(ROW_BASED, 1, 1); + std::unique_ptr schema = + std::make_unique(2, "Stream"); int numberOfAttributes = 1; for (int i = 1; i < numberOfAttributes + 1; i++) { auto attr = AttributeType(BasicType::Long); @@ -169,9 +109,11 @@ TEST(ResultHandler, FreeSlots) { auto queryOperator = new QueryOperator(*cpuCode); std::vector operators; operators.push_back(queryOperator); - long timestampReference = std::chrono::system_clock::now().time_since_epoch().count(); + long timestampReference = + std::chrono::system_clock::now().time_since_epoch().count(); std::vector> queries(1); - queries[0] = std::make_shared(0, operators, *window, schema.get(), timestampReference); + queries[0] = std::make_shared(0, operators, *window, schema.get(), + timestampReference); auto application = new QueryApplication(queries); queries[0]->setParent(application); @@ -195,7 +137,79 @@ TEST(ResultHandler, FreeSlots) { delete cpuCode; delete queryOperator; - delete application; + //delete application; // todo: fix this +} + +TEST(CompressionStatistics, addStatistics) { + // 3 cols: (Timestamp, Int, Float) + auto cols = new std::vector; + cols->push_back(new ColumnReference(0, BasicType::Long)); + cols->push_back(new ColumnReference(1, BasicType::Integer)); + cols->push_back(new ColumnReference(2, BasicType::Float)); + CompressionStatistics stats (0, cols); + + // generate data + struct input { + long timestamp; + int _1; + float _2; + }; + std::vector data(16); + for (size_t i = 0; i < 16; ++i) { + data[i].timestamp = (long)i/6; + data[i]._1 = 10*(int)i; + data[i]._2 = ((float) i) * (float) 0.001; + } + + // gather statistics + std::vector m_distinctVals(cols->size()); + std::vector m_consecutiveVals(cols->size(), 1); + std::vector m_min(cols->size(), DBL_MAX), m_max(cols->size(), DBL_MIN), m_maxDiff(cols->size(), DBL_MIN); + auto timestamp = data[0].timestamp; + auto _1 = data[0]._1; + auto _2 = data[0]._2; + m_min[0] = timestamp; m_max[0] = timestamp; + m_min[1] = _1; m_max[1] = _1; + m_min[2] = _2; m_max[2] = _2; + for (size_t i = 1; i < 16; ++i) { + if (timestamp != data[i].timestamp) { + timestamp = data[i].timestamp; + m_consecutiveVals[0]++; + } + m_min[0] = std::min(m_min[0], (double)data[i].timestamp); + m_max[0] = std::max(m_max[0], (double)data[i].timestamp); + m_maxDiff[0] = std::max(m_maxDiff[0], (double)(data[i].timestamp - data[i-1].timestamp)); + + if (_1 != data[i]._1) { + _1 = data[i]._1; + m_consecutiveVals[1]++; + } + m_min[1] = std::min(m_min[1], (double)data[i]._1); + m_max[1] = std::max(m_max[1], (double)data[i]._1); + m_maxDiff[1] = std::max(m_maxDiff[1], (double)(data[i]._1 - data[i-1]._1)); + + if (_2 != data[i]._2) { + _2 = data[i]._2; + m_consecutiveVals[2]++; + } + m_min[2] = std::min(m_min[2], (double)data[i]._2); + m_max[2] = std::max(m_max[2], (double)data[i]._2); + m_maxDiff[2] = std::max(m_maxDiff[2], (double)(data[i]._2 - data[i-1]._2)); + } + m_consecutiveVals[0] = 16 / m_consecutiveVals[0]; + m_consecutiveVals[1] = 16 / m_consecutiveVals[1]; + m_consecutiveVals[2] = 16 / m_consecutiveVals[2]; + + stats.addStatistics(m_distinctVals.data(), m_consecutiveVals.data(), m_min.data(), m_max.data(), m_maxDiff.data()); + EXPECT_EQ(stats.updateCompressionDecision(), true); + + EXPECT_EQ(stats.m_useRLE[0], true); + EXPECT_EQ(stats.m_useRLE[1], false); + EXPECT_EQ(stats.m_useRLE[2], false); + EXPECT_EQ(stats.m_precision[0], 2); + EXPECT_EQ(stats.m_precision[1], 8); + EXPECT_EQ(stats.m_diffPrecision[0], 2); + EXPECT_EQ(stats.m_diffPrecision[1], 8); } int main(int argc, char **argv) { diff --git a/test/unit_tests/operators.cpp b/test/unit_tests/operators.cpp new file mode 100644 index 0000000..bbf74f6 --- /dev/null +++ b/test/unit_tests/operators.cpp @@ -0,0 +1,97 @@ +#include "buffers/CircularQueryBuffer.h" +#include "cql/expressions/ColumnReference.h" +#include "cql/expressions/IntConstant.h" +#include "cql/expressions/LongConstant.h" +#include "cql/expressions/operations/Addition.h" +#include "cql/expressions/operations/Division.h" +#include "cql/expressions/operations/Multiplication.h" +#include "cql/expressions/operations/Subtraction.h" +#include "cql/operators/Aggregation.h" +#include "cql/operators/Projection.h" +#include "cql/operators/Selection.h" +#include "cql/operators/ThetaJoin.h" +#include "cql/predicates/ComparisonPredicate.h" +#include "gtest/gtest.h" +#include "monitors/LatencyMonitor.h" +#include "monitors/PerformanceMonitor.h" +#include "result/ResultHandler.h" +#include "tasks/WindowBatchFactory.h" +#include "utils/QueryApplication.h" +#include "utils/QueryOperator.h" + +TEST(Expressions, BasicExpressionAndPredicateCreation) { + ColumnReference ref1(0); + ColumnReference ref2(3); + LongConstant const1(2L); + IntConstant const2(5); + IntConstant const3(3); + Addition add(&const2, &const3); + Multiplication mul(&ref2, &add); + Division div(&ref1, &const1); + Subtraction sub(&div, &mul); + EXPECT_EQ(sub.toSExpr(), "( ( \"0\" / Constant 2 ) - ( \"3\" * ( Constant 5 + Constant 3 ) ) )"); + + ColumnReference ref3(1); + ComparisonPredicate pr1(NONEQUAL_OP, &sub, &ref3); + EXPECT_EQ(pr1.toSExpr(), "( ( \"0\" / Constant 2 ) - ( \"3\" * ( Constant 5 + Constant 3 ) ) ) != \"1\""); +} + +TEST(Selection, OperatorInitialization) { + Selection selection(new ComparisonPredicate(LESS_OP, new ColumnReference(0), new IntConstant(100))); + EXPECT_EQ(selection.toSExpr(), "Selection (\"0\" < Constant 100)"); +} + +TEST(Projection, OperatorInitialization) { + std::vector expressions(3); + // Always project the timestamp + expressions[0] = new ColumnReference(0); + expressions[1] = new ColumnReference(1); + expressions[2] = new Division(new Multiplication(new IntConstant(3), new IntConstant(15)), new IntConstant(2)); + Projection projection(expressions); + EXPECT_EQ(projection.toSExpr(), "Projection (\"0\", \"1\", ( ( Constant 3 * Constant 15 ) / Constant 2 ))"); +} + +TEST(Aggregation, OperatorInitialization) { + WindowDefinition windowDefinition(ROW_BASED, 1024, 32); + std::vector aggregationTypes(2); + aggregationTypes[0] = AVG; + aggregationTypes[1] = MIN; + std::vector aggregationAttributes(2); + aggregationAttributes[0] = new ColumnReference(1); + aggregationAttributes[1] = new ColumnReference(2); + std::vector groupByAttributes(1); + groupByAttributes[0] = new ColumnReference(2); + Aggregation aggregation(windowDefinition, aggregationTypes, aggregationAttributes, groupByAttributes); + std::cout << aggregation.toSExpr() << std::endl; + EXPECT_EQ(aggregation.toSExpr(), + "[Partial window u-aggregation] AVG(\"1\") MIN(\"2\") (group-by ? 1) (incremental ? 1)"); +} + +TEST(ThetaJoin, OperatorInitialization) { + int numberOfAttributes = 1; + std::unique_ptr window1 = std::make_unique(ROW_BASED, 1024, 512); + std::unique_ptr schema1 = std::make_unique(2, "Stream1"); + auto attr = AttributeType(BasicType::Long); + for (int i = 1; i < numberOfAttributes + 1; i++) { + schema1->setAttributeType(i, attr); + } + schema1->setAttributeType(0, attr); + std::unique_ptr window2 = std::make_unique(ROW_BASED, 1024, 512); + std::unique_ptr schema2 = std::make_unique(2, "Stream2"); + for (int i = 1; i < numberOfAttributes + 1; i++) { + schema1->setAttributeType(i, attr); + } + schema1->setAttributeType(0, attr); + std::unique_ptr predicate = std::make_unique(EQUAL_OP, new ColumnReference(1), new ColumnReference(1)); + auto join = new ThetaJoin(*schema1, *schema2, predicate.get()); + // TODO: fix OperatorJit::removeAllModules() so that the test passes without heap allocation + std::cout << join->toSExpr() << std::endl; + EXPECT_EQ(join->toSExpr(), + "ThetaJoin (\"1\" == \"1\")"); +} + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +}