Skip to content

Commit

Permalink
Added a verbosity-mode that creates extensive printouts from the SW-s…
Browse files Browse the repository at this point in the history
…tack if selected in the build-process.
  • Loading branch information
maximilianheer committed Sep 13, 2024
1 parent 2043b63 commit d7bf35e
Show file tree
Hide file tree
Showing 12 changed files with 625 additions and 11 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,12 @@ Similar to building the HW, it makes sense to build within the `examples_sw` dir

~~~~
$ mkdir examples_sw/build_sw && cd examples_sw/build_sw
$ cmake ../ -DEXAMPLE=<target_example>
$ cmake ../ -DEXAMPLE=<target_example> -DVERBOSITY=<ON or OFF>
$ make
~~~~

The software-stack can be built in verbosity-mode, which will generate extensive printouts during execution. This is controlled via the `VERBOSITY` toggle in the cmake-call. Per default, verbosity is turned off.

### Build `Driver`

After the bitstream is loaded, the driver can be inserted once for the initial static image.
Expand Down
15 changes: 15 additions & 0 deletions examples_sw/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,21 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CYT_DIR}/cmake)

find_package(CoyoteSW REQUIRED)

#
# Pass an additional command-line argument to specify the verbosity of the compilation
# If VERBOSITY is passed as an argument, VERBOSE is set as preprocessor-macro
#

set(VERBOSITY OFF CACHE BOOL "Enable VERBOSITY for compilation")

if (VERBOSITY)
message("You selected VERBOSITY for compilation")
target_compile_definitions(VERBOSE)
else()
message("VERBOSITY is not selected for compilation")
endif()


#
# Shell reconfiguration
#
Expand Down
50 changes: 48 additions & 2 deletions examples_sw/apps/rdma_service/client/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,22 +140,33 @@ int main(int argc, char *argv[])
// -----------------------------------------------------------------------------------------------------------------------

// Get a thread for execution: Has the vFPGA-ID, host-process-ID of this calling process, and device number
cThread<int> cthread(defTargetVfid, getpid(), cs_dev);
# ifdef VERBOSE
cout << "Created the cThread-object for the RDMA-server-main-code";
std::cout << "rdma_client: Create the cThread-object for the RDMA-server-main-code" << std::endl;
std::cout << "rdma_client: Target-vfid: " << defTargetVfid << std::endl;
std::cout << "rdma_client: Current process ID: " << getpid() << std::endl;
# endif
cThread<int> cthread(defTargetVfid, getpid(), cs_dev);

// Get memory in the max size of the experiment. Argument is a cs_alloc-struct: Huge Page, max size, is remote
// This operation attaches the buffer to the Thread, which is required for the cLib constructor for RDMA-capabilities
cthread.getMem({CoyoteAlloc::HPF, max_size, true});

// Connect to the RDMA server and run the task

# ifdef VERBOSE
std::cout << "rdma_client: Create an instance of the cLib-class for exchange of QPs etc." << std::endl;
# endif

// This instantiates the communication library cLib with the name of the socket, function-ID (?), the executing cthread, the target IP-address and the target port
// The constructor of the communication library also automatically does the meta-exchange of information in the beginning to connect the queue pairs from local and remote
cLib<int, bool, uint32_t, uint32_t, uint32_t, uint32_t> clib_rdma("/tmp/coyote-daemon-vfid-0-rdma",
fidRDMA, &cthread, tcp_ip.c_str(), defPort);

// Issue the iTaks for exchange of experimental parameters
# ifdef VERBOSE
std::cout << "rdma_client: Issue the iTask for exchange of experimental parameters" << std::endl;
# endif

// Execute the iTask -> That goes to cLib and from there probably to cFunc for scheduling of the execution of the cThread
clib_rdma.iTask(opPriority, oper, min_size, max_size, n_reps_thr, n_reps_lat);

Expand All @@ -166,6 +177,10 @@ int main(int argc, char *argv[])
// Create a Scatter-Gather-Entry, save it in memory - size of the rdmaSg
// How is this sg-element connected to the thread-attached buffer? Should be the vaddr, shouldn't it?
// There has to be a connection, since sg is handed over to the invoke-function, where the local_dest and offset is accessed
# ifdef VERBOSE
std::cout << "rdma_client: Create a sg-Entry for the RDMA-operation." << std::endl;
# endif

sgEntry sg;
memset(&sg, 0, sizeof(rdmaSg));

Expand All @@ -183,7 +198,13 @@ int main(int argc, char *argv[])

// Sync
// Clear the registers that hold information about completed functions
# ifdef VERBOSE
std::cout << "rdma_client: Perform a clear Completed in cThread." << std::endl;
# endif
cthread.clearCompleted();
# ifdef VERBOSE
std::cout << "rdma_client: Perform a connection sync in cThread." << std::endl;
# endif
// Initiate a sync between the remote nodes with handshaking via exchanged ACKs
cthread.connSync(true);
// Initialize a benchmark-object to precisely benchmark the RDMA-execution. Number of executions is set to 1 (no further repetitions on this level), no calibration required, no distribution required.
Expand All @@ -193,10 +214,16 @@ int main(int argc, char *argv[])
auto benchmark_thr = [&]() {
// For the desired number of repetitions per size, invoke the cThread-Function with the coyote-Operation
for(int i = 0; i < n_reps_thr; i++)
# ifdef VERBOSE
std::cout << "rdma_client: invoke the operation " << coper << std::endl;
# endif
cthread.invoke(coper, &sg);

// Check the number of completed RDMA-transactions, wait until all operations have been completed. Check for stalling in-between.
while(cthread.checkCompleted(CoyoteOper::LOCAL_WRITE) < n_reps_thr) {
# ifdef VERBOSE
std::cout << "rdma_client: Current number of completed operations: " << cthread.checkCompleted(CoyoteOper::LOCAL_WRITE) << std::endl;
# endif
// stalled is an atomic boolean used for event-handling (?) that would indicate a stalled operation
if( stalled.load() ) throw std::runtime_error("Stalled, SIGINT caught");
}
Expand All @@ -211,16 +238,28 @@ int main(int argc, char *argv[])
<< std::setw(8) << ((1 + oper) * ((1000 * sg.rdma.len ))) / ((bench.getAvg()) / n_reps_thr) << " [MB/s], latency: ";

// Sync - reset the completion counter from the thread, sync-up via ACK-handshakes
# ifdef VERBOSE
std::cout << "rdma_client: Perform a clear Completed in cThread." << std::endl;
# endif
cthread.clearCompleted();
# ifdef VERBOSE
std::cout << "rdma_client: Perform a connection sync in cThread." << std::endl;
# endif
cthread.connSync(true);

// Lambda-function for latency-benchmarking
auto benchmark_lat = [&]() {
// Different than before: Issue one single command via invoke, then wait for its completion (ping-pong-scheme)
// Repeated for the number of desired repetitions
for(int i = 0; i < n_reps_lat; i++) {
# ifdef VERBOSE
std::cout << "rdma_client: invoke the operation " << coper << std::endl;
# endif
cthread.invoke(coper, &sg);
while(cthread.checkCompleted(CoyoteOper::LOCAL_WRITE) < i+1) {
# ifdef VERBOSE
std::cout << "rdma_client: Current number of completed operations: " << cthread.checkCompleted(CoyoteOper::LOCAL_WRITE) << std::endl;
# endif
// As long as the completion is not yet received, check for a possible stall-event
if( stalled.load() ) throw std::runtime_error("Stalled, SIGINT caught");
}
Expand All @@ -241,10 +280,17 @@ int main(int argc, char *argv[])
std::cout << std::endl;

// Final connection sync via the thread-provided function
# ifdef VERBOSE
std::cout << "rdma_client: Perform a connection sync in cThread." << std::endl;
# endif
cthread.connSync(true);

// Try to obtain the completion event at the end - probably has to do with the iTask at the beginning?
int ret_val = clib_rdma.iCmpl();

# ifdef VERBOSE
std::cout << "rdma_client: Generated the return value from clib_rdma-completion function " << ret_val << std::endl;
# endif

return (ret_val);
}
29 changes: 29 additions & 0 deletions examples_sw/apps/rdma_service/server/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,24 +99,41 @@ int main(int argc, char *argv[])
* Instantiate a daemon for the server-side of RDMA: "remote" is set to true
*
*/
# ifdef VERBOSE
std::cout << "rdma_server: Get an instance of the cService for rdma with vfid " << vfid << " and for device " << cs_dev << std::endl;
# endif
cService *cservice = cService::getInstance("rdma", true, vfid, cs_dev, nullptr, defPort);

//std::cout << std::endl << "Shell loading ..." << std::endl << std::endl;
//cservice->shellReconfigure("shell_bstream.bin");

// RDMA perf: Add a new function for execution to the cService, which takes the experiment parameters as input for the lambda-function
# ifdef VERBOSE
std::cout << "rdma_server: Add a function for experiment-execution." << std::endl;
# endif
cservice->addFunction(fidRDMA, std::unique_ptr<bFunc>(new cFunc<int, bool, uint32_t, uint32_t, uint32_t, uint32_t>(operatorRDMA,
[=] (cThread<int> *cthread, bool rdwr, uint32_t min_size, uint32_t max_size, uint32_t n_reps_thr, uint32_t n_reps_lat) -> int {
syslog(LOG_NOTICE, "Executing RDMA benchmark, %s, min_size %d, max_size %d, n_reps_thr %d, n_reps_lat %d",
(rdwr ? "RDMA WRITE" : "RDMA READ"), min_size, max_size, n_reps_thr, n_reps_lat);

// SG entries
# ifdef VERBOSE
std::cout << "rdma_server: Create a sg-Entry for the RDMA-operation." << std::endl;
# endif

sgEntry sg;
memset(&sg, 0, sizeof(rdmaSg));
sg.rdma.len = min_size; sg.rdma.local_stream = strmHost;

while(sg.rdma.len <= max_size) {
// Sync via the cThread that is part of the cService-daemon that was just started in the background
# ifdef VERBOSE
std::cout << "rdma_server: Perform a clear Completed in cThread." << std::endl;
# endif
cthread->clearCompleted();
# ifdef VERBOSE
std::cout << "rdma_server: Perform a connection sync in cThread." << std::endl;
# endif
cthread->connSync(false);


Expand All @@ -126,10 +143,19 @@ int main(int argc, char *argv[])

// THR - issuing the same amount of "Write-Backs" to the client
for(int i = 0; i < n_reps_thr; i++)
# ifdef VERBOSE
std::cout << "rdma_server: invoke the operation " << coper << std::endl;
# endif
cthread->invoke(CoyoteOper::REMOTE_RDMA_WRITE, &sg);

// Sync via the thread that is located within the cService-daemon
# ifdef VERBOSE
std::cout << "rdma_server: Perform a clearCompleted." << std::endl;
# endif
cthread->clearCompleted();
# ifdef VERBOSE
std::cout << "rdma_server: Perform a connection sync in cThread." << std::endl;
# endif
cthread->connSync(false);

// LAT - iterate over the number of ping-pong-exchanges according to the desired experiment setting
Expand Down Expand Up @@ -157,6 +183,9 @@ int main(int argc, char *argv[])
// Start a daemon
//
std::cout << "Forking ..." << std::endl << std::endl;
# ifdef VERBOSE
std::cout << "rdma_server: Start the background daemon." << std::endl;
# endif
cservice->start();
}

54 changes: 52 additions & 2 deletions sw/include/cFunc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,18 @@ class cFunc : public bFunc {
cFunc(int32_t oid, std::function<Cmpl(cThread<Cmpl>*, Args...)> f) {
this->oid = oid;
this->f = f;

# ifdef VERBOSE
std::cout << "cFunc: Called the constructor with operator ID " << oid << std::endl;
# endif
}

// Destructor: Destroy the final clean-up thread
~cFunc() {
~cFunc() {
# ifdef VERBOSE
std::cout << "cFunc: Called the destructor." << std::endl;
# endif

int connfd;
run_cln = false;
thread_cln.join();
Expand All @@ -73,6 +81,9 @@ class cFunc : public bFunc {
// Create the clean-up thread -> Thread again points to a function cleanConns defined here in this class
// The function cleanConns is to be executed by this clean-up-thread
void start() {
# ifdef VERBOSE
std::cout << "cFunc: Create a cleaning thread that runs cleanConns." << std::endl;
# endif
thread_cln = std::thread(&cFunc::cleanConns, this);
}

Expand All @@ -84,16 +95,27 @@ class cFunc : public bFunc {
// csched - scheduler
// user-defined interrupt service routine
bThread* registerClientThread(int connfd, int32_t vfid, pid_t rpid, uint32_t dev, cSched *csched, void (*uisr)(int) = nullptr) {

# ifdef VERBOSE
std::cout << "cFunc: Called registClientThread to register a new client thread for this function with connfd " << connfd << ", vfid " << vfid << ", rpid " << rpid << " and dev " << dev << std::endl;
# endif

// Check if there's already a thread registered for this connfd
if(clients.find(connfd) == clients.end()) {

// New insertion into the clients-struct: Mapping between connection-fd and new cThread based on the parameters given
clients.insert({connfd, std::make_unique<cThread<Cmpl>>(vfid, rpid, dev, csched, uisr)});

# ifdef VERBOSE
std::cout << " - cFunc: Register client in the struct with connfd " << connfd << std::endl;
# endif

// Registers a new pair of bool::false and a standard-thread (which again points to the function processRequests and the connfd)
reqs.insert({connfd, std::make_pair(false, std::thread(&cFunc::processRequests, this, connfd))});

# ifdef VERBOSE
std::cout << " - cFunc: Register client request in the struct with connfd " << connfd << std::endl;
# endif

// The newly added thread is kicked off:
clients[connfd]->setConnection(connfd); // Set connection for the new thread
clients[connfd]->start(); // Start execution of the cThread
Expand All @@ -112,6 +134,10 @@ class cFunc : public bFunc {
// Function that is given to the standard-threads that are stored in the reqs-struct
void processRequests(int connfd) {

# ifdef VERBOSE
std::cout << "cFunc: Called processRequests, which is the function given to the standard threads in the reqs-struct." << std::endl;
# endif

// Create a receive-buffer and set it to 0
char recv_buf[recvBuffSize];
memset(recv_buf, 0, recvBuffSize);
Expand Down Expand Up @@ -149,12 +175,20 @@ class cFunc : public bFunc {
priority = request[2];
syslog(LOG_NOTICE, "Client: %d, opcode %d, tid: %d", connfd, opcode, tid);

# ifdef VERBOSE
std::cout << " - cFunc: Client " << connfd << " with opcode " << opcode << " and tid " << tid << std::endl;
# endif

// Further action depends on the opcode that is read from the network socket
switch (opcode) {

// Request to close a connection
case defOpClose: {
syslog(LOG_NOTICE, "Received close connection request");
# ifdef VERBOSE
std::cout << " - cFunc: Received close connection request." << std::endl;
# endif

close(connfd);

// Set the entry to false, case has been closed
Expand All @@ -168,6 +202,10 @@ class cFunc : public bFunc {
// Tuple that can hold multiple arguments
std::tuple<Args...> msg;

# ifdef VERBOSE
std::cout << " - cFunc: Received request to execute a function." << std::endl;
# endif

// Lambda function to read data from the socket to the receive buffer (most likely arguments for execution)
auto f_rd = [&](auto& x){
using U = decltype(x);
Expand All @@ -184,6 +222,10 @@ class cFunc : public bFunc {
// Not exactly sure about this, but would argue that the received message is stored in previously declared message
std::apply([=](auto&&... args) {(f_rd(args), ...);}, msg);

# ifdef VERBOSE
std::cout << " - cFunc: Schedule the task for execution." << std::endl;
# endif

// Schedule the task for execution in the thread that it belongs to, based on the arguments that were received for it
clients[connfd]->scheduleTask(std::unique_ptr<bTask<Cmpl>>(new auto(std::make_from_tuple<cTask<Cmpl, std::function<Cmpl(cThread<Cmpl>*, Args...)>, Args...>>(std::tuple_cat(
std::make_tuple(tid),
Expand All @@ -197,6 +239,10 @@ class cFunc : public bFunc {
// Check the thread for completion of the scheduled task
cmpltd = clients[connfd]->getTaskCompletedNext(cmpl_tid, cmpl_ev);

# ifdef VERBOSE
std::cout << " - cFunc: Read a completion event for the task at cmpl_tid " << cmpl_tid << std::endl;
# endif

// If task has been completed, send both the completion tid and completion ev back to the caller, which is cLib through the iTask
if(cmpltd) {
if(write(connfd, &cmpl_tid, sizeof(int32_t)) != sizeof(int32_t)) {
Expand Down Expand Up @@ -237,6 +283,10 @@ class cFunc : public bFunc {
run_cln = true;
int connfd;

# ifdef VERBOSE
std::cout << "cFunc: Run cleanConns with a cleaning thread for the connections." << std::endl;
# endif

// As long as the clean-up runs, get threads to be cleaned from the FIFO and continue cleaning them up
while(run_cln) {
// Close the lock before accessing the cleaning-queue
Expand Down
Loading

0 comments on commit d7bf35e

Please sign in to comment.