Skip to content

Commit

Permalink
Spinlock versus mutex queue lock tests
Browse files Browse the repository at this point in the history
  • Loading branch information
RainerZ committed Sep 29, 2024
1 parent 24a8084 commit 6f26b29
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 11 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ All measurement and calibration code instrumentation is non blocking and the tri
There are no heap allocation during runtime, except for the lazy registrations of and for A2L generation.
build.rs automatically builds a minimum static C library from individially preconfigured core XCPlite sources.
On C level, there is a synchronisation mutex or spinlock for the mpsc transmit queue.
On C level, there is a synchronisation mutex for the mpsc transmit queue.
The C code has the option to start the server with 2 normal threads for rx and tx socket handling.
The generated A2L file is finalized on XCP connect and provided for upload via XCP.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_multi_thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Integration test for XCP in a multi threaded application
// Uses the test XCP client in xcp_client

// cargo test --features=json --features=auto_reg -- --test-threads=1 --nocapture --test test_multi_thread
// cargo test --features=json --features=auto_reg --features=a2l_reader -- --test-threads=1 --nocapture --test test_multi_thread

#![allow(unused_assignments)]

Expand Down
3 changes: 1 addition & 2 deletions tests/test_single_thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
// Integration test for XCP in a single thread application
// Uses the test XCP client in module xcp_client

// cargo test --features=json --features=auto_reg -- --test-threads=1 --nocapture --test test_single_thread

// cargo test --features=json --features=auto_reg --features=a2l_reader -- --test-threads=1 --nocapture --test test_single_thread
use xcp::*;
use xcp_type_description::prelude::*;

Expand Down
5 changes: 3 additions & 2 deletions xcplib/src/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,10 @@ typedef HANDLE tXcpThread;
#elif defined(_LINUX) // Linux

typedef pthread_t tXcpThread;
#define create_thread(h,t) pthread_create(h, NULL, t, NULL);
#define join_thread(h) pthread_join(h,NULL);
#define create_thread(h,t) pthread_create(h, NULL, t, NULL)
#define join_thread(h) pthread_join(h,NULL)
#define cancel_thread(h) { pthread_detach(h); pthread_cancel(h); }
#define yield_thread() sched_yield()

#endif

Expand Down
66 changes: 61 additions & 5 deletions xcplib/src/xcpTlQueue.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,57 @@
#include "dbg_print.h"
#include "xcpLite.h"

// Experimental
// Use spinlock/mutex instead of mutex for producer lock
// This naiv approach is usually not faster compared to a mutex and can produce higher latencies and hard to predict impact on other threads
// It might be a better solution for non preemptive tasks
//#define USE_SPINLOCK
//#define USE_YIELD
//#define TEST_LOCK_TIMING

/*
Test results from test_multi_thread with 32 tasks and 200us sleep time:
maxLock and avgLock time in ns
SPINLOCK+YIELD
lockCount=501170, maxLock=296000, avgLock=768
lockCount=501019, maxLock=195000, avgLock=744
lockCount=500966, maxLock=210000, avgLock=724
SPINLOCK without cache friendly lock check
lockCount=492952, maxLock=10115000, avgLock=1541
SPINLOCK
lockCount=497254, maxLock=9935000, avgLock=512
lockCount=494866, maxLock=11935000, avgLock=1322
lockCount=490923, maxLock=10019000, avgLock=2073
lockCount=489831, maxLock=10024000, avgLock=1980
MUTEX
lockCount=499798, maxLock=114000, avgLock=840
lockCount=500202, maxLock=135000, avgLock=806
lockCount=499972, maxLock=130000, avgLock=790
lockCount=500703, maxLock=124000, avgLock=755
lockCount=500773, maxLock=126000, avgLock=669
*/

#ifdef TEST_LOCK_TIMING
static uint64_t lockTimeMax = 0;
static uint64_t lockTimeSum = 0;
static uint64_t lockCount = 0;
#endif

#ifndef _WIN

#include <stdatomic.h>

// Use spinlock instead of mutex for producer lock
#define USE_SPINLOCK

#else

#ifdef _WIN32_
#error "Windows32 not implemented yet"
#else


#undef USE_SPINLOCK
#define atomic_uint_fast64_t uint64_t
#define atomic_store(a,b) (*a)=(b)
#define atomic_load(a) (*a)
Expand Down Expand Up @@ -100,6 +136,10 @@ void XcpTlFreeTransmitQueue() {
#ifndef USE_SPINLOCK
mutexDestroy(&gXcpTlQueue.mutex);
#endif

#ifdef TEST_LOCK_TIMING
DBG_PRINTF3("XcpTlFreeTransmitQueue: overruns=%u, lockCount=%llu, maxLock=%llu, avgLock=%llu\n", gXcpTlQueue.overruns, lockCount, lockTimeMax, lockTimeSum/lockCount);
#endif
}


Expand Down Expand Up @@ -127,11 +167,27 @@ uint8_t* XcpTlGetTransmitBuffer(void** handle, uint16_t packet_len) {
DBG_PRINTF5("XcpTlGetTransmitBuffer: len=%d\n", packet_len);

// Producer lock
#ifdef TEST_LOCK_TIMING
uint64_t c = clockGet();
#endif
#ifdef USE_SPINLOCK
while (atomic_flag_test_and_set_explicit(&lock, memory_order_acquire));
for (uint32_t n = 1;1;n++) {
BOOL locked = atomic_load_explicit(&lock._Value, memory_order_relaxed);
if (!locked && !atomic_flag_test_and_set_explicit(&lock, memory_order_acquire)) break;
//if ( !atomic_flag_test_and_set_explicit(&lock, memory_order_acquire)) break;
#ifdef USE_YIELD
if (n%16==0) yield_thread();
#endif
}
#else
mutexLock(&gXcpTlQueue.mutex);
#endif
#ifdef TEST_LOCK_TIMING
uint64_t d = clockGet() - c;
if (d>lockTimeMax) lockTimeMax = d;
lockTimeSum += d;
lockCount++;
#endif

uint64_t head = atomic_load(&gXcpTlQueue.head);
uint64_t tail = atomic_load_explicit(&gXcpTlQueue.tail,memory_order_relaxed);
Expand Down

0 comments on commit 6f26b29

Please sign in to comment.