Skip to content

Commit

Permalink
Support timeout for interrupt semaphore
Browse files Browse the repository at this point in the history
Introduce ETHOSU_INFERENCE_TIMEOUT CMake variable to set an
arbitrary timeout value that will be sent as argument to
ethosu_semaphore_take() for the interrupt semaphore. Adding
the ability to have a timeout for an inference. (Defaults to
no timeout/wait forever.)

Implement a placeholder mutex for the baremetal example and add
error checks for mutex_create() call.

Change-Id: Ia74391620340a27c23dc3d15f9ba742c674c8bfa
Signed-off-by: Jonny Svärd <jonny.svaerd@arm.com>
  • Loading branch information
jonny-svaerd-arm authored and kristofer-jonsson-arm committed Feb 15, 2024
1 parent a1cba40 commit 8e2cf75
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 31 deletions.
10 changes: 9 additions & 1 deletion CMakeLists.txt
Expand Up @@ -32,6 +32,7 @@ set(LOG_NAMES err warning info debug)
set(ETHOSU_LOG_ENABLE ON CACHE BOOL "Toggle driver logs on/off (Defaults to ON)")
set(ETHOSU_LOG_SEVERITY "warning" CACHE STRING "Driver log severity level ${LOG_NAMES} (Defaults to 'warning')")
set(ETHOSU_TARGET_NPU_CONFIG "ethos-u55-128" CACHE STRING "Default NPU configuration")
set(ETHOSU_INFERENCE_TIMEOUT "" CACHE STRING "Inference timeout (unit is implementation defined)")
set_property(CACHE ETHOSU_LOG_SEVERITY PROPERTY STRINGS ${LOG_NAMES})

#
Expand Down Expand Up @@ -75,7 +76,13 @@ else()
message(FATAL_ERROR "Invalid NPU configuration")
endif()


if(NOT "${ETHOSU_INFERENCE_TIMEOUT}" STREQUAL "")
target_compile_definitions(ethosu_core_driver PRIVATE
ETHOSU_SEMAPHORE_WAIT_INFERENCE=${ETHOSU_INFERENCE_TIMEOUT})
set(ETHOSU_INFERENCE_TIMEOUT_TEXT ${ETHOSU_INFERENCE_TIMEOUT})
else()
set(ETHOSU_INFERENCE_TIMEOUT_TEXT "Default (no timeout)")
endif()
# Set the log level for the target
target_compile_definitions(ethosu_core_driver PRIVATE
ETHOSU_LOG_SEVERITY=${LOG_SEVERITY}
Expand All @@ -100,4 +107,5 @@ message(STATUS "CMAKE_SYSTEM_PROCESSOR : ${CMAKE_SYSTEM_PROCESSO
message(STATUS "CMSIS_PATH : ${CMSIS_PATH}")
message(STATUS "ETHOSU_LOG_ENABLE : ${ETHOSU_LOG_ENABLE}")
message(STATUS "ETHOSU_LOG_SEVERITY : ${ETHOSU_LOG_SEVERITY}")
message(STATUS "ETHOSU_INFERENCE_TIMEOUT : ${ETHOSU_INFERENCE_TIMEOUT_TEXT}")
message(STATUS "*******************************************************")
30 changes: 25 additions & 5 deletions README.md
Expand Up @@ -160,23 +160,40 @@ environemnts where multi-threading is possible, e.g., RTOS, the user is
responsible to provide implementation for mutexes and semaphores to be used by
the driver.

The mutex and semaphores are used as synchronisation mechanisms and unless
specified, the timeout is required to be 'forever'.

The driver allows for an RTOS to set a timeout for the NPU interrupt semaphore.
The timeout can be set with the CMake variable `ETHOSU_INFERENCE_TIMEOUT`, which
is then used as `timeout` argument for the interrupt semaphore take call. Note
that the unit is implementation defined, the value is shipped as is to the
`ethosu_semaphore_take()` function and an override implementation should cast it
to the appropriate type and/or convert it to the unit desired.

A macro `ETHOSU_SEMAPHORE_WAIT_FOREVER` is defined in the driver header file,
and should be made sure to map to the RTOS' equivalent of
'no timeout/wait forever'. Inference timeout value defaults to this if left
unset. The macro is used internally in the driver for the available NPU's, thus
the driver does NOT support setting a timeout other than forever when waiting
for an NPU to become available (global ethosu_semaphore).

The mutex and semaphore APIs are defined as weak linked functions that can be
overridden by the user. The APIs are the usual ones and described below:

```[C]
// create a mutex by returning back a handle
void *ethosu_mutex_create(void);
// lock the given mutex
void ethosu_mutex_lock(void *mutex);
int ethosu_mutex_lock(void *mutex);
// unlock the given mutex
void ethosu_mutex_unlock(void *mutex);
int ethosu_mutex_unlock(void *mutex);
// create a (binary) semaphore by returning back a handle
void *ethosu_semaphore_create(void);
// take from the given semaphore
void ethosu_semaphore_take(void *sem);
// take from the given semaphore, accepting a timeout (unit impl. defined)
int ethosu_semaphore_take(void *sem, uint64_t timeout);
// give from the given semaphore
void ethosu_semaphore_give(void *sem);
int ethosu_semaphore_give(void *sem);
```

## Begin/End inference callbacks
Expand All @@ -187,6 +204,9 @@ To avoid memory leaks, any allocations done in the ethosu_inference_begin() must
be balanced by a corresponding free of the memory in the ethosu_inference_end()
callback.

The end callback will always be called if the begin callback has been called,
including in the event of an interrupt semaphore take timeout.

```[C]
void ethosu_inference_begin(struct ethosu_driver *drv, void *user_arg);
void ethosu_inference_end(struct ethosu_driver *drv, void *user_arg);
Expand Down
20 changes: 17 additions & 3 deletions include/ethosu_driver.h
Expand Up @@ -41,6 +41,12 @@ extern "C" {
#define ETHOSU_DRIVER_VERSION_MINOR 16 ///< Driver minor version
#define ETHOSU_DRIVER_VERSION_PATCH 0 ///< Driver patch version

#define ETHOSU_SEMAPHORE_WAIT_FOREVER (UINT64_MAX)

#ifndef ETHOSU_SEMAPHORE_WAIT_INFERENCE
#define ETHOSU_SEMAPHORE_WAIT_INFERENCE ETHOSU_SEMAPHORE_WAIT_FOREVER
#endif

/******************************************************************************
* Types
******************************************************************************/
Expand All @@ -55,9 +61,17 @@ enum ethosu_job_state
ETHOSU_JOB_DONE
};

enum ethosu_job_result
{
ETHOSU_JOB_RESULT_OK = 0,
ETHOSU_JOB_RESULT_TIMEOUT,
ETHOSU_JOB_RESULT_ERROR
};

struct ethosu_job
{
volatile enum ethosu_job_state state;
volatile enum ethosu_job_result result;
const void *custom_data_ptr;
int custom_data_size;
const uint64_t *base_addr;
Expand All @@ -75,7 +89,6 @@ struct ethosu_driver
uint64_t fast_memory;
size_t fast_memory_size;
uint32_t power_request_counter;
bool status_error;
bool reserved;
};

Expand Down Expand Up @@ -161,9 +174,10 @@ int ethosu_mutex_unlock(void *mutex);
* Take semaphore.
*
* @param sem Pointer to semaphore handle
* @returns 0 on success, else negative error code
* @param timeout Timeout value (unit impl. defined)
* @returns 0 on success else negative error code
*/
int ethosu_semaphore_take(void *sem);
int ethosu_semaphore_take(void *sem, uint64_t timeout);

/**
* Give semaphore.
Expand Down
78 changes: 56 additions & 22 deletions src/ethosu_driver.c
Expand Up @@ -160,7 +160,8 @@ static void *ethosu_semaphore;

void *__attribute__((weak)) ethosu_mutex_create(void)
{
return NULL;
static uint8_t mutex_placeholder;
return &mutex_placeholder;
}

void __attribute__((weak)) ethosu_mutex_destroy(void *mutex)
Expand Down Expand Up @@ -197,12 +198,21 @@ void __attribute__((weak)) ethosu_semaphore_destroy(void *sem)
}

// Baremetal simulation of waiting/sleeping for and then taking a semaphore using intrisics
int __attribute__((weak)) ethosu_semaphore_take(void *sem)
int __attribute__((weak)) ethosu_semaphore_take(void *sem, uint64_t timeout)
{
UNUSED(timeout);
// Baremetal pseudo-example on how to trigger a timeout:
// if (timeout != ETHOSU_SEMAPHORE_WAIT_FOREVER) {
// setup_a_timer_to_call_SEV_after_time(timeout);
// }
struct ethosu_semaphore_t *s = sem;
while (s->count == 0)
{
__WFE();
// Baremetal pseudo-example check if timeout triggered:
// if (SEV_timer_triggered()) {
// return -1;
// }
}
s->count--;
return 0;
Expand Down Expand Up @@ -263,7 +273,7 @@ static int ethosu_deregister_driver(struct ethosu_driver *drv)
{
*prev = curr->next;
LOG_INFO("NPU driver handle %p deregistered.", drv);
ethosu_semaphore_take(ethosu_semaphore);
ethosu_semaphore_take(ethosu_semaphore, ETHOSU_SEMAPHORE_WAIT_FOREVER);
break;
}

Expand Down Expand Up @@ -365,11 +375,15 @@ void __attribute__((weak)) ethosu_irq_handler(struct ethosu_driver *drv)
{
LOG_DEBUG("Got interrupt from Ethos-U");

drv->job.state = ETHOSU_JOB_DONE;
if (!ethosu_dev_handle_interrupt(drv->dev))
// Prevent race condition where interrupt triggered after a timeout waiting
// for semaphore, but before NPU is reset.
if (drv->job.result == ETHOSU_JOB_RESULT_TIMEOUT)
{
drv->status_error = true;
return;
}

drv->job.state = ETHOSU_JOB_DONE;
drv->job.result = ethosu_dev_handle_interrupt(drv->dev) ? ETHOSU_JOB_RESULT_OK : ETHOSU_JOB_RESULT_ERROR;
ethosu_semaphore_give(drv->semaphore);
}

Expand All @@ -395,6 +409,11 @@ int ethosu_init(struct ethosu_driver *drv,
if (!ethosu_mutex)
{
ethosu_mutex = ethosu_mutex_create();
if (!ethosu_mutex)
{
LOG_ERR("Failed to create global driver mutex");
return -1;
}
}

if (!ethosu_semaphore)
Expand Down Expand Up @@ -429,8 +448,6 @@ int ethosu_init(struct ethosu_driver *drv,
return -1;
}

drv->status_error = false;

ethosu_reset_job(drv);
ethosu_register_driver(drv);

Expand Down Expand Up @@ -530,29 +547,48 @@ int ethosu_wait(struct ethosu_driver *drv, bool block)
case ETHOSU_JOB_DONE:
// Wait for interrupt in blocking mode. In non-blocking mode
// the interrupt has already triggered
ethosu_semaphore_take(drv->semaphore);
ret = ethosu_semaphore_take(drv->semaphore, ETHOSU_SEMAPHORE_WAIT_INFERENCE);
if (ret < 0)
{
drv->job.result = ETHOSU_JOB_RESULT_TIMEOUT;

// There's a race where the NPU interrupt can have fired between semaphore
// timing out and setting the result above (checked in interrupt handler).
// By checking if the job state has been changed (only set to DONE by interrupt
// handler), we know if the interrupt handler has run, if so decrement the
// semaphore count by one (given in interrupt handler).
if (drv->job.state == ETHOSU_JOB_DONE)
{
drv->job.result = ETHOSU_JOB_RESULT_TIMEOUT; // Reset back to timeout
ethosu_semaphore_take(drv->semaphore, ETHOSU_SEMAPHORE_WAIT_INFERENCE);
}
}

// Inference done callback
// Inference done callback - always called even in case of timeout
ethosu_inference_end(drv, drv->job.user_arg);

// Relase power gating disabled requirement
// Release power gating disabled requirement
ethosu_release_power(drv);

// Check NPU and interrupt status
if (drv->status_error)
if (drv->job.result)
{
LOG_ERR("NPU error(s) occured during inference.");
ethosu_dev_print_err_status(drv->dev);
if (drv->job.result == ETHOSU_JOB_RESULT_ERROR)
{
LOG_ERR("NPU error(s) occured during inference.");
ethosu_dev_print_err_status(drv->dev);
}
else
{
LOG_ERR("NPU inference timed out.");
}

// Reset the NPU
(void)ethosu_soft_reset(drv);
// NPU is no longer in error state
drv->status_error = false;

ret = -1;
}

if (ret == 0)
else
{
// Invalidate cache
if (drv->job.base_addr_size != NULL)
Expand All @@ -568,6 +604,7 @@ int ethosu_wait(struct ethosu_driver *drv, bool block)
}

LOG_DEBUG("Inference finished successfully...");
ret = 0;
}

// Reset internal job (state resets to IDLE)
Expand Down Expand Up @@ -643,8 +680,6 @@ int ethosu_invoke_async(struct ethosu_driver *drv,
base_addr[FAST_MEMORY_BASE_ADDR_INDEX] = drv->fast_memory;
}

drv->status_error = false;

// Parse Custom Operator Payload data
while (data_ptr < data_end)
{
Expand Down Expand Up @@ -712,7 +747,7 @@ struct ethosu_driver *ethosu_reserve_driver(void)
struct ethosu_driver *drv = NULL;

LOG_INFO("Acquiring NPU driver handle");
ethosu_semaphore_take(ethosu_semaphore); // This is meant to block until available
ethosu_semaphore_take(ethosu_semaphore, ETHOSU_SEMAPHORE_WAIT_FOREVER); // This is meant to block until available

ethosu_mutex_lock(ethosu_mutex);
drv = registered_drivers;
Expand Down Expand Up @@ -751,7 +786,6 @@ void ethosu_release_driver(struct ethosu_driver *drv)
drv->power_request_counter = 0;
ethosu_soft_reset(drv);
ethosu_reset_job(drv);
drv->status_error = false;
}
}

Expand Down

0 comments on commit 8e2cf75

Please sign in to comment.