Skip to content

Commit d1cbeb0

Browse files
committed
Optimise mbed_ticker_api.c
The generic code in mbed_ticker_api.c uses run-time polymorphism to handle different tickers, and has generic run-time calculations for different ticker widths and frequencies, with a single special-case for 1MHz. Extend the run-time special casing to handle any conversion cases where either the multiply or divide can be done as a shift. This is a speed optimisation for certain platforms. Add a new option `target.custom-tickers`. If turned off, it promises that only USTICKER and LPTICKER devices will be used. This then permits elimination and/or simplification of runtime calculations, saving size and speed. If either both USTICKER and LPTICKER have the same width, or the same frequency, or only one of them exists, then operations can be hard-coded. This is a significant ROM space saving, and a minor speed and RAM saving. We get to optimise all the calculations, but the run-time polymorphism is retained even if there is only one ticker, as it doesn't significantly affect code size versus direct calls, and the existence of lp_ticker_wrapper and various us_ticker optimisations requires it, even if only LPTICKER is available.
1 parent 0fef823 commit d1cbeb0

File tree

3 files changed

+205
-49
lines changed

3 files changed

+205
-49
lines changed

hal/mbed_ticker_api.c

Lines changed: 162 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,85 @@
2121
#include "platform/mbed_assert.h"
2222
#include "platform/mbed_error.h"
2323

24+
#if !MBED_CONF_TARGET_CUSTOM_TICKERS
25+
#include "us_ticker_api.h"
26+
#include "lp_ticker_api.h"
27+
#endif
28+
29+
// It's almost always worth avoiding division, but only worth avoiding
30+
// multiplication on some cores.
31+
#if defined(__CORTEX_M0) || defined(__CORTEX_M0PLUS) || defined(__CORTEX_M23)
32+
#define SLOW_MULTIPLY 1
33+
#else
34+
#define SLOW_MULTIPLY 0
35+
#endif
36+
2437
static void schedule_interrupt(const ticker_data_t *const ticker);
2538
static void update_present_time(const ticker_data_t *const ticker);
2639

40+
/* Macros that either look up the info from mbed_ticker_queue_t, or give a constant.
41+
* Some constants are defined during the definition of initialize, to keep the
42+
* compile-time and run-time calculations alongside each other.
43+
*/
44+
#ifdef MBED_TICKER_CONSTANT_PERIOD_NUM
45+
#define TICKER_PERIOD_NUM(queue) MBED_TICKER_CONSTANT_PERIOD_NUM
46+
#define TICKER_PERIOD_DEN(queue) MBED_TICKER_CONSTANT_PERIOD_DEN
47+
// don't bother doing this - rely on the compiler being able convert "/ 2^k" to ">> k".
48+
#define TICKER_PERIOD_NUM_SHIFTS(queue) (-1)
49+
#define TICKER_PERIOD_DEN_SHIFTS(queue) (-1)
50+
#else
51+
#define TICKER_PERIOD_NUM(queue) ((queue)->period_num)
52+
#define TICKER_PERIOD_DEN(queue) ((queue)->period_den)
53+
#define TICKER_PERIOD_NUM_SHIFTS(queue) ((queue)->period_num_shifts)
54+
#define TICKER_PERIOD_DEN_SHIFTS(queue) ((queue)->period_den_shifts)
55+
#endif
56+
57+
// But the above can generate compiler warnings from `if (-1 >= 0) { x >>= -1; }`
58+
#if defined ( __CC_ARM )
59+
#pragma diag_suppress 62 // Shift count is negative
60+
#elif defined ( __GNUC__ )
61+
#pragma GCC diagnostic ignored "-Wshift-count-negative"
62+
#elif defined (__ICCARM__)
63+
#pragma diag_suppress=Pe062 // Shift count is negative
64+
#endif
65+
66+
67+
#ifdef MBED_TICKER_CONSTANT_MASK
68+
#define TICKER_BITMASK(queue) MBED_TICKER_CONSTANT_MASK
69+
#define TICKER_MAX_DELTA(queue) CONSTANT_MAX_DELTA
70+
#else
71+
#define TICKER_BITMASK(queue) ((queue)->bitmask)
72+
#define TICKER_MAX_DELTA(queue) ((queue)->max_delta)
73+
#endif
74+
75+
#if defined MBED_TICKER_CONSTANT_PERIOD_NUM && defined MBED_TICKER_CONSTANT_MASK
76+
#define TICKER_MAX_DELTA_US(queue) CONSTANT_MAX_DELTA_US
77+
#else
78+
#define TICKER_MAX_DELTA_US(queue) ((queue)->max_delta_us)
79+
#endif
80+
81+
#ifndef MBED_TICKER_CONSTANT_PERIOD_NUM
82+
static inline uint32_t gcd(uint32_t a, uint32_t b)
83+
{
84+
do {
85+
uint32_t r = a % b;
86+
a = b;
87+
b = r;
88+
} while (b != 0);
89+
return a;
90+
}
91+
92+
static int exact_log2(uint32_t n)
93+
{
94+
for (uint8_t i = 31; i > 0; --i) {
95+
if ((1U << i) == n) {
96+
return n;
97+
}
98+
}
99+
return -1;
100+
}
101+
#endif
102+
27103
/*
28104
* Initialize a ticker instance.
29105
*/
@@ -41,8 +117,26 @@ static void initialize(const ticker_data_t *ticker)
41117
ticker->interface->init();
42118

43119
const ticker_info_t *info = ticker->interface->get_info();
120+
121+
#if !MBED_CONF_TARGET_CUSTOM_TICKERS
122+
/* They must be passing us one of the well-known tickers. Check info
123+
* rather than the data, to cope with the lp_ticker_wrapper. It doesn't count
124+
* as a "custom ticker" for the purpose of this optimization.
125+
*/
126+
#if DEVICE_USTICKER && DEVICE_LPTICKER
127+
MBED_ASSERT(info == get_us_ticker_data()->interface->get_info() || info == get_lp_ticker_data()->interface->get_info());
128+
#elif DEVICE_USTICKER
129+
MBED_ASSERT(info == get_us_ticker_data()->interface->get_info());
130+
#elif DEVICE_LPTICKER
131+
MBED_ASSERT(info == get_lp_ticker_data()->interface->get_info());
132+
#else
133+
MBED_ASSERT(false);
134+
#endif
135+
#endif
136+
137+
#ifndef MBED_TICKER_CONSTANT_PERIOD_NUM
44138
uint32_t frequency = info->frequency;
45-
if (info->frequency == 0) {
139+
if (frequency == 0) {
46140
#if MBED_TRAP_ERRORS_ENABLED
47141
MBED_ERROR(
48142
MBED_MAKE_ERROR(
@@ -56,16 +150,17 @@ static void initialize(const ticker_data_t *ticker)
56150
#endif // MBED_TRAP_ERRORS_ENABLED
57151
}
58152

59-
uint8_t frequency_shifts = 0;
60-
for (uint8_t i = 31; i > 0; --i) {
61-
if ((1U << i) == frequency) {
62-
frequency_shifts = i;
63-
break;
64-
}
65-
}
153+
const uint32_t period_gcd = gcd(frequency, 1000000);
154+
ticker->queue->period_num = 1000000 / period_gcd;
155+
ticker->queue->period_den = frequency / period_gcd;
156+
157+
ticker->queue->period_num_shifts = exact_log2(ticker->queue->period_num);
158+
ticker->queue->period_den_shifts = exact_log2(ticker->queue->period_den);
159+
#endif // MBED_TICKER_CONSTANT_PERIOD_NUM
66160

161+
#ifndef MBED_TICKER_CONSTANT_MASK
67162
uint32_t bits = info->bits;
68-
if ((info->bits > 32) || (info->bits < 4)) {
163+
if ((bits > 32) || (bits < 4)) {
69164
#if MBED_TRAP_ERRORS_ENABLED
70165
MBED_ERROR(
71166
MBED_MAKE_ERROR(
@@ -78,19 +173,24 @@ static void initialize(const ticker_data_t *ticker)
78173
bits = 32;
79174
#endif // MBED_TRAP_ERRORS_ENABLED
80175
}
81-
uint32_t max_delta = 0x7 << (bits - 4); // 7/16th
82-
uint64_t max_delta_us =
83-
((uint64_t)max_delta * 1000000 + frequency - 1) / frequency;
176+
ticker->queue->bitmask = bits == 32 ? 0xFFFFFFFF : (1U << bits) - 1;
177+
ticker->queue->max_delta = 7 << (bits - 4); // 7/16th
178+
#else // MBED_TICKER_CONSTANT_MASK
179+
#define CONSTANT_MAX_DELTA (7 * ((MBED_TICKER_CONSTANT_MASK >> 4) + 1)) // 7/16th
180+
#endif // MBED_TICKER_CONSTANT_MASK
181+
182+
#if !(defined MBED_TICKER_CONSTANT_PERIOD_NUM && defined MBED_TICKER_CONSTANT_MASK)
183+
ticker->queue->max_delta_us =
184+
((uint64_t)TICKER_MAX_DELTA(ticker->queue) * TICKER_PERIOD_NUM(ticker->queue) + TICKER_PERIOD_DEN(ticker->queue) - 1) / TICKER_PERIOD_DEN(ticker->queue);
185+
#else
186+
#define CONSTANT_MAX_DELTA_US \
187+
(((uint64_t) CONSTANT_MAX_DELTA * MBED_TICKER_CONSTANT_PERIOD_NUM + MBED_TICKER_CONSTANT_PERIOD_DEN - 1) / MBED_TICKER_CONSTANT_PERIOD_DEN)
188+
#endif
84189

85190
ticker->queue->event_handler = NULL;
86191
ticker->queue->head = NULL;
87192
ticker->queue->tick_last_read = ticker->interface->read();
88193
ticker->queue->tick_remainder = 0;
89-
ticker->queue->frequency = frequency;
90-
ticker->queue->frequency_shifts = frequency_shifts;
91-
ticker->queue->bitmask = ((uint64_t)1 << bits) - 1;
92-
ticker->queue->max_delta = max_delta;
93-
ticker->queue->max_delta_us = max_delta_us;
94194
ticker->queue->present_time = 0;
95195
ticker->queue->dispatching = false;
96196
ticker->queue->suspended = false;
@@ -154,27 +254,31 @@ static void update_present_time(const ticker_data_t *const ticker)
154254
return;
155255
}
156256

157-
uint64_t elapsed_ticks = (ticker_time - queue->tick_last_read) & queue->bitmask;
257+
uint32_t elapsed_ticks = (ticker_time - queue->tick_last_read) & TICKER_BITMASK(queue);
158258
queue->tick_last_read = ticker_time;
159259

260+
// Convert elapsed_ticks to elapsed_us as (elapsed_ticks * period_num / period_den)
261+
// adding in any remainder from the last division
262+
uint64_t scaled_ticks;
263+
if (SLOW_MULTIPLY && TICKER_PERIOD_NUM_SHIFTS(queue) >= 0) {
264+
scaled_ticks = (uint64_t) elapsed_ticks << TICKER_PERIOD_NUM_SHIFTS(queue);
265+
} else {
266+
scaled_ticks = (uint64_t) elapsed_ticks * TICKER_PERIOD_NUM(queue);
267+
}
160268
uint64_t elapsed_us;
161-
if (1000000 == queue->frequency) {
162-
// Optimized for 1MHz
163-
164-
elapsed_us = elapsed_ticks;
269+
if (TICKER_PERIOD_DEN_SHIFTS(queue) == 0) {
270+
// Optimized for cases that don't need division
271+
elapsed_us = scaled_ticks;
165272
} else {
166-
uint64_t us_x_ticks = elapsed_ticks * 1000000;
167-
if (0 != queue->frequency_shifts) {
168-
// Optimized for frequencies divisible by 2
169-
elapsed_us = us_x_ticks >> queue->frequency_shifts;
170-
queue->tick_remainder += us_x_ticks - (elapsed_us << queue->frequency_shifts);
273+
scaled_ticks += queue->tick_remainder;
274+
if (TICKER_PERIOD_DEN_SHIFTS(queue) >= 0) {
275+
// Speed-optimised for shifts
276+
elapsed_us = scaled_ticks >> TICKER_PERIOD_DEN_SHIFTS(queue);
277+
queue->tick_remainder = scaled_ticks - (elapsed_us << TICKER_PERIOD_DEN_SHIFTS(queue));
171278
} else {
172-
elapsed_us = us_x_ticks / queue->frequency;
173-
queue->tick_remainder += us_x_ticks - elapsed_us * queue->frequency;
174-
}
175-
if (queue->tick_remainder >= queue->frequency) {
176-
elapsed_us += 1;
177-
queue->tick_remainder -= queue->frequency;
279+
// General case division
280+
elapsed_us = scaled_ticks / TICKER_PERIOD_DEN(queue);
281+
queue->tick_remainder = scaled_ticks - elapsed_us * TICKER_PERIOD_DEN(queue);
178282
}
179283
}
180284

@@ -190,25 +294,37 @@ static timestamp_t compute_tick_round_up(const ticker_data_t *const ticker, us_t
190294
ticker_event_queue_t *queue = ticker->queue;
191295
us_timestamp_t delta_us = timestamp - queue->present_time;
192296

193-
timestamp_t delta = ticker->queue->max_delta;
194-
if (delta_us <= ticker->queue->max_delta_us) {
297+
timestamp_t delta = TICKER_MAX_DELTA(ticker->queue);
298+
if (delta_us <= TICKER_MAX_DELTA_US(ticker->queue)) {
195299
// Checking max_delta_us ensures the operation will not overflow
196300

197-
if (1000000 == queue->frequency) {
198-
// Optimized for 1MHz
199-
delta = delta_us;
200-
} else if (0 != queue->frequency_shifts) {
201-
// Optimized frequencies divisible by 2
202-
delta = ((delta_us << ticker->queue->frequency_shifts) + 1000000 - 1) / 1000000;
301+
// Convert delta_us to delta (ticks) as (delta_us * period_den / period_num)
302+
// taking care to round up if num != 1
303+
uint64_t scaled_delta;
304+
if (SLOW_MULTIPLY && TICKER_PERIOD_DEN_SHIFTS(queue) >= 0) {
305+
// Optimized denominators divisible by 2
306+
scaled_delta = delta_us << TICKER_PERIOD_DEN_SHIFTS(queue);
203307
} else {
204308
// General case
205-
delta = (delta_us * queue->frequency + 1000000 - 1) / 1000000;
309+
scaled_delta = delta_us * TICKER_PERIOD_DEN(queue);
310+
}
311+
if (TICKER_PERIOD_NUM_SHIFTS(queue) == 0) {
312+
delta = scaled_delta;
313+
} else {
314+
scaled_delta += TICKER_PERIOD_NUM(queue) - 1;
315+
if (TICKER_PERIOD_NUM_SHIFTS(queue) >= 0) {
316+
// Optimized numerators divisible by 2
317+
delta = scaled_delta >> TICKER_PERIOD_NUM_SHIFTS(queue);
318+
} else {
319+
// General case
320+
delta = scaled_delta / TICKER_PERIOD_NUM(queue);
321+
}
206322
}
207-
if (delta > ticker->queue->max_delta) {
208-
delta = ticker->queue->max_delta;
323+
if (delta > TICKER_MAX_DELTA(queue)) {
324+
delta = TICKER_MAX_DELTA(queue);
209325
}
210326
}
211-
return (queue->tick_last_read + delta) & queue->bitmask;
327+
return (queue->tick_last_read + delta) & TICKER_BITMASK(queue);
212328
}
213329

214330
//NOTE: Must be called from critical section!
@@ -308,7 +424,7 @@ static void schedule_interrupt(const ticker_data_t *const ticker)
308424
}
309425
} else {
310426
uint32_t match_tick =
311-
(queue->tick_last_read + queue->max_delta) & queue->bitmask;
427+
(queue->tick_last_read + TICKER_MAX_DELTA(queue)) & TICKER_BITMASK(queue);
312428
ticker->interface->set_interrupt(match_tick);
313429
}
314430
}

hal/ticker_api.h

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,22 +70,58 @@ typedef struct {
7070
bool runs_in_deep_sleep; /**< Whether ticker operates in deep sleep */
7171
} ticker_interface_t;
7272

73+
/* Optimizations to avoid run-time computation if custom ticker support is disabled and
74+
* there is exactly one of USTICKER or LPTICKER available, or if they have the same
75+
* parameter value(s).
76+
*/
77+
#define MBED_TICKER_JUST_US (!MBED_CONF_TARGET_CUSTOM_TICKERS && DEVICE_USTICKER && !DEVICE_LPTICKER)
78+
#define MBED_TICKER_JUST_LP (!MBED_CONF_TARGET_CUSTOM_TICKERS && DEVICE_LPTICKER && !DEVICE_USTICKER)
79+
#define MBED_TICKER_EXACTLY_ONE (MBED_TICKER_JUST_US || MBED_TICKER_JUST_LP)
80+
81+
#if (MBED_TICKER_JUST_US && defined US_TICKER_PERIOD_NUM) || \
82+
(!MBED_CONF_TARGET_CUSTOM_TICKERS && defined US_TICKER_PERIOD_NUM && defined LP_TICKER_PERIOD_NUM && \
83+
US_TICKER_PERIOD_NUM == LP_TICKER_PERIOD_NUM && US_TICKER_PERIOD_DEN == LP_TICKER_PERIOD_DEN)
84+
#define MBED_TICKER_CONSTANT_PERIOD_NUM US_TICKER_PERIOD_NUM
85+
#define MBED_TICKER_CONSTANT_PERIOD_DEN US_TICKER_PERIOD_DEN
86+
#elif MBED_TICKER_JUST_LP && defined LP_TICKER_PERIOD_NUM
87+
#define MBED_TICKER_CONSTANT_PERIOD_NUM LP_TICKER_PERIOD_NUM
88+
#define MBED_TICKER_CONSTANT_PERIOD_DEN LP_TICKER_PERIOD_DEN
89+
#endif
90+
91+
#if (MBED_TICKER_JUST_US && defined US_TICKER_MASK) || \
92+
(!MBED_CONF_TARGET_CUSTOM_TICKERS && defined US_TICKER_MASK && defined LP_TICKER_MASK && \
93+
US_TICKER_MASK == LP_TICKER_MASK)
94+
#define MBED_TICKER_CONSTANT_MASK US_TICKER_MASK
95+
#elif MBED_TICKER_JUST_LP && defined LP_TICKER_MASK
96+
#define MBED_TICKER_CONSTANT_MASK LP_TICKER_MASK
97+
#endif
98+
7399
/** Ticker's event queue structure
74100
*/
75101
typedef struct {
76102
ticker_event_handler event_handler; /**< Event handler */
77103
ticker_event_t *head; /**< A pointer to head */
78-
uint32_t frequency; /**< Frequency of the timer in Hz */
104+
#ifndef MBED_TICKER_CONSTANT_PERIOD_NUM
105+
uint32_t period_num; /**< Ratio of period to 1us, numerator */
106+
uint32_t period_den; /**< Ratio of period to 1us, denominator */
107+
#endif
108+
#ifndef MBED_TICKER_CONSTANT_MASK
79109
uint32_t bitmask; /**< Mask to be applied to time values read */
80110
uint32_t max_delta; /**< Largest delta in ticks that can be used when scheduling */
111+
#endif
112+
#if !(defined MBED_TICKER_CONSTANT_PERIOD_NUM && defined MBED_TICKER_CONSTANT_MASK)
81113
uint64_t max_delta_us; /**< Largest delta in us that can be used when scheduling */
114+
#endif
82115
uint32_t tick_last_read; /**< Last tick read */
83-
uint64_t tick_remainder; /**< Ticks that have not been added to base_time */
116+
uint32_t tick_remainder; /**< Ticks that have not been added to base_time */
84117
us_timestamp_t present_time; /**< Store the timestamp used for present time */
85118
bool initialized; /**< Indicate if the instance is initialized */
86119
bool dispatching; /**< The function ticker_irq_handler is dispatching */
87120
bool suspended; /**< Indicate if the instance is suspended */
88-
uint8_t frequency_shifts; /**< If frequency is a value of 2^n, this is n, otherwise 0 */
121+
#ifndef MBED_TICKER_CONSTANT_PERIOD_NUM
122+
int8_t period_num_shifts; /**< If numerator is a value of 2^n, this is n, otherwise -1 */
123+
int8_t period_den_shifts; /**< If denominator is a value of 2^n, this is n, otherwise -1 */
124+
#endif
89125
} ticker_event_queue_t;
90126

91127
/** Ticker's data structure

targets/targets.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@
7777
"help": "Initialize the microsecond ticker at boot rather than on first use, and leave it initialized. This speeds up wait_us in particular.",
7878
"value": false
7979
},
80+
"custom-tickers": {
81+
"help": "Support custom tickers in addition to USTICKER and LPTICKER. Turning this off can permit some space and speed optimisations, if characteristics of USTICKER and LPTICKER are known at compile time.",
82+
"value": true
83+
},
8084
"xip-enable": {
8185
"help": "Enable Execute In Place (XIP) on this target. Value is only significant if the board has executable external storage such as QSPIF. If this is enabled, customize the linker file to choose what text segments are placed on external storage",
8286
"value": false

0 commit comments

Comments
 (0)