Skip to content

Commit 116ca5f

Browse files
committed
Optimise mbed_ticker_api.c
The generic code in mbed_ticker_api.c uses run-time polymorphism to handle different tickers, and has generic run-time calculations for different ticker widths and frequencies, with a single special-case for 1MHz. Extend the run-time special casing to handle any conversion cases where either the multiply or divide can be done as a shift. This is a speed optimisation for certain platforms. Add a new option `target.custom-tickers`. If turned off, it promises that only USTICKER and LPTICKER devices will be used. This then permits elimination and/or simplification of runtime calculations, saving size and speed. If either both USTICKER and LPTICKER have the same width, or the same frequency, or only one of them exists, then operations can be hard-coded. This is a significant ROM space saving, and a minor speed and RAM saving. We get to optimise all the calculations, but the run-time polymorphism is retained even if there is only one ticker, as it doesn't significantly affect code size versus direct calls, and the existence of lp_ticker_wrapper and various us_ticker optimisations requires it, even if only LPTICKER is available.
1 parent 56396d6 commit 116ca5f

File tree

3 files changed

+248
-50
lines changed

3 files changed

+248
-50
lines changed

hal/mbed_ticker_api.c

Lines changed: 192 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,95 @@
2121
#include "platform/mbed_assert.h"
2222
#include "platform/mbed_error.h"
2323

24+
#if !MBED_CONF_TARGET_CUSTOM_TICKERS
25+
#include "us_ticker_api.h"
26+
#include "lp_ticker_api.h"
27+
#endif
28+
29+
// It's almost always worth avoiding division, but only worth avoiding
30+
// multiplication on some cores.
31+
#if defined(__CORTEX_M0) || defined(__CORTEX_M0PLUS) || defined(__CORTEX_M23)
32+
#define SLOW_MULTIPLY 1
33+
#else
34+
#define SLOW_MULTIPLY 0
35+
#endif
36+
37+
// Do we compute ratio from frequency, or can we always get it from defines?
38+
#if MBED_CONF_TARGET_CUSTOM_TICKERS || (DEVICE_USTICKER && !defined US_TICKER_PERIOD_NUM) || (DEVICE_LPTICKER && !defined LP_TICKER_PERIOD_NUM)
39+
#define COMPUTE_RATIO_FROM_FREQUENCY 1
40+
#else
41+
#define COMPUTE_RATIO_FROM_FREQUENCY 0
42+
#endif
43+
2444
static void schedule_interrupt(const ticker_data_t *const ticker);
2545
static void update_present_time(const ticker_data_t *const ticker);
2646

47+
/* Macros that either look up the info from mbed_ticker_queue_t, or give a constant.
48+
* Some constants are defined during the definition of initialize, to keep the
49+
* compile-time and run-time calculations alongside each other.
50+
*/
51+
#ifdef MBED_TICKER_CONSTANT_PERIOD_NUM
52+
#define TICKER_PERIOD_NUM(queue) MBED_TICKER_CONSTANT_PERIOD_NUM
53+
// don't bother doing this - rely on the compiler being able convert "/ 2^k" to ">> k".
54+
#define TICKER_PERIOD_NUM_SHIFTS(queue) (-1)
55+
#else
56+
#define TICKER_PERIOD_NUM(queue) ((queue)->period_num)
57+
#define TICKER_PERIOD_NUM_SHIFTS(queue) ((queue)->period_num_shifts)
58+
#endif
59+
60+
#ifdef MBED_TICKER_CONSTANT_PERIOD_DEN
61+
#define TICKER_PERIOD_DEN(queue) MBED_TICKER_CONSTANT_PERIOD_DEN
62+
#define TICKER_PERIOD_DEN_SHIFTS(queue) (-1)
63+
#else
64+
#define TICKER_PERIOD_DEN(queue) ((queue)->period_den)
65+
#define TICKER_PERIOD_DEN_SHIFTS(queue) ((queue)->period_den_shifts)
66+
#endif
67+
68+
// But the above can generate compiler warnings from `if (-1 >= 0) { x >>= -1; }`
69+
#if defined ( __CC_ARM )
70+
#pragma diag_suppress 62 // Shift count is negative
71+
#elif defined ( __GNUC__ )
72+
#pragma GCC diagnostic ignored "-Wshift-count-negative"
73+
#elif defined (__ICCARM__)
74+
#pragma diag_suppress=Pe062 // Shift count is negative
75+
#endif
76+
77+
#ifdef MBED_TICKER_CONSTANT_MASK
78+
#define TICKER_BITMASK(queue) MBED_TICKER_CONSTANT_MASK
79+
#define TICKER_MAX_DELTA(queue) CONSTANT_MAX_DELTA
80+
#else
81+
#define TICKER_BITMASK(queue) ((queue)->bitmask)
82+
#define TICKER_MAX_DELTA(queue) ((queue)->max_delta)
83+
#endif
84+
85+
#if defined MBED_TICKER_CONSTANT_PERIOD && defined MBED_TICKER_CONSTANT_MASK
86+
#define TICKER_MAX_DELTA_US(queue) CONSTANT_MAX_DELTA_US
87+
#else
88+
#define TICKER_MAX_DELTA_US(queue) ((queue)->max_delta_us)
89+
#endif
90+
91+
#if COMPUTE_RATIO_FROM_FREQUENCY
92+
static inline uint32_t gcd(uint32_t a, uint32_t b)
93+
{
94+
do {
95+
uint32_t r = a % b;
96+
a = b;
97+
b = r;
98+
} while (b != 0);
99+
return a;
100+
}
101+
102+
static int exact_log2(uint32_t n)
103+
{
104+
for (int i = 31; i > 0; --i) {
105+
if ((1U << i) == n) {
106+
return i;
107+
}
108+
}
109+
return -1;
110+
}
111+
#endif
112+
27113
/*
28114
* Initialize a ticker instance.
29115
*/
@@ -40,9 +126,36 @@ static void initialize(const ticker_data_t *ticker)
40126

41127
ticker->interface->init();
42128

129+
#if MBED_TRAP_ERRORS_ENABLED || COMPUTE_RATIO_FROM_FREQUENCY || !defined MBED_TICKER_CONSTANT_MASK
43130
const ticker_info_t *info = ticker->interface->get_info();
131+
#endif
132+
133+
#if !MBED_CONF_TARGET_CUSTOM_TICKERS && MBED_TRAP_ERRORS_ENABLED
134+
/* They must be passing us one of the well-known tickers. Check info
135+
* rather than the data, to cope with the lp_ticker_wrapper. It doesn't count
136+
* as a "custom ticker" for the purpose of this optimization.
137+
*
138+
* This check has the downside of potentially pulling in code for an unused ticker.
139+
* This is minimized by using direct xxx_ticker_get_info() calls rather than
140+
* `get_us_ticker_data()->interface->get_info()` which would pull in the entire system,
141+
* and we wrap it in `MBED_TRAP_ERRORS_ENABLED`.
142+
*/
143+
#if DEVICE_USTICKER && DEVICE_LPTICKER
144+
MBED_ASSERT(info == us_ticker_get_info() || info == lp_ticker_get_info());
145+
#elif DEVICE_USTICKER
146+
MBED_ASSERT(info == us_ticker_get_info());
147+
#elif DEVICE_LPTICKER
148+
MBED_ASSERT(info == lp_ticker_get_info());
149+
#else
150+
MBED_ASSERT(false);
151+
#endif
152+
#endif
153+
154+
#if COMPUTE_RATIO_FROM_FREQUENCY
155+
// Will need to use frequency computation for at least some cases, so always do it
156+
// to minimise code size.
44157
uint32_t frequency = info->frequency;
45-
if (info->frequency == 0) {
158+
if (frequency == 0) {
46159
#if MBED_TRAP_ERRORS_ENABLED
47160
MBED_ERROR(
48161
MBED_MAKE_ERROR(
@@ -56,16 +169,27 @@ static void initialize(const ticker_data_t *ticker)
56169
#endif // MBED_TRAP_ERRORS_ENABLED
57170
}
58171

59-
uint8_t frequency_shifts = 0;
60-
for (uint8_t i = 31; i > 0; --i) {
61-
if ((1U << i) == frequency) {
62-
frequency_shifts = i;
63-
break;
64-
}
65-
}
66-
172+
const uint32_t period_gcd = gcd(frequency, 1000000);
173+
ticker->queue->period_num = 1000000 / period_gcd;
174+
ticker->queue->period_num_shifts = exact_log2(ticker->queue->period_num);
175+
ticker->queue->period_den = frequency / period_gcd;
176+
ticker->queue->period_den_shifts = exact_log2(ticker->queue->period_den);
177+
#elif !MBED_TICKER_CONSTANT_PERIOD
178+
// Have ratio defines, but need to figure out which one applies.
179+
// `runs_in_deep_sleep` is a viable proxy. (We have asserts above that
180+
// check that they're only passing usticker or lpticker).
181+
const bool is_usticker = !DEVICE_LPTICKER || !ticker->interface->runs_in_deep_sleep;
182+
#ifndef MBED_TICKER_CONSTANT_PERIOD_NUM
183+
ticker->queue->period_num = is_usticker ? US_TICKER_PERIOD_NUM : LP_TICKER_PERIOD_NUM;
184+
#endif
185+
#ifndef MBED_TICKER_CONSTANT_PERIOD_DEN
186+
ticker->queue->period_den = is_usticker ? US_TICKER_PERIOD_DEN : LP_TICKER_PERIOD_DEN;
187+
#endif
188+
#endif // COMPUTE_RATIO_FROM_FREQUENCY / MBED_TICKER_CONSTANT_PERIOD
189+
190+
#ifndef MBED_TICKER_CONSTANT_MASK
67191
uint32_t bits = info->bits;
68-
if ((info->bits > 32) || (info->bits < 4)) {
192+
if ((bits > 32) || (bits < 4)) {
69193
#if MBED_TRAP_ERRORS_ENABLED
70194
MBED_ERROR(
71195
MBED_MAKE_ERROR(
@@ -78,19 +202,24 @@ static void initialize(const ticker_data_t *ticker)
78202
bits = 32;
79203
#endif // MBED_TRAP_ERRORS_ENABLED
80204
}
81-
uint32_t max_delta = 0x7 << (bits - 4); // 7/16th
82-
uint64_t max_delta_us =
83-
((uint64_t)max_delta * 1000000 + frequency - 1) / frequency;
205+
ticker->queue->bitmask = bits == 32 ? 0xFFFFFFFF : (1U << bits) - 1;
206+
ticker->queue->max_delta = 7 << (bits - 4); // 7/16th
207+
#else // MBED_TICKER_CONSTANT_MASK
208+
#define CONSTANT_MAX_DELTA (7 * ((MBED_TICKER_CONSTANT_MASK >> 4) + 1)) // 7/16th
209+
#endif // MBED_TICKER_CONSTANT_MASK
210+
211+
#if !(defined MBED_TICKER_CONSTANT_PERIOD && defined MBED_TICKER_CONSTANT_MASK)
212+
ticker->queue->max_delta_us =
213+
((uint64_t)TICKER_MAX_DELTA(ticker->queue) * TICKER_PERIOD_NUM(ticker->queue) + TICKER_PERIOD_DEN(ticker->queue) - 1) / TICKER_PERIOD_DEN(ticker->queue);
214+
#else
215+
#define CONSTANT_MAX_DELTA_US \
216+
(((uint64_t)CONSTANT_MAX_DELTA * MBED_TICKER_CONSTANT_PERIOD_NUM + MBED_TICKER_CONSTANT_PERIOD_DEN - 1) / MBED_TICKER_CONSTANT_PERIOD_DEN)
217+
#endif
84218

85219
ticker->queue->event_handler = NULL;
86220
ticker->queue->head = NULL;
87221
ticker->queue->tick_last_read = ticker->interface->read();
88222
ticker->queue->tick_remainder = 0;
89-
ticker->queue->frequency = frequency;
90-
ticker->queue->frequency_shifts = frequency_shifts;
91-
ticker->queue->bitmask = ((uint64_t)1 << bits) - 1;
92-
ticker->queue->max_delta = max_delta;
93-
ticker->queue->max_delta_us = max_delta_us;
94223
ticker->queue->present_time = 0;
95224
ticker->queue->dispatching = false;
96225
ticker->queue->suspended = false;
@@ -154,27 +283,31 @@ static void update_present_time(const ticker_data_t *const ticker)
154283
return;
155284
}
156285

157-
uint64_t elapsed_ticks = (ticker_time - queue->tick_last_read) & queue->bitmask;
286+
uint32_t elapsed_ticks = (ticker_time - queue->tick_last_read) & TICKER_BITMASK(queue);
158287
queue->tick_last_read = ticker_time;
159288

289+
// Convert elapsed_ticks to elapsed_us as (elapsed_ticks * period_num / period_den)
290+
// adding in any remainder from the last division
291+
uint64_t scaled_ticks;
292+
if (SLOW_MULTIPLY && TICKER_PERIOD_NUM_SHIFTS(queue) >= 0) {
293+
scaled_ticks = (uint64_t) elapsed_ticks << TICKER_PERIOD_NUM_SHIFTS(queue);
294+
} else {
295+
scaled_ticks = (uint64_t) elapsed_ticks * TICKER_PERIOD_NUM(queue);
296+
}
160297
uint64_t elapsed_us;
161-
if (1000000 == queue->frequency) {
162-
// Optimized for 1MHz
163-
164-
elapsed_us = elapsed_ticks;
298+
if (TICKER_PERIOD_DEN_SHIFTS(queue) == 0) {
299+
// Optimized for cases that don't need division
300+
elapsed_us = scaled_ticks;
165301
} else {
166-
uint64_t us_x_ticks = elapsed_ticks * 1000000;
167-
if (0 != queue->frequency_shifts) {
168-
// Optimized for frequencies divisible by 2
169-
elapsed_us = us_x_ticks >> queue->frequency_shifts;
170-
queue->tick_remainder += us_x_ticks - (elapsed_us << queue->frequency_shifts);
302+
scaled_ticks += queue->tick_remainder;
303+
if (TICKER_PERIOD_DEN_SHIFTS(queue) >= 0) {
304+
// Speed-optimised for shifts
305+
elapsed_us = scaled_ticks >> TICKER_PERIOD_DEN_SHIFTS(queue);
306+
queue->tick_remainder = scaled_ticks - (elapsed_us << TICKER_PERIOD_DEN_SHIFTS(queue));
171307
} else {
172-
elapsed_us = us_x_ticks / queue->frequency;
173-
queue->tick_remainder += us_x_ticks - elapsed_us * queue->frequency;
174-
}
175-
if (queue->tick_remainder >= queue->frequency) {
176-
elapsed_us += 1;
177-
queue->tick_remainder -= queue->frequency;
308+
// General case division
309+
elapsed_us = scaled_ticks / TICKER_PERIOD_DEN(queue);
310+
queue->tick_remainder = scaled_ticks - elapsed_us * TICKER_PERIOD_DEN(queue);
178311
}
179312
}
180313

@@ -190,25 +323,37 @@ static timestamp_t compute_tick_round_up(const ticker_data_t *const ticker, us_t
190323
ticker_event_queue_t *queue = ticker->queue;
191324
us_timestamp_t delta_us = timestamp - queue->present_time;
192325

193-
timestamp_t delta = ticker->queue->max_delta;
194-
if (delta_us <= ticker->queue->max_delta_us) {
326+
timestamp_t delta = TICKER_MAX_DELTA(ticker->queue);
327+
if (delta_us <= TICKER_MAX_DELTA_US(ticker->queue)) {
195328
// Checking max_delta_us ensures the operation will not overflow
196329

197-
if (1000000 == queue->frequency) {
198-
// Optimized for 1MHz
199-
delta = delta_us;
200-
} else if (0 != queue->frequency_shifts) {
201-
// Optimized frequencies divisible by 2
202-
delta = ((delta_us << ticker->queue->frequency_shifts) + 1000000 - 1) / 1000000;
330+
// Convert delta_us to delta (ticks) as (delta_us * period_den / period_num)
331+
// taking care to round up if num != 1
332+
uint64_t scaled_delta;
333+
if (SLOW_MULTIPLY && TICKER_PERIOD_DEN_SHIFTS(queue) >= 0) {
334+
// Optimized denominators divisible by 2
335+
scaled_delta = delta_us << TICKER_PERIOD_DEN_SHIFTS(queue);
203336
} else {
204337
// General case
205-
delta = (delta_us * queue->frequency + 1000000 - 1) / 1000000;
338+
scaled_delta = delta_us * TICKER_PERIOD_DEN(queue);
339+
}
340+
if (TICKER_PERIOD_NUM_SHIFTS(queue) == 0) {
341+
delta = scaled_delta;
342+
} else {
343+
scaled_delta += TICKER_PERIOD_NUM(queue) - 1;
344+
if (TICKER_PERIOD_NUM_SHIFTS(queue) >= 0) {
345+
// Optimized numerators divisible by 2
346+
delta = scaled_delta >> TICKER_PERIOD_NUM_SHIFTS(queue);
347+
} else {
348+
// General case
349+
delta = scaled_delta / TICKER_PERIOD_NUM(queue);
350+
}
206351
}
207-
if (delta > ticker->queue->max_delta) {
208-
delta = ticker->queue->max_delta;
352+
if (delta > TICKER_MAX_DELTA(queue)) {
353+
delta = TICKER_MAX_DELTA(queue);
209354
}
210355
}
211-
return (queue->tick_last_read + delta) & queue->bitmask;
356+
return (queue->tick_last_read + delta) & TICKER_BITMASK(queue);
212357
}
213358

214359
//NOTE: Must be called from critical section!
@@ -308,7 +453,7 @@ static void schedule_interrupt(const ticker_data_t *const ticker)
308453
}
309454
} else {
310455
uint32_t match_tick =
311-
(queue->tick_last_read + queue->max_delta) & queue->bitmask;
456+
(queue->tick_last_read + TICKER_MAX_DELTA(queue)) & TICKER_BITMASK(queue);
312457
ticker->interface->set_interrupt(match_tick);
313458
}
314459
}

hal/ticker_api.h

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,22 +70,71 @@ typedef struct {
7070
bool runs_in_deep_sleep; /**< Whether ticker operates in deep sleep */
7171
} ticker_interface_t;
7272

73+
/* Optimizations to avoid run-time computation if custom ticker support is disabled and
74+
* there is exactly one of USTICKER or LPTICKER available, or if they have the same
75+
* parameter value(s).
76+
*/
77+
#define MBED_TICKER_JUST_US (!MBED_CONF_TARGET_CUSTOM_TICKERS && DEVICE_USTICKER && !DEVICE_LPTICKER)
78+
#define MBED_TICKER_JUST_LP (!MBED_CONF_TARGET_CUSTOM_TICKERS && DEVICE_LPTICKER && !DEVICE_USTICKER)
79+
80+
#if (MBED_TICKER_JUST_US && defined US_TICKER_PERIOD_NUM) || \
81+
(!MBED_CONF_TARGET_CUSTOM_TICKERS && defined US_TICKER_PERIOD_NUM && defined LP_TICKER_PERIOD_NUM && \
82+
US_TICKER_PERIOD_NUM == LP_TICKER_PERIOD_NUM)
83+
#define MBED_TICKER_CONSTANT_PERIOD_NUM US_TICKER_PERIOD_NUM
84+
#elif MBED_TICKER_JUST_LP && defined LP_TICKER_PERIOD_NUM
85+
#define MBED_TICKER_CONSTANT_PERIOD_NUM LP_TICKER_PERIOD_NUM
86+
#endif
87+
88+
#if (MBED_TICKER_JUST_US && defined US_TICKER_PERIOD_DEN) || \
89+
(!MBED_CONF_TARGET_CUSTOM_TICKERS && defined US_TICKER_PERIOD_DEN && defined LP_TICKER_PERIOD_DEN && \
90+
US_TICKER_PERIOD_DEN == LP_TICKER_PERIOD_DEN)
91+
#define MBED_TICKER_CONSTANT_PERIOD_DEN US_TICKER_PERIOD_DEN
92+
#elif MBED_TICKER_JUST_LP && defined LP_TICKER_PERIOD_DEN
93+
#define MBED_TICKER_CONSTANT_PERIOD_DEN LP_TICKER_PERIOD_DEN
94+
#endif
95+
96+
#if defined MBED_TICKER_CONSTANT_PERIOD_NUM && defined MBED_TICKER_CONSTANT_PERIOD_DEN
97+
#define MBED_TICKER_CONSTANT_PERIOD
98+
#endif
99+
100+
#if (MBED_TICKER_JUST_US && defined US_TICKER_MASK) || \
101+
(!MBED_CONF_TARGET_CUSTOM_TICKERS && defined US_TICKER_MASK && defined LP_TICKER_MASK && \
102+
US_TICKER_MASK == LP_TICKER_MASK)
103+
#define MBED_TICKER_CONSTANT_MASK US_TICKER_MASK
104+
#elif MBED_TICKER_JUST_LP && defined LP_TICKER_MASK
105+
#define MBED_TICKER_CONSTANT_MASK LP_TICKER_MASK
106+
#endif
107+
73108
/** Ticker's event queue structure
74109
*/
75110
typedef struct {
76111
ticker_event_handler event_handler; /**< Event handler */
77112
ticker_event_t *head; /**< A pointer to head */
78-
uint32_t frequency; /**< Frequency of the timer in Hz */
113+
#ifndef MBED_TICKER_CONSTANT_PERIOD_NUM
114+
uint32_t period_num; /**< Ratio of period to 1us, numerator */
115+
#endif
116+
#ifndef MBED_TICKER_CONSTANT_PERIOD_DEN
117+
uint32_t period_den; /**< Ratio of period to 1us, denominator */
118+
#endif
119+
#ifndef MBED_TICKER_CONSTANT_MASK
79120
uint32_t bitmask; /**< Mask to be applied to time values read */
80121
uint32_t max_delta; /**< Largest delta in ticks that can be used when scheduling */
122+
#endif
123+
#if !(defined MBED_TICKER_CONSTANT_PERIOD && defined MBED_TICKER_CONSTANT_MASK)
81124
uint64_t max_delta_us; /**< Largest delta in us that can be used when scheduling */
125+
#endif
82126
uint32_t tick_last_read; /**< Last tick read */
83-
uint64_t tick_remainder; /**< Ticks that have not been added to base_time */
127+
uint32_t tick_remainder; /**< Ticks that have not been added to base_time */
84128
us_timestamp_t present_time; /**< Store the timestamp used for present time */
85129
bool initialized; /**< Indicate if the instance is initialized */
86130
bool dispatching; /**< The function ticker_irq_handler is dispatching */
87131
bool suspended; /**< Indicate if the instance is suspended */
88-
uint8_t frequency_shifts; /**< If frequency is a value of 2^n, this is n, otherwise 0 */
132+
#ifndef MBED_TICKER_CONSTANT_PERIOD_NUM
133+
int8_t period_num_shifts; /**< If numerator is a value of 2^n, this is n, otherwise -1 */
134+
#endif
135+
#ifndef MBED_TICKER_CONSTANT_PERIOD_DEN
136+
int8_t period_den_shifts; /**< If denominator is a value of 2^n, this is n, otherwise -1 */
137+
#endif
89138
} ticker_event_queue_t;
90139

91140
/** Ticker's data structure

targets/targets.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@
7777
"help": "Initialize the microsecond ticker at boot rather than on first use, and leave it initialized. This speeds up wait_us in particular.",
7878
"value": false
7979
},
80+
"custom-tickers": {
81+
"help": "Support custom tickers in addition to USTICKER and LPTICKER. Turning this off can permit some space and speed optimisations, if characteristics of USTICKER and LPTICKER are known at compile time.",
82+
"value": true
83+
},
8084
"xip-enable": {
8185
"help": "Enable Execute In Place (XIP) on this target. Value is only significant if the board has executable external storage such as QSPIF. If this is enabled, customize the linker file to choose what text segments are placed on external storage",
8286
"value": false

0 commit comments

Comments
 (0)