Skip to content

Commit 3ecd37e

Browse files
msperlbroonie
authored andcommitted
spi: bcm2835: enable dma modes for transfers meeting certain conditions
Conditions per spi_transfer are: * transfer.len >= 96 bytes (to avoid mapping overhead costs) * transfer.len < 65536 bytes (limitaion by spi-hw block - could get extended) * an individual scatter/gather transfer length must be a multiple of 4 for anything but the last transfer - spi-hw block limit. (some shortcut has been taken in can_dma to avoid unnecessary mapping of pages which, for which there is a chance that there is a split with a transfer length not a multiple of 4) If it becomes a necessity these restrictions can get removed by additional code. Note that this patch requires a patch to dma-bcm2835.c by Noralf to enable scatter-gather mode inside the dmaengine, which has not been merged yet. That is why no patch to arch/arm/boot/dts/bcm2835.dtsi is included - the code works as before without dma when tx/rx are not set, but it writes a message warning about dma not used: spi-bcm2835 20204000.spi: no tx-dma configuration found - not using dma mode To enable dma-mode add the following lines to the device-tree: dmas = <&dma 6>, <&dma 7>; dma-names = "tx", "rx"; Tested-by: Noralf Trønnes <[email protected]> (private communication) Signed-off-by: Martin Sperl <[email protected]> Signed-off-by: Mark Brown <[email protected]>
1 parent e0d58cd commit 3ecd37e

File tree

1 file changed

+301
-2
lines changed

1 file changed

+301
-2
lines changed

drivers/spi/spi-bcm2835.c

Lines changed: 301 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,18 @@
2323
#include <linux/clk.h>
2424
#include <linux/completion.h>
2525
#include <linux/delay.h>
26+
#include <linux/dma-mapping.h>
27+
#include <linux/dmaengine.h>
2628
#include <linux/err.h>
2729
#include <linux/interrupt.h>
2830
#include <linux/io.h>
2931
#include <linux/kernel.h>
3032
#include <linux/module.h>
3133
#include <linux/of.h>
32-
#include <linux/of_irq.h>
33-
#include <linux/of_gpio.h>
34+
#include <linux/of_address.h>
3435
#include <linux/of_device.h>
36+
#include <linux/of_gpio.h>
37+
#include <linux/of_irq.h>
3538
#include <linux/spi/spi.h>
3639

3740
/* SPI register offsets */
@@ -70,6 +73,7 @@
7073

7174
#define BCM2835_SPI_POLLING_LIMIT_US 30
7275
#define BCM2835_SPI_POLLING_JIFFIES 2
76+
#define BCM2835_SPI_DMA_MIN_LENGTH 96
7377
#define BCM2835_SPI_MODE_BITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
7478
| SPI_NO_CS | SPI_3WIRE)
7579

@@ -83,6 +87,7 @@ struct bcm2835_spi {
8387
u8 *rx_buf;
8488
int tx_len;
8589
int rx_len;
90+
bool dma_pending;
8691
};
8792

8893
static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg)
@@ -128,12 +133,15 @@ static void bcm2835_spi_reset_hw(struct spi_master *master)
128133
/* Disable SPI interrupts and transfer */
129134
cs &= ~(BCM2835_SPI_CS_INTR |
130135
BCM2835_SPI_CS_INTD |
136+
BCM2835_SPI_CS_DMAEN |
131137
BCM2835_SPI_CS_TA);
132138
/* and reset RX/TX FIFOS */
133139
cs |= BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX;
134140

135141
/* and reset the SPI_HW */
136142
bcm2835_wr(bs, BCM2835_SPI_CS, cs);
143+
/* as well as DLEN */
144+
bcm2835_wr(bs, BCM2835_SPI_DLEN, 0);
137145
}
138146

139147
static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id)
@@ -193,6 +201,279 @@ static int bcm2835_spi_transfer_one_irq(struct spi_master *master,
193201
return 1;
194202
}
195203

204+
/*
205+
* DMA support
206+
*
207+
* this implementation has currently a few issues in so far as it does
208+
* not work arrount limitations of the HW.
209+
*
210+
* the main one being that DMA transfers are limited to 16 bit
211+
* (so 0 to 65535 bytes) by the SPI HW due to BCM2835_SPI_DLEN
212+
*
213+
* also we currently assume that the scatter-gather fragments are
214+
* all multiple of 4 (except the last) - otherwise we would need
215+
* to reset the FIFO before subsequent transfers...
216+
* this also means that tx/rx transfers sg's need to be of equal size!
217+
*
218+
* there may be a few more border-cases we may need to address as well
219+
* but unfortunately this would mean splitting up the scatter-gather
220+
* list making it slightly unpractical...
221+
*/
222+
static void bcm2835_spi_dma_done(void *data)
223+
{
224+
struct spi_master *master = data;
225+
struct bcm2835_spi *bs = spi_master_get_devdata(master);
226+
227+
/* reset fifo and HW */
228+
bcm2835_spi_reset_hw(master);
229+
230+
/* and terminate tx-dma as we do not have an irq for it
231+
* because when the rx dma will terminate and this callback
232+
* is called the tx-dma must have finished - can't get to this
233+
* situation otherwise...
234+
*/
235+
dmaengine_terminate_all(master->dma_tx);
236+
237+
/* mark as no longer pending */
238+
bs->dma_pending = 0;
239+
240+
/* and mark as completed */;
241+
complete(&master->xfer_completion);
242+
}
243+
244+
static int bcm2835_spi_prepare_sg(struct spi_master *master,
245+
struct spi_transfer *tfr,
246+
bool is_tx)
247+
{
248+
struct dma_chan *chan;
249+
struct scatterlist *sgl;
250+
unsigned int nents;
251+
enum dma_transfer_direction dir;
252+
unsigned long flags;
253+
254+
struct dma_async_tx_descriptor *desc;
255+
dma_cookie_t cookie;
256+
257+
if (is_tx) {
258+
dir = DMA_MEM_TO_DEV;
259+
chan = master->dma_tx;
260+
nents = tfr->tx_sg.nents;
261+
sgl = tfr->tx_sg.sgl;
262+
flags = 0 /* no tx interrupt */;
263+
264+
} else {
265+
dir = DMA_DEV_TO_MEM;
266+
chan = master->dma_rx;
267+
nents = tfr->rx_sg.nents;
268+
sgl = tfr->rx_sg.sgl;
269+
flags = DMA_PREP_INTERRUPT;
270+
}
271+
/* prepare the channel */
272+
desc = dmaengine_prep_slave_sg(chan, sgl, nents, dir, flags);
273+
if (!desc)
274+
return -EINVAL;
275+
276+
/* set callback for rx */
277+
if (!is_tx) {
278+
desc->callback = bcm2835_spi_dma_done;
279+
desc->callback_param = master;
280+
}
281+
282+
/* submit it to DMA-engine */
283+
cookie = dmaengine_submit(desc);
284+
285+
return dma_submit_error(cookie);
286+
}
287+
288+
static inline int bcm2835_check_sg_length(struct sg_table *sgt)
289+
{
290+
int i;
291+
struct scatterlist *sgl;
292+
293+
/* check that the sg entries are word-sized (except for last) */
294+
for_each_sg(sgt->sgl, sgl, (int)sgt->nents - 1, i) {
295+
if (sg_dma_len(sgl) % 4)
296+
return -EFAULT;
297+
}
298+
299+
return 0;
300+
}
301+
302+
static int bcm2835_spi_transfer_one_dma(struct spi_master *master,
303+
struct spi_device *spi,
304+
struct spi_transfer *tfr,
305+
u32 cs)
306+
{
307+
struct bcm2835_spi *bs = spi_master_get_devdata(master);
308+
int ret;
309+
310+
/* check that the scatter gather segments are all a multiple of 4 */
311+
if (bcm2835_check_sg_length(&tfr->tx_sg) ||
312+
bcm2835_check_sg_length(&tfr->rx_sg)) {
313+
dev_warn_once(&spi->dev,
314+
"scatter gather segment length is not a multiple of 4 - falling back to interrupt mode\n");
315+
return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs);
316+
}
317+
318+
/* setup tx-DMA */
319+
ret = bcm2835_spi_prepare_sg(master, tfr, true);
320+
if (ret)
321+
return ret;
322+
323+
/* start TX early */
324+
dma_async_issue_pending(master->dma_tx);
325+
326+
/* mark as dma pending */
327+
bs->dma_pending = 1;
328+
329+
/* set the DMA length */
330+
bcm2835_wr(bs, BCM2835_SPI_DLEN, tfr->len);
331+
332+
/* start the HW */
333+
bcm2835_wr(bs, BCM2835_SPI_CS,
334+
cs | BCM2835_SPI_CS_TA | BCM2835_SPI_CS_DMAEN);
335+
336+
/* setup rx-DMA late - to run transfers while
337+
* mapping of the rx buffers still takes place
338+
* this saves 10us or more.
339+
*/
340+
ret = bcm2835_spi_prepare_sg(master, tfr, false);
341+
if (ret) {
342+
/* need to reset on errors */
343+
dmaengine_terminate_all(master->dma_tx);
344+
bcm2835_spi_reset_hw(master);
345+
return ret;
346+
}
347+
348+
/* start rx dma late */
349+
dma_async_issue_pending(master->dma_rx);
350+
351+
/* wait for wakeup in framework */
352+
return 1;
353+
}
354+
355+
static bool bcm2835_spi_can_dma(struct spi_master *master,
356+
struct spi_device *spi,
357+
struct spi_transfer *tfr)
358+
{
359+
/* only run for gpio_cs */
360+
if (!gpio_is_valid(spi->cs_gpio))
361+
return false;
362+
363+
/* we start DMA efforts only on bigger transfers */
364+
if (tfr->len < BCM2835_SPI_DMA_MIN_LENGTH)
365+
return false;
366+
367+
/* BCM2835_SPI_DLEN has defined a max transfer size as
368+
* 16 bit, so max is 65535
369+
* we can revisit this by using an alternative transfer
370+
* method - ideally this would get done without any more
371+
* interaction...
372+
*/
373+
if (tfr->len > 65535) {
374+
dev_warn_once(&spi->dev,
375+
"transfer size of %d too big for dma-transfer\n",
376+
tfr->len);
377+
return false;
378+
}
379+
380+
/* if we run rx/tx_buf with word aligned addresses then we are OK */
381+
if (((u32)tfr->tx_buf % 4 == 0) && ((u32)tfr->tx_buf % 4 == 0))
382+
return true;
383+
384+
/* otherwise we only allow transfers within the same page
385+
* to avoid wasting time on dma_mapping when it is not practical
386+
*/
387+
if (((u32)tfr->tx_buf % SZ_4K) + tfr->len > SZ_4K) {
388+
dev_warn_once(&spi->dev,
389+
"Unaligned spi tx-transfer bridging page\n");
390+
return false;
391+
}
392+
if (((u32)tfr->rx_buf % SZ_4K) + tfr->len > SZ_4K) {
393+
dev_warn_once(&spi->dev,
394+
"Unaligned spi tx-transfer bridging page\n");
395+
return false;
396+
}
397+
398+
/* return OK */
399+
return true;
400+
}
401+
402+
void bcm2835_dma_release(struct spi_master *master)
403+
{
404+
if (master->dma_tx) {
405+
dmaengine_terminate_all(master->dma_tx);
406+
dma_release_channel(master->dma_tx);
407+
master->dma_tx = NULL;
408+
}
409+
if (master->dma_rx) {
410+
dmaengine_terminate_all(master->dma_rx);
411+
dma_release_channel(master->dma_rx);
412+
master->dma_rx = NULL;
413+
}
414+
}
415+
416+
void bcm2835_dma_init(struct spi_master *master, struct device *dev)
417+
{
418+
struct dma_slave_config slave_config;
419+
const __be32 *addr;
420+
dma_addr_t dma_reg_base;
421+
int ret;
422+
423+
/* base address in dma-space */
424+
addr = of_get_address(master->dev.of_node, 0, NULL, NULL);
425+
if (!addr) {
426+
dev_err(dev, "could not get DMA-register address - not using dma mode\n");
427+
goto err;
428+
}
429+
dma_reg_base = be32_to_cpup(addr);
430+
431+
/* get tx/rx dma */
432+
master->dma_tx = dma_request_slave_channel(dev, "tx");
433+
if (!master->dma_tx) {
434+
dev_err(dev, "no tx-dma configuration found - not using dma mode\n");
435+
goto err;
436+
}
437+
master->dma_rx = dma_request_slave_channel(dev, "rx");
438+
if (!master->dma_rx) {
439+
dev_err(dev, "no rx-dma configuration found - not using dma mode\n");
440+
goto err_release;
441+
}
442+
443+
/* configure DMAs */
444+
slave_config.direction = DMA_MEM_TO_DEV;
445+
slave_config.dst_addr = (u32)(dma_reg_base + BCM2835_SPI_FIFO);
446+
slave_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
447+
448+
ret = dmaengine_slave_config(master->dma_tx, &slave_config);
449+
if (ret)
450+
goto err_config;
451+
452+
slave_config.direction = DMA_DEV_TO_MEM;
453+
slave_config.src_addr = (u32)(dma_reg_base + BCM2835_SPI_FIFO);
454+
slave_config.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
455+
456+
ret = dmaengine_slave_config(master->dma_rx, &slave_config);
457+
if (ret)
458+
goto err_config;
459+
460+
/* all went well, so set can_dma */
461+
master->can_dma = bcm2835_spi_can_dma;
462+
master->max_dma_len = 65535; /* limitation by BCM2835_SPI_DLEN */
463+
/* need to do TX AND RX DMA, so we need dummy buffers */
464+
master->flags = SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX;
465+
466+
return;
467+
468+
err_config:
469+
dev_err(dev, "issue configuring dma: %d - not using DMA mode\n",
470+
ret);
471+
err_release:
472+
bcm2835_dma_release(master);
473+
err:
474+
return;
475+
}
476+
196477
static int bcm2835_spi_transfer_one_poll(struct spi_master *master,
197478
struct spi_device *spi,
198479
struct spi_transfer *tfr,
@@ -301,12 +582,26 @@ static int bcm2835_spi_transfer_one(struct spi_master *master,
301582
return bcm2835_spi_transfer_one_poll(master, spi, tfr,
302583
cs, xfer_time_us);
303584

585+
/* run in dma mode if conditions are right */
586+
if (master->can_dma && bcm2835_spi_can_dma(master, spi, tfr))
587+
return bcm2835_spi_transfer_one_dma(master, spi, tfr, cs);
588+
589+
/* run in interrupt-mode */
304590
return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs);
305591
}
306592

307593
static void bcm2835_spi_handle_err(struct spi_master *master,
308594
struct spi_message *msg)
309595
{
596+
struct bcm2835_spi *bs = spi_master_get_devdata(master);
597+
598+
/* if an error occurred and we have an active dma, then terminate */
599+
if (bs->dma_pending) {
600+
dmaengine_terminate_all(master->dma_tx);
601+
dmaengine_terminate_all(master->dma_rx);
602+
bs->dma_pending = 0;
603+
}
604+
/* and reset */
310605
bcm2835_spi_reset_hw(master);
311606
}
312607

@@ -476,6 +771,8 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
476771
goto out_clk_disable;
477772
}
478773

774+
bcm2835_dma_init(master, &pdev->dev);
775+
479776
/* initialise the hardware with the default polarities */
480777
bcm2835_wr(bs, BCM2835_SPI_CS,
481778
BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
@@ -506,6 +803,8 @@ static int bcm2835_spi_remove(struct platform_device *pdev)
506803

507804
clk_disable_unprepare(bs->clk);
508805

806+
bcm2835_dma_release(master);
807+
509808
return 0;
510809
}
511810

0 commit comments

Comments
 (0)