From 8325e3684cbe963bdce6e59cfdc2c1fcc354f3fb Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Fri, 22 Apr 2016 17:17:13 +0000 Subject: [PATCH 01/19] Revert "bcm2835-dma: Fix up convert to DMA pool" This reverts commit ec2e48fda22c57cab56a4332d1a095f91c919493. --- drivers/dma/bcm2835-dma.c | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index d26b6bdbcf1ab3..6b87ce2bb3c9d3 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -488,17 +488,6 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( c->cyclic = true; return vchan_tx_prep(&c->vc, &d->vd, flags); -error_cb: - i--; - for (; i >= 0; i--) { - struct bcm2835_cb_entry *cb_entry = &d->cb_list[i]; - - dma_pool_free(c->cb_pool, cb_entry->cb, cb_entry->paddr); - } - - kfree(d->cb_list); - kfree(d); - return NULL; } static struct dma_async_tx_descriptor * @@ -545,7 +534,6 @@ bcm2835_dma_prep_slave_sg(struct dma_chan *chan, if (!d) return NULL; - d->c = c; d->dir = direction; if (c->ch >= 8) /* LITE channel */ @@ -565,21 +553,15 @@ bcm2835_dma_prep_slave_sg(struct dma_chan *chan, d->frames += len / max_size + 1; } - d->cb_list = kcalloc(d->frames, sizeof(*d->cb_list), GFP_KERNEL); - if (!d->cb_list) { + /* Allocate memory for control blocks */ + d->control_block_size = d->frames * sizeof(struct bcm2835_dma_cb); + d->control_block_base = dma_zalloc_coherent(chan->device->dev, + d->control_block_size, &d->control_block_base_phys, + GFP_NOWAIT); + if (!d->control_block_base) { kfree(d); return NULL; } - /* Allocate memory for control blocks */ - for (i = 0; i < d->frames; i++) { - struct bcm2835_cb_entry *cb_entry = &d->cb_list[i]; - - cb_entry->cb = dma_pool_zalloc(c->cb_pool, GFP_ATOMIC, - &cb_entry->paddr); - - if (!cb_entry->cb) - goto error_cb; - } /* * Iterate over all SG entries, create a control block @@ -596,7 +578,7 @@ bcm2835_dma_prep_slave_sg(struct dma_chan *chan, for (j = 0; j < len; j += max_size) { struct bcm2835_dma_cb *control_block = - d->cb_list[i + split_cnt].cb; + &d->control_block_base[i + split_cnt]; /* Setup addresses */ if (d->dir == DMA_DEV_TO_MEM) { @@ -638,7 +620,9 @@ bcm2835_dma_prep_slave_sg(struct dma_chan *chan, if (i < sg_len - 1 || len - j > max_size) { /* Next block is the next frame. */ control_block->next = - d->cb_list[i + split_cnt + 1].paddr; + d->control_block_base_phys + + sizeof(struct bcm2835_dma_cb) * + (i + split_cnt + 1); } else { /* Next block is empty. */ control_block->next = 0; From 3cd8f1c91a28f8c97635521c433ef67dfb585a76 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Fri, 22 Apr 2016 17:17:22 +0000 Subject: [PATCH 02/19] Revert "bcm2835-dma: Limit cyclic transfers on lite channels to 32k" This reverts commit 052c2005b6ecedc5abad86632f5781adda310aa7. --- drivers/dma/bcm2835-dma.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 6b87ce2bb3c9d3..5db0a95985b20a 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -144,12 +144,6 @@ struct bcm2835_desc { */ #define MAX_LITE_TRANSFER (SZ_64K - 4) -/* - * Transfers larger than 32k cause issues with the bcm2708-i2s driver, - * so limit transfer size to 32k as bcm2708-dmaengine did. - */ -#define MAX_CYCLIC_LITE_TRANSFER SZ_32K - static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d) { return container_of(d, struct bcm2835_dmadev, ddev); @@ -418,7 +412,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( d->c = c; d->dir = direction; if (c->ch >= 8) /* LITE channel */ - max_size = MAX_CYCLIC_LITE_TRANSFER; + max_size = MAX_LITE_TRANSFER; else max_size = MAX_NORMAL_TRANSFER; period_len = min(period_len, max_size); From 825d92c755e430f09071821fe64ed9796214f3f6 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Fri, 22 Apr 2016 17:17:30 +0000 Subject: [PATCH 03/19] Revert "dmaengine: bcm2835: Load driver early and support legacy API" This reverts commit 6f56fff39c811953809b011f59c49a2122c4d173. --- drivers/dma/Kconfig | 2 +- drivers/dma/bcm2835-dma.c | 30 ++++++------------------------ 2 files changed, 7 insertions(+), 25 deletions(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 33e36b90cfeddc..b7a17b1c40e23c 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -108,7 +108,7 @@ config COH901318 config DMA_BCM2835 tristate "BCM2835 DMA engine support" - depends on ARCH_BCM2835 || ARCH_BCM2708 || ARCH_BCM2709 + depends on ARCH_BCM2835 select DMA_ENGINE select DMA_VIRTUAL_CHANNELS diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 5db0a95985b20a..696fb30b4f5a01 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -787,10 +786,6 @@ static int bcm2835_dma_probe(struct platform_device *pdev) if (IS_ERR(base)) return PTR_ERR(base); - rc = bcm_dmaman_probe(pdev, base, BCM2835_DMA_BULK_MASK); - if (rc) - dev_err(&pdev->dev, "Failed to initialize the legacy API\n"); - od->base = base; dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); @@ -823,8 +818,11 @@ static int bcm2835_dma_probe(struct platform_device *pdev) goto err_no_dma; } - /* Channel 0 is used by the legacy API */ - chans_available &= ~BCM2835_DMA_BULK_MASK; + /* + * Do not use the FIQ and BULK channels, + * because they are used by the GPU. + */ + chans_available &= ~(BCM2835_DMA_FIQ_MASK | BCM2835_DMA_BULK_MASK); for (i = 0; i < pdev->num_resources; i++) { irq = platform_get_irq(pdev, i); @@ -868,7 +866,6 @@ static int bcm2835_dma_remove(struct platform_device *pdev) { struct bcm2835_dmadev *od = platform_get_drvdata(pdev); - bcm_dmaman_remove(pdev); dma_async_device_unregister(&od->ddev); bcm2835_dma_free(od); @@ -884,22 +881,7 @@ static struct platform_driver bcm2835_dma_driver = { }, }; -static int bcm2835_dma_init(void) -{ - return platform_driver_register(&bcm2835_dma_driver); -} - -static void bcm2835_dma_exit(void) -{ - platform_driver_unregister(&bcm2835_dma_driver); -} - -/* - * Load after serial driver (arch_initcall) so we see the messages if it fails, - * but before drivers (module_init) that need a DMA channel. - */ -subsys_initcall(bcm2835_dma_init); -module_exit(bcm2835_dma_exit); +module_platform_driver(bcm2835_dma_driver); MODULE_ALIAS("platform:bcm2835-dma"); MODULE_DESCRIPTION("BCM2835 DMA engine driver"); From 9a338755a8629de77fb69642263fbd14b40495af Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Fri, 22 Apr 2016 17:17:37 +0000 Subject: [PATCH 04/19] Revert "dmaengine: bcm2835: Add slave dma support" This reverts commit 8a349301238aabb40c9da5ca8c8492b6b8d146f6. --- drivers/dma/bcm2835-dma.c | 206 +++----------------------------------- 1 file changed, 14 insertions(+), 192 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 696fb30b4f5a01..2d72fe81243fbb 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -1,10 +1,11 @@ /* * BCM2835 DMA engine support * + * This driver only supports cyclic DMA transfers + * as needed for the I2S module. + * * Author: Florian Meier * Copyright 2013 - * Gellert Weisz - * Copyright 2013-2014 * * Based on * OMAP DMAengine support by Russell King @@ -94,8 +95,6 @@ struct bcm2835_desc { size_t size; }; -#define BCM2835_DMA_WAIT_CYCLES 0 /* Slow down DMA transfers: 0-31 */ - #define BCM2835_DMA_CS 0x00 #define BCM2835_DMA_ADDR 0x04 #define BCM2835_DMA_SOURCE_AD 0x0c @@ -112,16 +111,12 @@ struct bcm2835_desc { #define BCM2835_DMA_RESET BIT(31) /* WO, self clearing */ #define BCM2835_DMA_INT_EN BIT(0) -#define BCM2835_DMA_WAIT_RESP BIT(3) #define BCM2835_DMA_D_INC BIT(4) -#define BCM2835_DMA_D_WIDTH BIT(5) #define BCM2835_DMA_D_DREQ BIT(6) #define BCM2835_DMA_S_INC BIT(8) -#define BCM2835_DMA_S_WIDTH BIT(9) #define BCM2835_DMA_S_DREQ BIT(10) #define BCM2835_DMA_PER_MAP(x) ((x) << 16) -#define BCM2835_DMA_WAITS(x) (((x) & 0x1f) << 21) #define BCM2835_DMA_DATA_TYPE_S8 1 #define BCM2835_DMA_DATA_TYPE_S16 2 @@ -135,14 +130,6 @@ struct bcm2835_desc { #define BCM2835_DMA_CHAN(n) ((n) << 8) /* Base address */ #define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n)) -#define MAX_NORMAL_TRANSFER SZ_1G -/* - * Max length on a Lite channel is 65535 bytes. - * DMA handles byte-enables on SDRAM reads and writes even on 128-bit accesses, - * but byte-enables don't exist on peripheral addresses, so align to 32-bit. - */ -#define MAX_LITE_TRANSFER (SZ_64K - 4) - static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d) { return container_of(d, struct bcm2835_dmadev, ddev); @@ -239,19 +226,13 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) d = c->desc; if (d) { - if (c->cyclic) { - vchan_cyclic_callback(&d->vd); - - /* Keep the DMA engine running */ - writel(BCM2835_DMA_ACTIVE, - c->chan_base + BCM2835_DMA_CS); - - } else { - vchan_cookie_complete(&c->desc->vd); - bcm2835_dma_start_desc(c); - } + /* TODO Only works for cyclic DMA */ + vchan_cyclic_callback(&d->vd); } + /* Keep the DMA engine running */ + writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS); + spin_unlock_irqrestore(&c->vc.lock, flags); return IRQ_HANDLED; @@ -358,6 +339,8 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan) struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); unsigned long flags; + c->cyclic = true; /* Nothing else is implemented */ + spin_lock_irqsave(&c->vc.lock, flags); if (vchan_issue_pending(&c->vc) && !c->desc) bcm2835_dma_start_desc(c); @@ -375,7 +358,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( struct bcm2835_desc *d; dma_addr_t dev_addr; unsigned int es, sync_type; - unsigned int frame, max_size; + unsigned int frame; int i; /* Grab configuration */ @@ -410,12 +393,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( d->c = c; d->dir = direction; - if (c->ch >= 8) /* LITE channel */ - max_size = MAX_LITE_TRANSFER; - else - max_size = MAX_NORMAL_TRANSFER; - period_len = min(period_len, max_size); - d->frames = (buf_len - 1) / (period_len + 1); + d->frames = buf_len / period_len; d->cb_list = kcalloc(d->frames, sizeof(*d->cb_list), GFP_KERNEL); if (!d->cb_list) { @@ -463,171 +441,17 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( BCM2835_DMA_PER_MAP(c->dreq); /* Length of a frame */ - if (frame != d->frames - 1) - control_block->length = period_len; - else - control_block->length = buf_len - (d->frames - 1) * - period_len; + control_block->length = period_len; d->size += control_block->length; /* * Next block is the next frame. - * This function is called on cyclic DMA transfers. + * This DMA engine driver currently only supports cyclic DMA. * Therefore, wrap around at number of frames. */ control_block->next = d->cb_list[((frame + 1) % d->frames)].paddr; } - c->cyclic = true; - - return vchan_tx_prep(&c->vc, &d->vd, flags); -} - -static struct dma_async_tx_descriptor * -bcm2835_dma_prep_slave_sg(struct dma_chan *chan, - struct scatterlist *sgl, - unsigned int sg_len, - enum dma_transfer_direction direction, - unsigned long flags, void *context) -{ - struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); - enum dma_slave_buswidth dev_width; - struct bcm2835_desc *d; - dma_addr_t dev_addr; - struct scatterlist *sgent; - unsigned int i, sync_type, split_cnt, max_size; - - if (!is_slave_direction(direction)) { - dev_err(chan->device->dev, "direction not supported\n"); - return NULL; - } - - if (direction == DMA_DEV_TO_MEM) { - dev_addr = c->cfg.src_addr; - dev_width = c->cfg.src_addr_width; - sync_type = BCM2835_DMA_S_DREQ; - } else { - dev_addr = c->cfg.dst_addr; - dev_width = c->cfg.dst_addr_width; - sync_type = BCM2835_DMA_D_DREQ; - } - - /* Bus width translates to the element size (ES) */ - switch (dev_width) { - case DMA_SLAVE_BUSWIDTH_4_BYTES: - break; - default: - dev_err(chan->device->dev, "buswidth not supported: %i\n", - dev_width); - return NULL; - } - - /* Allocate and setup the descriptor. */ - d = kzalloc(sizeof(*d), GFP_NOWAIT); - if (!d) - return NULL; - - d->dir = direction; - - if (c->ch >= 8) /* LITE channel */ - max_size = MAX_LITE_TRANSFER; - else - max_size = MAX_NORMAL_TRANSFER; - - /* - * Store the length of the SG list in d->frames - * taking care to account for splitting up transfers - * too large for a LITE channel - */ - d->frames = 0; - for_each_sg(sgl, sgent, sg_len, i) { - unsigned int len = sg_dma_len(sgent); - - d->frames += len / max_size + 1; - } - - /* Allocate memory for control blocks */ - d->control_block_size = d->frames * sizeof(struct bcm2835_dma_cb); - d->control_block_base = dma_zalloc_coherent(chan->device->dev, - d->control_block_size, &d->control_block_base_phys, - GFP_NOWAIT); - if (!d->control_block_base) { - kfree(d); - return NULL; - } - - /* - * Iterate over all SG entries, create a control block - * for each frame and link them together. - * Count the number of times an SG entry had to be split - * as a result of using a LITE channel - */ - split_cnt = 0; - - for_each_sg(sgl, sgent, sg_len, i) { - unsigned int j; - dma_addr_t addr = sg_dma_address(sgent); - unsigned int len = sg_dma_len(sgent); - - for (j = 0; j < len; j += max_size) { - struct bcm2835_dma_cb *control_block = - &d->control_block_base[i + split_cnt]; - - /* Setup addresses */ - if (d->dir == DMA_DEV_TO_MEM) { - control_block->info = BCM2835_DMA_D_INC | - BCM2835_DMA_D_WIDTH | - BCM2835_DMA_S_DREQ; - control_block->src = dev_addr; - control_block->dst = addr + (dma_addr_t)j; - } else { - control_block->info = BCM2835_DMA_S_INC | - BCM2835_DMA_S_WIDTH | - BCM2835_DMA_D_DREQ; - control_block->src = addr + (dma_addr_t)j; - control_block->dst = dev_addr; - } - - /* Common part */ - control_block->info |= - BCM2835_DMA_WAITS(BCM2835_DMA_WAIT_CYCLES); - control_block->info |= BCM2835_DMA_WAIT_RESP; - - /* Enable */ - if (i == sg_len - 1 && len - j <= max_size) - control_block->info |= BCM2835_DMA_INT_EN; - - /* Setup synchronization */ - if (sync_type) - control_block->info |= sync_type; - - /* Setup DREQ channel */ - if (c->dreq) - control_block->info |= - BCM2835_DMA_PER_MAP(c->dreq); - - /* Length of a frame */ - control_block->length = min(len - j, max_size); - d->size += control_block->length; - - if (i < sg_len - 1 || len - j > max_size) { - /* Next block is the next frame. */ - control_block->next = - d->control_block_base_phys + - sizeof(struct bcm2835_dma_cb) * - (i + split_cnt + 1); - } else { - /* Next block is empty. */ - control_block->next = 0; - } - - if (len - j > max_size) - split_cnt++; - } - } - - c->cyclic = false; - return vchan_tx_prep(&c->vc, &d->vd, flags); error_cb: i--; @@ -796,7 +620,6 @@ static int bcm2835_dma_probe(struct platform_device *pdev) od->ddev.device_tx_status = bcm2835_dma_tx_status; od->ddev.device_issue_pending = bcm2835_dma_issue_pending; od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic; - od->ddev.device_prep_slave_sg = bcm2835_dma_prep_slave_sg; od->ddev.device_config = bcm2835_dma_slave_config; od->ddev.device_terminate_all = bcm2835_dma_terminate_all; od->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); @@ -886,5 +709,4 @@ module_platform_driver(bcm2835_dma_driver); MODULE_ALIAS("platform:bcm2835-dma"); MODULE_DESCRIPTION("BCM2835 DMA engine driver"); MODULE_AUTHOR("Florian Meier "); -MODULE_AUTHOR("Gellert Weisz "); MODULE_LICENSE("GPL v2"); From 2c023f124adbed5180cd5e1fed35fad790dea950 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Wed, 16 Mar 2016 12:24:56 -0700 Subject: [PATCH 05/19] dmaengine: bcm2835: remove unnecessary masking of dma channels The original patch contained 3 dma channels that were masked out. These - as far as research and discussions show - are a artefacts remaining from the downstream legacy dma-api. Right now down-stream still includes a legacy api used only in a single (downstream only) driver (bcm2708_fb) that requires 2D DMA for speedup (DMA-channel 0). Formerly the sd-card support driver also was using this legacy api (DMA-channel 2), but since has been moved over to use dmaengine directly. The DMA-channel 3 is already masked out in the devicetree in the default property "brcm,dma-channel-mask = <0x7f35>;" So we can remove the whole masking of DMA channels. Signed-off-by: Martin Sperl Reviewed-by: Eric Anholt Signed-off-by: Eric Anholt Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 2d72fe81243fbb..e4ca980049bab8 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -123,9 +123,6 @@ struct bcm2835_desc { #define BCM2835_DMA_DATA_TYPE_S32 4 #define BCM2835_DMA_DATA_TYPE_S128 16 -#define BCM2835_DMA_BULK_MASK BIT(0) -#define BCM2835_DMA_FIQ_MASK (BIT(2) | BIT(3)) - /* Valid only for channels 0 - 14, 15 has its own base address */ #define BCM2835_DMA_CHAN(n) ((n) << 8) /* Base address */ #define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n)) @@ -641,12 +638,6 @@ static int bcm2835_dma_probe(struct platform_device *pdev) goto err_no_dma; } - /* - * Do not use the FIQ and BULK channels, - * because they are used by the GPU. - */ - chans_available &= ~(BCM2835_DMA_FIQ_MASK | BCM2835_DMA_BULK_MASK); - for (i = 0; i < pdev->num_resources; i++) { irq = platform_get_irq(pdev, i); if (irq < 0) From 137aa4c7865c8bb85d8b2d2c7030485d74952a9c Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Wed, 16 Mar 2016 12:24:57 -0700 Subject: [PATCH 06/19] dmaengine: bcm2835: add additional defines for DMA-registers Add additional defines describing the DMA registers as well as adding some more documentation to those registers. Signed-off-by: Martin Sperl Reviewed-by: Eric Anholt Signed-off-by: Eric Anholt Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 57 +++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 8 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index e4ca980049bab8..a1d851aa5b182f 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -97,26 +97,67 @@ struct bcm2835_desc { #define BCM2835_DMA_CS 0x00 #define BCM2835_DMA_ADDR 0x04 +#define BCM2835_DMA_TI 0x08 #define BCM2835_DMA_SOURCE_AD 0x0c #define BCM2835_DMA_DEST_AD 0x10 -#define BCM2835_DMA_NEXTCB 0x1C +#define BCM2835_DMA_LEN 0x14 +#define BCM2835_DMA_STRIDE 0x18 +#define BCM2835_DMA_NEXTCB 0x1c +#define BCM2835_DMA_DEBUG 0x20 /* DMA CS Control and Status bits */ -#define BCM2835_DMA_ACTIVE BIT(0) -#define BCM2835_DMA_INT BIT(2) +#define BCM2835_DMA_ACTIVE BIT(0) /* activate the DMA */ +#define BCM2835_DMA_END BIT(1) /* current CB has ended */ +#define BCM2835_DMA_INT BIT(2) /* interrupt status */ +#define BCM2835_DMA_DREQ BIT(3) /* DREQ state */ #define BCM2835_DMA_ISPAUSED BIT(4) /* Pause requested or not active */ #define BCM2835_DMA_ISHELD BIT(5) /* Is held by DREQ flow control */ -#define BCM2835_DMA_ERR BIT(8) +#define BCM2835_DMA_WAITING_FOR_WRITES BIT(6) /* waiting for last + * AXI-write to ack + */ +#define BCM2835_DMA_ERR BIT(8) +#define BCM2835_DMA_PRIORITY(x) ((x & 15) << 16) /* AXI priority */ +#define BCM2835_DMA_PANIC_PRIORITY(x) ((x & 15) << 20) /* panic priority */ +/* current value of TI.BCM2835_DMA_WAIT_RESP */ +#define BCM2835_DMA_WAIT_FOR_WRITES BIT(28) +#define BCM2835_DMA_DIS_DEBUG BIT(29) /* disable debug pause signal */ #define BCM2835_DMA_ABORT BIT(30) /* Stop current CB, go to next, WO */ #define BCM2835_DMA_RESET BIT(31) /* WO, self clearing */ +/* Transfer information bits - also bcm2835_cb.info field */ #define BCM2835_DMA_INT_EN BIT(0) +#define BCM2835_DMA_TDMODE BIT(1) /* 2D-Mode */ +#define BCM2835_DMA_WAIT_RESP BIT(3) /* wait for AXI-write to be acked */ #define BCM2835_DMA_D_INC BIT(4) -#define BCM2835_DMA_D_DREQ BIT(6) +#define BCM2835_DMA_D_WIDTH BIT(5) /* 128bit writes if set */ +#define BCM2835_DMA_D_DREQ BIT(6) /* enable DREQ for destination */ +#define BCM2835_DMA_D_IGNORE BIT(7) /* ignore destination writes */ #define BCM2835_DMA_S_INC BIT(8) -#define BCM2835_DMA_S_DREQ BIT(10) - -#define BCM2835_DMA_PER_MAP(x) ((x) << 16) +#define BCM2835_DMA_S_WIDTH BIT(9) /* 128bit writes if set */ +#define BCM2835_DMA_S_DREQ BIT(10) /* enable SREQ for source */ +#define BCM2835_DMA_S_IGNORE BIT(11) /* ignore source reads - read 0 */ +#define BCM2835_DMA_BURST_LENGTH(x) ((x & 15) << 12) +#define BCM2835_DMA_PER_MAP(x) ((x & 31) << 16) /* REQ source */ +#define BCM2835_DMA_WAIT(x) ((x & 31) << 21) /* add DMA-wait cycles */ +#define BCM2835_DMA_NO_WIDE_BURSTS BIT(26) /* no 2 beat write bursts */ + +/* debug register bits */ +#define BCM2835_DMA_DEBUG_LAST_NOT_SET_ERR BIT(0) +#define BCM2835_DMA_DEBUG_FIFO_ERR BIT(1) +#define BCM2835_DMA_DEBUG_READ_ERR BIT(2) +#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_SHIFT 4 +#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_BITS 4 +#define BCM2835_DMA_DEBUG_ID_SHIFT 16 +#define BCM2835_DMA_DEBUG_ID_BITS 9 +#define BCM2835_DMA_DEBUG_STATE_SHIFT 16 +#define BCM2835_DMA_DEBUG_STATE_BITS 9 +#define BCM2835_DMA_DEBUG_VERSION_SHIFT 25 +#define BCM2835_DMA_DEBUG_VERSION_BITS 3 +#define BCM2835_DMA_DEBUG_LITE BIT(28) + +/* shared registers for all dma channels */ +#define BCM2835_DMA_INT_STATUS 0xfe0 +#define BCM2835_DMA_ENABLE 0xff0 #define BCM2835_DMA_DATA_TYPE_S8 1 #define BCM2835_DMA_DATA_TYPE_S16 2 From 4fecd865543ac922f6ddd4c15a47965c6fa50a7c Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Wed, 16 Mar 2016 12:24:58 -0700 Subject: [PATCH 07/19] dmaengine: bcm2835: move cyclic member from bcm2835_chan into bcm2835_desc In preparation to consolidating code we move the cyclic member into the bcm_2835_desc structure. Signed-off-by: Martin Sperl Reviewed-by: Eric Anholt Signed-off-by: Eric Anholt Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index a1d851aa5b182f..b3bc382fd19983 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -73,7 +73,6 @@ struct bcm2835_chan { struct list_head node; struct dma_slave_config cfg; - bool cyclic; unsigned int dreq; int ch; @@ -93,6 +92,8 @@ struct bcm2835_desc { unsigned int frames; size_t size; + + bool cyclic; }; #define BCM2835_DMA_CS 0x00 @@ -377,8 +378,6 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan) struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); unsigned long flags; - c->cyclic = true; /* Nothing else is implemented */ - spin_lock_irqsave(&c->vc.lock, flags); if (vchan_issue_pending(&c->vc) && !c->desc) bcm2835_dma_start_desc(c); @@ -432,6 +431,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( d->c = c; d->dir = direction; d->frames = buf_len / period_len; + d->cyclic = true; d->cb_list = kcalloc(d->frames, sizeof(*d->cb_list), GFP_KERNEL); if (!d->cb_list) { From 5ac4b48921cc115f2657f5b90821ada483e645d5 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Wed, 16 Mar 2016 12:24:59 -0700 Subject: [PATCH 08/19] dmaengine: bcm2835: move controlblock chain generation into separate method In preparation of adding slave_sg functionality this patch moves the generation/allocation of bcm2835_desc and the building of the corresponding DMA-control-block chain from bcm2835_dma_prep_dma_cyclic into the newly created method bcm2835_dma_create_cb_chain. Signed-off-by: Martin Sperl Reviewed-by: Eric Anholt Signed-off-by: Eric Anholt Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 294 +++++++++++++++++++++++++------------- 1 file changed, 198 insertions(+), 96 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index b3bc382fd19983..4db0e232fab8e3 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -88,12 +88,12 @@ struct bcm2835_desc { struct virt_dma_desc vd; enum dma_transfer_direction dir; - struct bcm2835_cb_entry *cb_list; - unsigned int frames; size_t size; bool cyclic; + + struct bcm2835_cb_entry cb_list[]; }; #define BCM2835_DMA_CS 0x00 @@ -169,6 +169,13 @@ struct bcm2835_desc { #define BCM2835_DMA_CHAN(n) ((n) << 8) /* Base address */ #define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n)) +/* how many frames of max_len size do we need to transfer len bytes */ +static inline size_t bcm2835_dma_frames_for_length(size_t len, + size_t max_len) +{ + return DIV_ROUND_UP(len, max_len); +} + static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d) { return container_of(d, struct bcm2835_dmadev, ddev); @@ -185,19 +192,161 @@ static inline struct bcm2835_desc *to_bcm2835_dma_desc( return container_of(t, struct bcm2835_desc, vd.tx); } -static void bcm2835_dma_desc_free(struct virt_dma_desc *vd) +static void bcm2835_dma_free_cb_chain(struct bcm2835_desc *desc) { - struct bcm2835_desc *desc = container_of(vd, struct bcm2835_desc, vd); - int i; + size_t i; for (i = 0; i < desc->frames; i++) dma_pool_free(desc->c->cb_pool, desc->cb_list[i].cb, desc->cb_list[i].paddr); - kfree(desc->cb_list); kfree(desc); } +static void bcm2835_dma_desc_free(struct virt_dma_desc *vd) +{ + bcm2835_dma_free_cb_chain( + container_of(vd, struct bcm2835_desc, vd)); +} + +static void bcm2835_dma_create_cb_set_length( + struct bcm2835_chan *chan, + struct bcm2835_dma_cb *control_block, + size_t len, + size_t period_len, + size_t *total_len, + u32 finalextrainfo) +{ + /* set the length */ + control_block->length = len; + + /* finished if we have no period_length */ + if (!period_len) + return; + + /* + * period_len means: that we need to generate + * transfers that are terminating at every + * multiple of period_len - this is typically + * used to set the interrupt flag in info + * which is required during cyclic transfers + */ + + /* have we filled in period_length yet? */ + if (*total_len + control_block->length < period_len) + return; + + /* calculate the length that remains to reach period_length */ + control_block->length = period_len - *total_len; + + /* reset total_length for next period */ + *total_len = 0; + + /* add extrainfo bits in info */ + control_block->info |= finalextrainfo; +} + +/** + * bcm2835_dma_create_cb_chain - create a control block and fills data in + * + * @chan: the @dma_chan for which we run this + * @direction: the direction in which we transfer + * @cyclic: it is a cyclic transfer + * @info: the default info bits to apply per controlblock + * @frames: number of controlblocks to allocate + * @src: the src address to assign (if the S_INC bit is set + * in @info, then it gets incremented) + * @dst: the dst address to assign (if the D_INC bit is set + * in @info, then it gets incremented) + * @buf_len: the full buffer length (may also be 0) + * @period_len: the period length when to apply @finalextrainfo + * in addition to the last transfer + * this will also break some control-blocks early + * @finalextrainfo: additional bits in last controlblock + * (or when period_len is reached in case of cyclic) + * @gfp: the GFP flag to use for allocation + */ +static struct bcm2835_desc *bcm2835_dma_create_cb_chain( + struct dma_chan *chan, enum dma_transfer_direction direction, + bool cyclic, u32 info, u32 finalextrainfo, size_t frames, + dma_addr_t src, dma_addr_t dst, size_t buf_len, + size_t period_len, gfp_t gfp) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + size_t len = buf_len, total_len; + size_t frame; + struct bcm2835_desc *d; + struct bcm2835_cb_entry *cb_entry; + struct bcm2835_dma_cb *control_block; + + /* allocate and setup the descriptor. */ + d = kzalloc(sizeof(*d) + frames * sizeof(struct bcm2835_cb_entry), + gfp); + if (!d) + return NULL; + + d->c = c; + d->dir = direction; + d->cyclic = cyclic; + + /* + * Iterate over all frames, create a control block + * for each frame and link them together. + */ + for (frame = 0, total_len = 0; frame < frames; d->frames++, frame++) { + cb_entry = &d->cb_list[frame]; + cb_entry->cb = dma_pool_alloc(c->cb_pool, gfp, + &cb_entry->paddr); + if (!cb_entry->cb) + goto error_cb; + + /* fill in the control block */ + control_block = cb_entry->cb; + control_block->info = info; + control_block->src = src; + control_block->dst = dst; + control_block->stride = 0; + control_block->next = 0; + /* set up length in control_block if requested */ + if (buf_len) { + /* calculate length honoring period_length */ + bcm2835_dma_create_cb_set_length( + c, control_block, + len, period_len, &total_len, + cyclic ? finalextrainfo : 0); + + /* calculate new remaining length */ + len -= control_block->length; + } + + /* link this the last controlblock */ + if (frame) + d->cb_list[frame - 1].cb->next = cb_entry->paddr; + + /* update src and dst and length */ + if (src && (info & BCM2835_DMA_S_INC)) + src += control_block->length; + if (dst && (info & BCM2835_DMA_D_INC)) + dst += control_block->length; + + /* Length of total transfer */ + d->size += control_block->length; + } + + /* the last frame requires extra flags */ + d->cb_list[d->frames - 1].cb->info |= finalextrainfo; + + /* detect a size missmatch */ + if (buf_len && (d->size != buf_len)) + goto error_cb; + + return d; +error_cb: + bcm2835_dma_free_cb_chain(d); + + return NULL; +} + static int bcm2835_dma_abort(void __iomem *chan_base) { unsigned long cs; @@ -391,12 +540,11 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( unsigned long flags) { struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); - enum dma_slave_buswidth dev_width; struct bcm2835_desc *d; - dma_addr_t dev_addr; - unsigned int es, sync_type; - unsigned int frame; - int i; + dma_addr_t src, dst; + u32 info = BCM2835_DMA_WAIT_RESP; + u32 extra = BCM2835_DMA_INT_EN; + size_t frames; /* Grab configuration */ if (!is_slave_direction(direction)) { @@ -404,104 +552,58 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( return NULL; } - if (direction == DMA_DEV_TO_MEM) { - dev_addr = c->cfg.src_addr; - dev_width = c->cfg.src_addr_width; - sync_type = BCM2835_DMA_S_DREQ; - } else { - dev_addr = c->cfg.dst_addr; - dev_width = c->cfg.dst_addr_width; - sync_type = BCM2835_DMA_D_DREQ; - } - - /* Bus width translates to the element size (ES) */ - switch (dev_width) { - case DMA_SLAVE_BUSWIDTH_4_BYTES: - es = BCM2835_DMA_DATA_TYPE_S32; - break; - default: + if (!buf_len) { + dev_err(chan->device->dev, + "%s: bad buffer length (= 0)\n", __func__); return NULL; } - /* Now allocate and setup the descriptor. */ - d = kzalloc(sizeof(*d), GFP_NOWAIT); - if (!d) - return NULL; + /* + * warn if buf_len is not a multiple of period_len - this may leed + * to unexpected latencies for interrupts and thus audiable clicks + */ + if (buf_len % period_len) + dev_warn_once(chan->device->dev, + "%s: buffer_length (%zd) is not a multiple of period_len (%zd)\n", + __func__, buf_len, period_len); - d->c = c; - d->dir = direction; - d->frames = buf_len / period_len; - d->cyclic = true; + /* Setup DREQ channel */ + if (c->dreq != 0) + info |= BCM2835_DMA_PER_MAP(c->dreq); - d->cb_list = kcalloc(d->frames, sizeof(*d->cb_list), GFP_KERNEL); - if (!d->cb_list) { - kfree(d); - return NULL; + if (direction == DMA_DEV_TO_MEM) { + if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + src = c->cfg.src_addr; + dst = buf_addr; + info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC; + } else { + if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + dst = c->cfg.dst_addr; + src = buf_addr; + info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC; } - /* Allocate memory for control blocks */ - for (i = 0; i < d->frames; i++) { - struct bcm2835_cb_entry *cb_entry = &d->cb_list[i]; - cb_entry->cb = dma_pool_zalloc(c->cb_pool, GFP_ATOMIC, - &cb_entry->paddr); - if (!cb_entry->cb) - goto error_cb; - } + /* calculate number of frames */ + frames = DIV_ROUND_UP(buf_len, period_len); /* - * Iterate over all frames, create a control block - * for each frame and link them together. + * allocate the CB chain + * note that we need to use GFP_NOWAIT, as the ALSA i2s dmaengine + * implementation calls prep_dma_cyclic with interrupts disabled. */ - for (frame = 0; frame < d->frames; frame++) { - struct bcm2835_dma_cb *control_block = d->cb_list[frame].cb; - - /* Setup adresses */ - if (d->dir == DMA_DEV_TO_MEM) { - control_block->info = BCM2835_DMA_D_INC; - control_block->src = dev_addr; - control_block->dst = buf_addr + frame * period_len; - } else { - control_block->info = BCM2835_DMA_S_INC; - control_block->src = buf_addr + frame * period_len; - control_block->dst = dev_addr; - } - - /* Enable interrupt */ - control_block->info |= BCM2835_DMA_INT_EN; - - /* Setup synchronization */ - if (sync_type != 0) - control_block->info |= sync_type; - - /* Setup DREQ channel */ - if (c->dreq != 0) - control_block->info |= - BCM2835_DMA_PER_MAP(c->dreq); - - /* Length of a frame */ - control_block->length = period_len; - d->size += control_block->length; + d = bcm2835_dma_create_cb_chain(chan, direction, true, + info, extra, + frames, src, dst, buf_len, + period_len, GFP_NOWAIT); + if (!d) + return NULL; - /* - * Next block is the next frame. - * This DMA engine driver currently only supports cyclic DMA. - * Therefore, wrap around at number of frames. - */ - control_block->next = d->cb_list[((frame + 1) % d->frames)].paddr; - } + /* wrap around into a loop */ + d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr; return vchan_tx_prep(&c->vc, &d->vd, flags); -error_cb: - i--; - for (; i >= 0; i--) { - struct bcm2835_cb_entry *cb_entry = &d->cb_list[i]; - - dma_pool_free(c->cb_pool, cb_entry->cb, cb_entry->paddr); - } - - kfree(d->cb_list); - kfree(d); - return NULL; } static int bcm2835_dma_slave_config(struct dma_chan *chan, From fa760929b08af7d06bd95c07ab3d76328370550e Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Wed, 16 Mar 2016 12:25:00 -0700 Subject: [PATCH 09/19] dmaengine: bcm2835: limit max length based on channel type The bcm2835 dma system has 2 basic types of dma-channels: * "normal" channels * "light" channels Lite channels are limited in several aspects: * internal data-structure is 128 bit (not 256) * does not support BCM2835_DMA_TDMODE (2D) * DMA length register is limited to 16 bit. so 0-65535 (not 0-65536 as mentioned in the official datasheet) * BCM2835_DMA_S/D_IGNORE are not supported The detection of the type of mode is implemented by looking at the LITE bit in the DEBUG register for each channel. This allows automatic detection. Based on this the maximum block size is set to (64K - 4) or to 1G and this limit is honored during generation of control block chains. The effect is that when a LITE channel is used more control blocks are used to do the same transfer (compared to a normal channel). As there are several sources/target DREQS that are 32 bit wide we need to have the transfer to be a multiple of 4 as this would break the transfer otherwise. This is why the limit of (64K - 4) was chosen over the alternative of (64K - 4K). Signed-off-by: Martin Sperl Reviewed-by: Eric Anholt Signed-off-by: Eric Anholt Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 4db0e232fab8e3..59c5ef36d97059 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -81,6 +81,8 @@ struct bcm2835_chan { void __iomem *chan_base; int irq_number; + + bool is_lite_channel; }; struct bcm2835_desc { @@ -169,6 +171,16 @@ struct bcm2835_desc { #define BCM2835_DMA_CHAN(n) ((n) << 8) /* Base address */ #define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n)) +/* the max dma length for different channels */ +#define MAX_DMA_LEN SZ_1G +#define MAX_LITE_DMA_LEN (SZ_64K - 4) + +static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c) +{ + /* lite and normal channels have different max frame length */ + return c->is_lite_channel ? MAX_LITE_DMA_LEN : MAX_DMA_LEN; +} + /* how many frames of max_len size do we need to transfer len bytes */ static inline size_t bcm2835_dma_frames_for_length(size_t len, size_t max_len) @@ -217,8 +229,10 @@ static void bcm2835_dma_create_cb_set_length( size_t *total_len, u32 finalextrainfo) { - /* set the length */ - control_block->length = len; + size_t max_len = bcm2835_dma_max_frame_length(chan); + + /* set the length taking lite-channel limitations into account */ + control_block->length = min_t(u32, len, max_len); /* finished if we have no period_length */ if (!period_len) @@ -544,6 +558,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( dma_addr_t src, dst; u32 info = BCM2835_DMA_WAIT_RESP; u32 extra = BCM2835_DMA_INT_EN; + size_t max_len = bcm2835_dma_max_frame_length(c); size_t frames; /* Grab configuration */ @@ -586,7 +601,10 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( } /* calculate number of frames */ - frames = DIV_ROUND_UP(buf_len, period_len); + frames = /* number of periods */ + DIV_ROUND_UP(buf_len, period_len) * + /* number of frames per period */ + bcm2835_dma_frames_for_length(period_len, max_len); /* * allocate the CB chain @@ -685,6 +703,11 @@ static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, int irq) c->ch = chan_id; c->irq_number = irq; + /* check in DEBUG register if this is a LITE channel */ + if (readl(c->chan_base + BCM2835_DMA_DEBUG) & + BCM2835_DMA_DEBUG_LITE) + c->is_lite_channel = true; + return 0; } From 337a40115dd7835febc730f81a41db9c70b89239 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Wed, 16 Mar 2016 12:25:01 -0700 Subject: [PATCH 10/19] dmaengine: bcm2835: add slave_sg support to bcm2835-dma Add slave_sg support to bcm2835-dma using shared allocation code for bcm2835_desc and DMA-control blocks already used by dma_cyclic. Note that bcm2835_dma_callback had to get modified to support both modes of operation (cyclic and non-cyclic). Tested using: * Hifiberry I2S card (using cyclic DMA) * fb_st7735r SPI-framebuffer (using slave_sg DMA via spi-bcm2835) playing BigBuckBunny for audio and video. Signed-off-by: Martin Sperl Reviewed-by: Eric Anholt Signed-off-by: Eric Anholt Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 113 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 108 insertions(+), 5 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 59c5ef36d97059..b46b12f66f387e 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -260,6 +260,23 @@ static void bcm2835_dma_create_cb_set_length( control_block->info |= finalextrainfo; } +static inline size_t bcm2835_dma_count_frames_for_sg( + struct bcm2835_chan *c, + struct scatterlist *sgl, + unsigned int sg_len) +{ + size_t frames = 0; + struct scatterlist *sgent; + unsigned int i; + size_t plength = bcm2835_dma_max_frame_length(c); + + for_each_sg(sgl, sgent, sg_len, i) + frames += bcm2835_dma_frames_for_length( + sg_dma_len(sgent), plength); + + return frames; +} + /** * bcm2835_dma_create_cb_chain - create a control block and fills data in * @@ -361,6 +378,32 @@ static struct bcm2835_desc *bcm2835_dma_create_cb_chain( return NULL; } +static void bcm2835_dma_fill_cb_chain_with_sg( + struct dma_chan *chan, + enum dma_transfer_direction direction, + struct bcm2835_cb_entry *cb, + struct scatterlist *sgl, + unsigned int sg_len) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + size_t max_len = bcm2835_dma_max_frame_length(c); + unsigned int i, len; + dma_addr_t addr; + struct scatterlist *sgent; + + for_each_sg(sgl, sgent, sg_len, i) { + for (addr = sg_dma_address(sgent), len = sg_dma_len(sgent); + len > 0; + addr += cb->cb->length, len -= cb->cb->length, cb++) { + if (direction == DMA_DEV_TO_MEM) + cb->cb->dst = addr; + else + cb->cb->src = addr; + cb->cb->length = min(len, max_len); + } + } +} + static int bcm2835_dma_abort(void __iomem *chan_base) { unsigned long cs; @@ -428,13 +471,19 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) d = c->desc; if (d) { - /* TODO Only works for cyclic DMA */ - vchan_cyclic_callback(&d->vd); + if (d->cyclic) { + /* call the cyclic callback */ + vchan_cyclic_callback(&d->vd); + + /* Keep the DMA engine running */ + writel(BCM2835_DMA_ACTIVE, + c->chan_base + BCM2835_DMA_CS); + } else { + vchan_cookie_complete(&c->desc->vd); + bcm2835_dma_start_desc(c); + } } - /* Keep the DMA engine running */ - writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS); - spin_unlock_irqrestore(&c->vc.lock, flags); return IRQ_HANDLED; @@ -548,6 +597,58 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&c->vc.lock, flags); } +static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg( + struct dma_chan *chan, + struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct bcm2835_desc *d; + dma_addr_t src = 0, dst = 0; + u32 info = BCM2835_DMA_WAIT_RESP; + u32 extra = BCM2835_DMA_INT_EN; + size_t frames; + + if (!is_slave_direction(direction)) { + dev_err(chan->device->dev, + "%s: bad direction?\n", __func__); + return NULL; + } + + if (c->dreq != 0) + info |= BCM2835_DMA_PER_MAP(c->dreq); + + if (direction == DMA_DEV_TO_MEM) { + if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + src = c->cfg.src_addr; + info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC; + } else { + if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + dst = c->cfg.dst_addr; + info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC; + } + + /* count frames in sg list */ + frames = bcm2835_dma_count_frames_for_sg(c, sgl, sg_len); + + /* allocate the CB chain */ + d = bcm2835_dma_create_cb_chain(chan, direction, false, + info, extra, + frames, src, dst, 0, 0, + GFP_KERNEL); + if (!d) + return NULL; + + /* fill in frames with scatterlist pointers */ + bcm2835_dma_fill_cb_chain_with_sg(chan, direction, d->cb_list, + sgl, sg_len); + + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, @@ -778,11 +879,13 @@ static int bcm2835_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); dma_cap_set(DMA_PRIVATE, od->ddev.cap_mask); dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask); + dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources; od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources; od->ddev.device_tx_status = bcm2835_dma_tx_status; od->ddev.device_issue_pending = bcm2835_dma_issue_pending; od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic; + od->ddev.device_prep_slave_sg = bcm2835_dma_prep_slave_sg; od->ddev.device_config = bcm2835_dma_slave_config; od->ddev.device_terminate_all = bcm2835_dma_terminate_all; od->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); From b9ff0a9e281809b2d6269562082d781c3cec7703 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Wed, 16 Mar 2016 12:25:02 -0700 Subject: [PATCH 11/19] dmaengine: bcm2835: add dma_memcopy support to bcm2835-dma Also added check for an error condition in bcm2835_dma_create_cb_chain that showed up during development of this patch. Tested using dmatest for all enabled channels. Signed-off-by: Martin Sperl Reviewed-by: Eric Anholt Signed-off-by: Eric Anholt Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index b46b12f66f387e..cc771cd35dd0c2 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -310,6 +310,9 @@ static struct bcm2835_desc *bcm2835_dma_create_cb_chain( struct bcm2835_cb_entry *cb_entry; struct bcm2835_dma_cb *control_block; + if (!frames) + return NULL; + /* allocate and setup the descriptor. */ d = kzalloc(sizeof(*d) + frames * sizeof(struct bcm2835_cb_entry), gfp); @@ -597,6 +600,34 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&c->vc.lock, flags); } +struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct bcm2835_desc *d; + u32 info = BCM2835_DMA_D_INC | BCM2835_DMA_S_INC; + u32 extra = BCM2835_DMA_INT_EN | BCM2835_DMA_WAIT_RESP; + size_t max_len = bcm2835_dma_max_frame_length(c); + size_t frames; + + /* if src, dst or len is not given return with an error */ + if (!src || !dst || !len) + return NULL; + + /* calculate number of frames */ + frames = bcm2835_dma_frames_for_length(len, max_len); + + /* allocate the CB chain - this also fills in the pointers */ + d = bcm2835_dma_create_cb_chain(chan, DMA_MEM_TO_MEM, false, + info, extra, frames, + src, dst, len, 0, GFP_KERNEL); + if (!d) + return NULL; + + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg( struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, @@ -880,17 +911,20 @@ static int bcm2835_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_PRIVATE, od->ddev.cap_mask); dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask); dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); + dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask); od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources; od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources; od->ddev.device_tx_status = bcm2835_dma_tx_status; od->ddev.device_issue_pending = bcm2835_dma_issue_pending; od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic; od->ddev.device_prep_slave_sg = bcm2835_dma_prep_slave_sg; + od->ddev.device_prep_dma_memcpy = bcm2835_dma_prep_dma_memcpy; od->ddev.device_config = bcm2835_dma_slave_config; od->ddev.device_terminate_all = bcm2835_dma_terminate_all; od->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); od->ddev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); - od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) | + BIT(DMA_MEM_TO_MEM); od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; od->ddev.dev = &pdev->dev; INIT_LIST_HEAD(&od->ddev.channels); From a108657572b157a0f0d40b5a1c0b71f864a18274 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Mon, 11 Apr 2016 13:29:08 +0000 Subject: [PATCH 12/19] dmaengine: bcm2835: use platform_get_irq_byname Use platform_get_irq_byname to allow for correct mapping of interrupts to dma channels. The currently implemented device tree is unfortunately implemented with the wrong assumption, that each dma-channel has its own dma channel, but dma-irq 11 is handling dma-channel 11-14 and dma-irq 12 is actually a "catch all" interrupt. So here we use the byname variant and require that interrupts are explicitly named via the interrupts-name property in the device tree. The use of shared interrupts is also implemented. As a side-effect this means we can now use dma channels 12, 13 and 14 in a correct manner - also testing shows that onl using channels 11 to 14 for spi and i2s works perfectly (when playing some video) Signed-off-by: Martin Sperl Acked-by: Eric Anholt Acked-by: Mark Rutland Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 77 ++++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 14 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index cc771cd35dd0c2..974015193b93cd 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -46,6 +46,9 @@ #include "virt-dma.h" +#define BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED 14 +#define BCM2835_DMA_CHAN_NAME_SIZE 8 + struct bcm2835_dmadev { struct dma_device ddev; spinlock_t lock; @@ -81,6 +84,7 @@ struct bcm2835_chan { void __iomem *chan_base; int irq_number; + unsigned int irq_flags; bool is_lite_channel; }; @@ -466,6 +470,15 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) struct bcm2835_desc *d; unsigned long flags; + /* check the shared interrupt */ + if (c->irq_flags & IRQF_SHARED) { + /* check if the interrupt is enabled */ + flags = readl(c->chan_base + BCM2835_DMA_CS); + /* if not set then we are not the reason for the irq */ + if (!(flags & BCM2835_DMA_INT)) + return IRQ_NONE; + } + spin_lock_irqsave(&c->vc.lock, flags); /* Acknowledge interrupt */ @@ -506,8 +519,8 @@ static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan) return -ENOMEM; } - return request_irq(c->irq_number, - bcm2835_dma_callback, 0, "DMA IRQ", c); + return request_irq(c->irq_number, bcm2835_dma_callback, + c->irq_flags, "DMA IRQ", c); } static void bcm2835_dma_free_chan_resources(struct dma_chan *chan) @@ -819,7 +832,8 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) return 0; } -static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, int irq) +static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, + int irq, unsigned int irq_flags) { struct bcm2835_chan *c; @@ -834,6 +848,7 @@ static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, int irq) c->chan_base = BCM2835_DMA_CHANIO(d->base, chan_id); c->ch = chan_id; c->irq_number = irq; + c->irq_flags = irq_flags; /* check in DEBUG register if this is a LITE channel */ if (readl(c->chan_base + BCM2835_DMA_DEBUG) & @@ -882,9 +897,11 @@ static int bcm2835_dma_probe(struct platform_device *pdev) struct resource *res; void __iomem *base; int rc; - int i; - int irq; + int i, j; + int irq[BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED + 1]; + int irq_flags; uint32_t chans_available; + char chan_name[BCM2835_DMA_CHAN_NAME_SIZE]; if (!pdev->dev.dma_mask) pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask; @@ -941,16 +958,48 @@ static int bcm2835_dma_probe(struct platform_device *pdev) goto err_no_dma; } - for (i = 0; i < pdev->num_resources; i++) { - irq = platform_get_irq(pdev, i); - if (irq < 0) - break; - - if (chans_available & (1 << i)) { - rc = bcm2835_dma_chan_init(od, i, irq); - if (rc) - goto err_no_dma; + /* get irqs for each channel that we support */ + for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) { + /* skip masked out channels */ + if (!(chans_available & (1 << i))) { + irq[i] = -1; + continue; } + + /* get the named irq */ + snprintf(chan_name, sizeof(chan_name), "dma%i", i); + irq[i] = platform_get_irq_byname(pdev, chan_name); + if (irq[i] >= 0) + continue; + + /* legacy device tree case handling */ + dev_warn_once(&pdev->dev, + "missing interrupts-names property in device tree - legacy interpretation is used"); + /* + * in case of channel >= 11 + * use the 11th interrupt and that is shared + */ + irq[i] = platform_get_irq(pdev, i < 11 ? i : 11); + } + + /* get irqs for each channel */ + for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) { + /* skip channels without irq */ + if (irq[i] < 0) + continue; + + /* check if there are other channels that also use this irq */ + irq_flags = 0; + for (j = 0; j <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; j++) + if ((i != j) && (irq[j] == irq[i])) { + irq_flags = IRQF_SHARED; + break; + } + + /* initialize the channel */ + rc = bcm2835_dma_chan_init(od, i, irq[i], irq_flags); + if (rc) + goto err_no_dma; } dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", i); From d9416f4e0d2e935773e52725107f4dc2340f36da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Sat, 3 Oct 2015 22:22:55 +0200 Subject: [PATCH 13/19] dmaengine: bcm2835: Load driver early and support legacy API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Load driver early since at least bcm2708_fb doesn't support deferred probing and even if it did, we don't want the video driver deferred. Support the legacy DMA API which is needed by bcm2708_fb (but only using the dedicated dma channel 0). Signed-off-by: Noralf Trønnes Signed-off-by: Martin Sperl --- drivers/dma/Kconfig | 2 +- drivers/dma/bcm2835-dma.c | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index b7a17b1c40e23c..33e36b90cfeddc 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -108,7 +108,7 @@ config COH901318 config DMA_BCM2835 tristate "BCM2835 DMA engine support" - depends on ARCH_BCM2835 + depends on ARCH_BCM2835 || ARCH_BCM2708 || ARCH_BCM2709 select DMA_ENGINE select DMA_VIRTUAL_CHANNELS diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 974015193b93cd..2d7aba11864044 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -1021,6 +1022,14 @@ static int bcm2835_dma_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "Load BCM2835 DMA engine driver\n"); + /* load the legacy api if bit 0 in the mask is cleared */ + if ((chans_available & BIT(0)) == 0) { + rc = bcm_dmaman_probe(pdev, base, BIT(0)); + if (rc) + dev_err(&pdev->dev, + "Failed to initialize the legacy API\n"); + } + return 0; err_no_dma: @@ -1032,6 +1041,7 @@ static int bcm2835_dma_remove(struct platform_device *pdev) { struct bcm2835_dmadev *od = platform_get_drvdata(pdev); + bcm_dmaman_remove(pdev); dma_async_device_unregister(&od->ddev); bcm2835_dma_free(od); @@ -1047,7 +1057,22 @@ static struct platform_driver bcm2835_dma_driver = { }, }; -module_platform_driver(bcm2835_dma_driver); +static int bcm2835_dma_init(void) +{ + return platform_driver_register(&bcm2835_dma_driver); +} + +static void bcm2835_dma_exit(void) +{ + platform_driver_unregister(&bcm2835_dma_driver); +} + +/* + * Load after serial driver (arch_initcall) so we see the messages if it fails, + * but before drivers (module_init) that need a DMA channel. + */ +subsys_initcall(bcm2835_dma_init); +module_exit(bcm2835_dma_exit); MODULE_ALIAS("platform:bcm2835-dma"); MODULE_DESCRIPTION("BCM2835 DMA engine driver"); From 1436ae915e83f5f68ba6bd5e4461db43228062f5 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Fri, 22 Apr 2016 16:13:59 +0000 Subject: [PATCH 14/19] ARM: bcm270x: changed bcrm,dma-channel-mask to mask out the used channel Dma channel0 is used by the legacy api - to avoid confilcts this needs to get masked out in the device-tree instead of hardcoding it in the driver. Signed-off-by: Martin Sperl --- arch/arm/boot/dts/bcm2708_common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm2708_common.dtsi b/arch/arm/boot/dts/bcm2708_common.dtsi index fa3f6b672a3d17..3bf78706af57fc 100644 --- a/arch/arm/boot/dts/bcm2708_common.dtsi +++ b/arch/arm/boot/dts/bcm2708_common.dtsi @@ -59,7 +59,7 @@ <1 27>; #dma-cells = <1>; - brcm,dma-channel-mask = <0x0f35>; + brcm,dma-channel-mask = <0x0f34>; }; intc: interrupt-controller@7e00b200 { From 28a1653bdc2366d29726e0495b78ece71669967f Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Sat, 23 Apr 2016 14:07:03 +0000 Subject: [PATCH 15/19] ARM: bcm2835: add interrupt-names and apply correct mapping Add interrupt-names properties to dt and apply the correct mapping between irq and dma channels. Signed-off-by: Martin Sperl --- arch/arm/boot/dts/bcm2708_common.dtsi | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm2708_common.dtsi b/arch/arm/boot/dts/bcm2708_common.dtsi index 3bf78706af57fc..6f3982eaac8d88 100644 --- a/arch/arm/boot/dts/bcm2708_common.dtsi +++ b/arch/arm/boot/dts/bcm2708_common.dtsi @@ -56,10 +56,32 @@ <1 24>, <1 25>, <1 26>, - <1 27>; + /* dma channel 11-14 share one irq */ + <1 27>, + <1 27>, + <1 27>, + <1 27>, + /* unused shared irq for all channels */ + <1 28>; + interrupt-names = "dma0", + "dma1", + "dma2", + "dma3", + "dma4", + "dma5", + "dma6", + "dma7", + "dma8", + "dma9", + "dma10", + "dma11", + "dma12", + "dma13", + "dma14", + "dma-shared-all"; #dma-cells = <1>; - brcm,dma-channel-mask = <0x0f34>; + brcm,dma-channel-mask = <0x7f34>; }; intc: interrupt-controller@7e00b200 { From 0b7523cbf3b8cea34878590f859aeb8e156ea520 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Sat, 23 Apr 2016 14:20:41 +0000 Subject: [PATCH 16/19] ARM: bcm2835: make dma-channel-0 available for kms setups Enable the use of dma-channel 0 when using the vc4-kms-v3d overlay. Signed-off-by: Martin Sperl --- arch/arm/boot/dts/overlays/vc4-kms-v3d-overlay.dts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/boot/dts/overlays/vc4-kms-v3d-overlay.dts b/arch/arm/boot/dts/overlays/vc4-kms-v3d-overlay.dts index a4ca2cabd4cd19..4f1cc20f90dc67 100644 --- a/arch/arm/boot/dts/overlays/vc4-kms-v3d-overlay.dts +++ b/arch/arm/boot/dts/overlays/vc4-kms-v3d-overlay.dts @@ -119,6 +119,13 @@ }; }; + fragment@15 { + target-path = "/soc/dma"; + __overlay__ { + brcm,dma-channel-mask = <0x7f35>; + }; + }; + __overrides__ { cma-256 = <0>,"+0-1-2-3-4"; cma-192 = <0>,"-0+1-2-3-4"; From 51c0665bb3f2d136216f8d4eee1a4800a063dae1 Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Tue, 7 Jun 2016 19:37:10 +0200 Subject: [PATCH 17/19] dmaengine: bcm2835: Fix cyclic DMA period splitting The code responsible for splitting periods into chunks that can be handled by the DMA controller missed to update total_len, the number of bytes processed in the current period, when there are more chunks to follow. Therefore total_len was stuck at 0 and the code didn't work at all. This resulted in a wrong control block layout and audio issues because the cyclic DMA callback wasn't executing on period boundaries. Fix this by adding the missing total_len update. Signed-off-by: Matthias Reichl Signed-off-by: Martin Sperl Tested-by: Clive Messer --- drivers/dma/bcm2835-dma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 2d7aba11864044..ea58967594c5a2 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -252,8 +252,11 @@ static void bcm2835_dma_create_cb_set_length( */ /* have we filled in period_length yet? */ - if (*total_len + control_block->length < period_len) + if (*total_len + control_block->length < period_len) { + /* update number of bytes in this period so far */ + *total_len += control_block->length; return; + } /* calculate the length that remains to reach period_length */ control_block->length = period_len - *total_len; From d212dcf69c924281c7afa1ab23b97a2d79687c86 Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Wed, 8 Jun 2016 13:09:56 +0200 Subject: [PATCH 18/19] dmaengine: bcm2835: Avoid splitting periods into very small chunks The current cyclic DMA period splitting implementation can generate very small chunks at the end of each period. For example a 65536 byte period will be split into a 65532 byte chunk and a 4 byte chunk on the "lite" DMA channels. This increases pressure on the RAM controller as the DMA controller needs to fetch two control blocks from RAM in quick succession and could potentially cause latency issues if the RAM is tied up by other devices. We can easily avoid these situations by distributing the remaining length evenly between the last-but-one and the last chunk, making sure that split chunks will be at least half the maximum length the DMA controller can handle. This patch checks if the last chunk would be less than half of the maximum DMA length and if yes distributes the max len+4...max_len*1.5 bytes evenly between the last 2 chunks. This results in chunk sizes between max_len/2 and max_len*0.75 bytes. Signed-off-by: Matthias Reichl Signed-off-by: Martin Sperl Tested-by: Clive Messer --- drivers/dma/bcm2835-dma.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index ea58967594c5a2..0173b912afb26b 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -253,6 +253,20 @@ static void bcm2835_dma_create_cb_set_length( /* have we filled in period_length yet? */ if (*total_len + control_block->length < period_len) { + /* + * If the next control block is the last in the period + * and it's length would be less than half of max_len + * change it so that both control blocks are (almost) + * equally long. This avoids generating very short + * control blocks (worst case would be 4 bytes) which + * might be problematic. We also have to make sure the + * new length is a multiple of 4 bytes. + */ + if (*total_len + control_block->length + max_len / 2 > + period_len) { + control_block->length = + DIV_ROUND_UP(period_len - *total_len, 8) * 4; + } /* update number of bytes in this period so far */ *total_len += control_block->length; return; From f241592b40d9d36dea69d11f7dd3da922cba962f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 3 Jun 2016 19:29:11 -0700 Subject: [PATCH 19/19] dmaengine: bcm2835: Fix polling for completion of DMA with interrupts masked. The tx_status hook is supposed to be safe to call from interrupt context, but it wouldn't ever return completion for the last transfer, meaning you couldn't poll for DMA completion with interrupts masked. This fixes IRQ handling for bcm2835's DSI1, which requires using the DMA engine to write its registers due to a bug in the AXI bridge. Signed-off-by: Eric Anholt --- drivers/dma/bcm2835-dma.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 0173b912afb26b..a98e2c2e6888f6 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -588,16 +588,16 @@ static enum dma_status bcm2835_dma_tx_status(struct dma_chan *chan, struct virt_dma_desc *vd; enum dma_status ret; unsigned long flags; + u32 residue; ret = dma_cookie_status(chan, cookie, txstate); - if (ret == DMA_COMPLETE || !txstate) + if (ret == DMA_COMPLETE) return ret; spin_lock_irqsave(&c->vc.lock, flags); vd = vchan_find_desc(&c->vc, cookie); if (vd) { - txstate->residue = - bcm2835_dma_desc_size(to_bcm2835_dma_desc(&vd->tx)); + residue = bcm2835_dma_desc_size(to_bcm2835_dma_desc(&vd->tx)); } else if (c->desc && c->desc->vd.tx.cookie == cookie) { struct bcm2835_desc *d = c->desc; dma_addr_t pos; @@ -609,11 +609,25 @@ static enum dma_status bcm2835_dma_tx_status(struct dma_chan *chan, else pos = 0; - txstate->residue = bcm2835_dma_desc_size_pos(d, pos); + residue = bcm2835_dma_desc_size_pos(d, pos); + + /* + * If our non-cyclic transfer is done, then report + * complete and trigger the next tx now. This lets + * the dmaengine API be used synchronously from an IRQ + * handler. + */ + if (!d->cyclic && residue == 0) { + vchan_cookie_complete(&c->desc->vd); + bcm2835_dma_start_desc(c); + ret = dma_cookie_status(chan, cookie, txstate); + } } else { - txstate->residue = 0; + residue = 0; } + dma_set_residue(txstate, residue); + spin_unlock_irqrestore(&c->vc.lock, flags); return ret;