From 3466a4c3659e2ee968682793926c8b32e8522d35 Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Mon, 21 Oct 2024 17:55:29 -0700 Subject: [PATCH 1/4] Workaround PSRAM cache invalid'm by reading flash Fixes #2537 While waiting for a working direct cache flush routine, try and force the cache to evict all PSRAM values by reading a bunch of flash addresses (which share the XIP cache). This hurts performance when PSRAM is not used, but is required for correctness until we have a working XIP flush. --- cores/rp2040/flash_wrapper.cpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/cores/rp2040/flash_wrapper.cpp b/cores/rp2040/flash_wrapper.cpp index f447c60b1..69a02d1cb 100644 --- a/cores/rp2040/flash_wrapper.cpp +++ b/cores/rp2040/flash_wrapper.cpp @@ -17,18 +17,37 @@ License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ - +#include #include #ifdef PICO_RP2350 #include #endif +#if defined(RP2350_PSRAM_CS) +static volatile uint32_t __wastedsum = 0; +static void __no_inline_not_in_flash_func(flushcache)() { + //for (volatile uint8_t* cache = (volatile uint8_t*)0x18000001; cache < (volatile uint8_t*)(0x18000001 + 2048 * 8); cache += 8) { + // *cache = 0; + //} + uint32_t sum = 0; // Ignored, just to ensure not optimized out + for (volatile uint32_t *flash = (volatile uint32_t *)0x11000000; flash < (volatile uint32_t *)(0x11000000 + 48*1024*4); flash++) { + sum += *flash; + } + __wastedsum += sum; +} +#else +static uint32_t flushcache() { + return 0; +} +#endif + extern "C" { extern void __real_flash_range_erase(uint32_t flash_offs, size_t count); void __wrap_flash_range_erase(uint32_t flash_offs, size_t count) { #ifdef PICO_RP2350 auto s = qmi_hw->m[1]; + flushcache(); #endif __real_flash_range_erase(flash_offs, count); #ifdef PICO_RP2350 @@ -41,6 +60,7 @@ extern "C" { void __wrap_flash_range_program(uint32_t flash_offs, const uint8_t *data, size_t count) { #ifdef PICO_RP2350 auto s = qmi_hw->m[1]; + flushcache(); #endif __real_flash_range_program(flash_offs, data, count); #ifdef PICO_RP2350 @@ -53,6 +73,7 @@ extern "C" { void __wrap_flash_get_unique_id(uint8_t *id_out) { #ifdef PICO_RP2350 auto s = qmi_hw->m[1]; + flushcache(); #endif __real_flash_get_unique_id(id_out); #ifdef PICO_RP2350 @@ -65,6 +86,7 @@ extern "C" { void __wrap_flash_do_cmd(const uint8_t *txbuf, uint8_t *rxbuf, size_t count) { #ifdef PICO_RP2350 auto s = qmi_hw->m[1]; + flushcache(); #endif __real_flash_do_cmd(txbuf, rxbuf, count); #ifdef PICO_RP2350 From b33481fa4ff6f82359cd742b2239047a5ab3e250 Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Mon, 21 Oct 2024 18:09:57 -0700 Subject: [PATCH 2/4] Fix RP2040 build --- cores/rp2040/flash_wrapper.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cores/rp2040/flash_wrapper.cpp b/cores/rp2040/flash_wrapper.cpp index 69a02d1cb..25eafdf27 100644 --- a/cores/rp2040/flash_wrapper.cpp +++ b/cores/rp2040/flash_wrapper.cpp @@ -24,7 +24,7 @@ #include #endif -#if defined(RP2350_PSRAM_CS) +#if defined(PICO_RP2350) && defined(RP2350_PSRAM_CS) static volatile uint32_t __wastedsum = 0; static void __no_inline_not_in_flash_func(flushcache)() { //for (volatile uint8_t* cache = (volatile uint8_t*)0x18000001; cache < (volatile uint8_t*)(0x18000001 + 2048 * 8); cache += 8) { @@ -36,12 +36,13 @@ static void __no_inline_not_in_flash_func(flushcache)() { } __wastedsum += sum; } -#else -static uint32_t flushcache() { - return 0; +#elif defined(PICO_RP2350) +static void __no_inline_not_in_flash_func(flushcache)() { + // Null } #endif + extern "C" { extern void __real_flash_range_erase(uint32_t flash_offs, size_t count); void __wrap_flash_range_erase(uint32_t flash_offs, size_t count) { From 88868a5fb2e096aabafadf2a95588cc346067dff Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Mon, 21 Oct 2024 18:20:26 -0700 Subject: [PATCH 3/4] Fix astyle --- cores/rp2040/flash_wrapper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cores/rp2040/flash_wrapper.cpp b/cores/rp2040/flash_wrapper.cpp index 25eafdf27..fc44788a7 100644 --- a/cores/rp2040/flash_wrapper.cpp +++ b/cores/rp2040/flash_wrapper.cpp @@ -31,7 +31,7 @@ static void __no_inline_not_in_flash_func(flushcache)() { // *cache = 0; //} uint32_t sum = 0; // Ignored, just to ensure not optimized out - for (volatile uint32_t *flash = (volatile uint32_t *)0x11000000; flash < (volatile uint32_t *)(0x11000000 + 48*1024*4); flash++) { + for (volatile uint32_t *flash = (volatile uint32_t *)0x11000000; flash < (volatile uint32_t *)(0x11000000 + 48 * 1024 * 4); flash++) { sum += *flash; } __wastedsum += sum; From 4ce9d14b5a41cb2c57f44dffd6077638142b6ae1 Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Tue, 22 Oct 2024 16:08:17 -0700 Subject: [PATCH 4/4] Invalidate after cleaning the cache line gmx from the RPI forums came up with this hack and it seems to work! https://forums.raspberrypi.com/viewtopic.php?p=2262371#p2262371 --- cores/rp2040/flash_wrapper.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cores/rp2040/flash_wrapper.cpp b/cores/rp2040/flash_wrapper.cpp index fc44788a7..e75c55d42 100644 --- a/cores/rp2040/flash_wrapper.cpp +++ b/cores/rp2040/flash_wrapper.cpp @@ -25,16 +25,13 @@ #endif #if defined(PICO_RP2350) && defined(RP2350_PSRAM_CS) -static volatile uint32_t __wastedsum = 0; static void __no_inline_not_in_flash_func(flushcache)() { - //for (volatile uint8_t* cache = (volatile uint8_t*)0x18000001; cache < (volatile uint8_t*)(0x18000001 + 2048 * 8); cache += 8) { - // *cache = 0; - //} - uint32_t sum = 0; // Ignored, just to ensure not optimized out - for (volatile uint32_t *flash = (volatile uint32_t *)0x11000000; flash < (volatile uint32_t *)(0x11000000 + 48 * 1024 * 4); flash++) { - sum += *flash; + for (volatile uint8_t* cache = (volatile uint8_t*)0x18000001; cache < (volatile uint8_t*)(0x18000001 + 2048 * 8); cache += 8) { + *cache = 0; + __compiler_memory_barrier(); + *(cache - 1) = 0; + __compiler_memory_barrier(); } - __wastedsum += sum; } #elif defined(PICO_RP2350) static void __no_inline_not_in_flash_func(flushcache)() {