最近开始入坑F1C100s,在坑网蹭了诸多前辈的使用经验,本着能够继续蹭经验的目的,贡献一下使用DMA加速spiflash启动的代码
基于 https://gitee.com/tiny200s/lv7_rtthread_f1c100s.git (https://whycan.com/t_4942.html)的代码,编译出来的程序大约2.6M,把芒果派换上spiflash之后,启动时间大约需要6s,强迫症表示不能忍。
仔细阅读f1c100s_spl里的代码,跟xboot里的也是一样的,都是直读spiflash,每次64字节,
中间浪费的时间不少。最后决定用DMA读取spi,每次128K字节。不得不吐槽一下,
全志的手册真是节省,不参考别人的代码都很难读懂。
改成DMA读取spiflash后,基本就是秒起了。
f1c100s_spl.zip
参考各位大神的代码:
https://whycan.com/t_1514.html
https://whycan.com/t_4209.html
离线
这个好 一直觉得启动慢是个问题
离线
这个修改之后。开机时间能到多少呢
离线
没有仔细测,摁下reset,大约1-2s的样子
离线
没有仔细测,摁下reset,大约1-2s的样子
请教楼主,uboot有没有办法改 ^_^
离线
uboot代码有点复杂,我改dma的时候还想参考来着,看看也是直读spiflash就没继续研究了
我这只是抛砖引玉,期待别的大神能够改过去
airpumpkin 说:没有仔细测,摁下reset,大约1-2s的样子
请教楼主,uboot有没有办法改 ^_^
离线
我记得当时玩xboot的时候3MB也没有6秒这么慢,2~3秒的样子
离线
可能测试起止时间标准不一样
我用手机秒表测的,从摁下reset开始,到rtthread的debug串口输出显示启动结束,6s多一点点
实际的视觉体验是屏幕从开始变花到刷新显示,也差不多要这么多时间
我记得当时玩xboot的时候3MB也没有6秒这么慢,2~3秒的样子
离线
666.楼主可以直接在原来的基础上打个补丁就行,把补丁发布出来。
离线
学习下,,,不错
离线
..\Driver\Source\sys_spi.c(334): error: #852: expression must be a pointer to a complete object type rxbuf += bytes_cnt;
请教一下你这里编译的时候报错了咋解决呀
离线
好东西,虽然我对现在的速度要求不高,1M多的代码也就1-2秒。
离线
启动速度可以这么快?太牛了,学习
离线
3.6m的bin文件 使用dma启动速度约2秒 不用dma需要8秒的样子 点赞
离线
到现在都不太清楚SPL是啥,是uboot里面重定向前的那一部分吗,但是看公司里又是spl来加载uboot
离线
多谢楼主,正好在调spi和dma,一直没调通
离线
这是启动到linux,还是boot?
离线
询问下是否能在1s内快速启动
离线
实测,没用DMA情况下,从flash拷16M固件到DRAM仅需0.75s,真正的秒启动
离线
加开机LOGO就是为了掩饰启动慢吗?
离线
这个spi确实太慢了,3M的代码要加载6s,楼主这个好
离线
gitee上没看到有spi + dma的代码呀?楼主能再分享一下吗?
离线
gitee上没看到有spi + dma的代码呀?楼主能再分享一下吗?
另一个帖子上有
https://gitee.com/LicheePiNano/lv7_rtthread_f1c100s.git
离线
DOUT+DMA模式,太快了!若SPI时钟设为100MHZ,能达到25MB/S;保险起见设置SPI为50MHZ,也有12.5MB/S。
启动个RTOS应用简直了。
#include <stdint.h>
#include <string.h>
#include "f1c100s/reg-ccu.h"
#include "io.h"
#define SPI_USE_DMA (1)
enum {
SPI_GCR = 0x04,
SPI_TCR = 0x08,
SPI_IER = 0x10,
SPI_ISR = 0x14,
SPI_FCR = 0x18,
SPI_FSR = 0x1c,
SPI_WCR = 0x20,
SPI_CCR = 0x24,
SPI_MBC = 0x30,
SPI_MTC = 0x34,
SPI_BCC = 0x38,
SPI_TXD = 0x200,
SPI_RXD = 0x300,
};
#if SPI_USE_DMA > 0
enum {
DMA0 = 0,
DMA1,
DMA2,
DMA3,
};
enum {
NDMA = 0,
DDMA,
};
#define NDMA_TRANS_LEN (128u*1024)
#define DDMA_TRANS_LEN (16u*1024*1024)
/********** DMA info *************/
#define DMA_NO (DMA0)
#define DMA_MODE (NDMA) /* SPI only support NDMA */
#define DMA_TRANS_LEN ((DMA_MODE == NDMA) ? (NDMA_TRANS_LEN) : (DDMA_TRANS_LEN))
#define DMA_BASE (0x01C02000)
#define DMA_ICR (DMA_BASE + 0x00)
#define DMA_ISR (DMA_BASE + 0x04)
#define DMA_PCR (DMA_BASE + 0x08)
#define NDMA_CR(dma_n) (DMA_BASE + 0x100 + 0x20*dma_n + 0x0)
#define NDMA_SRC_ADDR(dma_n) (DMA_BASE + 0x100 + 0x20*dma_n + 0x4)
#define NDMA_DES_ADDR(dma_n) (DMA_BASE + 0x100 + 0x20*dma_n + 0x8)
#define NDMA_BCR(dma_n) (DMA_BASE + 0x100 + 0x20*dma_n + 0xC)
#define DDMA_CR(dma_n) (DMA_BASE + 0x300 + 0x20*dma_n + 0x0)
#define DDMA_SRC_ADDR(dma_n) (DMA_BASE + 0x300 + 0x20*dma_n + 0x4)
#define DDMA_DES_ADDR(dma_n) (DMA_BASE + 0x300 + 0x20*dma_n + 0x8)
#define DDMA_BCR(dma_n) (DMA_BASE + 0x300 + 0x20*dma_n + 0xC)
#define DDMA_PR(dma_n) (DMA_BASE + 0x300 + 0x20*dma_n + 0x18)
#define DDMA_GD(dma_n) (DMA_BASE + 0x300 + 0x20*dma_n + 0x1C)
static void sdelay(int loops)
{
loop_again:
__asm volatile {
SUBS loops, loops, #1
BNE loop_again
}
}
static void sys_dma_init(void)
{
/* Enable gate for DMA clock, and perform softreset */
write32(F1C100S_CCU_BASE + CCU_BUS_CLK_GATE0, read32(F1C100S_CCU_BASE + CCU_BUS_CLK_GATE0) | (0x1 << 6));
write32(F1C100S_CCU_BASE + CCU_BUS_SOFT_RST0, read32(F1C100S_CCU_BASE + CCU_BUS_SOFT_RST0) & (~(0x1 << 6)));
sdelay(20);
write32(F1C100S_CCU_BASE + CCU_BUS_SOFT_RST0, read32(F1C100S_CCU_BASE + CCU_BUS_SOFT_RST0) | (0x1 << 6));
}
static void sys_dma_deinit(void)
{
write32(F1C100S_CCU_BASE + CCU_BUS_CLK_GATE0, read32(F1C100S_CCU_BASE + CCU_BUS_CLK_GATE0) & (~(0x1 << 6)));
}
static uint32_t sys_dma_transfer_len_get(void)
{
return DMA_TRANS_LEN;
}
static void sys_spi_dma_set(void* dst, void* src, uint32_t len)
{
uint32_t val;
write32(NDMA_SRC_ADDR(DMA_NO), (uint32_t)src);
write32(NDMA_DES_ADDR(DMA_NO), (uint32_t)dst);
write32(NDMA_BCR(DMA_NO), len);
val = (1u << 31) | (0x11 << 16) | (0x1 << 5) | (0x4 << 0);
write32(NDMA_CR(DMA_NO), val);
}
static void sys_spi_dma_start(uint32_t len)
{
uint32_t reg_base = 0x01c05000;
uint32_t val;
write32(reg_base + SPI_MBC, len);
write32(reg_base + SPI_MTC, 0);
write32(reg_base + SPI_BCC, (1 << 28)); // dual-mode
val = read32(reg_base + SPI_FCR);
val |= (1 << 8) | (1 << 0);
write32(reg_base + SPI_FCR, val);
write32(reg_base + SPI_TCR, read32(reg_base + SPI_TCR) | (1u << 31));
}
static void sys_dma_wait_end(void)
{
/* when the dma end, it clear this bit automatically */
while (read32(NDMA_CR(DMA_NO)) & (1u << 31));
}
#endif
void sys_spi_flash_init(void)
{
uint32_t addr;
uint32_t val;
/* Config GPIOC0, GPIOC1, GPIOC2 and GPIOC3 */
addr = 0x01c20848 + 0x00;
val = read32(addr);
val &= ~(0xf << ((0 & 0x7) << 2));
val |= ((0x2 & 0x7) << ((0 & 0x7) << 2));
write32(addr, val);
val = read32(addr);
val &= ~(0xf << ((1 & 0x7) << 2));
val |= ((0x2 & 0x7) << ((1 & 0x7) << 2));
write32(addr, val);
val = read32(addr);
val &= ~(0xf << ((2 & 0x7) << 2));
val |= ((0x2 & 0x7) << ((2 & 0x7) << 2));
write32(addr, val);
val = read32(addr);
val &= ~(0xf << ((3 & 0x7) << 2));
val |= ((0x2 & 0x7) << ((3 & 0x7) << 2));
write32(addr, val);
/* Deassert spi0 reset */
addr = 0x01c202c0;
val = read32(addr);
val |= (1 << 20);
write32(addr, val);
/* Open the spi0 bus gate */
addr = 0x01c20000 + 0x60;
val = read32(addr);
val |= (1 << 20);
write32(addr, val);
/* Set spi clock rate control register, divided by 4 */
addr = 0x01c05000;
write32(addr + SPI_CCR, 0x00001001);
/* Enable spi0 and do a soft reset */
addr = 0x01c05000;
val = read32(addr + SPI_GCR);
val |= (1UL << 31) | (1 << 7) | (1 << 1) | (1 << 0); // Transmit Pause Enable (TP_EN)
write32(addr + SPI_GCR, val);
while (read32(addr + SPI_GCR) & (1UL << 31));
val = read32(addr + SPI_TCR);
val &= ~(0x3 << 0);
val |= (1 << 6) | (1 << 2);
write32(addr + SPI_TCR, val);
val = read32(addr + SPI_FCR);
val |= (1UL << 31) | (1 << 15);
write32(addr + SPI_FCR, val);
#if SPI_USE_DMA > 0
sys_dma_init();
#endif
}
void sys_spi_flash_exit(void)
{
uint32_t addr = 0x01c05000;
uint32_t val;
/* Disable the spi0 controller */
val = read32(addr + SPI_GCR);
val &= ~((1 << 1) | (1 << 0));
write32(addr + SPI_GCR, val);
#if SPI_USE_DMA > 0
sys_dma_deinit();
#endif
}
static void sys_spi_select(void)
{
uint32_t addr = 0x01c05000;
uint32_t val;
val = read32(addr + SPI_TCR);
val &= ~((0x3 << 4) | (0x1 << 7));
val |= ((0 & 0x3) << 4) | (0x0 << 7);
write32(addr + SPI_TCR, val);
}
static void sys_spi_deselect(void)
{
uint32_t addr = 0x01c05000;
uint32_t val;
val = read32(addr + SPI_TCR);
val &= ~((0x3 << 4) | (0x1 << 7));
val |= ((0 & 0x3) << 4) | (0x1 << 7);
write32(addr + SPI_TCR, val);
}
static void sys_spi_write_txbuf(uint8_t* buf, int len)
{
uint32_t addr = 0x01c05000;
int i;
if (!buf)
len = 0;
write32(addr + SPI_MTC, len & 0xffffff);
write32(addr + SPI_BCC, len & 0xffffff);
for (i = 0; i < len; ++i)
write8(addr + SPI_TXD, *buf++);
}
static int sys_spi_transfer(void* txbuf, void* rxbuf, int len)
{
uint32_t addr = 0x01c05000;
int count = len;
uint8_t* tx = txbuf;
uint8_t* rx = rxbuf;
uint8_t val;
int n, i;
while (count > 0) {
n = (count <= 64) ? count : 64;
write32(addr + SPI_MBC, n);
sys_spi_write_txbuf(tx, n);
write32(addr + SPI_TCR, read32(addr + SPI_TCR) | (1UL << 31));
while ((read32(addr + SPI_FSR) & 0xff) < n);
for (i = 0; i < n; i++) {
val = read8(addr + SPI_RXD);
if (rx)
*rx++ = val;
}
if (tx)
tx += n;
count -= n;
}
return len;
}
static int sys_spi_write_then_read(void* txbuf, int txlen, void* rxbuf, int rxlen)
{
if (sys_spi_transfer(txbuf, NULL, txlen) != txlen)
return -1;
if (sys_spi_transfer(NULL, rxbuf, rxlen) != rxlen)
return -1;
return 0;
}
void sys_spi_flash_read(int addr, void* buf, int count)
{
uint8_t tx[4];
tx[0] = 0x03;
tx[1] = (uint8_t)(addr >> 16);
tx[2] = (uint8_t)(addr >> 8);
tx[3] = (uint8_t)(addr >> 0);
sys_spi_select();
sys_spi_write_then_read(tx, 4, buf, count);
sys_spi_deselect();
}
void sys_spi_flash_read_dualout(int addr, void* buf, int count)
{
uint32_t reg_base = 0x01c05000;
uint8_t* rxbuf = buf;
uint8_t tx[5];
int n, i, c;
n = 0;
tx[n++] = 0x3b; // fast read dual-output
tx[n++] = (uint8_t)(addr >> 16);
tx[n++] = (uint8_t)(addr >> 8);
tx[n++] = (uint8_t)(addr >> 0);
tx[n++] = 0;
sys_spi_select();
write32(reg_base + SPI_MBC, n);
write32(reg_base + SPI_MTC, n);
write32(reg_base + SPI_BCC, n);
for (i = 0; i < n; i++) {
write8(reg_base + SPI_TXD, tx[i]);
}
write32(reg_base + SPI_TCR, read32(reg_base + SPI_TCR) | (1u << 31));
while (read32(reg_base + SPI_TCR) & (1u << 31));
write32(reg_base + SPI_FCR, read32(reg_base + SPI_FCR) | 0x80008000u);
while (count > 0) {
n = ((count <= 4096) ? count : 4096);
write32(reg_base + SPI_MBC, n);
write32(reg_base + SPI_MTC, 0);
write32(reg_base + SPI_BCC, (1 << 28)); // dual-mode
write32(reg_base + SPI_TCR, read32(reg_base + SPI_TCR) | (1u << 31));
for (i = n; i > 0;) {
if ((c = (read32(reg_base + SPI_FSR) & 0xff)) > 0) {
i -= c;
while (c-- > 0) {
*rxbuf++ = read8(reg_base + SPI_RXD);
}
}
}
count -= n;
}
sys_spi_deselect();
}
#if SPI_USE_DMA > 0
void sys_spi_flash_read_dma(int addr, void* buf, uint32_t count)
{
uint32_t reg_base = 0x01c05000;
uint32_t dma_max_len;
uint8_t* rxbuf = buf;
uint8_t tx[5];
uint32_t n, i;
n = 0;
tx[n++] = 0x3b; // fast read dual-output
tx[n++] = (uint8_t)(addr >> 16);
tx[n++] = (uint8_t)(addr >> 8);
tx[n++] = (uint8_t)(addr >> 0);
tx[n++] = 0;
sys_spi_select();
write32(reg_base + SPI_MBC, n);
write32(reg_base + SPI_MTC, n);
write32(reg_base + SPI_BCC, n);
for (i = 0; i < n; i++) {
write8(reg_base + SPI_TXD, tx[i]);
}
write32(reg_base + SPI_TCR, read32(reg_base + SPI_TCR) | (1u << 31));
while (read32(reg_base + SPI_TCR) & (1u << 31));
write32(reg_base + SPI_FCR, read32(reg_base + SPI_FCR) | 0x80008000u);
dma_max_len = sys_dma_transfer_len_get();
while (count > 0) {
n = ((count <= dma_max_len) ? count : dma_max_len);
sys_spi_dma_set(rxbuf, (void*)(reg_base + SPI_RXD), n);
sys_spi_dma_start(n);
sys_dma_wait_end();
rxbuf += n;
count -= n;
}
sys_spi_deselect();
}
#endif
离线