您尚未登录。

楼主 # 2025-08-26 16:04:06

PENPEN
会员
注册时间: 2025-06-25
已发帖子: 3
积分: 4

D1跑裸机程序的时钟配置问题

最近在研究D1跑裸机程序,工程使用xboot大佬的例程https://whycan.com/t_6683.html/,发现运行很慢,遂简单测量运行速度,cpu配置和测量程序如下所示:

static inline uint64_t counter(void)
{
	uint64_t cnt;
	 __asm__ __volatile__("csrr %0, time\n" : "=r"(cnt) :: "memory");
	return cnt;
}

static void sdelay(unsigned long us)
{
	uint64_t t1 = counter();
	uint64_t t2 = t1 + us * 24;
	do {
		t1 = counter();
	} while(t2 >= t1);
}

static void set_pll_cpux_axi(void)
{
	uint32_t val;

	/* Select cpux clock src to osc24m, axi divide ratio is 3, system apb clk ratio is 4 */
	write32(D1_CCU_BASE + CCU_RISCV_CLK_REG, (0 << 24) | (3 << 8) | (1 << 0));
	sdelay(1);

	/* Disable pll gating */
	val = read32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG);
	val &= ~(1 << 27);
	write32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG, val);

	/* Enable pll ldo */
	val = read32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG);
	val |= (1 << 30);
	write32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG, val);
	sdelay(5);

	/* Set default clk to 1008mhz */
	val = read32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG);
	val &= ~((0x3 << 16) | (0xff << 8) | (0x3 << 0));
	val |= (41 << 8);
	write32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG, val);

	/* Lock enable */
	val = read32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG);
	val |= (1 << 29);
	write32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG, val);

	/* Enable pll */
	val = read32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG);
	val |= (1 << 31);
	write32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG, val);

	/* Wait pll stable */
	while(!(read32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG) & (0x1 << 28)));
	sdelay(20);

	/* Enable pll gating */
	val = read32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG);
	val |= (1 << 27);
	write32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG, val);

	/* Lock disable */
	val = read32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG);
	val &= ~(1 << 29);
	write32(D1_CCU_BASE + CCU_PLL_CPU_CTRL_REG, val);
	sdelay(1);

	/* Set and change cpu clk src */
	val = read32(D1_CCU_BASE + CCU_RISCV_CLK_REG);
	val &= ~(0x07 << 24 | 0x3 << 8 | 0xf << 0);
	val |= (0x05 << 24 | 0x1 << 8);
	write32(D1_CCU_BASE + CCU_RISCV_CLK_REG, val);
	sdelay(1);
}

static void set_pll_periph0(void)
{
	uint32_t val;

	/* Periph0 has been enabled */
	if(read32(D1_CCU_BASE + CCU_PLL_PERI0_CTRL_REG) & (1 << 31))
		return;

	/* Change psi src to osc24m */
	val = read32(D1_CCU_BASE + CCU_PSI_CLK_REG);
	val &= (~(0x3 << 24));
	write32(val, D1_CCU_BASE + CCU_PSI_CLK_REG);

	/* Set default val */
	write32(D1_CCU_BASE + CCU_PLL_PERI0_CTRL_REG, 0x63 << 8);

	/* Lock enable */
	val = read32(D1_CCU_BASE + CCU_PLL_PERI0_CTRL_REG);
	val |= (1 << 29);
	write32(D1_CCU_BASE + CCU_PLL_PERI0_CTRL_REG, val);

	/* Enabe pll 600m(1x) 1200m(2x) */
	val = read32(D1_CCU_BASE + CCU_PLL_PERI0_CTRL_REG);
	val |= (1 << 31);
	write32(D1_CCU_BASE + CCU_PLL_PERI0_CTRL_REG, val);

	/* Wait pll stable */
	while(!(read32(D1_CCU_BASE + CCU_PLL_PERI0_CTRL_REG) & (0x1 << 28)));
	sdelay(20);

	/* Lock disable */
	val = read32(D1_CCU_BASE + CCU_PLL_PERI0_CTRL_REG);
	val &= ~(1 << 29);
	write32(D1_CCU_BASE + CCU_PLL_PERI0_CTRL_REG, val);
}

static void set_ahb(void)
{
	write32(D1_CCU_BASE + CCU_PSI_CLK_REG, (2 << 0) | (0 << 8));
	write32(D1_CCU_BASE + CCU_PSI_CLK_REG, read32(D1_CCU_BASE + CCU_PSI_CLK_REG) | (0x03 << 24));
	sdelay(1);
}

static void set_apb(void)
{
	write32(D1_CCU_BASE + CCU_APB0_CLK_REG, (2 << 0) | (1 << 8));
	write32(D1_CCU_BASE + CCU_APB0_CLK_REG, (0x03 << 24) | read32(D1_CCU_BASE + CCU_APB0_CLK_REG));
	sdelay(1);
}

static void set_dma(void)
{
	/* Dma reset */
	write32(D1_CCU_BASE + CCU_DMA_BGR_REG, read32(D1_CCU_BASE + CCU_DMA_BGR_REG) | (1 << 16));
	sdelay(20);
	/* Enable gating clock for dma */
	write32(D1_CCU_BASE + CCU_DMA_BGR_REG, read32(D1_CCU_BASE + CCU_DMA_BGR_REG) | (1 << 0));
}

static void set_mbus(void)
{
	uint32_t val;

	/* Reset mbus domain */
	val = read32(D1_CCU_BASE + CCU_MBUS_CLK_REG);
	val |= (0x1 << 30);
	write32(D1_CCU_BASE + CCU_MBUS_CLK_REG, val);
	sdelay(1);
}

static void set_module(virtual_addr_t addr)
{
	uint32_t val;

	if(!(read32(addr) & (1 << 31)))
	{
		val = read32(addr);
		write32(addr, val | (1 << 31) | (1 << 30));

		/* Lock enable */
		val = read32(addr);
		val |= (1 << 29);
		write32(addr, val);

		/* Wait pll stable */
		while(!(read32(addr) & (0x1 << 28)));
		sdelay(20);

		/* Lock disable */
		val = read32(addr);
		val &= ~(1 << 29);
		write32(addr, val);
	}
}

void sys_clock_init(void)
{
	set_pll_cpux_axi();
	set_pll_periph0();
	set_ahb();
	set_apb();
	set_dma();
	set_mbus();
	set_module(D1_CCU_BASE + CCU_PLL_PERI0_CTRL_REG);
	set_module(D1_CCU_BASE + CCU_PLL_VIDEO0_CTRL_REG);
	set_module(D1_CCU_BASE + CCU_PLL_VIDEO1_CTRL_REG);
	set_module(D1_CCU_BASE + CCU_PLL_VE_CTRL);
	set_module(D1_CCU_BASE + CCU_PLL_AUDIO0_CTRL_REG);
	set_module(D1_CCU_BASE + CCU_PLL_AUDIO1_CTRL_REG);
}
int main(void)
{
    //OS_start(app_init);  //启动RTOS并执行主线程app_init
    uint64_t t1,t2;
    gpio_init(LIGHT_BLUE,GPIO_OUTPUT,LIGHT_ON);
    uart_init(UART_User,115200);
    LCD_Init();
    LCD_Clear(GREEN);
        __asm__ __volatile__("csrr %0, time\n" : "=r"(t1) :: "memory");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");

        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");

        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");

        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");

        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm("nop");
        __asm__ __volatile__("csrr %0, time\n" : "=r"(t2) :: "memory");
        printf("t1:%ld,t2:%ld,\n",t1,t2);
} 

延时50个周期,然后通过读取time寄存器的值计算等待50个时钟周期的时间。已知time寄存器的计数频率是24Mhz,每加1用时41.7ns。串口输出结果:t1:118929024,t2:118929029,time寄存器计了5个数,得出结论50个时钟周期用时约200多ns,主频大约为250Mhz,低于配置的1Ghz。
希望请教下我测量运行速度的方式是否有问题?频率配置是否有误?

最近编辑记录 PENPEN (2025-08-27 15:36:23)

离线

楼主 #1 昨天 11:21:42

PENPEN
会员
注册时间: 2025-06-25
已发帖子: 3
积分: 4

Re: D1跑裸机程序的时钟配置问题

后续用__asm__ __volatile__("csrr %0, cycle\n" : "=r"(c1) :: "memory");读取cycle寄存器的时钟周期数,计算100个nop所用的总时钟周期数为476,平局每个nop用了4.76个时钟周期,再计算主频约为1.14G,因此主频配置没错。但每个nop正常应该消耗1个时钟周期吧?为什么这里会消耗4个时钟周期?

离线

页脚

工信部备案:粤ICP备20025096号 Powered by FluxBB

感谢为中文互联网持续输出优质内容的各位老铁们。 QQ: 516333132, 微信(wechat): whycan_cn (哇酷网/挖坑网/填坑网) service@whycan.cn