Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

treewide: Add SPI write support and flashing utility #45

Merged
merged 8 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cheshire.mk
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ chs-clean-deps:
######################

CHS_NONFREE_REMOTE ?= [email protected]:pulp-restricted/cheshire-nonfree.git
CHS_NONFREE_COMMIT ?= fd3526f
CHS_NONFREE_COMMIT ?= 99e58ec

CHS_PHONY += chs-nonfree-init
chs-nonfree-init:
Expand Down
3 changes: 2 additions & 1 deletion docs/tg/xilinx.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ The second command only ensures correctness of the partition layout; it moves th

Insert your SD card and reset into boot mode 1. You should see a `Hello World!` UART output.

### Boot from onboard flash (`vcu128` only)
### Boot from onboard flash

Build a GPT disk image for your desired binary as explained above, then flash it to your board's flash. For `helloworld`:

Expand Down Expand Up @@ -179,4 +179,5 @@ Flash your image to an SD card or SPI flash as described in the preceding sectio
( P )
( ))))))))))
```

You should then boot through OpenSBI, U-Boot, and Linux until you are dropped into a shell.
9 changes: 7 additions & 2 deletions docs/um/sw.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ The boot ROM supports four builtin boot modes chosen from by the `boot_mode_i` p
| `0b10` | EEPROM (24FC1025) | I2C |


Should a program invoked by the boot ROM return, the boot ROM will attempt to yield control to an external debugger if present, such as GDB, using the `ebreak` instruction.

#### Passive Preload

The *passive preload* boot mode expects code to be preloaded to an executable location and an entry point to be written to `scratch[1:0]`. After preloading, execution is launched when `scratch[2][0]` is set to 1. Unlike for autonomous boot modes, BMPs can directly be preloaded into DRAM and have no size restriction.
Expand All @@ -63,7 +65,6 @@ The JTAG and serial link interfaces can preload programs by directly accessing t
| `0x12` (Write) | 64b address, 64b length | RX `ACK`, TX write data, RX `EOT` |
| `0x13` (Exec) | 64b address | RX `ACK`, execution, RX `ACK`, RX return |


#### Autonomous Boot

The *autonomous* boot modes load a BMP of at most 48 KiB from their boot medium into SPM, then execute it. The boot medium can either be GPT-formatted or contain raw code. If no GPT header is found, raw code execution starts from sector 0 of the boot medium.
Expand All @@ -76,7 +77,11 @@ BMPs that run from SPM and fit into the alotted size can be compiled into raw im
make sw/tests/helloworld.(rom|gpt).(bin|memh)
```

The boot ROM is *not* reentrant; when an invoked BMP returns, the system will halt and not reboot.
These images then can be copied onto a bootable disk. For convenience, we also provide a BMP that can flash images preloaded into DRAM to selected devices (`sw/boot/flash.spm.elf`). This BMP can be invoked through OpenOCD using the following script (see BMP and script for details):

```
util/flash_disk.sh <board_or_adapter> <disk_type_idx> <image>
```

### Zero-Stage Loader

Expand Down
3 changes: 2 additions & 1 deletion hw/bootrom/cheshire_bootrom.S
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,11 @@ _boot:
// If main returns, we end up here
.global _exit
_exit:
// Save the return value to scratch register 2 and wait forever
// Save the return value to scratch register 2, try `ebreak`, then wait forever
slli a0, a0, 1
ori a0, a0, 1
la t0, __base_regs
sw a0, 8(t0) // regs.SCRATCH[2]
ebreak
1: wfi
j 1b
3 changes: 1 addition & 2 deletions hw/bootrom/cheshire_bootrom.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@
//
// Nicole Narr <[email protected]>
// Christopher Reinwardt <[email protected]>
// Paul Scheffler <paulsc@student.ethz.ch>
// Paul Scheffler <paulsc@iis.ee.ethz.ch>

#include <stdint.h>
#include "util.h"
#include "params.h"
#include "regs/cheshire.h"
#include "regs/serial_link.h"
#include "spi_host_regs.h"
#include "dif/clint.h"
#include "hal/i2c_24fc1025.h"
Expand Down
3,496 changes: 1,748 additions & 1,748 deletions hw/bootrom/cheshire_bootrom.sv

Large diffs are not rendered by default.

10 changes: 9 additions & 1 deletion sw/boot/cheshire.genesys2.dts
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,22 @@
/include/ "cheshire.dtsi"

&spi {
boot-with = <0>;
// Choose NOR in "boot-with", as it is only considered if `mmc` probe fails.
// Thus, we boot from MMC (SD) if available and NOR flash otherwise.
boot-with = <1>;
mmc@0 {
compatible = "mmc-spi-slot";
reg = <0>; // CS
spi-max-frequency = <25000000>;
voltage-ranges = <3300 3300>;
disable-wp;
};
nor@1 {
compatible = "s25fl256s1", "jedec,spi-nor";
reg = <1>; // CS
spi-max-frequency = <25000000>;
disable-wp;
};
};

&soc {
Expand Down
9 changes: 0 additions & 9 deletions sw/boot/cheshire.vcu128.dts
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,10 @@
&spi {
boot-with = <1>;
nor@1 {
#address-cells = <0x1>;
#size-cells = <0x1>;
// Note : u-boot does not find mt25qu02g
compatible = "mt25qu02g", "jedec,spi-nor";
reg = <0x1>; // CS
spi-max-frequency = <25000000>;
spi-rx-bus-width = <0x1>;
spi-tx-bus-width = <0x1>;
disable-wp;
partition@0 {
label = "all";
reg = <0x0 0x6000000>; // 96 MB
read-only;
};
};
};
96 changes: 96 additions & 0 deletions sw/boot/flash.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Paul Scheffler <[email protected]>
//
// Boot disk flasher for Cheshire; writes a contiguous disk segment to a boot target disk.
// This program can be preloaded and invoked repeatedly to write multiple segments.

#include <stdint.h>
#include "util.h"
#include "params.h"
#include "regs/cheshire.h"
#include "spi_host_regs.h"
#include "dif/clint.h"
#include "hal/i2c_24fc1025.h"
#include "hal/spi_s25fs512s.h"
#include "hal/spi_sdcard.h"
#include "hal/uart_debug.h"
#include "gpt.h"
#include "printf.h"

int flash_spi_sdcard(uint64_t core_freq, uint64_t rtc_freq, void *img_base, uint64_t sector,
uint64_t len) {
// Initialize device handle
spi_sdcard_t device = {
.spi_freq = 24 * 1000 * 1000, // 24MHz (maximum is 25MHz)
.csid = 0,
.csid_dummy = SPI_HOST_PARAM_NUM_C_S - 1 // Last physical CS is designated dummy
};
CHECK_CALL(spi_sdcard_init(&device, core_freq))
// Wait for device to be initialized (1ms, round up extra tick to be sure)
clint_spin_until((1000 * rtc_freq) / (1000 * 1000) + 1);
// Write sectors: we have 512 512B blocks per 256KiB sector, so a 9b left shift
return spi_sdcard_write_blocks(&device, img_base, sector << 9, len << 9, 1);
}

int flash_spi_s25fs512s(uint64_t core_freq, uint64_t rtc_freq, void *img_base, uint64_t sector,
uint64_t len) {
// Initialize device handle
spi_s25fs512s_t device = {
.spi_freq = MIN(40 * 1000 * 1000, core_freq / 4), // Up to quarter core freq or 40MHz
.csid = 1};
CHECK_CALL(spi_s25fs512s_init(&device, core_freq))
// Wait for device to be initialized (t_PU = 300us, round up extra tick to be sure)
clint_spin_until((350 * rtc_freq) / (1000 * 1000) + 1);
// Write sectors of 256 KiB directly
return spi_s25fs512s_single_flash(&device, img_base, sector, len);
}

int flash_i2c_24fc1025(uint64_t core_freq, void *img_base) {
// Initialize device handle
dif_i2c_t i2c;
CHECK_CALL(i2c_24fc1025_init(&i2c, core_freq))
// Write half of a single 256 KiB sector (entire capacity)
return i2c_24fc1025_write(&i2c, img_base, 0, 128 * 1024);
}

int main() {
int ret;
// Read reference frequency and compute core frequency
uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET);
uint64_t core_freq = clint_get_core_freq(rtc_freq, 2500);
// Get arguments from scratch registers
volatile uint32_t *scratch = reg32(&__base_regs, CHESHIRE_SCRATCH_0_REG_OFFSET);
uint64_t target = scratch[0];
void *img_base = (void *)(uintptr_t)scratch[1];
uint64_t sector = scratch[2];
uint64_t len = scratch[3];
// Flash chosen disk
printf("[FLASH] Write buffer at 0x%x of length %d to target %d, sector %d ... ", img_base, len,
target, sector);
switch (target) {
case 1: {
ret = flash_spi_sdcard(core_freq, rtc_freq, img_base, sector, len);
break;
}
case 2: {
ret = flash_spi_s25fs512s(core_freq, rtc_freq, img_base, sector, len);
break;
}
case 3: {
ret = flash_i2c_24fc1025(core_freq, img_base);
break;
}
default: {
ret = -1;
break;
}
}
if (ret)
printf("ERROR (%d)\r\n", ret);
else
printf("OK\r\n");
return ret;
}
24 changes: 13 additions & 11 deletions sw/include/dif/dma.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,18 @@
#include "regs/idma.h"
#include "params.h"

#define DMA_SRC_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_SRC_ADDR_LOW_REG_OFFSET)
#define DMA_DST_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_DST_ADDR_LOW_REG_OFFSET)
#define DMA_NUMBYTES_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_LENGTH_LOW_REG_OFFSET)
#define DMA_CONF_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_CONF_REG_OFFSET)
#define DMA_STATUS_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_STATUS_0_REG_OFFSET)
#define DMA_NEXTID_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_NEXT_ID_0_REG_OFFSET)
#define DMA_DONE_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_DONE_ID_0_REG_OFFSET)
#define DMA_SRC_STRIDE_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_SRC_STRIDE_2_LOW_REG_OFFSET)
#define DMA_DST_STRIDE_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_DST_STRIDE_2_LOW_REG_OFFSET)
#define DMA_NUM_REPS_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_REPS_2_LOW_REG_OFFSET)
#define DMA_SRC_ADDR(BASE) (void *)((uint8_t *)BASE + IDMA_REG64_2D_SRC_ADDR_LOW_REG_OFFSET)
#define DMA_DST_ADDR(BASE) (void *)((uint8_t *)BASE + IDMA_REG64_2D_DST_ADDR_LOW_REG_OFFSET)
#define DMA_NUMBYTES_ADDR(BASE) (void *)((uint8_t *)BASE + IDMA_REG64_2D_LENGTH_LOW_REG_OFFSET)
#define DMA_CONF_ADDR(BASE) (void *)((uint8_t *)BASE + IDMA_REG64_2D_CONF_REG_OFFSET)
#define DMA_STATUS_ADDR(BASE) (void *)((uint8_t *)BASE + IDMA_REG64_2D_STATUS_0_REG_OFFSET)
#define DMA_NEXTID_ADDR(BASE) (void *)((uint8_t *)BASE + IDMA_REG64_2D_NEXT_ID_0_REG_OFFSET)
#define DMA_DONE_ADDR(BASE) (void *)((uint8_t *)BASE + IDMA_REG64_2D_DONE_ID_0_REG_OFFSET)
#define DMA_SRC_STRIDE_ADDR(BASE) \
(void *)((uint8_t *)BASE + IDMA_REG64_2D_SRC_STRIDE_2_LOW_REG_OFFSET)
#define DMA_DST_STRIDE_ADDR(BASE) \
(void *)((uint8_t *)BASE + IDMA_REG64_2D_DST_STRIDE_2_LOW_REG_OFFSET)
#define DMA_NUM_REPS_ADDR(BASE) (void *)((uint8_t *)BASE + IDMA_REG64_2D_REPS_2_LOW_REG_OFFSET)
#define DMA_CONF_DECOUPLE_AW 0
#define DMA_CONF_DECOUPLE_RW 0

Expand Down Expand Up @@ -120,6 +122,6 @@
return *(NAME##_dma_status_ptr()); \
}

X(sys, &__base_dma);
X(sys, &__base_dma)

#undef X
4 changes: 2 additions & 2 deletions sw/include/hal/spi_s25fs512s.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ int spi_s25fs512s_init(spi_s25fs512s_t *handle, uint64_t core_freq);

int spi_s25fs512s_single_read(void *priv, void *buf, uint64_t addr, uint64_t len);

// Flashing is done as whole 512B pages
int spi_s25fs512s_single_flash(void *priv, void *buf, uint64_t page, uint64_t num_pages);
// Flashing is done as whole 256KiB sectors
int spi_s25fs512s_single_flash(void *priv, void *buf, uint64_t sector, uint64_t num_sectors);
8 changes: 6 additions & 2 deletions sw/include/hal/spi_sdcard.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ typedef struct {
static const uint64_t __spi_sdcard_init_clock = 200000;

// How many cycles to wait for a non-yielding R1b response
static const uint64_t __spi_sdcard_r1b_timeout = 10000;
static const uint64_t __spi_sdcard_r1b_timeout = 100000;

// How many cycles to wait for another data block
static const uint64_t __spi_sdcard_data_timeout = 10000;
static const uint64_t __spi_sdcard_data_timeout = 100000;

// Sets up only this device; other functions may be used with own setup if requirements are met.
// This assumes the power-up period of 1ms will be elapsed *before* issuing further commands.
Expand All @@ -35,3 +35,7 @@ int spi_sdcard_init(spi_sdcard_t *handle, uint64_t core_freq);
int spi_sdcard_read_checkcrc(void *priv, void *buf, uint64_t addr, uint64_t len);

int spi_sdcard_read_ignorecrc(void *priv, void *buf, uint64_t addr, uint64_t len);

// Transfer whole 512B blocks, aligned on the SD card. CRC must be computed if enabled at the time.
int spi_sdcard_write_blocks(spi_sdcard_t *handle, void *buf, uint64_t block, uint64_t len,
int compute_crc);
4 changes: 2 additions & 2 deletions sw/include/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
#include "params.h"

static inline volatile uint8_t *reg8(void *base, int offs) {
return (volatile uint8_t *)(base + offs);
return (volatile uint8_t *)((uint8_t *)base + offs);
}

static inline volatile uint32_t *reg32(void *base, int offs) {
return (volatile uint32_t *)(base + offs);
return (volatile uint32_t *)((uint8_t *)base + offs);
}

static inline void fence() {
Expand Down
4 changes: 2 additions & 2 deletions sw/lib/dif/uart.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ void uart_read_str(void *uart_base, void *dst, uint64_t len) {
// Default UART provides console
void _putchar(char byte) {
uart_write(&__base_uart, byte);
};
}

char _getchar() {
return uart_read(&__base_uart);
};
}
9 changes: 5 additions & 4 deletions sw/lib/hal/i2c_24fc1025.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "i2c_regs.h"
#include "util.h"
#include "params.h"

#include "dif/clint.h"

int i2c_24fc1025_init(dif_i2c_t *i2c, uint64_t core_freq) {
Expand Down Expand Up @@ -74,7 +73,8 @@ static inline int __i2c_24fc1025_access_chunk(dif_i2c_t *i2c, void *buf, uint64_
CHECK_CALL(dif_i2c_host_set_enabled(i2c, kDifToggleEnabled))
// If our length exceeded half the FIFO, invoke another half transfer
if (len > half_fill)
CHECK_CALL(__i2c_24fc1025_access_chunk(i2c, buf + len, addr + len, len - half_fill, 1))
CHECK_CALL(__i2c_24fc1025_access_chunk(i2c, (uint8_t *)buf + len, addr + len,
len - half_fill, 1))
} else {
// Request read of len bytes
uint64_t ctrl_rdata = ctrl_waddr | 0x1;
Expand All @@ -86,7 +86,7 @@ static inline int __i2c_24fc1025_access_chunk(dif_i2c_t *i2c, void *buf, uint64_
do CHECK_CALL(dif_i2c_get_fifo_levels(i2c, &lfmt, &lrx, &ltx, &lacq))
while (lrx < len);
// Transfer chunk to memory destination
for (uint64_t b = 0; b < len; b++) CHECK_CALL(dif_i2c_read_byte(i2c, buf + b))
for (uint64_t b = 0; b < len; b++) CHECK_CALL(dif_i2c_read_byte(i2c, (uint8_t *)buf + b))
}
// Nothing went wrong
return 0;
Expand All @@ -108,7 +108,8 @@ static inline int __i2c_24fc1025_access(void *priv, void *buf, uint64_t addr, ui
// Copy start-aligned chunks
for (; offs < len; offs += I2C_PARAM_FIFO_DEPTH) {
uint64_t chunk_len = MIN(I2C_PARAM_FIFO_DEPTH, len - offs);
CHECK_CALL(__i2c_24fc1025_access_chunk(i2c, buf + offs, addr + offs, chunk_len, write))
CHECK_CALL(
__i2c_24fc1025_access_chunk(i2c, (uint8_t *)buf + offs, addr + offs, chunk_len, write))
}
// Nothing went wrong
return 0;
Expand Down
Loading
Loading