Accessing external flash from OpenOCD

Hi guys,

I finally got this working. To say it has been a pain is an understatement.

So now we can read and write to the SQPI Flash from OpenOCD:

> flash list
{name stm32h7x base 134217728 size 0 bus_width 0 chip_width 0} {name stmqspi base 2415919104 size 0 bus_width 0 chip_width 0}

> flash probe 0
Device: STM32H74x/75x
flash size probed value 128
STM32H flash size is 128kb, base address is 0x8000000
flash 'stm32h7x' found at 0x08000000

> flash probe 1
flash2 'micron n25q256 3v' id = 0x19ba20 size = 32768kbytes
flash 'stmqspi' found at 0x90000000

> flash info 1
#1 : stmqspi at 0x90000000, size 0x02000000, buswidth 0, chipwidth 0
	#  0: 0x00000000 (0x10000 64kB) not protected
	#  1: 0x00010000 (0x10000 64kB) not protected
...
...
	#510: 0x01fe0000 (0x10000 64kB) not protected
	#511: 0x01ff0000 (0x10000 64kB) not protected
flash2 'micron n25q256 3v', device id = 0x19ba20, flash size = 32768kbytes
(page size = 256, read = 0x13, qread = 0xec, pprog = 0x12, mass_erase = 0xc7, sector size = 64kbytes, sector_erase = 0xdc)

So this is what is needed:

We need an OpenOCD version with the stmqspi patch. stmqspi is a driver that works with the STM QSPI. STM use a modified version of this in their own version of OpenOCD. I originally was trying to get all this working with the STM OpenOCD version but had some difficulties as their version expected flash1 to exist if flash2 was being used. the 32blit is using flash 2. So we have to build one:

git clone --recursive https://git.code.sf.net/p/openocd/code openocd-code
cd openocd-code
git fetch http://openocd.zylin.com/openocd refs/changes/21/4321/7 && git cherry-pick FETCH_HEAD
./bootstrap
./configure --disable-werror
make

You also need a modified openocd cfg file (32blit.cfg) where I have set up the flash and also the GPIO registers and QSPI registers:

# script for stm32h7x family

#
# stm32h7 devices support both JTAG and SWD transports.
#
source [find target/swj-dp.tcl]
source [find mem_helper.tcl]

if { [info exists CHIPNAME] } {
   set _CHIPNAME $CHIPNAME
} else {
   set _CHIPNAME stm32h7x
}

if { [info exists DUAL_BANK] } {
	set $_CHIPNAME.DUAL_BANK $DUAL_BANK
	unset DUAL_BANK
} else {
	set $_CHIPNAME.DUAL_BANK 0
}

if { [info exists DUAL_CORE] } {
	set $_CHIPNAME.DUAL_CORE $DUAL_CORE
	unset DUAL_CORE
} else {
	set $_CHIPNAME.DUAL_CORE 0
}

# Issue a warning when hla is used, and fallback to single core configuration
if { [set $_CHIPNAME.DUAL_CORE] && [using_hla] } {
	echo "Warning : hla does not support multicore debugging"
	set $_CHIPNAME.DUAL_CORE 0
}

if { [info exists USE_CTI] } {
	set $_CHIPNAME.USE_CTI $USE_CTI
	unset USE_CTI
} else {
	set $_CHIPNAME.USE_CTI 0
}

# Issue a warning when DUAL_CORE=0 and USE_CTI=1, and fallback to USE_CTI=0
if { ![set $_CHIPNAME.DUAL_CORE] && [set $_CHIPNAME.USE_CTI] } {
	echo "Warning : could not use CTI with a single core device, CTI is disabled"
	set $_CHIPNAME.USE_CTI 0
}

set _ENDIAN little

# Work-area is a space in RAM used for flash programming
# By default use 64kB
if { [info exists WORKAREASIZE] } {
   set _WORKAREASIZE $WORKAREASIZE
} else {
   set _WORKAREASIZE 0x10000
}

#jtag scan chain
if { [info exists CPUTAPID] } {
   set _CPUTAPID $CPUTAPID
} else {
   if { [using_jtag] } {
	  set _CPUTAPID 0x6ba00477
   } {
      set _CPUTAPID 0x6ba02477
   }
}

swj_newdap $_CHIPNAME cpu -irlen 4 -ircapture 0x1 -irmask 0xf -expected-id $_CPUTAPID
dap create $_CHIPNAME.dap -chain-position $_CHIPNAME.cpu

if {[using_jtag]} {
 swj_newdap $_CHIPNAME bs -irlen 5
}

if {![using_hla]} {
	# STM32H7 provides an APB-AP at access port 2, which allows the access to
	# the debug and trace features on the system APB System Debug Bus (APB-D).
	target create $_CHIPNAME.ap2 mem_ap -dap $_CHIPNAME.dap -ap-num 2
}

target create $_CHIPNAME.cpu0 cortex_m -endian $_ENDIAN -dap $_CHIPNAME.dap -ap-num 0

$_CHIPNAME.cpu0 configure -work-area-phys 0x20000000 -work-area-size $_WORKAREASIZE -work-area-backup 0

flash bank $_CHIPNAME.bank1.cpu0 stm32h7x 0x08000000 0 0 0 $_CHIPNAME.cpu0

set _QSPINAME $_CHIPNAME.qspi
flash bank $_QSPINAME stmqspi 0x90000000 0 0 0 $_CHIPNAME.cpu0 0x52005000

if {[set $_CHIPNAME.DUAL_BANK]} {
	flash bank $_CHIPNAME.bank2.cpu0 stm32h7x 0x08100000 0 0 0 $_CHIPNAME.cpu0
}

if {[set $_CHIPNAME.DUAL_CORE]} {
	target create $_CHIPNAME.cpu1 cortex_m -endian $_ENDIAN -dap $_CHIPNAME.dap -ap-num 3

	$_CHIPNAME.cpu1 configure -work-area-phys 0x38000000 -work-area-size $_WORKAREASIZE -work-area-backup 0

	flash bank $_CHIPNAME.bank1.cpu1 stm32h7x 0x08000000 0 0 0 $_CHIPNAME.cpu1

	if {[set $_CHIPNAME.DUAL_BANK]} {
		flash bank $_CHIPNAME.bank2.cpu1 stm32h7x 0x08100000 0 0 0 $_CHIPNAME.cpu1
	}
}


# 32 blit QUADSPI initialization
proc 32blit_qspi_init { qpi } {
	echo "***** 32blit_qspi_init"

	mmw 0x580244E0 0x000007FF 0				;# RCC_AHB4ENR |= GPIOA-GPIOK (enable clocks)
	mmw 0x580244D4 0x00004000 0				;# RCC_AHB3ENR |= QSPIEN (enable clock)
	sleep 1									;# Wait for clock startup

	# PB02:AF09:V, PC11:AF09:V, PE10:AF10:V, PE09:AF10:V, PE08:AF10:V, PE07:AF10:V
	# Port B: PB02:AF09:V
	mmw 0x58020400 0x00000020 0x00000010	;# MODER
	mmw 0x58020408 0x00000030 0x00000000	;# OSPEEDR
	mmw 0x5802040C 0x00000000 0x00000030	;# PUPDR
	mmw 0x58020420 0x00000900 0x00000600	;# AFRL
	# Port C: PC11:AF09:V
	mmw 0x58020800 0x00800000 0x00400000	;# MODER
	mmw 0x58020808 0x00C00000 0x00000000	;# OSPEEDR
	mmw 0x5802080C 0x00000000 0x00C00000	;# PUPDR
	mmw 0x58020824 0x00009000 0x00006000	;# AFRH
	# Port E: PE10:AF10:V, PE09:AF10:V, PE08:AF10:V, PE07:AF10:V
	mmw 0x58021000 0x002A8000 0x00154000	;# MODER
	mmw 0x58021008 0x003FC000 0x00000000	;# OSPEEDR
	mmw 0x5802100C 0x00000000 0x003FC000	;# PUPDR
	mmw 0x58021020 0xA0000000 0x50000000	;# AFRL
	mmw 0x58021024 0x00000AAA 0x00000555	;# AFRH

	# 32blit 
	# PC11: BK2_NCS, PB02: CLK, PE10: BK2_IO3, PE9: BK2_IO2, PE8: BK2_IO1, PE7: BK2_IO0
	
	# correct FSIZE would be 0x19 or 0x1A, however, this causes trouble when
	# reading the last word at end of bank in memory mapped mode
	# increase fsize as a workaround

	# QSPI MT25TL
	mww 0x52005000 0x05400080			;# QUADSPI_CR: 
	mww 0x52005004 0x00190100			;# QUADSPI_DCR: FSIZE=0x19, CSHT=0x01, CKMODE=0

	mww 0x52005014 0x0D002503			;# QUADSPI_CCR: FMODE=0x3, DMODE=0x1, DCYC=0x0, ADSIZE=0x3, ADMODE=0x1, IMODE=0x1
	mmw 0x52005000 0x00000001 0		;# QUADSPI_CR: EN=1


	# Exit QPI mode
	mww 0x52005014 0x000003FF				;# QUADSPI_CCR: FMODE=0x0, DMODE=0x0, DCYC=0x0, ADSIZE=0x0, ADMODE=0x0, IMODE=0x3, INSTR=Exit QPI
	sleep 1

	# Enter 4-byte mode
	mww 0x52005014 0x000001B7				;# QUADSPI_CCR: FMODE=0x0, DMODE=0x0, DCYC=0x0, ADSIZE=0x0, ADMODE=0x0, IMODE=0x1, INSTR=Enter 4-byte
	sleep 1

	if { $qpi == 1 } {
		# Enter QPI mode
		mww 0x52005014 0x00000138			;# QUADSPI_CCR: FMODE=0x0, DMODE=0x0, DCYC=0x0, ADSIZE=0x0, ADMODE=0x0, IMODE=0x1, INSTR=Enter QPI
		sleep 1

		# memory-mapped fast read mode with 4-byte addresses and 2 dummy cycles (for read only)
		mww 0x52005014 0x0F083F0B			;# QUADSPI_CCR: FMODE=0x3, DMODE=0x3, DCYC=0x2, ADSIZE=0x3, ADMODE=0x3, IMODE=0x3, INSTR=Fast READ

		# as QSPI is set to instruction/address/data on 4 lines in memory mapped mode, driver will *always*
		# use this setting (i. e. for probe, erase, write)
		# the 'Enter QPI mode' command is chip specific, additionally both w25q256fv must be configured in advance by
		# programming the non-volatile QE bit (bit 1 in status register 2), e. g. by the following commands
		#
		# stmqspi spicmd 2 0 0x06
		# stmqspi spicmd 2 0 0x31 0x02 0x02
		# stmqspi spicmd 2 2 0x35
		#
		# the last one should return  '-> 02 02' to indicate successful setting of QE bit
		# furthemore, the flash chip changes id from 0x1940ef to 0x1960ef upon entering QPI mode
	} else {
		# memory-mapped read mode with 4-byte addresses
		mww 0x52005014 0x0D003503			;# QUADSPI_CCR: FMODE=0x3, DMODE=0x1, DCYC=0x0, ADSIZE=0x3, ADMODE=0x1, IMODE=0x1, INSTR=READ
	}
}


# Make sure that cpu0 is selected
targets $_CHIPNAME.cpu0


$_CHIPNAME.cpu0 configure -event reset-end {
	32blit_qspi_init 0
}

# Clock after reset is HSI at 64 MHz, no need of PLL
adapter_khz 1800

adapter_nsrst_delay 100
if {[using_jtag]} {
 jtag_ntrst_delay 100
}

# use hardware reset
#
# The STM32H7 does not support connect_assert_srst mode because the AXI is
# unavailable while SRST is asserted, and that is used to access the DBGMCU
# component at 0x5C001000 in the examine-end event handler.
#
# It is possible to access the DBGMCU component at 0xE00E1000 via AP2 instead
# of the default AP0, and that works with SRST asserted; however, nonzero AP
# usage does not work with HLA, so is not done by default. That change could be
# made in a local configuration file if connect_assert_srst mode is needed for
# a specific application and a non-HLA adapter is in use.
reset_config srst_only srst_nogate

if {![using_hla]} {
   # if srst is not fitted use SYSRESETREQ to
   # perform a soft reset
	$_CHIPNAME.cpu0 cortex_m reset_config sysresetreq

	if {[set $_CHIPNAME.DUAL_CORE]} {
		$_CHIPNAME.cpu1 cortex_m reset_config sysresetreq
	}

   # Set CSW[27], which according to ARM ADI v5 appendix E1.4 maps to AHB signal
   # HPROT[3], which according to AMBA AHB/ASB/APB specification chapter 3.7.3
   # makes the data access cacheable. This allows reading and writing data in the
   # CPU cache from the debugger, which is far more useful than going straight to
   # RAM when operating on typical variables, and is generally no worse when
   # operating on special memory locations.
   $_CHIPNAME.dap apcsw 0x08000000 0x08000000
}

$_CHIPNAME.cpu0 configure -event examine-end {
	# Enable D3 and D1 DBG clocks
	# DBGMCU_CR |= D3DBGCKEN | D1DBGCKEN
	stm32h7x_dbgmcu_mmw 0x004 0x00600000 0

	# Enable debug during low power modes (uses more power)
	# DBGMCU_CR |= DBG_STANDBY | DBG_STOP | DBG_SLEEP in D3, D2 & D1 Domains
	stm32h7x_dbgmcu_mmw 0x004 0x000001BF 0

	# Stop watchdog counters during halt
	# DBGMCU_APB3FZ1 |= WWDG1
	stm32h7x_dbgmcu_mmw 0x034 0x00000040 0
	# DBGMCU_APB1LFZ1 |= WWDG2
	stm32h7x_dbgmcu_mmw 0x03C 0x00000800 0
	# DBGMCU_APB4FZ1 |= WDGLSD1 | WDGLSD2
	stm32h7x_dbgmcu_mmw 0x054 0x000C0000 0
}



$_CHIPNAME.cpu0 configure -event trace-config {
	# Set TRACECLKEN; TRACE_MODE is set to async; when using sync
	# change this value accordingly to configure trace pins
	# assignment
	stm32h7x_dbgmcu_mmw 0x004 0x00100000 0
}

$_CHIPNAME.cpu0 configure -event reset-init {
	# Clock after reset is HSI at 64 MHz, no need of PLL
	adapter_khz 4000
}

if {[set $_CHIPNAME.DUAL_CORE]} {
	$_CHIPNAME.cpu1 configure -event examine-end {
		# get _CHIPNAME from the current target
		set _CHIPNAME [regsub ".cpu\\d$" [target current] ""]
		global $_CHIPNAME.USE_CTI

		# Stop watchdog counters during halt
		# DBGMCU_APB3FZ2 |= WWDG1
		stm32h7x_dbgmcu_mmw 0x038 0x00000040 0
		# DBGMCU_APB1LFZ2 |= WWDG2
		stm32h7x_dbgmcu_mmw 0x040 0x00000800 0
		# DBGMCU_APB4FZ2 |= WDGLSD1 | WDGLSD2
		stm32h7x_dbgmcu_mmw 0x058 0x000C0000 0

		if {[set $_CHIPNAME.USE_CTI]} {
			stm32h7x_cti_start
		}
	}
}

# like mrw, but with target selection
proc stm32h7x_mrw {used_target reg} {
	set value ""
	$used_target mem2array value 32 $reg 1
	return $value(0)
}

# like mmw, but with target selection
proc stm32h7x_mmw {used_target reg setbits clearbits} {
	set old [stm32h7x_mrw $used_target $reg]
	set new [expr ($old & ~$clearbits) | $setbits]
	$used_target mww $reg $new
}

# mmw for dbgmcu component registers, it accepts the register offset from dbgmcu base
# this procedure will use the mem_ap on AP2 whenever possible
proc stm32h7x_dbgmcu_mmw {reg_offset setbits clearbits} {
	# use $_CHIPNAME.ap2 if possible, and use the proper dbgmcu base address
	if {![using_hla]} {
		# get _CHIPNAME from the current target
		set _CHIPNAME [regsub ".(cpu|ap)\\d*$" [target current] ""]
		set used_target $_CHIPNAME.ap2
		set reg_addr [expr 0xE00E1000 + $reg_offset]
	} {
		set used_target [target current]
		set reg_addr [expr 0x5C001000 + $reg_offset]
	}

	stm32h7x_mmw $used_target $reg_addr $setbits $clearbits
}

if {[set $_CHIPNAME.USE_CTI]} {
	# create CTI instances for both cores
	cti create $_CHIPNAME.cti0 -dap $_CHIPNAME.dap -ap-num 0 -ctibase 0xE0043000
	cti create $_CHIPNAME.cti1 -dap $_CHIPNAME.dap -ap-num 3 -ctibase 0xE0043000

	$_CHIPNAME.cpu0 configure -event halted { stm32h7x_cti_prepare_restart_all }
	$_CHIPNAME.cpu1 configure -event halted { stm32h7x_cti_prepare_restart_all }

	$_CHIPNAME.cpu0 configure -event debug-halted { stm32h7x_cti_prepare_restart_all }
	$_CHIPNAME.cpu1 configure -event debug-halted { stm32h7x_cti_prepare_restart_all }

	proc stm32h7x_cti_start {} {
		# get _CHIPNAME from the current target
		set _CHIPNAME [regsub ".cpu\\d$" [target current] ""]

		# Configure Cores' CTIs to halt each other
		# TRIGIN0 (DBGTRIGGER) and TRIGOUT0 (EDBGRQ) at CTM_CHANNEL_0
		$_CHIPNAME.cti0 write INEN0 0x1
		$_CHIPNAME.cti0 write OUTEN0 0x1
		$_CHIPNAME.cti1 write INEN0 0x1
		$_CHIPNAME.cti1 write OUTEN0 0x1

		# enable CTIs
		$_CHIPNAME.cti0 enable on
		$_CHIPNAME.cti1 enable on
	}

	proc stm32h7x_cti_stop {} {
		# get _CHIPNAME from the current target
		set _CHIPNAME [regsub ".cpu\\d$" [target current] ""]

		$_CHIPNAME.cti0 enable off
		$_CHIPNAME.cti1 enable off
	}

	proc stm32h7x_cti_prepare_restart_all {} {
		stm32h7x_cti_prepare_restart cti0
		stm32h7x_cti_prepare_restart cti1
	}

	proc stm32h7x_cti_prepare_restart {cti} {
		# get _CHIPNAME from the current target
		set _CHIPNAME [regsub ".cpu\\d$" [target current] ""]

		# Acknowlodge EDBGRQ at TRIGOUT0
		$_CHIPNAME.$cti write INACK 0x01
		$_CHIPNAME.$cti write INACK 0x00
	}
}

Now use this cfg file instead of target/stm32h7x.cfg

source [find interface/stlink.cfg]
transport select hla_swd

set BOARDNAME 32blit
source 32blit.cfg

blah blah blah