From a89dd2ca6542d157890656d918d6ae2f933ab401 Mon Sep 17 00:00:00 2001
From: oharboe <oharboe@b42882b7-edfa-0310-969c-e2dbd0fdcd60>
Date: Thu, 3 Sep 2009 08:23:39 +0000
Subject: [PATCH] David Brownell

Abstract the orion_nand_fast_block_write() routine into a separate
routine -- arm_nandwrite() -- so that other ARM cores can reuse it.

Have davinci_nand do so.  This faster than byte-at-a-time ops by a
factor of three (!), even given the slowish interactions to support
hardware ECC (1-bit flavor in that test) each 512 bytes; those could
be read more efficiently by on-chip code.

NOTE that until there's a generic "ARM algorithm" structure, this
can't work on newer ARMv6 (like ARM1136) or ARMv7A (like Cortex-A8)
cores, though the downloaded code itself would work just fine there.

git-svn-id: svn://svn.berlios.de/openocd/trunk@2663 b42882b7-edfa-0310-969c-e2dbd0fdcd60
---
 src/flash/Makefile.am    |   2 +
 src/flash/arm_nandio.c   | 131 +++++++++++++++++++++++++++++++++++++++
 src/flash/arm_nandio.h   |  25 ++++++++
 src/flash/davinci_nand.c |  23 ++++++-
 src/flash/nand.h         |   1 +
 src/flash/orion_nand.c   |  83 ++++---------------------
 6 files changed, 191 insertions(+), 74 deletions(-)
 create mode 100644 src/flash/arm_nandio.c
 create mode 100644 src/flash/arm_nandio.h
diff --git a/src/flash/Makefile.am b/src/flash/Makefile.am
index 60a322aa6a..bf39b2e4f3 100644
--- a/src/flash/Makefile.am
+++ b/src/flash/Makefile.am
@@ -6,6 +6,7 @@ AM_CPPFLAGS = \
 METASOURCES = AUTO
 noinst_LTLIBRARIES = libflash.la
 libflash_la_SOURCES = \
+	arm_nandio.c \
 	flash.c \
 	lpc2000.c \
 	cfi.c \
@@ -38,6 +39,7 @@ libflash_la_SOURCES = \
 	avrf.c
 
 noinst_HEADERS = \
+	arm_nandio.h \
 	flash.h \
 	lpc2000.h \
 	cfi.h \
diff --git a/src/flash/arm_nandio.c b/src/flash/arm_nandio.c
new file mode 100644
index 0000000000..fb501e5614
--- /dev/null
+++ b/src/flash/arm_nandio.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2009 by Marvell Semiconductors, Inc.
+ * Written by Nicolas Pitre <nico at marvell.com>
+ *
+ * Copyright (C) 2009 by David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "arm_nandio.h"
+#include "armv4_5.h"
+
+
+/*
+ * ARM-specific bulk write from buffer to address of 8-bit wide NAND.
+ * For now this only supports ARMv4 and ARMv5 cores.
+ *
+ * Enhancements to target_run_algorithm() could enable:
+ *   - faster writes: on ARMv5+ don't setup/teardown hardware breakpoint
+ *   - ARMv6 and ARMv7 cores in ARM mode
+ *
+ * Different code fragments could handle:
+ *   - Thumb2 cores like Cortex-M (needs different byteswapping)
+ *   - 16-bit wide data (needs different setup too)
+ */
+int arm_nandwrite(struct arm_nand_data *nand, uint8_t *data, int size)
+{
+	target_t		*target = nand->target;
+	armv4_5_algorithm_t	algo;
+	reg_param_t		reg_params[3];
+	uint32_t		target_buf;
+	int			retval;
+
+	/* Inputs:
+	 *  r0	NAND data address (byte wide)
+	 *  r1	buffer address
+	 *  r2	buffer length
+	 */
+	static const uint32_t code[] = {
+		0xe4d13001,	/* s: ldrb  r3, [r1], #1 */
+		0xe5c03000,	/*    strb  r3, [r0]     */
+		0xe2522001,	/*    subs  r2, r2, #1   */
+		0x1afffffb,	/*    bne   s            */
+
+		/* exit: ARMv4 needs hardware breakpoint */
+		0xe1200070,	/* e: bkpt  #0           */
+	};
+
+	if (!nand->copy_area) {
+		uint8_t		code_buf[sizeof(code)];
+		unsigned	i;
+
+		/* make sure we have a working area */
+		if (target_alloc_working_area(target,
+				sizeof(code) + nand->chunk_size,
+				&nand->copy_area) != ERROR_OK) {
+			LOG_DEBUG("%s: no %d byte buffer",
+					__FUNCTION__,
+					(int) sizeof(code) + nand->chunk_size);
+			return ERROR_NAND_NO_BUFFER;
+		}
+
+		/* buffer code in target endianness */
+		for (i = 0; i < sizeof(code) / 4; i++)
+			target_buffer_set_u32(target, code_buf + i * 4, code[i]);
+
+		/* copy code to work area */
+                retval = target_write_memory(target,
+					nand->copy_area->address,
+					4, sizeof(code) / 4, code_buf);
+		if (retval != ERROR_OK)
+			return retval;
+	}
+
+	/* copy data to work area */
+	target_buf = nand->copy_area->address + sizeof(code);
+	retval = target_bulk_write_memory(target, target_buf, size / 4, data);
+	if (retval == ERROR_OK && (size & 3) != 0)
+		retval = target_write_memory(target,
+				target_buf + (size & ~3),
+				1, size & 3, data + (size & ~3));
+	if (retval != ERROR_OK)
+		return retval;
+
+	/* set up algorithm and parameters */
+	algo.common_magic = ARMV4_5_COMMON_MAGIC;
+	algo.core_mode = ARMV4_5_MODE_SVC;
+	algo.core_state = ARMV4_5_STATE_ARM;
+
+	init_reg_param(&reg_params[0], "r0", 32, PARAM_IN);
+	init_reg_param(&reg_params[1], "r1", 32, PARAM_IN);
+	init_reg_param(&reg_params[2], "r2", 32, PARAM_IN);
+
+	buf_set_u32(reg_params[0].value, 0, 32, nand->data);
+	buf_set_u32(reg_params[1].value, 0, 32, target_buf);
+	buf_set_u32(reg_params[2].value, 0, 32, size);
+
+	/* use alg to write data from work area to NAND chip */
+	retval = target_run_algorithm(target, 0, NULL, 3, reg_params,
+			nand->copy_area->address,
+			nand->copy_area->address + sizeof(code) - 4,
+			1000, &algo);
+	if (retval != ERROR_OK)
+		LOG_ERROR("error executing hosted NAND write");
+
+	destroy_reg_param(&reg_params[0]);
+	destroy_reg_param(&reg_params[1]);
+	destroy_reg_param(&reg_params[2]);
+
+	return retval;
+}
+
+/* REVISIT do the same for bulk *read* too ... */
+
diff --git a/src/flash/arm_nandio.h b/src/flash/arm_nandio.h
new file mode 100644
index 0000000000..eedf5dcac1
--- /dev/null
+++ b/src/flash/arm_nandio.h
@@ -0,0 +1,25 @@
+#ifndef __ARM_NANDIO_H
+#define  __ARM_NANDIO_H
+
+#include "nand.h"
+#include "binarybuffer.h"
+
+struct arm_nand_data {
+	/* target is proxy for some ARM core */
+	struct target_s		*target;
+
+	/* copy_area holds write-to-NAND loop and data to write */
+	struct working_area_s	*copy_area;
+
+	/* chunk_size == page or ECC unit */
+	unsigned		chunk_size;
+
+	/* data == where to write the data */
+	uint32_t		data;
+
+	/* currently implicit:  data width == 8 bits (not 16) */
+};
+
+int arm_nandwrite(struct arm_nand_data *nand, uint8_t *data, int size);
+
+#endif  /* __ARM_NANDIO_H */
diff --git a/src/flash/davinci_nand.c b/src/flash/davinci_nand.c
index 29c9630050..41c2b20ab1 100644
--- a/src/flash/davinci_nand.c
+++ b/src/flash/davinci_nand.c
@@ -28,7 +28,7 @@
 #include "config.h"
 #endif
 
-#include "nand.h"
+#include "arm_nandio.h"
 
 
 enum ecc {
@@ -51,6 +51,9 @@ struct davinci_nand {
 	uint32_t		cmd;		/* with CLE */
 	uint32_t		addr;		/* with ALE */
 
+	/* write acceleration */
+	struct arm_nand_data	io;
+
 	/* page i/o for the relevant flavor of hardware ECC */
 	int (*read_page)(struct nand_device_s *nand, uint32_t page,
 			uint8_t *data, uint32_t data_size, uint8_t *oob, uint32_t oob_size);
@@ -181,7 +184,7 @@ static int davinci_read_data(struct nand_device_s *nand, void *data)
 	return ERROR_OK;
 }
 
-/* REVISIT a bit of native code should let block I/O be MUCH faster */
+/* REVISIT a bit of native code should let block reads be MUCH faster */
 
 static int davinci_read_block_data(struct nand_device_s *nand,
 		uint8_t *data, int data_size)
@@ -223,10 +226,17 @@ static int davinci_write_block_data(struct nand_device_s *nand,
 	target_t *target = info->target;
 	uint32_t nfdata = info->data;
 	uint32_t tmp;
+	int status;
 
 	if (!halted(target, "write_block"))
 		return ERROR_NAND_OPERATION_FAILED;
 
+	/* try the fast way first */
+	status = arm_nandwrite(&info->io, data, data_size);
+	if (status != ERROR_NAND_NO_BUFFER)
+		return status;
+
+	/* else do it slowly */
 	while (data_size >= 4) {
 		tmp = le_to_h_u32(data);
 		target_write_u32(target, nfdata, tmp);
@@ -285,6 +295,12 @@ static int davinci_write_page(struct nand_device_s *nand, uint32_t page,
 		memset(oob, 0x0ff, oob_size);
 	}
 
+	/* REVISIT avoid wasting SRAM:  unless nand->use_raw is set,
+	 * use 512 byte chunks.  Read side support will often want
+	 * to include oob_size ...
+	 */
+	info->io.chunk_size = nand->page_size;
+
 	status = info->write_page(nand, page, data, data_size, oob, oob_size);
 	free(ooballoc);
 	return status;
@@ -700,6 +716,9 @@ static int davinci_nand_device_command(struct command_context_s *cmd_ctx,
 
 	nand->controller_priv = info;
 
+	info->io.target = target;
+	info->io.data = info->data;
+
 	/* NOTE:  for now we don't do any error correction on read.
 	 * Nothing else in OpenOCD currently corrects read errors,
 	 * and in any case it's *writing* that we care most about.
diff --git a/src/flash/nand.h b/src/flash/nand.h
index ab87123d60..b73e3304f6 100644
--- a/src/flash/nand.h
+++ b/src/flash/nand.h
@@ -223,5 +223,6 @@ extern int nand_init(struct command_context_s *cmd_ctx);
 #define		ERROR_NAND_OPERATION_NOT_SUPPORTED	(-1103)
 #define		ERROR_NAND_DEVICE_NOT_PROBED	(-1104)
 #define		ERROR_NAND_ERROR_CORRECTION_FAILED	(-1105)
+#define		ERROR_NAND_NO_BUFFER			(-1106)
 
 #endif /* NAND_H */
diff --git a/src/flash/orion_nand.c b/src/flash/orion_nand.c
index dc9d78c537..94df17ba72 100644
--- a/src/flash/orion_nand.c
+++ b/src/flash/orion_nand.c
@@ -26,15 +26,15 @@
 #include "config.h"
 #endif
 
-#include "nand.h"
+#include "arm_nandio.h"
 #include "armv4_5.h"
-#include "binarybuffer.h"
 
 
 typedef struct orion_nand_controller_s
 {
 	struct target_s	*target;
-	working_area_t *copy_area;
+
+	struct arm_nand_data	io;
 
 	uint32_t		cmd;
 	uint32_t		addr;
@@ -99,78 +99,14 @@ static int orion_nand_slow_block_write(struct nand_device_s *device, uint8_t *da
 static int orion_nand_fast_block_write(struct nand_device_s *device, uint8_t *data, int size)
 {
 	orion_nand_controller_t *hw = device->controller_priv;
-	target_t *target = hw->target;
-	armv4_5_algorithm_t algo;
-	reg_param_t reg_params[3];
-	uint32_t target_buf;
 	int retval;
 
-	static const uint32_t code[] = {
-		0xe4d13001,	/* ldrb	r3, [r1], #1	*/
-		0xe5c03000,	/* strb	r3, [r0]	*/
-		0xe2522001,	/* subs	r2, r2, #1	*/
-		0x1afffffb,	/* bne	0		*/
-		0xeafffffe,	/* b	.		*/
-	};
-	int code_size = sizeof(code);
-
-	if (!hw->copy_area) {
-		uint8_t code_buf[code_size];
-		int i;
-
-		/* make sure we have a working area */
-		if (target_alloc_working_area(target,
-					      code_size + device->page_size,
-					      &hw->copy_area) != ERROR_OK)
-		{
-			return orion_nand_slow_block_write(device, data, size);
-		}
-
-		/* copy target instructions to target endianness */
-		for (i = 0; i < code_size/4; i++)
-			target_buffer_set_u32(target, code_buf + i*4, code[i]);
-
-		/* write code to working area */
-                retval = target_write_memory(target,
-					hw->copy_area->address,
-					4, code_size/4, code_buf);
-		if (retval != ERROR_OK)
-			return retval;
-	}
+	hw->io.chunk_size = device->page_size;
+
+	retval = arm_nandwrite(&hw->io, data, size);
+	if (retval == ERROR_NAND_NO_BUFFER)
+		retval = orion_nand_slow_block_write(device, data, size);
 
-	/* copy data to target's memory */
-	target_buf = hw->copy_area->address + code_size;
-	retval = target_bulk_write_memory(target, target_buf, size/4, data);
-	if (retval == ERROR_OK && size & 3) {
-		retval = target_write_memory(target,
-					target_buf + (size & ~3),
-					1, size & 3, data + (size & ~3));
-	}
-	if (retval != ERROR_OK)
-		return retval;
-
-	algo.common_magic = ARMV4_5_COMMON_MAGIC;
-	algo.core_mode = ARMV4_5_MODE_SVC;
-	algo.core_state = ARMV4_5_STATE_ARM;
-
-	init_reg_param(&reg_params[0], "r0", 32, PARAM_IN);
-	init_reg_param(&reg_params[1], "r1", 32, PARAM_IN);
-	init_reg_param(&reg_params[2], "r2", 32, PARAM_IN);
-
-	buf_set_u32(reg_params[0].value, 0, 32, hw->data);
-	buf_set_u32(reg_params[1].value, 0, 32, target_buf);
-	buf_set_u32(reg_params[2].value, 0, 32, size);
-
-	retval = target_run_algorithm(target, 0, NULL, 3, reg_params,
-					hw->copy_area->address,
-					hw->copy_area->address + code_size - 4,
-					1000, &algo);
-	if (retval != ERROR_OK)
-		LOG_ERROR("error executing hosted NAND write");
-
-	destroy_reg_param(&reg_params[0]);
-	destroy_reg_param(&reg_params[1]);
-	destroy_reg_param(&reg_params[2]);
 	return retval;
 }
 
@@ -224,6 +160,9 @@ int orion_nand_device_command(struct command_context_s *cmd_ctx, char *cmd,
 	hw->cmd = base + (1 << cle);
 	hw->addr = base + (1 << ale);
 
+	hw->io.target = hw->target;
+	hw->io.data = hw->data;
+
 	return ERROR_OK;
 }
 
-- 
2.30.2