Cortex-A8: avoid DSCR reads
authorDavid Brownell <dbrownell@users.sourceforge.net>
Wed, 25 Nov 2009 05:24:44 +0000 (21:24 -0800)
committerDavid Brownell <dbrownell@users.sourceforge.net>
Wed, 25 Nov 2009 05:24:44 +0000 (21:24 -0800)
There was a lot of needless handshaking overhead in the current
Cortex-A8 DCC/ITR operations, since the status read by each step
was discarded rather than letting the next step know it.

This shrinks the handshaking by:  (a) passing status along from
previous steps, avoiding re-fetching; which enables the big win
(b) relying on a useful invariant:  that the DSCR_INSTR_COMP bit
is set after every call to a DPM method.

A "reg sp_usr" call previously took 17 flushes; now it takes just 9.
This visibly speeds common operations like entry to debug state and
stepping, as well as "arm reg" and so on.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
src/target/cortex_a8.c

index 299eb3c104fd5606a27d79bd14f39ddde5bdbd88..08e546012efa849f0a330a314c53ea87d21c2fae 100644 (file)
@@ -89,20 +89,25 @@ static int cortex_a8_init_debug_access(struct target *target)
        return retval;
 }
 
-/* FIXME we waste a *LOT* of round-trips with needless DSCR reads, which
- * slows down operations considerably.  One good way to start reducing
- * them would pass current values into and out of this routine.  That
- * should also help synch DCC read/write.
+/* To reduce needless round-trips, pass in a pointer to the current
+ * DSCR value.  Initialize it to zero if you just need to know the
+ * value on return from this function; or (1 << DSCR_INSTR_COMP) if
+ * you happen to know that no instruction is pending.
  */
-static int cortex_a8_exec_opcode(struct target *target, uint32_t opcode)
+static int cortex_a8_exec_opcode(struct target *target,
+               uint32_t opcode, uint32_t *dscr_p)
 {
        uint32_t dscr;
        int retval;
        struct armv7a_common *armv7a = target_to_armv7a(target);
        struct swjdp_common *swjdp = &armv7a->swjdp_info;
 
+       dscr = dscr_p ? *dscr_p : 0;
+
        LOG_DEBUG("exec opcode 0x%08" PRIx32, opcode);
-       do
+
+       /* Wait for InstrCompl bit to be set */
+       while ((dscr & (1 << DSCR_INSTR_COMP)) == 0)
        {
                retval = mem_ap_read_atomic_u32(swjdp,
                                armv7a->debug_base + CPUDBG_DSCR, &dscr);
@@ -112,7 +117,6 @@ static int cortex_a8_exec_opcode(struct target *target, uint32_t opcode)
                        return retval;
                }
        }
-       while ((dscr & (1 << DSCR_INSTR_COMP)) == 0); /* Wait for InstrCompl bit to be set */
 
        mem_ap_write_u32(swjdp, armv7a->debug_base + CPUDBG_ITR, opcode);
 
@@ -128,6 +132,9 @@ static int cortex_a8_exec_opcode(struct target *target, uint32_t opcode)
        }
        while ((dscr & (1 << DSCR_INSTR_COMP)) == 0); /* Wait for InstrCompl bit to be set */
 
+       if (dscr_p)
+               *dscr_p = dscr;
+
        return retval;
 }
 
@@ -144,7 +151,7 @@ static int cortex_a8_read_regs_through_mem(struct target *target, uint32_t addre
 
        cortex_a8_dap_read_coreregister_u32(target, regfile, 0);
        cortex_a8_dap_write_coreregister_u32(target, address, 0);
-       cortex_a8_exec_opcode(target, ARMV4_5_STMIA(0, 0xFFFE, 0, 0));
+       cortex_a8_exec_opcode(target, ARMV4_5_STMIA(0, 0xFFFE, 0, 0), NULL);
        dap_ap_select(swjdp, swjdp_memoryap);
        mem_ap_read_buf_u32(swjdp, (uint8_t *)(&regfile[1]), 4*15, address);
        dap_ap_select(swjdp, swjdp_debugap);
@@ -158,10 +165,15 @@ static int cortex_a8_read_cp(struct target *target, uint32_t *value, uint8_t CP,
        int retval;
        struct armv7a_common *armv7a = target_to_armv7a(target);
        struct swjdp_common *swjdp = &armv7a->swjdp_info;
+       uint32_t dscr = 0;
+
+       /* MRC(...) to read coprocessor register into r0 */
+       cortex_a8_exec_opcode(target, ARMV4_5_MRC(CP, op1, 0, CRn, CRm, op2),
+                       &dscr);
 
-       cortex_a8_exec_opcode(target, ARMV4_5_MRC(CP, op1, 0, CRn, CRm, op2));
        /* Move R0 to DTRTX */
-       cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+       cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
+                       &dscr);
 
        /* Read DCCTX */
        retval = mem_ap_read_atomic_u32(swjdp,
@@ -187,16 +199,21 @@ static int cortex_a8_write_cp(struct target *target, uint32_t value,
        {
                LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
                /* Clear DCCRX with MCR(p14, 0, Rd, c0, c5, 0), opcode  0xEE000E15 */
-               cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
+               cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+                               &dscr);
        }
 
+       /* Write DTRRX ... sets DSCR.DTRRXfull but exec_opcode() won't care */
        retval = mem_ap_write_u32(swjdp,
                        armv7a->debug_base + CPUDBG_DTRRX, value);
+
        /* Move DTRRX to r0 */
-       cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
+       cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0), &dscr);
 
-       cortex_a8_exec_opcode(target, ARMV4_5_MCR(CP, op1, 0, CRn, CRm, op2));
-       return retval;
+       /* MCR(...) to write r0 to coprocessor */
+       return cortex_a8_exec_opcode(target,
+                       ARMV4_5_MCR(CP, op1, 0, CRn, CRm, op2),
+                       &dscr);
 }
 
 static int cortex_a8_read_cp15(struct target *target, uint32_t op1, uint32_t op2,
@@ -238,7 +255,7 @@ static int cortex_a8_dap_read_coreregister_u32(struct target *target,
 {
        int retval = ERROR_OK;
        uint8_t reg = regnum&0xFF;
-       uint32_t dscr;
+       uint32_t dscr = 0;
        struct armv7a_common *armv7a = target_to_armv7a(target);
        struct swjdp_common *swjdp = &armv7a->swjdp_info;
 
@@ -248,30 +265,35 @@ static int cortex_a8_dap_read_coreregister_u32(struct target *target,
        if (reg < 15)
        {
                /* Rn to DCCTX, "MCR p14, 0, Rn, c0, c5, 0"  0xEE00nE15 */
-               cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, reg, 0, 5, 0));
+               cortex_a8_exec_opcode(target,
+                               ARMV4_5_MCR(14, 0, reg, 0, 5, 0),
+                               &dscr);
        }
        else if (reg == 15)
        {
                /* "MOV r0, r15"; then move r0 to DCCTX */
-               cortex_a8_exec_opcode(target, 0xE1A0000F);
-               cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+               cortex_a8_exec_opcode(target, 0xE1A0000F, &dscr);
+               cortex_a8_exec_opcode(target,
+                               ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
+                               &dscr);
        }
        else
        {
                /* "MRS r0, CPSR" or "MRS r0, SPSR"
                 * then move r0 to DCCTX
                 */
-               cortex_a8_exec_opcode(target, ARMV4_5_MRS(0, reg & 1));
-               cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+               cortex_a8_exec_opcode(target, ARMV4_5_MRS(0, reg & 1), &dscr);
+               cortex_a8_exec_opcode(target,
+                               ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
+                               &dscr);
        }
 
-       /* Read DTRRTX */
-       do
+       /* Wait for DTRRXfull then read DTRRTX */
+       while ((dscr & (1 << DSCR_DTR_TX_FULL)) == 0)
        {
                retval = mem_ap_read_atomic_u32(swjdp,
                                armv7a->debug_base + CPUDBG_DSCR, &dscr);
        }
-       while ((dscr & (1 << DSCR_DTR_TX_FULL)) == 0); /* Wait for DTRRXfull */
 
        retval = mem_ap_read_atomic_u32(swjdp,
                        armv7a->debug_base + CPUDBG_DTRTX, value);
@@ -298,13 +320,14 @@ static int cortex_a8_dap_write_coreregister_u32(struct target *target,
        {
                LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
                /* Clear DCCRX with MCR(p14, 0, Rd, c0, c5, 0), opcode  0xEE000E15 */
-               cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
+               cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+                               &dscr);
        }
 
        if (Rd > 17)
                return retval;
 
-       /* Write to DCCRX */
+       /* Write DTRRX ... sets DSCR.DTRRXfull but exec_opcode() won't care */
        LOG_DEBUG("write DCC 0x%08" PRIx32, value);
        retval = mem_ap_write_u32(swjdp,
                        armv7a->debug_base + CPUDBG_DTRRX, value);
@@ -312,28 +335,33 @@ static int cortex_a8_dap_write_coreregister_u32(struct target *target,
        if (Rd < 15)
        {
                /* DCCRX to Rn, "MCR p14, 0, Rn, c0, c5, 0", 0xEE00nE15 */
-               cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, Rd, 0, 5, 0));
+               cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, Rd, 0, 5, 0),
+                               &dscr);
        }
        else if (Rd == 15)
        {
                /* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15
                 * then "mov r15, r0"
                 */
-               cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
-               cortex_a8_exec_opcode(target, 0xE1A0F000);
+               cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+                               &dscr);
+               cortex_a8_exec_opcode(target, 0xE1A0F000, &dscr);
        }
        else
        {
                /* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15
                 * then "MSR CPSR_cxsf, r0" or "MSR SPSR_cxsf, r0" (all fields)
                 */
-               cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
-               cortex_a8_exec_opcode(target, ARMV4_5_MSR_GP(0, 0xF, Rd & 1));
+               cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+                               &dscr);
+               cortex_a8_exec_opcode(target, ARMV4_5_MSR_GP(0, 0xF, Rd & 1),
+                               &dscr);
 
                /* "Prefetch flush" after modifying execution status in CPSR */
                if (Rd == 16)
                        cortex_a8_exec_opcode(target,
-                                       ARMV4_5_MCR(15, 0, 0, 7, 5, 4));
+                                       ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
+                                       &dscr);
        }
 
        return retval;
@@ -354,6 +382,9 @@ static int cortex_a8_dap_write_memap_register_u32(struct target *target, uint32_
 /*
  * Cortex-A8 implementation of Debug Programmer's Model
  *
+ * NOTE the invariant:  these routines return with DSCR_INSTR_COMP set,
+ * so there's no need to poll for it before executing an instruction.
+ *
  * NOTE that in several of these cases the "stall" mode might be useful.
  * It'd let us queue a few operations together... prepare/finish might
  * be the places to enable/disable that mode.
@@ -371,23 +402,30 @@ static int cortex_a8_write_dcc(struct cortex_a8_common *a8, uint32_t data)
                        a8->armv7a_common.debug_base + CPUDBG_DTRRX, data);
 }
 
-static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data)
+static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data,
+               uint32_t *dscr_p)
 {
        struct swjdp_common *swjdp = &a8->armv7a_common.swjdp_info;
-       uint32_t dscr;
+       uint32_t dscr = 1 << DSCR_INSTR_COMP;
        int retval;
 
+       if (dscr_p)
+               dscr = *dscr_p;
+
        /* Wait for DTRRXfull */
-       do {
+       while ((dscr & (1 << DSCR_DTR_TX_FULL)) == 0) {
                retval = mem_ap_read_atomic_u32(swjdp,
                                a8->armv7a_common.debug_base + CPUDBG_DSCR,
                                &dscr);
-       } while ((dscr & (1 << DSCR_DTR_TX_FULL)) == 0);
+       }
 
        retval = mem_ap_read_atomic_u32(swjdp,
                        a8->armv7a_common.debug_base + CPUDBG_DTRTX, data);
        LOG_DEBUG("read DCC 0x%08" PRIx32, *data);
 
+       if (dscr_p)
+               *dscr_p = dscr;
+
        return retval;
 }
 
@@ -398,9 +436,12 @@ static int cortex_a8_dpm_prepare(struct arm_dpm *dpm)
        uint32_t dscr;
        int retval;
 
-       retval = mem_ap_read_atomic_u32(swjdp,
-                       a8->armv7a_common.debug_base + CPUDBG_DSCR,
-                       &dscr);
+       /* set up invariant:  INSTR_COMP is set after ever DPM operation */
+       do {
+               retval = mem_ap_read_atomic_u32(swjdp,
+                               a8->armv7a_common.debug_base + CPUDBG_DSCR,
+                               &dscr);
+       } while ((dscr & (1 << DSCR_INSTR_COMP)) == 0);
 
        /* this "should never happen" ... */
        if (dscr & (1 << DSCR_DTR_RX_FULL)) {
@@ -408,7 +449,8 @@ static int cortex_a8_dpm_prepare(struct arm_dpm *dpm)
                /* Clear DCCRX */
                retval = cortex_a8_exec_opcode(
                                a8->armv7a_common.armv4_5_common.target,
-                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+                               &dscr);
        }
 
        return retval;
@@ -425,18 +467,21 @@ static int cortex_a8_instr_write_data_dcc(struct arm_dpm *dpm,
 {
        struct cortex_a8_common *a8 = dpm_to_a8(dpm);
        int retval;
+       uint32_t dscr = 1 << DSCR_INSTR_COMP;
 
        retval = cortex_a8_write_dcc(a8, data);
 
        return cortex_a8_exec_opcode(
                        a8->armv7a_common.armv4_5_common.target,
-                       opcode);
+                       opcode,
+                       &dscr);
 }
 
 static int cortex_a8_instr_write_data_r0(struct arm_dpm *dpm,
                uint32_t opcode, uint32_t data)
 {
        struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+       uint32_t dscr = 1 << DSCR_INSTR_COMP;
        int retval;
 
        retval = cortex_a8_write_dcc(a8, data);
@@ -444,12 +489,14 @@ static int cortex_a8_instr_write_data_r0(struct arm_dpm *dpm,
        /* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15 */
        retval = cortex_a8_exec_opcode(
                        a8->armv7a_common.armv4_5_common.target,
-                       ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
+                       ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+                       &dscr);
 
        /* then the opcode, taking data from R0 */
        retval = cortex_a8_exec_opcode(
                        a8->armv7a_common.armv4_5_common.target,
-                       opcode);
+                       opcode,
+                       &dscr);
 
        return retval;
 }
@@ -457,9 +504,12 @@ static int cortex_a8_instr_write_data_r0(struct arm_dpm *dpm,
 static int cortex_a8_instr_cpsr_sync(struct arm_dpm *dpm)
 {
        struct target *target = dpm->arm->target;
+       uint32_t dscr = 1 << DSCR_INSTR_COMP;
 
        /* "Prefetch flush" after modifying execution status in CPSR */
-       return cortex_a8_exec_opcode(target, ARMV4_5_MCR(15, 0, 0, 7, 5, 4));
+       return cortex_a8_exec_opcode(target,
+                       ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
+                       &dscr);
 }
 
 static int cortex_a8_instr_read_data_dcc(struct arm_dpm *dpm,
@@ -467,13 +517,15 @@ static int cortex_a8_instr_read_data_dcc(struct arm_dpm *dpm,
 {
        struct cortex_a8_common *a8 = dpm_to_a8(dpm);
        int retval;
+       uint32_t dscr = 1 << DSCR_INSTR_COMP;
 
        /* the opcode, writing data to DCC */
        retval = cortex_a8_exec_opcode(
                        a8->armv7a_common.armv4_5_common.target,
-                       opcode);
+                       opcode,
+                       &dscr);
 
-       return cortex_a8_read_dcc(a8, data);
+       return cortex_a8_read_dcc(a8, data, &dscr);
 }
 
 
@@ -481,19 +533,22 @@ static int cortex_a8_instr_read_data_r0(struct arm_dpm *dpm,
                uint32_t opcode, uint32_t *data)
 {
        struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+       uint32_t dscr = 1 << DSCR_INSTR_COMP;
        int retval;
 
        /* the opcode, writing data to R0 */
        retval = cortex_a8_exec_opcode(
                        a8->armv7a_common.armv4_5_common.target,
-                       opcode);
+                       opcode,
+                       &dscr);
 
        /* write R0 to DCC */
        retval = cortex_a8_exec_opcode(
                        a8->armv7a_common.armv4_5_common.target,
-                       ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+                       ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
+                       &dscr);
 
-       return cortex_a8_read_dcc(a8, data);
+       return cortex_a8_read_dcc(a8, data, &dscr);
 }
 
 static int cortex_a8_dpm_setup(struct cortex_a8_common *a8, uint32_t didr)

Linking to existing account procedure

If you already have an account and want to add another login method you MUST first sign in with your existing account and then change URL to read https://review.openocd.org/login/?link to get to this page again but this time it'll work for linking. Thank you.

SSH host keys fingerprints

1024 SHA256:YKx8b7u5ZWdcbp7/4AeXNaqElP49m6QrwfXaqQGJAOk gerrit-code-review@openocd.zylin.com (DSA)
384 SHA256:jHIbSQa4REvwCFG4cq5LBlBLxmxSqelQPem/EXIrxjk gerrit-code-review@openocd.org (ECDSA)
521 SHA256:UAOPYkU9Fjtcao0Ul/Rrlnj/OsQvt+pgdYSZ4jOYdgs gerrit-code-review@openocd.org (ECDSA)
256 SHA256:A13M5QlnozFOvTllybRZH6vm7iSt0XLxbA48yfc2yfY gerrit-code-review@openocd.org (ECDSA)
256 SHA256:spYMBqEYoAOtK7yZBrcwE8ZpYt6b68Cfh9yEVetvbXg gerrit-code-review@openocd.org (ED25519)
+--[ED25519 256]--+
|=..              |
|+o..   .         |
|*.o   . .        |
|+B . . .         |
|Bo. = o S        |
|Oo.+ + =         |
|oB=.* = . o      |
| =+=.+   + E     |
|. .=o   . o      |
+----[SHA256]-----+
2048 SHA256:0Onrb7/PHjpo6iVZ7xQX2riKN83FJ3KGU0TvI0TaFG4 gerrit-code-review@openocd.zylin.com (RSA)