aarch64: add 'maskisr' command
[openocd.git] / src / target / aarch64.c
index 824a042de917416a3bbc2ad0006c31f9ca72b7cf..f1ce91459dd69665a78cc2d1f1c468a0d069c984 100644 (file)
 #include "armv8_cache.h"
 #include <helper/time_support.h>
 
+enum restart_mode {
+       RESTART_LAZY,
+       RESTART_SYNC,
+};
+
+enum halt_mode {
+       HALT_LAZY,
+       HALT_SYNC,
+};
+
 static int aarch64_poll(struct target *target);
 static int aarch64_debug_entry(struct target *target);
 static int aarch64_restore_context(struct target *target, bool bpwp);
@@ -44,14 +54,17 @@ static int aarch64_unset_breakpoint(struct target *target,
 static int aarch64_mmu(struct target *target, int *enabled);
 static int aarch64_virt2phys(struct target *target,
        target_addr_t virt, target_addr_t *phys);
-static int aarch64_read_apb_ap_memory(struct target *target,
+static int aarch64_read_cpu_memory(struct target *target,
        uint64_t address, uint32_t size, uint32_t count, uint8_t *buffer);
-static int aarch64_instr_write_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data);
+
+#define foreach_smp_target(pos, head) \
+       for (pos = head; (pos != NULL); pos = pos->next)
 
 static int aarch64_restore_system_control_reg(struct target *target)
 {
+       enum arm_mode target_mode = ARM_MODE_ANY;
        int retval = ERROR_OK;
+       uint32_t instr;
 
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = target_to_armv8(target);
@@ -61,51 +74,48 @@ static int aarch64_restore_system_control_reg(struct target *target)
                /* LOG_INFO("cp15_control_reg: %8.8" PRIx32, cortex_v8->cp15_control_reg); */
 
                switch (armv8->arm.core_mode) {
-                       case ARMV8_64_EL0T:
-                       case ARMV8_64_EL1T:
-                       case ARMV8_64_EL1H:
-                               retval = armv8->arm.msr(target, 3, /*op 0*/
-                                               0, 1,   /* op1, op2 */
-                                               0, 0,   /* CRn, CRm */
-                                               aarch64->system_control_reg);
-                               if (retval != ERROR_OK)
-                                       return retval;
+               case ARMV8_64_EL0T:
+                       target_mode = ARMV8_64_EL1H;
+                       /* fall through */
+               case ARMV8_64_EL1T:
+               case ARMV8_64_EL1H:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL1, 0);
                        break;
-                       case ARMV8_64_EL2T:
-                       case ARMV8_64_EL2H:
-                               retval = armv8->arm.msr(target, 3, /*op 0*/
-                                               4, 1,   /* op1, op2 */
-                                               0, 0,   /* CRn, CRm */
-                                               aarch64->system_control_reg);
-                               if (retval != ERROR_OK)
-                                       return retval;
+               case ARMV8_64_EL2T:
+               case ARMV8_64_EL2H:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL2, 0);
                        break;
-                       case ARMV8_64_EL3H:
-                       case ARMV8_64_EL3T:
-                               retval = armv8->arm.msr(target, 3, /*op 0*/
-                                               6, 1,   /* op1, op2 */
-                                               0, 0,   /* CRn, CRm */
-                                               aarch64->system_control_reg);
-                               if (retval != ERROR_OK)
-                                       return retval;
+               case ARMV8_64_EL3H:
+               case ARMV8_64_EL3T:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL3, 0);
                        break;
-                       default:
-                               retval = armv8->arm.mcr(target, 15, 0, 0, 1, 0, aarch64->system_control_reg);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               break;
-                       }
+
+               case ARM_MODE_SVC:
+               case ARM_MODE_ABT:
+               case ARM_MODE_FIQ:
+               case ARM_MODE_IRQ:
+                       instr = ARMV4_5_MCR(15, 0, 0, 1, 0, 0);
+                       break;
+
+               default:
+                       LOG_INFO("cannot read system control register in this mode");
+                       return ERROR_FAIL;
+               }
+
+               if (target_mode != ARM_MODE_ANY)
+                       armv8_dpm_modeswitch(&armv8->dpm, target_mode);
+
+               retval = armv8->dpm.instr_write_data_r0(&armv8->dpm, instr, aarch64->system_control_reg);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (target_mode != ARM_MODE_ANY)
+                       armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
        }
+
        return retval;
 }
 
-/*  check address before aarch64_apb read write access with mmu on
- *  remove apb predictible data abort */
-static int aarch64_check_address(struct target *target, uint32_t address)
-{
-       /* TODO */
-       return ERROR_OK;
-}
 /*  modify system_control_reg in order to enable or disable mmu for :
  *  - virt2phys address conversion
  *  - read or write memory in phys or virt address */
@@ -114,6 +124,7 @@ static int aarch64_mmu_modify(struct target *target, int enable)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        int retval = ERROR_OK;
+       uint32_t instr = 0;
 
        if (enable) {
                /*      if mmu enabled at target stop and mmu not enable */
@@ -121,86 +132,42 @@ static int aarch64_mmu_modify(struct target *target, int enable)
                        LOG_ERROR("trying to enable mmu on target stopped with mmu disable");
                        return ERROR_FAIL;
                }
-               if (!(aarch64->system_control_reg_curr & 0x1U)) {
+               if (!(aarch64->system_control_reg_curr & 0x1U))
                        aarch64->system_control_reg_curr |= 0x1U;
-                       switch (armv8->arm.core_mode) {
-                               case ARMV8_64_EL0T:
-                               case ARMV8_64_EL1T:
-                               case ARMV8_64_EL1H:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       0, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               break;
-                               case ARMV8_64_EL2T:
-                               case ARMV8_64_EL2H:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       4, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               break;
-                               case ARMV8_64_EL3H:
-                               case ARMV8_64_EL3T:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       6, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               break;
-                               default:
-                                       LOG_DEBUG("unknow cpu state 0x%x" PRIx32, armv8->arm.core_state);
-                       }
-               }
        } else {
                if (aarch64->system_control_reg_curr & 0x4U) {
                        /*  data cache is active */
                        aarch64->system_control_reg_curr &= ~0x4U;
-                       /* flush data cache armv7 function to be called */
+                       /* flush data cache armv8 function to be called */
                        if (armv8->armv8_mmu.armv8_cache.flush_all_data_cache)
                                armv8->armv8_mmu.armv8_cache.flush_all_data_cache(target);
                }
                if ((aarch64->system_control_reg_curr & 0x1U)) {
                        aarch64->system_control_reg_curr &= ~0x1U;
-                       switch (armv8->arm.core_mode) {
-                               case ARMV8_64_EL0T:
-                               case ARMV8_64_EL1T:
-                               case ARMV8_64_EL1H:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       0, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                                       break;
-                               case ARMV8_64_EL2T:
-                               case ARMV8_64_EL2H:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       4, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                                       break;
-                               case ARMV8_64_EL3H:
-                               case ARMV8_64_EL3T:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       6, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                                       break;
-                               default:
-                                       LOG_DEBUG("unknow cpu state 0x%x" PRIx32, armv8->arm.core_state);
-                                       break;
-                       }
                }
        }
+
+       switch (armv8->arm.core_mode) {
+       case ARMV8_64_EL0T:
+       case ARMV8_64_EL1T:
+       case ARMV8_64_EL1H:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL1, 0);
+               break;
+       case ARMV8_64_EL2T:
+       case ARMV8_64_EL2H:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL2, 0);
+               break;
+       case ARMV8_64_EL3H:
+       case ARMV8_64_EL3T:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL3, 0);
+               break;
+       default:
+               LOG_DEBUG("unknown cpu state 0x%x" PRIx32, armv8->arm.core_state);
+               break;
+       }
+
+       retval = armv8->dpm.instr_write_data_r0(&armv8->dpm, instr,
+                               aarch64->system_control_reg_curr);
        return retval;
 }
 
@@ -215,6 +182,13 @@ static int aarch64_init_debug_access(struct target *target)
 
        LOG_DEBUG(" ");
 
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_OSLAR, 0);
+       if (retval != ERROR_OK) {
+               LOG_DEBUG("Examine %s failed", "oslock");
+               return retval;
+       }
+
        /* Clear Sticky Power Down status Bit in PRSR to enable access to
           the registers in the Core Power Domain */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
@@ -230,82 +204,22 @@ static int aarch64_init_debug_access(struct target *target)
         */
 
        /* Enable CTI */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_CTR, 1);
-       /* By default, gate all channel triggers to and from the CTM */
+       retval = arm_cti_enable(armv8->cti, true);
+       /* By default, gate all channel events to and from the CTM */
        if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_GATE, 0);
-       /* output halt requests to PE on channel 0 trigger */
+               retval = arm_cti_write_reg(armv8->cti, CTI_GATE, 0);
+       /* output halt requests to PE on channel 0 event */
        if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_OUTEN0, CTI_CHNL(0));
-       /* output restart requests to PE on channel 1 trigger */
+               retval = arm_cti_write_reg(armv8->cti, CTI_OUTEN0, CTI_CHNL(0));
+       /* output restart requests to PE on channel 1 event */
        if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_OUTEN1, CTI_CHNL(1));
+               retval = arm_cti_write_reg(armv8->cti, CTI_OUTEN1, CTI_CHNL(1));
        if (retval != ERROR_OK)
                return retval;
 
        /* Resync breakpoint registers */
 
-       /* Since this is likely called from init or reset, update target state information*/
-       return aarch64_poll(target);
-}
-
-/* To reduce needless round-trips, pass in a pointer to the current
- * DSCR value.  Initialize it to zero if you just need to know the
- * value on return from this function; or DSCR_ITE if you
- * happen to know that no instruction is pending.
- */
-static int aarch64_exec_opcode(struct target *target,
-       uint32_t opcode, uint32_t *dscr_p)
-{
-       uint32_t dscr;
-       int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       dscr = dscr_p ? *dscr_p : 0;
-
-       LOG_DEBUG("exec opcode 0x%08" PRIx32, opcode);
-
-       /* Wait for InstrCompl bit to be set */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_ITE) == 0) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-               if (retval != ERROR_OK) {
-                       LOG_ERROR("Could not read DSCR register, opcode = 0x%08" PRIx32, opcode);
-                       return retval;
-               }
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for aarch64_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       }
-
-       retval = mem_ap_write_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_ITR, opcode);
-       if (retval != ERROR_OK)
-               return retval;
-
-       then = timeval_ms();
-       do {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-               if (retval != ERROR_OK) {
-                       LOG_ERROR("Could not read DSCR register");
-                       return retval;
-               }
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for aarch64_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       } while ((dscr & DSCR_ITE) == 0);       /* Wait for InstrCompl bit to be set */
-
-       if (dscr_p)
-               *dscr_p = dscr;
-
-       return retval;
+       return ERROR_OK;
 }
 
 /* Write to memory mapped registers directly with no cache or mmu handling */
@@ -321,574 +235,291 @@ static int aarch64_dap_write_memap_register_u32(struct target *target,
        return retval;
 }
 
-/*
- * AARCH64 implementation of Debug Programmer's Model
- *
- * NOTE the invariant:  these routines return with DSCR_ITE set,
- * so there's no need to poll for it before executing an instruction.
- *
- * NOTE that in several of these cases the "stall" mode might be useful.
- * It'd let us queue a few operations together... prepare/finish might
- * be the places to enable/disable that mode.
- */
-
-static inline struct aarch64_common *dpm_to_a8(struct arm_dpm *dpm)
-{
-       return container_of(dpm, struct aarch64_common, armv8_common.dpm);
-}
-
-static int aarch64_write_dcc(struct armv8_common *armv8, uint32_t data)
-{
-       LOG_DEBUG("write DCC 0x%08" PRIx32, data);
-       return mem_ap_write_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRRX, data);
-}
-
-static int aarch64_write_dcc_64(struct armv8_common *armv8, uint64_t data)
-{
-       int ret;
-       LOG_DEBUG("write DCC Low word0x%08" PRIx32, (unsigned)data);
-       LOG_DEBUG("write DCC High word 0x%08" PRIx32, (unsigned)(data >> 32));
-       ret = mem_ap_write_u32(armv8->debug_ap,
-                              armv8->debug_base + CPUV8_DBG_DTRRX, data);
-       ret += mem_ap_write_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRTX, data >> 32);
-       return ret;
-}
-
-static int aarch64_read_dcc(struct armv8_common *armv8, uint32_t *data,
-       uint32_t *dscr_p)
+static int aarch64_dpm_setup(struct aarch64_common *a8, uint64_t debug)
 {
-       uint32_t dscr = DSCR_ITE;
+       struct arm_dpm *dpm = &a8->armv8_common.dpm;
        int retval;
 
-       if (dscr_p)
-               dscr = *dscr_p;
-
-       /* Wait for DTRRXfull */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for read dcc");
-                       return ERROR_FAIL;
-               }
-       }
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                           armv8->debug_base + CPUV8_DBG_DTRTX,
-                                           data);
-       if (retval != ERROR_OK)
-               return retval;
-       LOG_DEBUG("read DCC 0x%08" PRIx32, *data);
+       dpm->arm = &a8->armv8_common.arm;
+       dpm->didr = debug;
 
-       if (dscr_p)
-               *dscr_p = dscr;
+       retval = armv8_dpm_setup(dpm);
+       if (retval == ERROR_OK)
+               retval = armv8_dpm_initialize(dpm);
 
        return retval;
 }
 
-static int aarch64_read_dcc_64(struct armv8_common *armv8, uint64_t *data,
-       uint32_t *dscr_p)
+static int aarch64_set_dscr_bits(struct target *target, unsigned long bit_mask, unsigned long value)
 {
-       uint32_t dscr = DSCR_ITE;
-       uint32_t higher;
-       int retval;
-
-       if (dscr_p)
-               dscr = *dscr_p;
-
-       /* Wait for DTRRXfull */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for read dcc");
-                       return ERROR_FAIL;
-               }
-       }
+       struct armv8_common *armv8 = target_to_armv8(target);
+       return armv8_set_dbgreg_bits(armv8, CPUV8_DBG_DSCR, bit_mask, value);
+}
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                           armv8->debug_base + CPUV8_DBG_DTRTX,
-                                           (uint32_t *)data);
-       if (retval != ERROR_OK)
-               return retval;
+static int aarch64_check_state_one(struct target *target,
+               uint32_t mask, uint32_t val, int *p_result, uint32_t *p_prsr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       uint32_t prsr;
+       int retval;
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                           armv8->debug_base + CPUV8_DBG_DTRRX,
-                                           &higher);
+                       armv8->debug_base + CPUV8_DBG_PRSR, &prsr);
        if (retval != ERROR_OK)
                return retval;
 
-       *data = *(uint32_t *)data | (uint64_t)higher << 32;
-       LOG_DEBUG("read DCC 0x%16.16" PRIx64, *data);
+       if (p_prsr)
+               *p_prsr = prsr;
 
-       if (dscr_p)
-               *dscr_p = dscr;
+       if (p_result)
+               *p_result = (prsr & mask) == (val & mask);
 
-       return retval;
+       return ERROR_OK;
 }
 
-static int aarch64_dpm_prepare(struct arm_dpm *dpm)
+static int aarch64_wait_halt_one(struct target *target)
 {
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr;
-       int retval;
+       int retval = ERROR_OK;
+       uint32_t prsr;
 
-       /* set up invariant:  INSTR_COMP is set after ever DPM operation */
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUV8_DBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if ((dscr & DSCR_ITE) != 0)
+       int64_t then = timeval_ms();
+       for (;;) {
+               int halted;
+
+               retval = aarch64_check_state_one(target, PRSR_HALT, PRSR_HALT, &halted, &prsr);
+               if (retval != ERROR_OK || halted)
                        break;
+
                if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for dpm prepare");
-                       return ERROR_FAIL;
+                       retval = ERROR_TARGET_TIMEOUT;
+                       LOG_DEBUG("target %s timeout, prsr=0x%08"PRIx32, target_name(target), prsr);
+                       break;
                }
        }
-
-       /* this "should never happen" ... */
-       if (dscr & DSCR_DTR_RX_FULL) {
-               LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
-               /* Clear DCCRX */
-               retval = mem_ap_read_u32(a8->armv8_common.debug_ap,
-                       a8->armv8_common.debug_base + CPUV8_DBG_DTRRX, &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-
-               /* Clear sticky error */
-               retval = mem_ap_write_u32(a8->armv8_common.debug_ap,
-                       a8->armv8_common.debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-               if (retval != ERROR_OK)
-                       return retval;
-       }
-
        return retval;
 }
 
-static int aarch64_dpm_finish(struct arm_dpm *dpm)
-{
-       /* REVISIT what could be done here? */
-       return ERROR_OK;
-}
-
-static int aarch64_instr_execute(struct arm_dpm *dpm,
-       uint32_t opcode)
+static int aarch64_prepare_halt_smp(struct target *target, bool exc_target, struct target **p_first)
 {
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
-
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-}
-
-static int aarch64_instr_write_data_dcc(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_ITE;
-
-       retval = aarch64_write_dcc(&a8->armv8_common, data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-}
-
-static int aarch64_instr_write_data_dcc_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_ITE;
-
-       retval = aarch64_write_dcc_64(&a8->armv8_common, data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-}
-
-static int aarch64_instr_write_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-
-       uint32_t dscr = DSCR_ITE;
-       int retval;
+       int retval = ERROR_OK;
+       struct target_list *head = target->head;
+       struct target *first = NULL;
 
-       retval = aarch64_write_dcc(&a8->armv8_common, data);
-       if (retval != ERROR_OK)
-               return retval;
+       LOG_DEBUG("target %s exc %i", target_name(target), exc_target);
 
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target, armv8_opcode(&a8->armv8_common, READ_REG_DTRRX), &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+       while (head != NULL) {
+               struct target *curr = head->target;
+               struct armv8_common *armv8 = target_to_armv8(curr);
+               head = head->next;
 
-       /* then the opcode, taking data from R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
+               if (exc_target && curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               if (curr->state != TARGET_RUNNING)
+                       continue;
 
-       return retval;
-}
+               /* HACK: mark this target as prepared for halting */
+               curr->debug_reason = DBG_REASON_DBGRQ;
 
-static int aarch64_instr_write_data_r0_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
-       int retval;
+               /* open the gate for channel 0 to let HALT requests pass to the CTM */
+               retval = arm_cti_ungate_channel(armv8->cti, 0);
+               if (retval == ERROR_OK)
+                       retval = aarch64_set_dscr_bits(curr, DSCR_HDE, DSCR_HDE);
+               if (retval != ERROR_OK)
+                       break;
 
-       retval = aarch64_write_dcc_64(&a8->armv8_common, data);
-       if (retval != ERROR_OK)
-               return retval;
+               LOG_DEBUG("target %s prepared", target_name(curr));
 
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0),
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+               if (first == NULL)
+                       first = curr;
+       }
 
-       /* then the opcode, taking data from R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
+       if (p_first) {
+               if (exc_target && first)
+                       *p_first = first;
+               else
+                       *p_first = target;
+       }
 
        return retval;
 }
 
-static int aarch64_instr_cpsr_sync(struct arm_dpm *dpm)
+static int aarch64_halt_one(struct target *target, enum halt_mode mode)
 {
-       struct target *target = dpm->arm->target;
+       int retval = ERROR_OK;
        struct armv8_common *armv8 = target_to_armv8(target);
-       uint32_t dscr = DSCR_ITE;
 
-       /* "Prefetch flush" after modifying execution status in CPSR */
-       return aarch64_exec_opcode(target, armv8_opcode(armv8, ARMV8_OPC_DSB_SY), &dscr);
-}
-
-static int aarch64_instr_read_data_dcc(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_ITE;
+       LOG_DEBUG("%s", target_name(target));
 
-       /* the opcode, writing data to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
+       /* allow Halting Debug Mode */
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
        if (retval != ERROR_OK)
                return retval;
 
-       return aarch64_read_dcc(&a8->armv8_common, data, &dscr);
-}
-
-static int aarch64_instr_read_data_dcc_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_ITE;
-
-       /* the opcode, writing data to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
+       /* trigger an event on channel 0, this outputs a halt request to the PE */
+       retval = arm_cti_pulse_channel(armv8->cti, 0);
        if (retval != ERROR_OK)
                return retval;
 
-       return aarch64_read_dcc_64(&a8->armv8_common, data, &dscr);
+       if (mode == HALT_SYNC) {
+               retval = aarch64_wait_halt_one(target);
+               if (retval != ERROR_OK) {
+                       if (retval == ERROR_TARGET_TIMEOUT)
+                               LOG_ERROR("Timeout waiting for target %s halt", target_name(target));
+                       return retval;
+               }
+       }
+
+       return ERROR_OK;
 }
 
-static int aarch64_instr_read_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t *data)
+static int aarch64_halt_smp(struct target *target, bool exc_target)
 {
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
+       struct target *next = target;
        int retval;
 
-       /* the opcode, writing data to R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+       /* prepare halt on all PEs of the group */
+       retval = aarch64_prepare_halt_smp(target, exc_target, &next);
 
-       /* write R0 to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target, armv8_opcode(&a8->armv8_common, WRITE_REG_DTRTX), &dscr);
-       if (retval != ERROR_OK)
+       if (exc_target && next == target)
                return retval;
 
-       return aarch64_read_dcc(&a8->armv8_common, data, &dscr);
-}
-
-static int aarch64_instr_read_data_r0_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
-       int retval;
-
-       /* the opcode, writing data to R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+       /* halt the target PE */
+       if (retval == ERROR_OK)
+               retval = aarch64_halt_one(next, HALT_LAZY);
 
-       /* write R0 to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0),  /* msr dbgdtr_el0, x0 */
-                       &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       return aarch64_read_dcc_64(&a8->armv8_common, data, &dscr);
-}
+       /* wait for all PEs to halt */
+       int64_t then = timeval_ms();
+       for (;;) {
+               bool all_halted = true;
+               struct target_list *head;
+               struct target *curr;
 
-static int aarch64_bpwp_enable(struct arm_dpm *dpm, unsigned index_t,
-       uint32_t addr, uint32_t control)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t vr = a8->armv8_common.debug_base;
-       uint32_t cr = a8->armv8_common.debug_base;
-       int retval;
+               foreach_smp_target(head, target->head) {
+                       int halted;
 
-       switch (index_t) {
-               case 0 ... 15:  /* breakpoints */
-                       vr += CPUV8_DBG_BVR_BASE;
-                       cr += CPUV8_DBG_BCR_BASE;
-                       break;
-               case 16 ... 31: /* watchpoints */
-                       vr += CPUV8_DBG_WVR_BASE;
-                       cr += CPUV8_DBG_WCR_BASE;
-                       index_t -= 16;
-                       break;
-               default:
-                       return ERROR_FAIL;
-       }
-       vr += 16 * index_t;
-       cr += 16 * index_t;
+                       curr = head->target;
 
-       LOG_DEBUG("A8: bpwp enable, vr %08x cr %08x",
-               (unsigned) vr, (unsigned) cr);
+                       if (!target_was_examined(curr))
+                               continue;
 
-       retval = aarch64_dap_write_memap_register_u32(dpm->arm->target,
-                       vr, addr);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = aarch64_dap_write_memap_register_u32(dpm->arm->target,
-                       cr, control);
-       return retval;
-}
+                       retval = aarch64_check_state_one(curr, PRSR_HALT, PRSR_HALT, &halted, NULL);
+                       if (retval != ERROR_OK || !halted) {
+                               all_halted = false;
+                               break;
+                       }
+               }
 
-static int aarch64_bpwp_disable(struct arm_dpm *dpm, unsigned index_t)
-{
-       struct aarch64_common *a = dpm_to_a8(dpm);
-       uint32_t cr;
+               if (all_halted)
+                       break;
 
-       switch (index_t) {
-               case 0 ... 15:
-                       cr = a->armv8_common.debug_base + CPUV8_DBG_BCR_BASE;
+               if (timeval_ms() > then + 1000) {
+                       retval = ERROR_TARGET_TIMEOUT;
                        break;
-               case 16 ... 31:
-                       cr = a->armv8_common.debug_base + CPUV8_DBG_WCR_BASE;
-                       index_t -= 16;
+               }
+
+               /*
+                * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                * and it looks like the CTI's are not connected by a common
+                * trigger matrix. It seems that we need to halt one core in each
+                * cluster explicitly. So if we find that a core has not halted
+                * yet, we trigger an explicit halt for the second cluster.
+                */
+               retval = aarch64_halt_one(curr, HALT_LAZY);
+               if (retval != ERROR_OK)
                        break;
-               default:
-                       return ERROR_FAIL;
        }
-       cr += 16 * index_t;
-
-       LOG_DEBUG("A: bpwp disable, cr %08x", (unsigned) cr);
-
-       /* clear control register */
-       return aarch64_dap_write_memap_register_u32(dpm->arm->target, cr, 0);
-
-}
-
-static int aarch64_dpm_setup(struct aarch64_common *a8, uint64_t debug)
-{
-       struct arm_dpm *dpm = &a8->armv8_common.dpm;
-       int retval;
-
-       dpm->arm = &a8->armv8_common.arm;
-       dpm->didr = debug;
-
-       dpm->prepare = aarch64_dpm_prepare;
-       dpm->finish = aarch64_dpm_finish;
-
-       dpm->instr_execute = aarch64_instr_execute;
-       dpm->instr_write_data_dcc = aarch64_instr_write_data_dcc;
-       dpm->instr_write_data_dcc_64 = aarch64_instr_write_data_dcc_64;
-       dpm->instr_write_data_r0 = aarch64_instr_write_data_r0;
-       dpm->instr_write_data_r0_64 = aarch64_instr_write_data_r0_64;
-       dpm->instr_cpsr_sync = aarch64_instr_cpsr_sync;
-
-       dpm->instr_read_data_dcc = aarch64_instr_read_data_dcc;
-       dpm->instr_read_data_dcc_64 = aarch64_instr_read_data_dcc_64;
-       dpm->instr_read_data_r0 = aarch64_instr_read_data_r0;
-       dpm->instr_read_data_r0_64 = aarch64_instr_read_data_r0_64;
-
-       dpm->arm_reg_current = armv8_reg_current;
-
-       dpm->bpwp_enable = aarch64_bpwp_enable;
-       dpm->bpwp_disable = aarch64_bpwp_disable;
-
-       retval = armv8_dpm_setup(dpm);
-       if (retval == ERROR_OK)
-               retval = armv8_dpm_initialize(dpm);
 
        return retval;
 }
-static struct target *get_aarch64(struct target *target, int32_t coreid)
+
+static int update_halt_gdb(struct target *target, enum target_debug_reason debug_reason)
 {
+       struct target *gdb_target = NULL;
        struct target_list *head;
        struct target *curr;
 
-       head = target->head;
-       while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr->coreid == coreid) && (curr->state == TARGET_HALTED))
-                       return curr;
-               head = head->next;
+       if (debug_reason == DBG_REASON_NOTHALTED) {
+               LOG_INFO("Halting remaining targets in SMP group");
+               aarch64_halt_smp(target, true);
        }
-       return target;
-}
-static int aarch64_halt(struct target *target);
-
-static int aarch64_halt_smp(struct target *target)
-{
-       int retval = ERROR_OK;
-       struct target_list *head = target->head;
-
-       while (head != (struct target_list *)NULL) {
-               struct target *curr = head->target;
-               struct armv8_common *armv8 = target_to_armv8(curr);
-
-               /* open the gate for channel 0 to let HALT requests pass to the CTM */
-               if (curr->smp)
-                       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->cti_base + CTI_GATE, CTI_CHNL(0));
-               if (retval != ERROR_OK)
-                       break;
-
-               head = head->next;
-       }
-
-       /* halt the target PE */
-       if (retval == ERROR_OK)
-               retval = aarch64_halt(target);
 
-       return retval;
-}
+       /* poll all targets in the group, but skip the target that serves GDB */
+       foreach_smp_target(head, target->head) {
+               curr = head->target;
+               /* skip calling context */
+               if (curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               /* skip targets that were already halted */
+               if (curr->state == TARGET_HALTED)
+                       continue;
+               /* remember the gdb_service->target */
+               if (curr->gdb_service != NULL)
+                       gdb_target = curr->gdb_service->target;
+               /* skip it */
+               if (curr == gdb_target)
+                       continue;
+
+               /* avoid recursion in aarch64_poll() */
+               curr->smp = 0;
+               aarch64_poll(curr);
+               curr->smp = 1;
+       }
+
+       /* after all targets were updated, poll the gdb serving target */
+       if (gdb_target != NULL && gdb_target != target)
+               aarch64_poll(gdb_target);
 
-static int update_halt_gdb(struct target *target)
-{
-       int retval = 0;
-       if (target->gdb_service && target->gdb_service->core[0] == -1) {
-               target->gdb_service->target = target;
-               target->gdb_service->core[0] = target->coreid;
-               retval += aarch64_halt_smp(target);
-       }
-       return retval;
+       return ERROR_OK;
 }
 
 /*
- * Cortex-A8 Run control
+ * Aarch64 Run control
  */
 
 static int aarch64_poll(struct target *target)
 {
+       enum target_state prev_target_state;
        int retval = ERROR_OK;
-       uint32_t dscr;
-       struct aarch64_common *aarch64 = target_to_aarch64(target);
-       struct armv8_common *armv8 = &aarch64->armv8_common;
-       enum target_state prev_target_state = target->state;
-       /*  toggle to another core is done by gdb as follow */
-       /*  maint packet J core_id */
-       /*  continue */
-       /*  the next polling trigger an halt event sent to gdb */
-       if ((target->state == TARGET_HALTED) && (target->smp) &&
-               (target->gdb_service) &&
-               (target->gdb_service->target == NULL)) {
-               target->gdb_service->target =
-                       get_aarch64(target, target->gdb_service->core[1]);
-               target_call_event_callbacks(target, TARGET_EVENT_HALTED);
-               return retval;
-       }
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       int halted;
+
+       retval = aarch64_check_state_one(target,
+                               PRSR_HALT, PRSR_HALT, &halted, NULL);
        if (retval != ERROR_OK)
                return retval;
-       aarch64->cpudbg_dscr = dscr;
 
-       if (DSCR_RUN_MODE(dscr) == 0x3) {
+       if (halted) {
+               prev_target_state = target->state;
                if (prev_target_state != TARGET_HALTED) {
+                       enum target_debug_reason debug_reason = target->debug_reason;
+
                        /* We have a halting debug event */
-                       LOG_DEBUG("Target halted");
                        target->state = TARGET_HALTED;
-                       if ((prev_target_state == TARGET_RUNNING)
-                               || (prev_target_state == TARGET_UNKNOWN)
-                               || (prev_target_state == TARGET_RESET)) {
-                               retval = aarch64_debug_entry(target);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               if (target->smp) {
-                                       retval = update_halt_gdb(target);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               }
-                               target_call_event_callbacks(target,
-                                       TARGET_EVENT_HALTED);
-                       }
-                       if (prev_target_state == TARGET_DEBUG_RUNNING) {
-                               LOG_DEBUG(" ");
-
-                               retval = aarch64_debug_entry(target);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               if (target->smp) {
-                                       retval = update_halt_gdb(target);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               }
+                       LOG_DEBUG("Target %s halted", target_name(target));
+                       retval = aarch64_debug_entry(target);
+                       if (retval != ERROR_OK)
+                               return retval;
 
-                               target_call_event_callbacks(target,
-                                       TARGET_EVENT_DEBUG_HALTED);
+                       if (target->smp)
+                               update_halt_gdb(target, debug_reason);
+
+                       switch (prev_target_state) {
+                       case TARGET_RUNNING:
+                       case TARGET_UNKNOWN:
+                       case TARGET_RESET:
+                               target_call_event_callbacks(target, TARGET_EVENT_HALTED);
+                               break;
+                       case TARGET_DEBUG_RUNNING:
+                               target_call_event_callbacks(target, TARGET_EVENT_DEBUG_HALTED);
+                               break;
+                       default:
+                               break;
                        }
                }
        } else
@@ -899,47 +530,13 @@ static int aarch64_poll(struct target *target)
 
 static int aarch64_halt(struct target *target)
 {
-       int retval = ERROR_OK;
-       uint32_t dscr;
-       struct armv8_common *armv8 = target_to_armv8(target);
+       if (target->smp)
+               return aarch64_halt_smp(target, false);
 
-       /*
-        * add HDE in halting debug mode
-        */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr | DSCR_HDE);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* trigger an event on channel 0, this outputs a halt request to the PE */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_APPPULSE, CTI_CHNL(0));
-       if (retval != ERROR_OK)
-               return retval;
-
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if ((dscr & DSCRV8_HALT_MASK) != 0)
-                       break;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for halt");
-                       return ERROR_FAIL;
-               }
-       }
-
-       target->debug_reason = DBG_REASON_DBGRQ;
-
-       return ERROR_OK;
+       return aarch64_halt_one(target, HALT_SYNC);
 }
 
-static int aarch64_internal_restore(struct target *target, int current,
+static int aarch64_restore_one(struct target *target, int current,
        uint64_t *address, int handle_breakpoints, int debug_execution)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
@@ -947,6 +544,8 @@ static int aarch64_internal_restore(struct target *target, int current,
        int retval;
        uint64_t resume_pc;
 
+       LOG_DEBUG("%s", target_name(target));
+
        if (!debug_execution)
                target_free_all_working_areas(target);
 
@@ -978,114 +577,236 @@ static int aarch64_internal_restore(struct target *target, int current,
                        LOG_ERROR("How do I resume into Jazelle state??");
                        return ERROR_FAIL;
        }
-       LOG_DEBUG("resume pc = 0x%16" PRIx64, resume_pc);
+       LOG_DEBUG("resume pc = 0x%016" PRIx64, resume_pc);
        buf_set_u64(arm->pc->value, 0, 64, resume_pc);
        arm->pc->dirty = 1;
        arm->pc->valid = 1;
-       dpmv8_modeswitch(&armv8->dpm, ARM_MODE_ANY);
 
        /* called it now before restoring context because it uses cpu
         * register r0 for restoring system control register */
        retval = aarch64_restore_system_control_reg(target);
+       if (retval == ERROR_OK)
+               retval = aarch64_restore_context(target, handle_breakpoints);
+
+       return retval;
+}
+
+/**
+ * prepare single target for restart
+ *
+ *
+ */
+static int aarch64_prepare_restart_one(struct target *target)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       int retval;
+       uint32_t dscr;
+       uint32_t tmp;
+
+       LOG_DEBUG("%s", target_name(target));
+
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if ((dscr & DSCR_ITE) == 0)
+               LOG_ERROR("DSCR.ITE must be set before leaving debug!");
+       if ((dscr & DSCR_ERR) != 0)
+               LOG_ERROR("DSCR.ERR must be cleared before leaving debug!");
+
+       /* acknowledge a pending CTI halt event */
+       retval = arm_cti_ack_events(armv8->cti, CTI_TRIG(HALT));
+       /*
+        * open the CTI gate for channel 1 so that the restart events
+        * get passed along to all PEs. Also close gate for channel 0
+        * to isolate the PE from halt events.
+        */
+       if (retval == ERROR_OK)
+               retval = arm_cti_ungate_channel(armv8->cti, 1);
+       if (retval == ERROR_OK)
+               retval = arm_cti_gate_channel(armv8->cti, 0);
+
+       /* make sure that DSCR.HDE is set */
+       if (retval == ERROR_OK) {
+               dscr |= DSCR_HDE;
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       }
+
+       /* clear sticky bits in PRSR, SDR is now 0 */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_PRSR, &tmp);
+
+       return retval;
+}
+
+static int aarch64_do_restart_one(struct target *target, enum restart_mode mode)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
+
+       /* trigger an event on channel 1, generates a restart request to the PE */
+       retval = arm_cti_pulse_channel(armv8->cti, 1);
        if (retval != ERROR_OK)
                return retval;
-       retval = aarch64_restore_context(target, handle_breakpoints);
+
+       if (mode == RESTART_SYNC) {
+               int64_t then = timeval_ms();
+               for (;;) {
+                       int resumed;
+                       /*
+                        * if PRSR.SDR is set now, the target did restart, even
+                        * if it's now already halted again (e.g. due to breakpoint)
+                        */
+                       retval = aarch64_check_state_one(target,
+                                               PRSR_SDR, PRSR_SDR, &resumed, NULL);
+                       if (retval != ERROR_OK || resumed)
+                               break;
+
+                       if (timeval_ms() > then + 1000) {
+                               LOG_ERROR("%s: Timeout waiting for resume"PRIx32, target_name(target));
+                               retval = ERROR_TARGET_TIMEOUT;
+                               break;
+                       }
+               }
+       }
+
        if (retval != ERROR_OK)
                return retval;
+
        target->debug_reason = DBG_REASON_NOTHALTED;
        target->state = TARGET_RUNNING;
 
-       /* registers are now invalid */
-       register_cache_invalidate(arm->core_cache);
+       return ERROR_OK;
+}
+
+static int aarch64_restart_one(struct target *target, enum restart_mode mode)
+{
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
+
+       retval = aarch64_prepare_restart_one(target);
+       if (retval == ERROR_OK)
+               retval = aarch64_do_restart_one(target, mode);
 
        return retval;
 }
 
-static int aarch64_internal_restart(struct target *target, bool slave_pe)
+/*
+ * prepare all but the current target for restart
+ */
+static int aarch64_prep_restart_smp(struct target *target, int handle_breakpoints, struct target **p_first)
 {
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct arm *arm = &armv8->arm;
-       int retval;
-       uint32_t dscr;
-       /*
-        * * Restart core and wait for it to be started.  Clear ITRen and sticky
-        * * exception flags: see ARMv7 ARM, C5.9.
-        *
-        * REVISIT: for single stepping, we probably want to
-        * disable IRQs by default, with optional override...
-        */
+       int retval = ERROR_OK;
+       struct target_list *head;
+       struct target *first = NULL;
+       uint64_t address;
+
+       foreach_smp_target(head, target->head) {
+               struct target *curr = head->target;
+
+               /* skip calling target */
+               if (curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               if (curr->state != TARGET_HALTED)
+                       continue;
+
+               /*  resume at current address, not in step mode */
+               retval = aarch64_restore_one(curr, 1, &address, handle_breakpoints, 0);
+               if (retval == ERROR_OK)
+                       retval = aarch64_prepare_restart_one(curr);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("failed to restore target %s", target_name(curr));
+                       break;
+               }
+               /* remember the first valid target in the group */
+               if (first == NULL)
+                       first = curr;
+       }
+
+       if (p_first)
+               *p_first = first;
+
+       return retval;
+}
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       if (retval != ERROR_OK)
-               return retval;
 
-       if ((dscr & DSCR_ITE) == 0)
-               LOG_ERROR("DSCR InstrCompl must be set before leaving debug!");
+static int aarch64_step_restart_smp(struct target *target)
+{
+       int retval = ERROR_OK;
+       struct target_list *head;
+       struct target *first = NULL;
 
-       /* make sure to acknowledge the halt event before resuming */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_INACK, CTI_TRIG(HALT));
+       LOG_DEBUG("%s", target_name(target));
 
-       /*
-        * open the CTI gate for channel 1 so that the restart events
-        * get passed along to all PEs
-        */
-       if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_GATE, CTI_CHNL(1));
+       retval = aarch64_prep_restart_smp(target, 0, &first);
        if (retval != ERROR_OK)
                return retval;
 
-       if (!slave_pe) {
-               /* trigger an event on channel 1, generates a restart request to the PE */
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_APPPULSE, CTI_CHNL(1));
-               if (retval != ERROR_OK)
-                       return retval;
+       if (first != NULL)
+               retval = aarch64_do_restart_one(first, RESTART_LAZY);
+       if (retval != ERROR_OK) {
+               LOG_DEBUG("error restarting target %s", target_name(first));
+               return retval;
+       }
 
-               long long then = timeval_ms();
-               for (;; ) {
-                       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       if ((dscr & DSCR_HDE) != 0)
+       int64_t then = timeval_ms();
+       for (;;) {
+               struct target *curr = target;
+               bool all_resumed = true;
+
+               foreach_smp_target(head, target->head) {
+                       uint32_t prsr;
+                       int resumed;
+
+                       curr = head->target;
+
+                       if (curr == target)
+                               continue;
+
+                       if (!target_was_examined(curr))
+                               continue;
+
+                       retval = aarch64_check_state_one(curr,
+                                       PRSR_SDR, PRSR_SDR, &resumed, &prsr);
+                       if (retval != ERROR_OK || (!resumed && (prsr & PRSR_HALT))) {
+                               all_resumed = false;
                                break;
-                       if (timeval_ms() > then + 1000) {
-                               LOG_ERROR("Timeout waiting for resume");
-                               return ERROR_FAIL;
                        }
-               }
-       }
-
-       target->debug_reason = DBG_REASON_NOTHALTED;
-       target->state = TARGET_RUNNING;
 
-       /* registers are now invalid */
-       register_cache_invalidate(arm->core_cache);
+                       if (curr->state != TARGET_RUNNING) {
+                               curr->state = TARGET_RUNNING;
+                               curr->debug_reason = DBG_REASON_NOTHALTED;
+                               target_call_event_callbacks(curr, TARGET_EVENT_RESUMED);
+                       }
+               }
 
-       return ERROR_OK;
-}
+               if (all_resumed)
+                       break;
 
-static int aarch64_restore_smp(struct target *target, int handle_breakpoints)
-{
-       int retval = 0;
-       struct target_list *head;
-       struct target *curr;
-       uint64_t address;
-       head = target->head;
-       while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr != target) && (curr->state != TARGET_RUNNING)) {
-                       /*  resume current address , not in step mode */
-                       retval += aarch64_internal_restore(curr, 1, &address,
-                                       handle_breakpoints, 0);
-                       retval += aarch64_internal_restart(curr, true);
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("%s: timeout waiting for target resume", __func__);
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
                }
-               head = head->next;
+               /*
+                * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                * and it looks like the CTI's are not connected by a common
+                * trigger matrix. It seems that we need to halt one core in each
+                * cluster explicitly. So if we find that a core has not halted
+                * yet, we trigger an explicit resume for the second cluster.
+                */
+               retval = aarch64_do_restart_one(curr, RESTART_LAZY);
+               if (retval != ERROR_OK)
+                       break;
+}
 
-       }
        return retval;
 }
 
@@ -1095,24 +816,86 @@ static int aarch64_resume(struct target *target, int current,
        int retval = 0;
        uint64_t addr = address;
 
-       /* dummy resume for smp toggle in order to reduce gdb impact  */
-       if ((target->smp) && (target->gdb_service->core[1] != -1)) {
-               /*   simulate a start and halt of target */
-               target->gdb_service->target = NULL;
-               target->gdb_service->core[0] = target->gdb_service->core[1];
-               /*  fake resume at next poll we play the  target core[1], see poll*/
-               target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
-               return 0;
-       }
-       aarch64_internal_restore(target, current, &addr, handle_breakpoints,
-                                debug_execution);
+       if (target->state != TARGET_HALTED)
+               return ERROR_TARGET_NOT_HALTED;
+
+       /*
+        * If this target is part of a SMP group, prepare the others
+        * targets for resuming. This involves restoring the complete
+        * target register context and setting up CTI gates to accept
+        * resume events from the trigger matrix.
+        */
        if (target->smp) {
-               target->gdb_service->core[0] = -1;
-               retval = aarch64_restore_smp(target, handle_breakpoints);
+               retval = aarch64_prep_restart_smp(target, handle_breakpoints, NULL);
                if (retval != ERROR_OK)
                        return retval;
        }
-       aarch64_internal_restart(target, false);
+
+       /* all targets prepared, restore and restart the current target */
+       retval = aarch64_restore_one(target, current, &addr, handle_breakpoints,
+                                debug_execution);
+       if (retval == ERROR_OK)
+               retval = aarch64_restart_one(target, RESTART_SYNC);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (target->smp) {
+               int64_t then = timeval_ms();
+               for (;;) {
+                       struct target *curr = target;
+                       struct target_list *head;
+                       bool all_resumed = true;
+
+                       foreach_smp_target(head, target->head) {
+                               uint32_t prsr;
+                               int resumed;
+
+                               curr = head->target;
+                               if (curr == target)
+                                       continue;
+                               if (!target_was_examined(curr))
+                                       continue;
+
+                               retval = aarch64_check_state_one(curr,
+                                               PRSR_SDR, PRSR_SDR, &resumed, &prsr);
+                               if (retval != ERROR_OK || (!resumed && (prsr & PRSR_HALT))) {
+                                       all_resumed = false;
+                                       break;
+                               }
+
+                               if (curr->state != TARGET_RUNNING) {
+                                       curr->state = TARGET_RUNNING;
+                                       curr->debug_reason = DBG_REASON_NOTHALTED;
+                                       target_call_event_callbacks(curr, TARGET_EVENT_RESUMED);
+                               }
+                       }
+
+                       if (all_resumed)
+                               break;
+
+                       if (timeval_ms() > then + 1000) {
+                               LOG_ERROR("%s: timeout waiting for target %s to resume", __func__, target_name(curr));
+                               retval = ERROR_TARGET_TIMEOUT;
+                               break;
+                       }
+
+                       /*
+                        * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                        * and it looks like the CTI's are not connected by a common
+                        * trigger matrix. It seems that we need to halt one core in each
+                        * cluster explicitly. So if we find that a core has not halted
+                        * yet, we trigger an explicit resume for the second cluster.
+                        */
+                       retval = aarch64_do_restart_one(curr, RESTART_LAZY);
+                       if (retval != ERROR_OK)
+                               break;
+               }
+       }
+
+       if (retval != ERROR_OK)
+               return retval;
+
+       target->debug_reason = DBG_REASON_NOTHALTED;
 
        if (!debug_execution) {
                target->state = TARGET_RUNNING;
@@ -1130,24 +913,41 @@ static int aarch64_resume(struct target *target, int current,
 static int aarch64_debug_entry(struct target *target)
 {
        int retval = ERROR_OK;
-       struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = target_to_armv8(target);
-
-       LOG_DEBUG("dscr = 0x%08" PRIx32, aarch64->cpudbg_dscr);
-
-       /* REVISIT see A8 TRM 12.11.4 steps 2..3 -- make sure that any
-        * imprecise data aborts get discarded by issuing a Data
-        * Synchronization Barrier:  ARMV4_5_MCR(15, 0, 0, 7, 10, 4).
-        */
+       struct arm_dpm *dpm = &armv8->dpm;
+       enum arm_state core_state;
+       uint32_t dscr;
 
        /* make sure to clear all sticky errors */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+       if (retval == ERROR_OK)
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (retval == ERROR_OK)
+               retval = arm_cti_ack_events(armv8->cti, CTI_TRIG(HALT));
+
+       if (retval != ERROR_OK)
+               return retval;
+
+       LOG_DEBUG("%s dscr = 0x%08" PRIx32, target_name(target), dscr);
+
+       dpm->dscr = dscr;
+       core_state = armv8_dpm_get_core_state(dpm);
+       armv8_select_opcodes(armv8, core_state == ARM_STATE_AARCH64);
+       armv8_select_reg_access(armv8, core_state == ARM_STATE_AARCH64);
+
+       /* close the CTI gate for all events */
+       if (retval == ERROR_OK)
+               retval = arm_cti_write_reg(armv8->cti, CTI_GATE, 0);
+       /* discard async exceptions */
+       if (retval == ERROR_OK)
+               retval = dpm->instr_cpsr_sync(dpm);
        if (retval != ERROR_OK)
                return retval;
 
        /* Examine debug reason */
-       armv8_dpm_report_dscr(&armv8->dpm, aarch64->cpudbg_dscr);
+       armv8_dpm_report_dscr(dpm, dscr);
 
        /* save address of instruction that triggered the watchpoint? */
        if (target->debug_reason == DBG_REASON_WATCHPOINT) {
@@ -1172,11 +972,8 @@ static int aarch64_debug_entry(struct target *target)
 
        retval = armv8_dpm_read_current_registers(&armv8->dpm);
 
-       if (armv8->post_debug_entry) {
+       if (retval == ERROR_OK && armv8->post_debug_entry)
                retval = armv8->post_debug_entry(target);
-               if (retval != ERROR_OK)
-                       return retval;
-       }
 
        return retval;
 }
@@ -1186,52 +983,55 @@ static int aarch64_post_debug_entry(struct target *target)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        int retval;
-
-       /* clear sticky errors */
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                                   armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+       enum arm_mode target_mode = ARM_MODE_ANY;
+       uint32_t instr;
 
        switch (armv8->arm.core_mode) {
-               case ARMV8_64_EL0T:
-               case ARMV8_64_EL1T:
-               case ARMV8_64_EL1H:
-                       retval = armv8->arm.mrs(target, 3, /*op 0*/
-                                       0, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       &aarch64->system_control_reg);
-                       if (retval != ERROR_OK)
-                               return retval;
+       case ARMV8_64_EL0T:
+               target_mode = ARMV8_64_EL1H;
+               /* fall through */
+       case ARMV8_64_EL1T:
+       case ARMV8_64_EL1H:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL1, 0);
                break;
-               case ARMV8_64_EL2T:
-               case ARMV8_64_EL2H:
-                       retval = armv8->arm.mrs(target, 3, /*op 0*/
-                                       4, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       &aarch64->system_control_reg);
-                       if (retval != ERROR_OK)
-                               return retval;
+       case ARMV8_64_EL2T:
+       case ARMV8_64_EL2H:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL2, 0);
                break;
-               case ARMV8_64_EL3H:
-               case ARMV8_64_EL3T:
-                       retval = armv8->arm.mrs(target, 3, /*op 0*/
-                                       6, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       &aarch64->system_control_reg);
-                       if (retval != ERROR_OK)
-                               return retval;
+       case ARMV8_64_EL3H:
+       case ARMV8_64_EL3T:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL3, 0);
                break;
-               default:
-                       retval = armv8->arm.mrc(target, 15, 0, 0, 1, 0, &aarch64->system_control_reg);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       break;
+
+       case ARM_MODE_SVC:
+       case ARM_MODE_ABT:
+       case ARM_MODE_FIQ:
+       case ARM_MODE_IRQ:
+               instr = ARMV4_5_MRC(15, 0, 0, 1, 0, 0);
+               break;
+
+       default:
+               LOG_INFO("cannot read system control register in this mode");
+               return ERROR_FAIL;
        }
 
+       if (target_mode != ARM_MODE_ANY)
+               armv8_dpm_modeswitch(&armv8->dpm, target_mode);
+
+       retval = armv8->dpm.instr_read_data_r0(&armv8->dpm, instr, &aarch64->system_control_reg);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (target_mode != ARM_MODE_ANY)
+               armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
+
        LOG_DEBUG("System_register: %8.8" PRIx32, aarch64->system_control_reg);
        aarch64->system_control_reg_curr = aarch64->system_control_reg;
 
-       if (armv8->armv8_mmu.armv8_cache.ctype == -1)
-               armv8_identify_cache(target);
+       if (armv8->armv8_mmu.armv8_cache.info == -1) {
+               armv8_identify_cache(armv8);
+               armv8_read_mpidr(armv8);
+       }
 
        armv8->armv8_mmu.mmu_enabled =
                        (aarch64->system_control_reg & 0x1U) ? 1 : 0;
@@ -1239,36 +1039,18 @@ static int aarch64_post_debug_entry(struct target *target)
                (aarch64->system_control_reg & 0x4U) ? 1 : 0;
        armv8->armv8_mmu.armv8_cache.i_cache_enabled =
                (aarch64->system_control_reg & 0x1000U) ? 1 : 0;
-       aarch64->curr_mode = armv8->arm.core_mode;
        return ERROR_OK;
 }
 
-static int aarch64_set_dscr_bits(struct target *target, unsigned long bit_mask, unsigned long value)
-{
-       struct armv8_common *armv8 = target_to_armv8(target);
-       uint32_t dscr;
-
-       /* Read DSCR */
-       int retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       if (ERROR_OK != retval)
-               return retval;
-
-       /* clear bitfield */
-       dscr &= ~bit_mask;
-       /* put new value */
-       dscr |= value & bit_mask;
-
-       /* write new DSCR */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       return retval;
-}
-
+/*
+ * single-step a target
+ */
 static int aarch64_step(struct target *target, int current, target_addr_t address,
        int handle_breakpoints)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+       int saved_retval = ERROR_OK;
        int retval;
        uint32_t edecr;
 
@@ -1279,39 +1061,76 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_EDECR, &edecr);
-       if (retval != ERROR_OK)
-               return retval;
-
        /* make sure EDECR.SS is not set when restoring the register */
-       edecr &= ~0x4;
 
-       /* set EDECR.SS to enter hardware step mode */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_EDECR, (edecr|0x4));
+       if (retval == ERROR_OK) {
+               edecr &= ~0x4;
+               /* set EDECR.SS to enter hardware step mode */
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_EDECR, (edecr|0x4));
+       }
+       /* disable interrupts while stepping */
+       if (retval == ERROR_OK && aarch64->isrmasking_mode == AARCH64_ISRMASK_ON)
+               retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0x3 << 22);
+       /* bail out if stepping setup has failed */
        if (retval != ERROR_OK)
                return retval;
 
-       /* disable interrupts while stepping */
-       retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0x3 << 22);
-       if (retval != ERROR_OK)
-               return ERROR_OK;
+       if (target->smp && !handle_breakpoints) {
+               /*
+                * isolate current target so that it doesn't get resumed
+                * together with the others
+                */
+               retval = arm_cti_gate_channel(armv8->cti, 1);
+               /* resume all other targets in the group */
+               if (retval == ERROR_OK)
+                       retval = aarch64_step_restart_smp(target);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("Failed to restart non-stepping targets in SMP group");
+                       return retval;
+               }
+               LOG_DEBUG("Restarted all non-stepping targets in SMP group");
+       }
+
+       /* all other targets running, restore and restart the current target */
+       retval = aarch64_restore_one(target, current, &address, 0, 0);
+       if (retval == ERROR_OK)
+               retval = aarch64_restart_one(target, RESTART_LAZY);
 
-       /* resume the target */
-       retval = aarch64_resume(target, current, address, 0, 0);
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
-       while (target->state != TARGET_HALTED) {
-               retval = aarch64_poll(target);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("timeout waiting for target halt");
-                       return ERROR_FAIL;
+       LOG_DEBUG("target step-resumed at 0x%" PRIx64, address);
+       if (!handle_breakpoints)
+               target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
+
+       int64_t then = timeval_ms();
+       for (;;) {
+               int stepped;
+               uint32_t prsr;
+
+               retval = aarch64_check_state_one(target,
+                                       PRSR_SDR|PRSR_HALT, PRSR_SDR|PRSR_HALT, &stepped, &prsr);
+               if (retval != ERROR_OK || stepped)
+                       break;
+
+               if (timeval_ms() > then + 100) {
+                       LOG_ERROR("timeout waiting for target %s halt after step",
+                                       target_name(target));
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
                }
        }
 
+       /*
+        * At least on one SoC (Renesas R8A7795) stepping over a WFI instruction
+        * causes a timeout. The core takes the step but doesn't complete it and so
+        * debug state is never entered. However, you can manually halt the core
+        * as an external debug even is also a WFI wakeup event.
+        */
+       if (retval == ERROR_TARGET_TIMEOUT)
+               saved_retval = aarch64_halt_one(target, HALT_SYNC);
+
        /* restore EDECR */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_EDECR, edecr);
@@ -1319,24 +1138,38 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
                return retval;
 
        /* restore interrupts */
-       retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0);
-       if (retval != ERROR_OK)
-               return ERROR_OK;
+       if (aarch64->isrmasking_mode == AARCH64_ISRMASK_ON) {
+               retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0);
+               if (retval != ERROR_OK)
+                       return ERROR_OK;
+       }
 
-       return ERROR_OK;
+       if (saved_retval != ERROR_OK)
+               return saved_retval;
+
+       return aarch64_poll(target);
 }
 
 static int aarch64_restore_context(struct target *target, bool bpwp)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm *arm = &armv8->arm;
 
-       LOG_DEBUG(" ");
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
 
        if (armv8->pre_restore_context)
                armv8->pre_restore_context(target);
 
-       return armv8_dpm_write_dirty_registers(&armv8->dpm, bpwp);
+       retval = armv8_dpm_write_dirty_registers(&armv8->dpm, bpwp);
+       if (retval == ERROR_OK) {
+               /* registers are now invalid */
+               register_cache_invalidate(arm->core_cache);
+               register_cache_invalidate(arm->core_cache->next);
+       }
 
+       return retval;
 }
 
 /*
@@ -1354,7 +1187,6 @@ static int aarch64_set_breakpoint(struct target *target,
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct aarch64_brp *brp_list = aarch64->brp_list;
-       uint32_t dscr;
 
        if (breakpoint->set) {
                LOG_WARNING("breakpoint already set");
@@ -1404,7 +1236,7 @@ static int aarch64_set_breakpoint(struct target *target,
        } else if (breakpoint->type == BKPT_SOFT) {
                uint8_t code[4];
 
-               buf_set_u32(code, 0, 32, ARMV8_HLT(0x11));
+               buf_set_u32(code, 0, 32, armv8_opcode(armv8, ARMV8_OPC_HLT));
                retval = target_read_memory(target,
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length, 1,
@@ -1433,12 +1265,8 @@ static int aarch64_set_breakpoint(struct target *target,
                breakpoint->set = 0x11; /* Any nice value but 0 */
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        /* Ensure that halting debug mode is enable */
-       dscr = dscr | DSCR_HDE;
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Failed to set DSCR.HDE");
                return retval;
@@ -1812,122 +1640,168 @@ static int aarch64_assert_reset(struct target *target)
                return ERROR_FAIL;
        }
 
-       /* registers are now invalid */
-       register_cache_invalidate(armv8->arm.core_cache);
+       /* registers are now invalid */
+       if (target_was_examined(target)) {
+               register_cache_invalidate(armv8->arm.core_cache);
+               register_cache_invalidate(armv8->arm.core_cache->next);
+       }
+
+       target->state = TARGET_RESET;
+
+       return ERROR_OK;
+}
+
+static int aarch64_deassert_reset(struct target *target)
+{
+       int retval;
+
+       LOG_DEBUG(" ");
+
+       /* be certain SRST is off */
+       jtag_add_reset(0, 0);
+
+       if (!target_was_examined(target))
+               return ERROR_OK;
+
+       retval = aarch64_poll(target);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (target->reset_halt) {
+               if (target->state != TARGET_HALTED) {
+                       LOG_WARNING("%s: ran after reset and before halt ...",
+                               target_name(target));
+                       retval = target_halt(target);
+                       if (retval != ERROR_OK)
+                               return retval;
+               }
+       }
+
+       return aarch64_init_debug_access(target);
+}
+
+static int aarch64_write_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, const uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* change DCC to normal mode if necessary */
+       if (*dscr & DSCR_MA) {
+               *dscr &= ~DSCR_MA;
+               retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       while (count) {
+               uint32_t data, opcode;
+
+               /* write the data to store into DTRRX */
+               if (size == 1)
+                       data = *buffer;
+               else if (size == 2)
+                       data = target_buffer_get_u16(target, buffer);
+               else
+                       data = target_buffer_get_u32(target, buffer);
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRRX, data);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (arm->core_state == ARM_STATE_AARCH64)
+                       retval = dpm->instr_execute(dpm, ARMV8_MRS(SYSTEM_DBG_DTRRX_EL0, 1));
+               else
+                       retval = dpm->instr_execute(dpm, ARMV4_5_MRC(14, 0, 1, 0, 5, 0));
+               if (retval != ERROR_OK)
+                       return retval;
 
-       target->state = TARGET_RESET;
+               if (size == 1)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRB_IP);
+               else if (size == 2)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRH_IP);
+               else
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRW_IP);
+               retval = dpm->instr_execute(dpm, opcode);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Advance */
+               buffer += size;
+               --count;
+       }
 
        return ERROR_OK;
 }
 
-static int aarch64_deassert_reset(struct target *target)
+static int aarch64_write_cpu_memory_fast(struct target *target,
+       uint32_t count, const uint8_t *buffer, uint32_t *dscr)
 {
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm *arm = &armv8->arm;
        int retval;
 
-       LOG_DEBUG(" ");
+       armv8_reg_current(arm, 1)->dirty = true;
 
-       /* be certain SRST is off */
-       jtag_add_reset(0, 0);
+       /* Step 1.d   - Change DCC to memory mode */
+       *dscr |= DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
-       retval = aarch64_poll(target);
+
+       /* Step 2.a   - Do the write */
+       retval = mem_ap_write_buf_noincr(armv8->debug_ap,
+                                       buffer, 4, count, armv8->debug_base + CPUV8_DBG_DTRRX);
        if (retval != ERROR_OK)
                return retval;
 
-       if (target->reset_halt) {
-               if (target->state != TARGET_HALTED) {
-                       LOG_WARNING("%s: ran after reset and before halt ...",
-                               target_name(target));
-                       retval = target_halt(target);
-                       if (retval != ERROR_OK)
-                               return retval;
-               }
-       }
+       /* Step 3.a   - Switch DTR mode back to Normal mode */
+       *dscr &= ~DSCR_MA;
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
        return ERROR_OK;
 }
 
-static int aarch64_write_apb_ap_memory(struct target *target,
+static int aarch64_write_cpu_memory(struct target *target,
        uint64_t address, uint32_t size,
        uint32_t count, const uint8_t *buffer)
 {
        /* write memory through APB-AP */
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
-       int total_bytes = count * size;
-       int total_u32;
-       int start_byte = address & 0x3;
-       int end_byte   = (address + total_bytes) & 0x3;
-       struct reg *reg;
        uint32_t dscr;
-       uint8_t *tmp_buff = NULL;
 
-       LOG_DEBUG("Writing APB-AP memory address 0x%" PRIx64 " size %"  PRIu32 " count%"  PRIu32,
-                         address, size, count);
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4);
-
-       /* Mark register R0 as dirty, as it will be used
+       /* Mark register X0 as dirty, as it will be used
         * for transferring the data.
         * It will be restored automatically when exiting
         * debug mode
         */
-       reg = armv8_reg_current(arm, 1);
-       reg->dirty = true;
-
-       reg = armv8_reg_current(arm, 0);
-       reg->dirty = true;
-
-       /*  clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-       if (retval != ERROR_OK)
-               return retval;
-
+       armv8_reg_current(arm, 0)->dirty = true;
 
        /* This algorithm comes from DDI0487A.g, chapter J9.1 */
 
-       /* The algorithm only copies 32 bit words, so the buffer
-        * should be expanded to include the words at either end.
-        * The first and last words will be read first to avoid
-        * corruption if needed.
-        */
-       tmp_buff = malloc(total_u32 * 4);
-
-       if ((start_byte != 0) && (total_u32 > 1)) {
-               /* First bytes not aligned - read the 32 bit word to avoid corrupting
-                * the other bytes in the word.
-                */
-               retval = aarch64_read_apb_ap_memory(target, (address & ~0x3), 4, 1, tmp_buff);
-               if (retval != ERROR_OK)
-                       goto error_free_buff_w;
-       }
-
-       /* If end of write is not aligned, or the write is less than 4 bytes */
-       if ((end_byte != 0) ||
-               ((total_u32 == 1) && (total_bytes != 4))) {
-
-               /* Read the last word to avoid corruption during 32 bit write */
-               int mem_offset = (total_u32-1) * 4;
-               retval = aarch64_read_apb_ap_memory(target, (address & ~0x3) + mem_offset, 4, 1, &tmp_buff[mem_offset]);
-               if (retval != ERROR_OK)
-                       goto error_free_buff_w;
-       }
-
-       /* Copy the write buffer over the top of the temporary buffer */
-       memcpy(&tmp_buff[start_byte], buffer, total_bytes);
-
-       /* We now have a 32 bit aligned buffer that can be written */
-
        /* Read DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_w;
+               return retval;
 
        /* Set Normal access mode  */
        dscr = (dscr & ~DSCR_MA);
@@ -1937,112 +1811,194 @@ static int aarch64_write_apb_ap_memory(struct target *target,
        if (arm->core_state == ARM_STATE_AARCH64) {
                /* Write X0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
-               retval += aarch64_write_dcc_64(armv8, address & ~0x3ULL);
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
-               retval += aarch64_exec_opcode(target,
-                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), &dscr);
+               retval = dpm->instr_write_data_dcc_64(dpm,
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address);
        } else {
                /* Write R0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTRRX */
-               retval += aarch64_write_dcc(armv8, address & ~0x3ULL);
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
-               retval += aarch64_exec_opcode(target,
-                               T32_FMTITR(ARMV4_5_MRC(14, 0, 0, 0, 5, 0)), &dscr);
-
+               dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address);
        }
-       /* Step 1.d   - Change DCC to memory mode */
-       dscr = dscr | DSCR_MA;
-       retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
 
+       if (size == 4 && (address % 4) == 0)
+               retval = aarch64_write_cpu_memory_fast(target, count, buffer, &dscr);
+       else
+               retval = aarch64_write_cpu_memory_slow(target, size, count, buffer, &dscr);
 
-       /* Step 2.a   - Do the write */
-       retval = mem_ap_write_buf_noincr(armv8->debug_ap,
-                                       tmp_buff, 4, total_u32, armv8->debug_base + CPUV8_DBG_DTRRX);
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
-
-       /* Step 3.a   - Switch DTR mode back to Normal mode */
-       dscr = (dscr & ~DSCR_MA);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+       if (retval != ERROR_OK) {
+               /* Unset DTR mode */
+               mem_ap_read_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+               dscr &= ~DSCR_MA;
+               mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       }
 
        /* Check for sticky abort flags in the DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                                armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_w;
+               return retval;
+
+       dpm->dscr = dscr;
        if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
                /* Abort occurred - clear it and exit */
                LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
-               mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DRCR, 1<<2);
-               goto error_free_buff_w;
+               armv8_dpm_handle_exception(dpm);
+               return ERROR_FAIL;
        }
 
        /* Done */
-       free(tmp_buff);
        return ERROR_OK;
+}
 
-error_unset_dtr_w:
-       /* Unset DTR mode */
-       mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       dscr = (dscr & ~DSCR_MA);
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-error_free_buff_w:
-       LOG_ERROR("error");
-       free(tmp_buff);
-       return ERROR_FAIL;
+static int aarch64_read_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* change DCC to normal mode (if necessary) */
+       if (*dscr & DSCR_MA) {
+               *dscr &= DSCR_MA;
+               retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       while (count) {
+               uint32_t opcode, data;
+
+               if (size == 1)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRB_IP);
+               else if (size == 2)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRH_IP);
+               else
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRW_IP);
+               retval = dpm->instr_execute(dpm, opcode);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (arm->core_state == ARM_STATE_AARCH64)
+                       retval = dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DTRTX_EL0, 1));
+               else
+                       retval = dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 1, 0, 5, 0));
+               if (retval != ERROR_OK)
+                       return retval;
+
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRTX, &data);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (size == 1)
+                       *buffer = (uint8_t)data;
+               else if (size == 2)
+                       target_buffer_set_u16(target, buffer, (uint16_t)data);
+               else
+                       target_buffer_set_u32(target, buffer, data);
+
+               /* Advance */
+               buffer += size;
+               --count;
+       }
+
+       return ERROR_OK;
+}
+
+static int aarch64_read_cpu_memory_fast(struct target *target,
+       uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+       uint32_t value;
+
+       /* Mark X1 as dirty */
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       if (arm->core_state == ARM_STATE_AARCH64) {
+               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
+               retval = dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0));
+       } else {
+               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
+               retval = dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+       }
+
+       /* Step 1.e - Change DCC to memory mode */
+       *dscr |= DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       /* Step 1.f - read DBGDTRTX and discard the value */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+
+       count--;
+       /* Read the data - Each read of the DTRTX register causes the instruction to be reissued
+        * Abort flags are sticky, so can be read at end of transactions
+        *
+        * This data is read in aligned to 32 bit boundary.
+        */
+
+       if (count) {
+               /* Step 2.a - Loop n-1 times, each read of DBGDTRTX reads the data from [X0] and
+                * increments X0 by 4. */
+               retval = mem_ap_read_buf_noincr(armv8->debug_ap, buffer, 4, count,
+                                                                       armv8->debug_base + CPUV8_DBG_DTRTX);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       /* Step 3.a - set DTR access mode back to Normal mode   */
+       *dscr &= ~DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Step 3.b - read DBGDTRTX for the final value */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+       if (retval != ERROR_OK)
+               return retval;
+
+       target_buffer_set_u32(target, buffer + count * 4, value);
+       return retval;
 }
 
-static int aarch64_read_apb_ap_memory(struct target *target,
+static int aarch64_read_cpu_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, uint8_t *buffer)
 {
        /* read memory through APB-AP */
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
-       int total_bytes = count * size;
-       int total_u32;
-       int start_byte = address & 0x3;
-       int end_byte   = (address + total_bytes) & 0x3;
-       struct reg *reg;
        uint32_t dscr;
-       uint8_t *tmp_buff = NULL;
-       uint8_t *u8buf_ptr;
-       uint32_t value;
 
-       LOG_DEBUG("Reading APB-AP memory address 0x%" TARGET_PRIxADDR " size %" PRIu32 " count%"  PRIu32,
-                         address, size, count);
+       LOG_DEBUG("Reading CPU memory address 0x%016" PRIx64 " size %" PRIu32 " count %" PRIu32,
+                       address, size, count);
+
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4);
-       /* Mark register X0, X1 as dirty, as it will be used
+       /* Mark register X0 as dirty, as it will be used
         * for transferring the data.
         * It will be restored automatically when exiting
         * debug mode
         */
-       reg = armv8_reg_current(arm, 1);
-       reg->dirty = true;
-
-       reg = armv8_reg_current(arm, 0);
-       reg->dirty = true;
-
-       /*      clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-       if (retval != ERROR_OK)
-               goto error_free_buff_r;
+       armv8_reg_current(arm, 0)->dirty = true;
 
        /* Read DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
@@ -2051,122 +2007,55 @@ static int aarch64_read_apb_ap_memory(struct target *target,
        /* This algorithm comes from DDI0487A.g, chapter J9.1 */
 
        /* Set Normal access mode  */
-       dscr = (dscr & ~DSCR_MA);
+       dscr &= ~DSCR_MA;
        retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
 
        if (arm->core_state == ARM_STATE_AARCH64) {
                /* Write X0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
-               retval += aarch64_write_dcc_64(armv8, address & ~0x3ULL);
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
-               retval += aarch64_exec_opcode(target, ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), &dscr);
-               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
-               retval += aarch64_exec_opcode(target, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0), &dscr);
-               /* Step 1.e - Change DCC to memory mode */
-               dscr = dscr | DSCR_MA;
-               retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-               /* Step 1.f - read DBGDTRTX and discard the value */
-               retval += mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+               retval += dpm->instr_write_data_dcc_64(dpm,
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address);
        } else {
                /* Write R0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTRRXint */
-               retval += aarch64_write_dcc(armv8, address & ~0x3ULL);
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
-               retval += aarch64_exec_opcode(target,
-                               T32_FMTITR(ARMV4_5_MRC(14, 0, 0, 0, 5, 0)), &dscr);
-               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
-               retval += aarch64_exec_opcode(target,
-                               T32_FMTITR(ARMV4_5_MCR(14, 0, 0, 0, 5, 0)), &dscr);
-               /* Step 1.e - Change DCC to memory mode */
-               dscr = dscr | DSCR_MA;
-               retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-               /* Step 1.f - read DBGDTRTX and discard the value */
-               retval += mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRTX, &value);
-
+               retval += dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address);
        }
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_r;
-
-       /* Optimize the read as much as we can, either way we read in a single pass  */
-       if ((start_byte) || (end_byte)) {
-               /* The algorithm only copies 32 bit words, so the buffer
-                * should be expanded to include the words at either end.
-                * The first and last words will be read into a temp buffer
-                * to avoid corruption
-                */
-               tmp_buff = malloc(total_u32 * 4);
-               if (!tmp_buff)
-                       goto error_unset_dtr_r;
-
-               /* use the tmp buffer to read the entire data */
-               u8buf_ptr = tmp_buff;
-       } else
-               /* address and read length are aligned so read directly into the passed buffer */
-               u8buf_ptr = buffer;
-
-       /* Read the data - Each read of the DTRTX register causes the instruction to be reissued
-        * Abort flags are sticky, so can be read at end of transactions
-        *
-        * This data is read in aligned to 32 bit boundary.
-        */
 
-       /* Step 2.a - Loop n-1 times, each read of DBGDTRTX reads the data from [X0] and
-        * increments X0 by 4. */
-       retval = mem_ap_read_buf_noincr(armv8->debug_ap, u8buf_ptr, 4, total_u32-1,
-                                                                       armv8->debug_base + CPUV8_DBG_DTRTX);
-       if (retval != ERROR_OK)
-                       goto error_unset_dtr_r;
+       if (size == 4 && (address % 4) == 0)
+               retval = aarch64_read_cpu_memory_fast(target, count, buffer, &dscr);
+       else
+               retval = aarch64_read_cpu_memory_slow(target, size, count, buffer, &dscr);
 
-       /* Step 3.a - set DTR access mode back to Normal mode   */
-       dscr = (dscr & ~DSCR_MA);
-       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+       if (dscr & DSCR_MA) {
+               dscr &= ~DSCR_MA;
+               mem_ap_write_atomic_u32(armv8->debug_ap,
                                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       if (retval != ERROR_OK)
-               goto error_free_buff_r;
+       }
 
-       /* Step 3.b - read DBGDTRTX for the final value */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
-       memcpy(u8buf_ptr + (total_u32-1) * 4, &value, 4);
+       if (retval != ERROR_OK)
+               return retval;
 
        /* Check for sticky abort flags in the DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                                armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_r;
+               return retval;
+
+       dpm->dscr = dscr;
+
        if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
                /* Abort occurred - clear it and exit */
                LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
-               mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-               goto error_free_buff_r;
-       }
-
-       /* check if we need to copy aligned data by applying any shift necessary */
-       if (tmp_buff) {
-               memcpy(buffer, tmp_buff + start_byte, total_bytes);
-               free(tmp_buff);
+               armv8_dpm_handle_exception(dpm);
+               return ERROR_FAIL;
        }
 
        /* Done */
        return ERROR_OK;
-
-error_unset_dtr_r:
-       /* Unset DTR mode */
-       mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       dscr = (dscr & ~DSCR_MA);
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-error_free_buff_r:
-       LOG_ERROR("error");
-       free(tmp_buff);
-       return ERROR_FAIL;
 }
 
 static int aarch64_read_phys_memory(struct target *target,
@@ -2174,15 +2063,13 @@ static int aarch64_read_phys_memory(struct target *target,
        uint32_t count, uint8_t *buffer)
 {
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       LOG_DEBUG("Reading memory at real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32,
-               address, size, count);
 
        if (count && buffer) {
                /* read memory through APB-AP */
                retval = aarch64_mmu_modify(target, 0);
                if (retval != ERROR_OK)
                        return retval;
-               retval = aarch64_read_apb_ap_memory(target, address, size, count, buffer);
+               retval = aarch64_read_cpu_memory(target, address, size, count, buffer);
        }
        return retval;
 }
@@ -2193,25 +2080,18 @@ static int aarch64_read_memory(struct target *target, target_addr_t address,
        int mmu_enabled = 0;
        int retval;
 
-       /* aarch64 handles unaligned memory access */
-       LOG_DEBUG("Reading memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
-               size, count);
-
        /* determine if MMU was enabled on target stop */
        retval = aarch64_mmu(target, &mmu_enabled);
        if (retval != ERROR_OK)
                return retval;
 
        if (mmu_enabled) {
-               retval = aarch64_check_address(target, address);
-               if (retval != ERROR_OK)
-                       return retval;
                /* enable MMU as we could have disabled it for phys access */
                retval = aarch64_mmu_modify(target, 1);
                if (retval != ERROR_OK)
                        return retval;
        }
-       return aarch64_read_apb_ap_memory(target, address, size, count, buffer);
+       return aarch64_read_cpu_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_write_phys_memory(struct target *target,
@@ -2220,15 +2100,12 @@ static int aarch64_write_phys_memory(struct target *target,
 {
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
 
-       LOG_DEBUG("Writing memory to real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
-               size, count);
-
        if (count && buffer) {
                /* write memory through APB-AP */
                retval = aarch64_mmu_modify(target, 0);
                if (retval != ERROR_OK)
                        return retval;
-               return aarch64_write_apb_ap_memory(target, address, size, count, buffer);
+               return aarch64_write_cpu_memory(target, address, size, count, buffer);
        }
 
        return retval;
@@ -2240,25 +2117,18 @@ static int aarch64_write_memory(struct target *target, target_addr_t address,
        int mmu_enabled = 0;
        int retval;
 
-       /* aarch64 handles unaligned memory access */
-       LOG_DEBUG("Writing memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32
-                 "; count %" PRId32, address, size, count);
-
        /* determine if MMU was enabled on target stop */
        retval = aarch64_mmu(target, &mmu_enabled);
        if (retval != ERROR_OK)
                return retval;
 
        if (mmu_enabled) {
-               retval = aarch64_check_address(target, address);
-               if (retval != ERROR_OK)
-                       return retval;
                /* enable MMU as we could have disabled it for phys access */
                retval = aarch64_mmu_modify(target, 1);
                if (retval != ERROR_OK)
                        return retval;
        }
-       return aarch64_write_apb_ap_memory(target, address, size, count, buffer);
+       return aarch64_write_cpu_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_handle_target_request(void *priv)
@@ -2298,6 +2168,7 @@ static int aarch64_examine_first(struct target *target)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct adiv5_dap *swjdp = armv8->arm.dap;
+       uint32_t cti_base;
        int i;
        int retval = ERROR_OK;
        uint64_t debug, ttypr;
@@ -2305,9 +2176,6 @@ static int aarch64_examine_first(struct target *target)
        uint32_t tmp0, tmp1;
        debug = ttypr = cpuid = 0;
 
-       /* We do one extra read to ensure DAP is configured,
-        * we call ahbap_debugport_init(swjdp) instead
-        */
        retval = dap_dp_init(swjdp);
        if (retval != ERROR_OK)
                return retval;
@@ -2325,7 +2193,7 @@ static int aarch64_examine_first(struct target *target)
                return retval;
        }
 
-       armv8->debug_ap->memaccess_tck = 80;
+       armv8->debug_ap->memaccess_tck = 10;
 
        if (!target->dbgbase_set) {
                uint32_t dbgbase;
@@ -2345,10 +2213,29 @@ static int aarch64_examine_first(struct target *target)
        } else
                armv8->debug_base = target->dbgbase;
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_LOCKACCESS, 0xC5ACCE55);
+       uint32_t prsr;
+       int64_t then = timeval_ms();
+       do {
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_PRSR, &prsr);
+               if (retval == ERROR_OK) {
+                       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_PRCR, PRCR_COREPURQ|PRCR_CORENPDRQ);
+                       if (retval != ERROR_OK) {
+                               LOG_DEBUG("write to PRCR failed");
+                               break;
+                       }
+               }
+
+               if (timeval_ms() > then + 1000) {
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
+               }
+
+       } while ((prsr & PRSR_PU) == 0);
+
        if (retval != ERROR_OK) {
-               LOG_DEBUG("LOCK debug access fail");
+               LOG_ERROR("target %s: failed to set power state of the core.", target_name(target));
                return retval;
        }
 
@@ -2394,12 +2281,15 @@ static int aarch64_examine_first(struct target *target)
 
        if (target->ctibase == 0) {
                /* assume a v8 rom table layout */
-               armv8->cti_base = target->ctibase = armv8->debug_base + 0x10000;
-               LOG_INFO("Target ctibase is not set, assuming 0x%0" PRIx32, target->ctibase);
+               cti_base = armv8->debug_base + 0x10000;
+               LOG_INFO("Target ctibase is not set, assuming 0x%0" PRIx32, cti_base);
        } else
-               armv8->cti_base = target->ctibase;
+               cti_base = target->ctibase;
+
+       armv8->cti = arm_cti_create(armv8->debug_ap, cti_base);
+       if (armv8->cti == NULL)
+               return ERROR_FAIL;
 
-       armv8->arm.core_type = ARM_MODE_MON;
        retval = aarch64_dpm_setup(aarch64, debug);
        if (retval != ERROR_OK)
                return retval;
@@ -2422,6 +2312,9 @@ static int aarch64_examine_first(struct target *target)
 
        LOG_DEBUG("Configured %i hw breakpoints", aarch64->brp_num);
 
+       target->state = TARGET_UNKNOWN;
+       target->debug_reason = DBG_REASON_NOTHALTED;
+       aarch64->isrmasking_mode = AARCH64_ISRMASK_ON;
        target_set_examined(target);
        return ERROR_OK;
 }
@@ -2456,34 +2349,22 @@ static int aarch64_init_arch_info(struct target *target,
        struct aarch64_common *aarch64, struct jtag_tap *tap)
 {
        struct armv8_common *armv8 = &aarch64->armv8_common;
-       struct adiv5_dap *dap = armv8->arm.dap;
-
-       armv8->arm.dap = dap;
 
        /* Setup struct aarch64_common */
        aarch64->common_magic = AARCH64_COMMON_MAGIC;
        /*  tap has no dap initialized */
        if (!tap->dap) {
                tap->dap = dap_init();
-
-               /* Leave (only) generic DAP stuff for debugport_init() */
                tap->dap->tap = tap;
        }
-
        armv8->arm.dap = tap->dap;
 
-       aarch64->fast_reg_read = 0;
-
        /* register arch-specific functions */
        armv8->examine_debug_reason = NULL;
-
        armv8->post_debug_entry = aarch64_post_debug_entry;
-
        armv8->pre_restore_context = NULL;
-
        armv8->armv8_mmu.read_physical_memory = aarch64_read_phys_memory;
 
-       /* REVISIT v7a setup should be in a v7a-specific routine */
        armv8_init_arch_info(target, armv8);
        target_register_timer_callback(aarch64_handle_target_request, 1, 1, target);
 
@@ -2500,7 +2381,7 @@ static int aarch64_target_create(struct target *target, Jim_Interp *interp)
 static int aarch64_mmu(struct target *target, int *enabled)
 {
        if (target->state != TARGET_HALTED) {
-               LOG_ERROR("%s: target not halted", __func__);
+               LOG_ERROR("%s: target %s not halted", __func__, target_name(target));
                return ERROR_TARGET_INVALID;
        }
 
@@ -2511,7 +2392,7 @@ static int aarch64_mmu(struct target *target, int *enabled)
 static int aarch64_virt2phys(struct target *target, target_addr_t virt,
                             target_addr_t *phys)
 {
-       return armv8_mmu_translate_va(target, virt, phys);
+       return armv8_mmu_translate_va_pa(target, virt, phys, 1);
 }
 
 COMMAND_HANDLER(aarch64_handle_cache_info_command)
@@ -2571,24 +2452,31 @@ COMMAND_HANDLER(aarch64_handle_smp_on_command)
        return ERROR_OK;
 }
 
-COMMAND_HANDLER(aarch64_handle_smp_gdb_command)
+COMMAND_HANDLER(aarch64_mask_interrupts_command)
 {
        struct target *target = get_current_target(CMD_CTX);
-       int retval = ERROR_OK;
-       struct target_list *head;
-       head = target->head;
-       if (head != (struct target_list *)NULL) {
-               if (CMD_ARGC == 1) {
-                       int coreid = 0;
-                       COMMAND_PARSE_NUMBER(int, CMD_ARGV[0], coreid);
-                       if (ERROR_OK != retval)
-                               return retval;
-                       target->gdb_service->core[1] = coreid;
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
 
+       static const Jim_Nvp nvp_maskisr_modes[] = {
+               { .name = "off", .value = AARCH64_ISRMASK_OFF },
+               { .name = "on", .value = AARCH64_ISRMASK_ON },
+               { .name = NULL, .value = -1 },
+       };
+       const Jim_Nvp *n;
+
+       if (CMD_ARGC > 0) {
+               n = Jim_Nvp_name2value_simple(nvp_maskisr_modes, CMD_ARGV[0]);
+               if (n->name == NULL) {
+                       LOG_ERROR("Unknown parameter: %s - should be off or on", CMD_ARGV[0]);
+                       return ERROR_COMMAND_SYNTAX_ERROR;
                }
-               command_print(CMD_CTX, "gdb coreid  %" PRId32 " -> %" PRId32, target->gdb_service->core[0]
-                       , target->gdb_service->core[1]);
+
+               aarch64->isrmasking_mode = n->value;
        }
+
+       n = Jim_Nvp_value2name_simple(nvp_maskisr_modes, aarch64->isrmasking_mode);
+       command_print(CMD_CTX, "aarch64 interrupt mask %s", n->name);
+
        return ERROR_OK;
 }
 
@@ -2621,27 +2509,23 @@ static const struct command_registration aarch64_exec_command_handlers[] = {
                .usage = "",
        },
        {
-               .name = "smp_gdb",
-               .handler = aarch64_handle_smp_gdb_command,
-               .mode = COMMAND_EXEC,
-               .help = "display/fix current core played to gdb",
-               .usage = "",
+               .name = "maskisr",
+               .handler = aarch64_mask_interrupts_command,
+               .mode = COMMAND_ANY,
+               .help = "mask aarch64 interrupts during single-step",
+               .usage = "['on'|'off']",
        },
 
-
        COMMAND_REGISTRATION_DONE
 };
 static const struct command_registration aarch64_command_handlers[] = {
-       {
-               .chain = arm_command_handlers,
-       },
        {
                .chain = armv8_command_handlers,
        },
        {
-               .name = "cortex_a",
+               .name = "aarch64",
                .mode = COMMAND_ANY,
-               .help = "Cortex-A command group",
+               .help = "Aarch64 command group",
                .usage = "",
                .chain = aarch64_exec_command_handlers,
        },
@@ -2667,11 +2551,6 @@ struct target_type aarch64_target = {
        .read_memory = aarch64_read_memory,
        .write_memory = aarch64_write_memory,
 
-       .checksum_memory = arm_checksum_memory,
-       .blank_check_memory = arm_blank_check_memory,
-
-       .run_algorithm = armv4_5_run_algorithm,
-
        .add_breakpoint = aarch64_add_breakpoint,
        .add_context_breakpoint = aarch64_add_context_breakpoint,
        .add_hybrid_breakpoint = aarch64_add_hybrid_breakpoint,

Linking to existing account procedure

If you already have an account and want to add another login method you MUST first sign in with your existing account and then change URL to read https://review.openocd.org/login/?link to get to this page again but this time it'll work for linking. Thank you.

SSH host keys fingerprints

1024 SHA256:YKx8b7u5ZWdcbp7/4AeXNaqElP49m6QrwfXaqQGJAOk gerrit-code-review@openocd.zylin.com (DSA)
384 SHA256:jHIbSQa4REvwCFG4cq5LBlBLxmxSqelQPem/EXIrxjk gerrit-code-review@openocd.org (ECDSA)
521 SHA256:UAOPYkU9Fjtcao0Ul/Rrlnj/OsQvt+pgdYSZ4jOYdgs gerrit-code-review@openocd.org (ECDSA)
256 SHA256:A13M5QlnozFOvTllybRZH6vm7iSt0XLxbA48yfc2yfY gerrit-code-review@openocd.org (ECDSA)
256 SHA256:spYMBqEYoAOtK7yZBrcwE8ZpYt6b68Cfh9yEVetvbXg gerrit-code-review@openocd.org (ED25519)
+--[ED25519 256]--+
|=..              |
|+o..   .         |
|*.o   . .        |
|+B . . .         |
|Bo. = o S        |
|Oo.+ + =         |
|oB=.* = . o      |
| =+=.+   + E     |
|. .=o   . o      |
+----[SHA256]-----+
2048 SHA256:0Onrb7/PHjpo6iVZ7xQX2riKN83FJ3KGU0TvI0TaFG4 gerrit-code-review@openocd.zylin.com (RSA)