aarch64: add 'maskisr' command
[openocd.git] / src / target / aarch64.c
index 4097d116066ee6d8a27b570e993f9a9e75b33f14..f1ce91459dd69665a78cc2d1f1c468a0d069c984 100644 (file)
 #include "target_request.h"
 #include "target_type.h"
 #include "armv8_opcodes.h"
+#include "armv8_cache.h"
 #include <helper/time_support.h>
 
+enum restart_mode {
+       RESTART_LAZY,
+       RESTART_SYNC,
+};
+
+enum halt_mode {
+       HALT_LAZY,
+       HALT_SYNC,
+};
+
 static int aarch64_poll(struct target *target);
 static int aarch64_debug_entry(struct target *target);
 static int aarch64_restore_context(struct target *target, bool bpwp);
@@ -43,35 +54,68 @@ static int aarch64_unset_breakpoint(struct target *target,
 static int aarch64_mmu(struct target *target, int *enabled);
 static int aarch64_virt2phys(struct target *target,
        target_addr_t virt, target_addr_t *phys);
-static int aarch64_read_apb_ap_memory(struct target *target,
+static int aarch64_read_cpu_memory(struct target *target,
        uint64_t address, uint32_t size, uint32_t count, uint8_t *buffer);
-static int aarch64_instr_write_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data);
+
+#define foreach_smp_target(pos, head) \
+       for (pos = head; (pos != NULL); pos = pos->next)
 
 static int aarch64_restore_system_control_reg(struct target *target)
 {
+       enum arm_mode target_mode = ARM_MODE_ANY;
        int retval = ERROR_OK;
+       uint32_t instr;
 
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = target_to_armv8(target);
 
        if (aarch64->system_control_reg != aarch64->system_control_reg_curr) {
                aarch64->system_control_reg_curr = aarch64->system_control_reg;
-               retval = aarch64_instr_write_data_r0(armv8->arm.dpm,
-                                                    0xd5181000,
-                                                    aarch64->system_control_reg);
+               /* LOG_INFO("cp15_control_reg: %8.8" PRIx32, cortex_v8->cp15_control_reg); */
+
+               switch (armv8->arm.core_mode) {
+               case ARMV8_64_EL0T:
+                       target_mode = ARMV8_64_EL1H;
+                       /* fall through */
+               case ARMV8_64_EL1T:
+               case ARMV8_64_EL1H:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL1, 0);
+                       break;
+               case ARMV8_64_EL2T:
+               case ARMV8_64_EL2H:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL2, 0);
+                       break;
+               case ARMV8_64_EL3H:
+               case ARMV8_64_EL3T:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL3, 0);
+                       break;
+
+               case ARM_MODE_SVC:
+               case ARM_MODE_ABT:
+               case ARM_MODE_FIQ:
+               case ARM_MODE_IRQ:
+                       instr = ARMV4_5_MCR(15, 0, 0, 1, 0, 0);
+                       break;
+
+               default:
+                       LOG_INFO("cannot read system control register in this mode");
+                       return ERROR_FAIL;
+               }
+
+               if (target_mode != ARM_MODE_ANY)
+                       armv8_dpm_modeswitch(&armv8->dpm, target_mode);
+
+               retval = armv8->dpm.instr_write_data_r0(&armv8->dpm, instr, aarch64->system_control_reg);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (target_mode != ARM_MODE_ANY)
+                       armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
        }
 
        return retval;
 }
 
-/*  check address before aarch64_apb read write access with mmu on
- *  remove apb predictible data abort */
-static int aarch64_check_address(struct target *target, uint32_t address)
-{
-       /* TODO */
-       return ERROR_OK;
-}
 /*  modify system_control_reg in order to enable or disable mmu for :
  *  - virt2phys address conversion
  *  - read or write memory in phys or virt address */
@@ -80,34 +124,50 @@ static int aarch64_mmu_modify(struct target *target, int enable)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        int retval = ERROR_OK;
+       uint32_t instr = 0;
 
        if (enable) {
-               /*  if mmu enabled at target stop and mmu not enable */
+               /*      if mmu enabled at target stop and mmu not enable */
                if (!(aarch64->system_control_reg & 0x1U)) {
                        LOG_ERROR("trying to enable mmu on target stopped with mmu disable");
                        return ERROR_FAIL;
                }
-               if (!(aarch64->system_control_reg_curr & 0x1U)) {
+               if (!(aarch64->system_control_reg_curr & 0x1U))
                        aarch64->system_control_reg_curr |= 0x1U;
-                       retval = aarch64_instr_write_data_r0(armv8->arm.dpm,
-                                                            0xd5181000,
-                                                            aarch64->system_control_reg_curr);
-               }
        } else {
                if (aarch64->system_control_reg_curr & 0x4U) {
                        /*  data cache is active */
                        aarch64->system_control_reg_curr &= ~0x4U;
-                       /* flush data cache armv7 function to be called */
+                       /* flush data cache armv8 function to be called */
                        if (armv8->armv8_mmu.armv8_cache.flush_all_data_cache)
                                armv8->armv8_mmu.armv8_cache.flush_all_data_cache(target);
                }
                if ((aarch64->system_control_reg_curr & 0x1U)) {
                        aarch64->system_control_reg_curr &= ~0x1U;
-                       retval = aarch64_instr_write_data_r0(armv8->arm.dpm,
-                                                            0xd5181000,
-                                                            aarch64->system_control_reg_curr);
                }
        }
+
+       switch (armv8->arm.core_mode) {
+       case ARMV8_64_EL0T:
+       case ARMV8_64_EL1T:
+       case ARMV8_64_EL1H:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL1, 0);
+               break;
+       case ARMV8_64_EL2T:
+       case ARMV8_64_EL2H:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL2, 0);
+               break;
+       case ARMV8_64_EL3H:
+       case ARMV8_64_EL3T:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL3, 0);
+               break;
+       default:
+               LOG_DEBUG("unknown cpu state 0x%x" PRIx32, armv8->arm.core_state);
+               break;
+       }
+
+       retval = armv8->dpm.instr_write_data_r0(&armv8->dpm, instr,
+                               aarch64->system_control_reg_curr);
        return retval;
 }
 
@@ -122,19 +182,13 @@ static int aarch64_init_debug_access(struct target *target)
 
        LOG_DEBUG(" ");
 
-       /* Unlocking the debug registers for modification
-        * The debugport might be uninitialised so try twice */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                            armv8->debug_base + CPUV8_DBG_LOCKACCESS, 0xC5ACCE55);
+                       armv8->debug_base + CPUV8_DBG_OSLAR, 0);
        if (retval != ERROR_OK) {
-               /* try again */
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                            armv8->debug_base + CPUV8_DBG_LOCKACCESS, 0xC5ACCE55);
-               if (retval == ERROR_OK)
-                       LOG_USER("Locking debug access failed on first, but succeeded on second try.");
-       }
-       if (retval != ERROR_OK)
+               LOG_DEBUG("Examine %s failed", "oslock");
                return retval;
+       }
+
        /* Clear Sticky Power Down status Bit in PRSR to enable access to
           the registers in the Core Power Domain */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
@@ -142,67 +196,30 @@ static int aarch64_init_debug_access(struct target *target)
        if (retval != ERROR_OK)
                return retval;
 
-       /* Enabling of instruction execution in debug mode is done in debug_entry code */
-
-       /* Resync breakpoint registers */
-
-       /* Since this is likely called from init or reset, update target state information*/
-       return aarch64_poll(target);
-}
-
-/* To reduce needless round-trips, pass in a pointer to the current
- * DSCR value.  Initialize it to zero if you just need to know the
- * value on return from this function; or DSCR_ITE if you
- * happen to know that no instruction is pending.
- */
-static int aarch64_exec_opcode(struct target *target,
-       uint32_t opcode, uint32_t *dscr_p)
-{
-       uint32_t dscr;
-       int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       dscr = dscr_p ? *dscr_p : 0;
-
-       LOG_DEBUG("exec opcode 0x%08" PRIx32, opcode);
-
-       /* Wait for InstrCompl bit to be set */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_ITE) == 0) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-               if (retval != ERROR_OK) {
-                       LOG_ERROR("Could not read DSCR register, opcode = 0x%08" PRIx32, opcode);
-                       return retval;
-               }
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for aarch64_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       }
+       /*
+        * Static CTI configuration:
+        * Channel 0 -> trigger outputs HALT request to PE
+        * Channel 1 -> trigger outputs Resume request to PE
+        * Gate all channel trigger events from entering the CTM
+        */
 
-       retval = mem_ap_write_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_ITR, opcode);
+       /* Enable CTI */
+       retval = arm_cti_enable(armv8->cti, true);
+       /* By default, gate all channel events to and from the CTM */
+       if (retval == ERROR_OK)
+               retval = arm_cti_write_reg(armv8->cti, CTI_GATE, 0);
+       /* output halt requests to PE on channel 0 event */
+       if (retval == ERROR_OK)
+               retval = arm_cti_write_reg(armv8->cti, CTI_OUTEN0, CTI_CHNL(0));
+       /* output restart requests to PE on channel 1 event */
+       if (retval == ERROR_OK)
+               retval = arm_cti_write_reg(armv8->cti, CTI_OUTEN1, CTI_CHNL(1));
        if (retval != ERROR_OK)
                return retval;
 
-       then = timeval_ms();
-       do {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-               if (retval != ERROR_OK) {
-                       LOG_ERROR("Could not read DSCR register");
-                       return retval;
-               }
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for aarch64_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       } while ((dscr & DSCR_ITE) == 0);       /* Wait for InstrCompl bit to be set */
-
-       if (dscr_p)
-               *dscr_p = dscr;
+       /* Resync breakpoint registers */
 
-       return retval;
+       return ERROR_OK;
 }
 
 /* Write to memory mapped registers directly with no cache or mmu handling */
@@ -218,649 +235,308 @@ static int aarch64_dap_write_memap_register_u32(struct target *target,
        return retval;
 }
 
-/*
- * AARCH64 implementation of Debug Programmer's Model
- *
- * NOTE the invariant:  these routines return with DSCR_ITE set,
- * so there's no need to poll for it before executing an instruction.
- *
- * NOTE that in several of these cases the "stall" mode might be useful.
- * It'd let us queue a few operations together... prepare/finish might
- * be the places to enable/disable that mode.
- */
-
-static inline struct aarch64_common *dpm_to_a8(struct arm_dpm *dpm)
+static int aarch64_dpm_setup(struct aarch64_common *a8, uint64_t debug)
 {
-       return container_of(dpm, struct aarch64_common, armv8_common.dpm);
-}
-
-static int aarch64_write_dcc(struct armv8_common *armv8, uint32_t data)
-{
-       LOG_DEBUG("write DCC 0x%08" PRIx32, data);
-       return mem_ap_write_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRRX, data);
-}
-
-static int aarch64_write_dcc_64(struct armv8_common *armv8, uint64_t data)
-{
-       int ret;
-       LOG_DEBUG("write DCC Low word0x%08" PRIx32, (unsigned)data);
-       LOG_DEBUG("write DCC High word 0x%08" PRIx32, (unsigned)(data >> 32));
-       ret = mem_ap_write_u32(armv8->debug_ap,
-                              armv8->debug_base + CPUV8_DBG_DTRRX, data);
-       ret += mem_ap_write_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRTX, data >> 32);
-       return ret;
-}
-
-static int aarch64_read_dcc(struct armv8_common *armv8, uint32_t *data,
-       uint32_t *dscr_p)
-{
-       uint32_t dscr = DSCR_ITE;
+       struct arm_dpm *dpm = &a8->armv8_common.dpm;
        int retval;
 
-       if (dscr_p)
-               dscr = *dscr_p;
-
-       /* Wait for DTRRXfull */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for read dcc");
-                       return ERROR_FAIL;
-               }
-       }
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                           armv8->debug_base + CPUV8_DBG_DTRTX,
-                                           data);
-       if (retval != ERROR_OK)
-               return retval;
-       LOG_DEBUG("read DCC 0x%08" PRIx32, *data);
+       dpm->arm = &a8->armv8_common.arm;
+       dpm->didr = debug;
 
-       if (dscr_p)
-               *dscr_p = dscr;
+       retval = armv8_dpm_setup(dpm);
+       if (retval == ERROR_OK)
+               retval = armv8_dpm_initialize(dpm);
 
        return retval;
 }
 
-static int aarch64_read_dcc_64(struct armv8_common *armv8, uint64_t *data,
-       uint32_t *dscr_p)
+static int aarch64_set_dscr_bits(struct target *target, unsigned long bit_mask, unsigned long value)
 {
-       uint32_t dscr = DSCR_ITE;
-       uint32_t higher;
-       int retval;
-
-       if (dscr_p)
-               dscr = *dscr_p;
-
-       /* Wait for DTRRXfull */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for read dcc");
-                       return ERROR_FAIL;
-               }
-       }
+       struct armv8_common *armv8 = target_to_armv8(target);
+       return armv8_set_dbgreg_bits(armv8, CPUV8_DBG_DSCR, bit_mask, value);
+}
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                           armv8->debug_base + CPUV8_DBG_DTRTX,
-                                           (uint32_t *)data);
-       if (retval != ERROR_OK)
-               return retval;
+static int aarch64_check_state_one(struct target *target,
+               uint32_t mask, uint32_t val, int *p_result, uint32_t *p_prsr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       uint32_t prsr;
+       int retval;
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                           armv8->debug_base + CPUV8_DBG_DTRRX,
-                                           &higher);
+                       armv8->debug_base + CPUV8_DBG_PRSR, &prsr);
        if (retval != ERROR_OK)
                return retval;
 
-       *data = *(uint32_t *)data | (uint64_t)higher << 32;
-       LOG_DEBUG("read DCC 0x%16.16" PRIx64, *data);
+       if (p_prsr)
+               *p_prsr = prsr;
 
-       if (dscr_p)
-               *dscr_p = dscr;
+       if (p_result)
+               *p_result = (prsr & mask) == (val & mask);
 
-       return retval;
+       return ERROR_OK;
 }
 
-static int aarch64_dpm_prepare(struct arm_dpm *dpm)
+static int aarch64_wait_halt_one(struct target *target)
 {
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr;
-       int retval;
+       int retval = ERROR_OK;
+       uint32_t prsr;
 
-       /* set up invariant:  INSTR_COMP is set after ever DPM operation */
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUV8_DBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if ((dscr & DSCR_ITE) != 0)
+       int64_t then = timeval_ms();
+       for (;;) {
+               int halted;
+
+               retval = aarch64_check_state_one(target, PRSR_HALT, PRSR_HALT, &halted, &prsr);
+               if (retval != ERROR_OK || halted)
                        break;
+
                if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for dpm prepare");
-                       return ERROR_FAIL;
+                       retval = ERROR_TARGET_TIMEOUT;
+                       LOG_DEBUG("target %s timeout, prsr=0x%08"PRIx32, target_name(target), prsr);
+                       break;
                }
        }
-
-       /* this "should never happen" ... */
-       if (dscr & DSCR_DTR_RX_FULL) {
-               LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
-               /* Clear DCCRX */
-               retval = aarch64_exec_opcode(
-                               a8->armv8_common.arm.target,
-                               0xd5130400,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-       }
-
        return retval;
 }
 
-static int aarch64_dpm_finish(struct arm_dpm *dpm)
+static int aarch64_prepare_halt_smp(struct target *target, bool exc_target, struct target **p_first)
 {
-       /* REVISIT what could be done here? */
-       return ERROR_OK;
-}
-
-static int aarch64_instr_execute(struct arm_dpm *dpm,
-       uint32_t opcode)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
-
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-}
-
-static int aarch64_instr_write_data_dcc(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_ITE;
-
-       retval = aarch64_write_dcc(&a8->armv8_common, data);
-       if (retval != ERROR_OK)
-               return retval;
+       int retval = ERROR_OK;
+       struct target_list *head = target->head;
+       struct target *first = NULL;
 
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-}
+       LOG_DEBUG("target %s exc %i", target_name(target), exc_target);
 
-static int aarch64_instr_write_data_dcc_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_ITE;
+       while (head != NULL) {
+               struct target *curr = head->target;
+               struct armv8_common *armv8 = target_to_armv8(curr);
+               head = head->next;
 
-       retval = aarch64_write_dcc_64(&a8->armv8_common, data);
-       if (retval != ERROR_OK)
-               return retval;
+               if (exc_target && curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               if (curr->state != TARGET_RUNNING)
+                       continue;
 
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-}
+               /* HACK: mark this target as prepared for halting */
+               curr->debug_reason = DBG_REASON_DBGRQ;
 
-static int aarch64_instr_write_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
-       int retval;
+               /* open the gate for channel 0 to let HALT requests pass to the CTM */
+               retval = arm_cti_ungate_channel(armv8->cti, 0);
+               if (retval == ERROR_OK)
+                       retval = aarch64_set_dscr_bits(curr, DSCR_HDE, DSCR_HDE);
+               if (retval != ERROR_OK)
+                       break;
 
-       retval = aarch64_write_dcc(&a8->armv8_common, data);
-       if (retval != ERROR_OK)
-               return retval;
+               LOG_DEBUG("target %s prepared", target_name(curr));
 
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       0xd5330500,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+               if (first == NULL)
+                       first = curr;
+       }
 
-       /* then the opcode, taking data from R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
+       if (p_first) {
+               if (exc_target && first)
+                       *p_first = first;
+               else
+                       *p_first = target;
+       }
 
        return retval;
 }
 
-static int aarch64_instr_write_data_r0_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t data)
+static int aarch64_halt_one(struct target *target, enum halt_mode mode)
 {
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
-       int retval;
+       int retval = ERROR_OK;
+       struct armv8_common *armv8 = target_to_armv8(target);
 
-       retval = aarch64_write_dcc_64(&a8->armv8_common, data);
-       if (retval != ERROR_OK)
-               return retval;
+       LOG_DEBUG("%s", target_name(target));
 
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       0xd5330400,
-                       &dscr);
+       /* allow Halting Debug Mode */
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
        if (retval != ERROR_OK)
                return retval;
 
-       /* then the opcode, taking data from R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-
-       return retval;
-}
-
-static int aarch64_instr_cpsr_sync(struct arm_dpm *dpm)
-{
-       struct target *target = dpm->arm->target;
-       uint32_t dscr = DSCR_ITE;
-
-       /* "Prefetch flush" after modifying execution status in CPSR */
-       return aarch64_exec_opcode(target,
-                       ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
-                       &dscr);
-}
-
-static int aarch64_instr_read_data_dcc(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_ITE;
-
-       /* the opcode, writing data to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
+       /* trigger an event on channel 0, this outputs a halt request to the PE */
+       retval = arm_cti_pulse_channel(armv8->cti, 0);
        if (retval != ERROR_OK)
                return retval;
 
-       return aarch64_read_dcc(&a8->armv8_common, data, &dscr);
-}
-
-static int aarch64_instr_read_data_dcc_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_ITE;
-
-       /* the opcode, writing data to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+       if (mode == HALT_SYNC) {
+               retval = aarch64_wait_halt_one(target);
+               if (retval != ERROR_OK) {
+                       if (retval == ERROR_TARGET_TIMEOUT)
+                               LOG_ERROR("Timeout waiting for target %s halt", target_name(target));
+                       return retval;
+               }
+       }
 
-       return aarch64_read_dcc_64(&a8->armv8_common, data, &dscr);
+       return ERROR_OK;
 }
 
-static int aarch64_instr_read_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t *data)
+static int aarch64_halt_smp(struct target *target, bool exc_target)
 {
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
+       struct target *next = target;
        int retval;
 
-       /* the opcode, writing data to R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+       /* prepare halt on all PEs of the group */
+       retval = aarch64_prepare_halt_smp(target, exc_target, &next);
 
-       /* write R0 to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       0xd5130400,  /* msr dbgdtr_el0, x0 */
-                       &dscr);
-       if (retval != ERROR_OK)
+       if (exc_target && next == target)
                return retval;
 
-       return aarch64_read_dcc(&a8->armv8_common, data, &dscr);
-}
-
-static int aarch64_instr_read_data_r0_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
-       int retval;
-
-       /* the opcode, writing data to R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+       /* halt the target PE */
+       if (retval == ERROR_OK)
+               retval = aarch64_halt_one(next, HALT_LAZY);
 
-       /* write R0 to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       0xd5130400,  /* msr dbgdtr_el0, x0 */
-                       &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       return aarch64_read_dcc_64(&a8->armv8_common, data, &dscr);
-}
+       /* wait for all PEs to halt */
+       int64_t then = timeval_ms();
+       for (;;) {
+               bool all_halted = true;
+               struct target_list *head;
+               struct target *curr;
 
-static int aarch64_bpwp_enable(struct arm_dpm *dpm, unsigned index_t,
-       uint32_t addr, uint32_t control)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t vr = a8->armv8_common.debug_base;
-       uint32_t cr = a8->armv8_common.debug_base;
-       int retval;
+               foreach_smp_target(head, target->head) {
+                       int halted;
 
-       switch (index_t) {
-               case 0 ... 15:  /* breakpoints */
-                       vr += CPUV8_DBG_BVR_BASE;
-                       cr += CPUV8_DBG_BCR_BASE;
-                       break;
-               case 16 ... 31: /* watchpoints */
-                       vr += CPUV8_DBG_WVR_BASE;
-                       cr += CPUV8_DBG_WCR_BASE;
-                       index_t -= 16;
-                       break;
-               default:
-                       return ERROR_FAIL;
-       }
-       vr += 4 * index_t;
-       cr += 4 * index_t;
+                       curr = head->target;
 
-       LOG_DEBUG("A8: bpwp enable, vr %08x cr %08x",
-               (unsigned) vr, (unsigned) cr);
+                       if (!target_was_examined(curr))
+                               continue;
 
-       retval = aarch64_dap_write_memap_register_u32(dpm->arm->target,
-                       vr, addr);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = aarch64_dap_write_memap_register_u32(dpm->arm->target,
-                       cr, control);
-       return retval;
-}
-
-static int aarch64_bpwp_disable(struct arm_dpm *dpm, unsigned index_t)
-{
-       return ERROR_OK;
+                       retval = aarch64_check_state_one(curr, PRSR_HALT, PRSR_HALT, &halted, NULL);
+                       if (retval != ERROR_OK || !halted) {
+                               all_halted = false;
+                               break;
+                       }
+               }
 
-#if 0
-       struct aarch64_common *a = dpm_to_a8(dpm);
-       uint32_t cr;
+               if (all_halted)
+                       break;
 
-       switch (index_t) {
-               case 0 ... 15:
-                       cr = a->armv8_common.debug_base + CPUV8_DBG_BCR_BASE;
+               if (timeval_ms() > then + 1000) {
+                       retval = ERROR_TARGET_TIMEOUT;
                        break;
-               case 16 ... 31:
-                       cr = a->armv8_common.debug_base + CPUV8_DBG_WCR_BASE;
-                       index_t -= 16;
+               }
+
+               /*
+                * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                * and it looks like the CTI's are not connected by a common
+                * trigger matrix. It seems that we need to halt one core in each
+                * cluster explicitly. So if we find that a core has not halted
+                * yet, we trigger an explicit halt for the second cluster.
+                */
+               retval = aarch64_halt_one(curr, HALT_LAZY);
+               if (retval != ERROR_OK)
                        break;
-               default:
-                       return ERROR_FAIL;
        }
-       cr += 4 * index_t;
-
-       LOG_DEBUG("A: bpwp disable, cr %08x", (unsigned) cr);
-
-       /* clear control register */
-       return aarch64_dap_write_memap_register_u32(dpm->arm->target, cr, 0);
-#endif
-}
-
-static int aarch64_dpm_setup(struct aarch64_common *a8, uint32_t debug)
-{
-       struct arm_dpm *dpm = &a8->armv8_common.dpm;
-       int retval;
-
-       dpm->arm = &a8->armv8_common.arm;
-       dpm->didr = debug;
-
-       dpm->prepare = aarch64_dpm_prepare;
-       dpm->finish = aarch64_dpm_finish;
-
-       dpm->instr_execute = aarch64_instr_execute;
-       dpm->instr_write_data_dcc = aarch64_instr_write_data_dcc;
-       dpm->instr_write_data_dcc_64 = aarch64_instr_write_data_dcc_64;
-       dpm->instr_write_data_r0 = aarch64_instr_write_data_r0;
-       dpm->instr_write_data_r0_64 = aarch64_instr_write_data_r0_64;
-       dpm->instr_cpsr_sync = aarch64_instr_cpsr_sync;
-
-       dpm->instr_read_data_dcc = aarch64_instr_read_data_dcc;
-       dpm->instr_read_data_dcc_64 = aarch64_instr_read_data_dcc_64;
-       dpm->instr_read_data_r0 = aarch64_instr_read_data_r0;
-       dpm->instr_read_data_r0_64 = aarch64_instr_read_data_r0_64;
-
-       dpm->arm_reg_current = armv8_reg_current;
-
-       dpm->bpwp_enable = aarch64_bpwp_enable;
-       dpm->bpwp_disable = aarch64_bpwp_disable;
-
-       retval = armv8_dpm_setup(dpm);
-       if (retval == ERROR_OK)
-               retval = armv8_dpm_initialize(dpm);
 
        return retval;
 }
-static struct target *get_aarch64(struct target *target, int32_t coreid)
+
+static int update_halt_gdb(struct target *target, enum target_debug_reason debug_reason)
 {
+       struct target *gdb_target = NULL;
        struct target_list *head;
        struct target *curr;
 
-       head = target->head;
-       while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr->coreid == coreid) && (curr->state == TARGET_HALTED))
-                       return curr;
-               head = head->next;
+       if (debug_reason == DBG_REASON_NOTHALTED) {
+               LOG_INFO("Halting remaining targets in SMP group");
+               aarch64_halt_smp(target, true);
        }
-       return target;
-}
-static int aarch64_halt(struct target *target);
 
-static int aarch64_halt_smp(struct target *target)
-{
-       int retval = 0;
-       struct target_list *head;
-       struct target *curr;
-       head = target->head;
-       while (head != (struct target_list *)NULL) {
+       /* poll all targets in the group, but skip the target that serves GDB */
+       foreach_smp_target(head, target->head) {
                curr = head->target;
-               if ((curr != target) && (curr->state != TARGET_HALTED))
-                       retval += aarch64_halt(curr);
-               head = head->next;
-       }
-       return retval;
-}
+               /* skip calling context */
+               if (curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               /* skip targets that were already halted */
+               if (curr->state == TARGET_HALTED)
+                       continue;
+               /* remember the gdb_service->target */
+               if (curr->gdb_service != NULL)
+                       gdb_target = curr->gdb_service->target;
+               /* skip it */
+               if (curr == gdb_target)
+                       continue;
+
+               /* avoid recursion in aarch64_poll() */
+               curr->smp = 0;
+               aarch64_poll(curr);
+               curr->smp = 1;
+       }
+
+       /* after all targets were updated, poll the gdb serving target */
+       if (gdb_target != NULL && gdb_target != target)
+               aarch64_poll(gdb_target);
 
-static int update_halt_gdb(struct target *target)
-{
-       int retval = 0;
-       if (target->gdb_service && target->gdb_service->core[0] == -1) {
-               target->gdb_service->target = target;
-               target->gdb_service->core[0] = target->coreid;
-               retval += aarch64_halt_smp(target);
-       }
-       return retval;
+       return ERROR_OK;
 }
 
 /*
- * Cortex-A8 Run control
+ * Aarch64 Run control
  */
 
 static int aarch64_poll(struct target *target)
 {
+       enum target_state prev_target_state;
        int retval = ERROR_OK;
-       uint32_t dscr;
-       struct aarch64_common *aarch64 = target_to_aarch64(target);
-       struct armv8_common *armv8 = &aarch64->armv8_common;
-       enum target_state prev_target_state = target->state;
-       /*  toggle to another core is done by gdb as follow */
-       /*  maint packet J core_id */
-       /*  continue */
-       /*  the next polling trigger an halt event sent to gdb */
-       if ((target->state == TARGET_HALTED) && (target->smp) &&
-               (target->gdb_service) &&
-               (target->gdb_service->target == NULL)) {
-               target->gdb_service->target =
-                       get_aarch64(target, target->gdb_service->core[1]);
-               target_call_event_callbacks(target, TARGET_EVENT_HALTED);
-               return retval;
-       }
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       int halted;
+
+       retval = aarch64_check_state_one(target,
+                               PRSR_HALT, PRSR_HALT, &halted, NULL);
        if (retval != ERROR_OK)
                return retval;
-       aarch64->cpudbg_dscr = dscr;
 
-       if (DSCR_RUN_MODE(dscr) == (DSCR_CORE_HALTED | DSCR_CORE_RESTARTED)) {
+       if (halted) {
+               prev_target_state = target->state;
                if (prev_target_state != TARGET_HALTED) {
+                       enum target_debug_reason debug_reason = target->debug_reason;
+
                        /* We have a halting debug event */
-                       LOG_DEBUG("Target halted");
                        target->state = TARGET_HALTED;
-                       if ((prev_target_state == TARGET_RUNNING)
-                               || (prev_target_state == TARGET_UNKNOWN)
-                               || (prev_target_state == TARGET_RESET)) {
-                               retval = aarch64_debug_entry(target);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               if (target->smp) {
-                                       retval = update_halt_gdb(target);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               }
-                               target_call_event_callbacks(target,
-                                       TARGET_EVENT_HALTED);
-                       }
-                       if (prev_target_state == TARGET_DEBUG_RUNNING) {
-                               LOG_DEBUG(" ");
-
-                               retval = aarch64_debug_entry(target);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               if (target->smp) {
-                                       retval = update_halt_gdb(target);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               }
+                       LOG_DEBUG("Target %s halted", target_name(target));
+                       retval = aarch64_debug_entry(target);
+                       if (retval != ERROR_OK)
+                               return retval;
 
-                               target_call_event_callbacks(target,
-                                       TARGET_EVENT_DEBUG_HALTED);
+                       if (target->smp)
+                               update_halt_gdb(target, debug_reason);
+
+                       switch (prev_target_state) {
+                       case TARGET_RUNNING:
+                       case TARGET_UNKNOWN:
+                       case TARGET_RESET:
+                               target_call_event_callbacks(target, TARGET_EVENT_HALTED);
+                               break;
+                       case TARGET_DEBUG_RUNNING:
+                               target_call_event_callbacks(target, TARGET_EVENT_DEBUG_HALTED);
+                               break;
+                       default:
+                               break;
                        }
                }
-       } else if (DSCR_RUN_MODE(dscr) == DSCR_CORE_RESTARTED)
+       } else
                target->state = TARGET_RUNNING;
-       else {
-               LOG_DEBUG("Unknown target state dscr = 0x%08" PRIx32, dscr);
-               target->state = TARGET_UNKNOWN;
-       }
 
        return retval;
 }
 
 static int aarch64_halt(struct target *target)
 {
-       int retval = ERROR_OK;
-       uint32_t dscr;
-       struct armv8_common *armv8 = target_to_armv8(target);
-
-       /* enable CTI*/
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_CTR, 1);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_GATE, 3);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_OUTEN0, 1);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_OUTEN1, 2);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /*
-        * add HDE in halting debug mode
-        */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+       if (target->smp)
+               return aarch64_halt_smp(target, false);
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, dscr | DSCR_HDE);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_APPPULSE, 1);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_INACK, 1);
-       if (retval != ERROR_OK)
-               return retval;
-
-
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if ((dscr & DSCRV8_HALT_MASK) != 0)
-                       break;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for halt");
-                       return ERROR_FAIL;
-               }
-       }
-
-       target->debug_reason = DBG_REASON_DBGRQ;
-
-       return ERROR_OK;
+       return aarch64_halt_one(target, HALT_SYNC);
 }
 
-static int aarch64_internal_restore(struct target *target, int current,
+static int aarch64_restore_one(struct target *target, int current,
        uint64_t *address, int handle_breakpoints, int debug_execution)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
@@ -868,6 +544,8 @@ static int aarch64_internal_restore(struct target *target, int current,
        int retval;
        uint64_t resume_pc;
 
+       LOG_DEBUG("%s", target_name(target));
+
        if (!debug_execution)
                target_free_all_working_areas(target);
 
@@ -899,56 +577,33 @@ static int aarch64_internal_restore(struct target *target, int current,
                        LOG_ERROR("How do I resume into Jazelle state??");
                        return ERROR_FAIL;
        }
-       LOG_DEBUG("resume pc = 0x%16" PRIx64, resume_pc);
+       LOG_DEBUG("resume pc = 0x%016" PRIx64, resume_pc);
        buf_set_u64(arm->pc->value, 0, 64, resume_pc);
        arm->pc->dirty = 1;
        arm->pc->valid = 1;
-       dpmv8_modeswitch(&armv8->dpm, ARM_MODE_ANY);
 
        /* called it now before restoring context because it uses cpu
         * register r0 for restoring system control register */
        retval = aarch64_restore_system_control_reg(target);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = aarch64_restore_context(target, handle_breakpoints);
-       if (retval != ERROR_OK)
-               return retval;
-       target->debug_reason = DBG_REASON_NOTHALTED;
-       target->state = TARGET_RUNNING;
-
-       /* registers are now invalid */
-       register_cache_invalidate(arm->core_cache);
-
-#if 0
-       /* the front-end may request us not to handle breakpoints */
-       if (handle_breakpoints) {
-               /* Single step past breakpoint at current address */
-               breakpoint = breakpoint_find(target, resume_pc);
-               if (breakpoint) {
-                       LOG_DEBUG("unset breakpoint at 0x%8.8x", breakpoint->address);
-                       cortex_m3_unset_breakpoint(target, breakpoint);
-                       cortex_m3_single_step_core(target);
-                       cortex_m3_set_breakpoint(target, breakpoint);
-               }
-       }
-#endif
+       if (retval == ERROR_OK)
+               retval = aarch64_restore_context(target, handle_breakpoints);
 
        return retval;
 }
 
-static int aarch64_internal_restart(struct target *target)
+/**
+ * prepare single target for restart
+ *
+ *
+ */
+static int aarch64_prepare_restart_one(struct target *target)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
-       struct arm *arm = &armv8->arm;
        int retval;
        uint32_t dscr;
-       /*
-        * * Restart core and wait for it to be started.  Clear ITRen and sticky
-        * * exception flags: see ARMv7 ARM, C5.9.
-        *
-        * REVISIT: for single stepping, we probably want to
-        * disable IRQs by default, with optional override...
-        */
+       uint32_t tmp;
+
+       LOG_DEBUG("%s", target_name(target));
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
@@ -956,54 +611,202 @@ static int aarch64_internal_restart(struct target *target)
                return retval;
 
        if ((dscr & DSCR_ITE) == 0)
-               LOG_ERROR("DSCR InstrCompl must be set before leaving debug!");
+               LOG_ERROR("DSCR.ITE must be set before leaving debug!");
+       if ((dscr & DSCR_ERR) != 0)
+               LOG_ERROR("DSCR.ERR must be cleared before leaving debug!");
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_APPPULSE, 2);
+       /* acknowledge a pending CTI halt event */
+       retval = arm_cti_ack_events(armv8->cti, CTI_TRIG(HALT));
+       /*
+        * open the CTI gate for channel 1 so that the restart events
+        * get passed along to all PEs. Also close gate for channel 0
+        * to isolate the PE from halt events.
+        */
+       if (retval == ERROR_OK)
+               retval = arm_cti_ungate_channel(armv8->cti, 1);
+       if (retval == ERROR_OK)
+               retval = arm_cti_gate_channel(armv8->cti, 0);
+
+       /* make sure that DSCR.HDE is set */
+       if (retval == ERROR_OK) {
+               dscr |= DSCR_HDE;
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       }
+
+       /* clear sticky bits in PRSR, SDR is now 0 */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_PRSR, &tmp);
+
+       return retval;
+}
+
+static int aarch64_do_restart_one(struct target *target, enum restart_mode mode)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
+
+       /* trigger an event on channel 1, generates a restart request to the PE */
+       retval = arm_cti_pulse_channel(armv8->cti, 1);
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if ((dscr & DSCR_HDE) != 0)
-                       break;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for resume");
-                       return ERROR_FAIL;
+       if (mode == RESTART_SYNC) {
+               int64_t then = timeval_ms();
+               for (;;) {
+                       int resumed;
+                       /*
+                        * if PRSR.SDR is set now, the target did restart, even
+                        * if it's now already halted again (e.g. due to breakpoint)
+                        */
+                       retval = aarch64_check_state_one(target,
+                                               PRSR_SDR, PRSR_SDR, &resumed, NULL);
+                       if (retval != ERROR_OK || resumed)
+                               break;
+
+                       if (timeval_ms() > then + 1000) {
+                               LOG_ERROR("%s: Timeout waiting for resume"PRIx32, target_name(target));
+                               retval = ERROR_TARGET_TIMEOUT;
+                               break;
+                       }
                }
        }
 
+       if (retval != ERROR_OK)
+               return retval;
+
        target->debug_reason = DBG_REASON_NOTHALTED;
        target->state = TARGET_RUNNING;
 
-       /* registers are now invalid */
-       register_cache_invalidate(arm->core_cache);
-
        return ERROR_OK;
 }
 
-static int aarch64_restore_smp(struct target *target, int handle_breakpoints)
+static int aarch64_restart_one(struct target *target, enum restart_mode mode)
 {
-       int retval = 0;
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
+
+       retval = aarch64_prepare_restart_one(target);
+       if (retval == ERROR_OK)
+               retval = aarch64_do_restart_one(target, mode);
+
+       return retval;
+}
+
+/*
+ * prepare all but the current target for restart
+ */
+static int aarch64_prep_restart_smp(struct target *target, int handle_breakpoints, struct target **p_first)
+{
+       int retval = ERROR_OK;
        struct target_list *head;
-       struct target *curr;
+       struct target *first = NULL;
        uint64_t address;
-       head = target->head;
-       while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr != target) && (curr->state != TARGET_RUNNING)) {
-                       /*  resume current address , not in step mode */
-                       retval += aarch64_internal_restore(curr, 1, &address,
-                                       handle_breakpoints, 0);
-                       retval += aarch64_internal_restart(curr);
+
+       foreach_smp_target(head, target->head) {
+               struct target *curr = head->target;
+
+               /* skip calling target */
+               if (curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               if (curr->state != TARGET_HALTED)
+                       continue;
+
+               /*  resume at current address, not in step mode */
+               retval = aarch64_restore_one(curr, 1, &address, handle_breakpoints, 0);
+               if (retval == ERROR_OK)
+                       retval = aarch64_prepare_restart_one(curr);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("failed to restore target %s", target_name(curr));
+                       break;
                }
-               head = head->next;
+               /* remember the first valid target in the group */
+               if (first == NULL)
+                       first = curr;
+       }
+
+       if (p_first)
+               *p_first = first;
 
+       return retval;
+}
+
+
+static int aarch64_step_restart_smp(struct target *target)
+{
+       int retval = ERROR_OK;
+       struct target_list *head;
+       struct target *first = NULL;
+
+       LOG_DEBUG("%s", target_name(target));
+
+       retval = aarch64_prep_restart_smp(target, 0, &first);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (first != NULL)
+               retval = aarch64_do_restart_one(first, RESTART_LAZY);
+       if (retval != ERROR_OK) {
+               LOG_DEBUG("error restarting target %s", target_name(first));
+               return retval;
        }
+
+       int64_t then = timeval_ms();
+       for (;;) {
+               struct target *curr = target;
+               bool all_resumed = true;
+
+               foreach_smp_target(head, target->head) {
+                       uint32_t prsr;
+                       int resumed;
+
+                       curr = head->target;
+
+                       if (curr == target)
+                               continue;
+
+                       if (!target_was_examined(curr))
+                               continue;
+
+                       retval = aarch64_check_state_one(curr,
+                                       PRSR_SDR, PRSR_SDR, &resumed, &prsr);
+                       if (retval != ERROR_OK || (!resumed && (prsr & PRSR_HALT))) {
+                               all_resumed = false;
+                               break;
+                       }
+
+                       if (curr->state != TARGET_RUNNING) {
+                               curr->state = TARGET_RUNNING;
+                               curr->debug_reason = DBG_REASON_NOTHALTED;
+                               target_call_event_callbacks(curr, TARGET_EVENT_RESUMED);
+                       }
+               }
+
+               if (all_resumed)
+                       break;
+
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("%s: timeout waiting for target resume", __func__);
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
+               }
+               /*
+                * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                * and it looks like the CTI's are not connected by a common
+                * trigger matrix. It seems that we need to halt one core in each
+                * cluster explicitly. So if we find that a core has not halted
+                * yet, we trigger an explicit resume for the second cluster.
+                */
+               retval = aarch64_do_restart_one(curr, RESTART_LAZY);
+               if (retval != ERROR_OK)
+                       break;
+}
+
        return retval;
 }
 
@@ -1013,24 +816,86 @@ static int aarch64_resume(struct target *target, int current,
        int retval = 0;
        uint64_t addr = address;
 
-       /* dummy resume for smp toggle in order to reduce gdb impact  */
-       if ((target->smp) && (target->gdb_service->core[1] != -1)) {
-               /*   simulate a start and halt of target */
-               target->gdb_service->target = NULL;
-               target->gdb_service->core[0] = target->gdb_service->core[1];
-               /*  fake resume at next poll we play the  target core[1], see poll*/
-               target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
-               return 0;
-       }
-       aarch64_internal_restore(target, current, &addr, handle_breakpoints,
-                                debug_execution);
+       if (target->state != TARGET_HALTED)
+               return ERROR_TARGET_NOT_HALTED;
+
+       /*
+        * If this target is part of a SMP group, prepare the others
+        * targets for resuming. This involves restoring the complete
+        * target register context and setting up CTI gates to accept
+        * resume events from the trigger matrix.
+        */
        if (target->smp) {
-               target->gdb_service->core[0] = -1;
-               retval = aarch64_restore_smp(target, handle_breakpoints);
+               retval = aarch64_prep_restart_smp(target, handle_breakpoints, NULL);
                if (retval != ERROR_OK)
                        return retval;
        }
-       aarch64_internal_restart(target);
+
+       /* all targets prepared, restore and restart the current target */
+       retval = aarch64_restore_one(target, current, &addr, handle_breakpoints,
+                                debug_execution);
+       if (retval == ERROR_OK)
+               retval = aarch64_restart_one(target, RESTART_SYNC);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (target->smp) {
+               int64_t then = timeval_ms();
+               for (;;) {
+                       struct target *curr = target;
+                       struct target_list *head;
+                       bool all_resumed = true;
+
+                       foreach_smp_target(head, target->head) {
+                               uint32_t prsr;
+                               int resumed;
+
+                               curr = head->target;
+                               if (curr == target)
+                                       continue;
+                               if (!target_was_examined(curr))
+                                       continue;
+
+                               retval = aarch64_check_state_one(curr,
+                                               PRSR_SDR, PRSR_SDR, &resumed, &prsr);
+                               if (retval != ERROR_OK || (!resumed && (prsr & PRSR_HALT))) {
+                                       all_resumed = false;
+                                       break;
+                               }
+
+                               if (curr->state != TARGET_RUNNING) {
+                                       curr->state = TARGET_RUNNING;
+                                       curr->debug_reason = DBG_REASON_NOTHALTED;
+                                       target_call_event_callbacks(curr, TARGET_EVENT_RESUMED);
+                               }
+                       }
+
+                       if (all_resumed)
+                               break;
+
+                       if (timeval_ms() > then + 1000) {
+                               LOG_ERROR("%s: timeout waiting for target %s to resume", __func__, target_name(curr));
+                               retval = ERROR_TARGET_TIMEOUT;
+                               break;
+                       }
+
+                       /*
+                        * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                        * and it looks like the CTI's are not connected by a common
+                        * trigger matrix. It seems that we need to halt one core in each
+                        * cluster explicitly. So if we find that a core has not halted
+                        * yet, we trigger an explicit resume for the second cluster.
+                        */
+                       retval = aarch64_do_restart_one(curr, RESTART_LAZY);
+                       if (retval != ERROR_OK)
+                               break;
+               }
+       }
+
+       if (retval != ERROR_OK)
+               return retval;
+
+       target->debug_reason = DBG_REASON_NOTHALTED;
 
        if (!debug_execution) {
                target->state = TARGET_RUNNING;
@@ -1047,58 +912,68 @@ static int aarch64_resume(struct target *target, int current,
 
 static int aarch64_debug_entry(struct target *target)
 {
-       uint32_t dscr;
        int retval = ERROR_OK;
-       struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = target_to_armv8(target);
-       uint32_t tmp;
+       struct arm_dpm *dpm = &armv8->dpm;
+       enum arm_state core_state;
+       uint32_t dscr;
 
-       LOG_DEBUG("dscr = 0x%08" PRIx32, aarch64->cpudbg_dscr);
+       /* make sure to clear all sticky errors */
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+       if (retval == ERROR_OK)
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (retval == ERROR_OK)
+               retval = arm_cti_ack_events(armv8->cti, CTI_TRIG(HALT));
 
-       /* REVISIT surely we should not re-read DSCR !! */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       /* REVISIT see A8 TRM 12.11.4 steps 2..3 -- make sure that any
-        * imprecise data aborts get discarded by issuing a Data
-        * Synchronization Barrier:  ARMV4_5_MCR(15, 0, 0, 7, 10, 4).
-        */
+       LOG_DEBUG("%s dscr = 0x%08" PRIx32, target_name(target), dscr);
 
-       /* Enable the ITR execution once we are in debug mode */
-       dscr |= DSCR_ITR_EN;
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       dpm->dscr = dscr;
+       core_state = armv8_dpm_get_core_state(dpm);
+       armv8_select_opcodes(armv8, core_state == ARM_STATE_AARCH64);
+       armv8_select_reg_access(armv8, core_state == ARM_STATE_AARCH64);
+
+       /* close the CTI gate for all events */
+       if (retval == ERROR_OK)
+               retval = arm_cti_write_reg(armv8->cti, CTI_GATE, 0);
+       /* discard async exceptions */
+       if (retval == ERROR_OK)
+               retval = dpm->instr_cpsr_sync(dpm);
        if (retval != ERROR_OK)
                return retval;
 
        /* Examine debug reason */
-       arm_dpm_report_dscr(&armv8->dpm, aarch64->cpudbg_dscr);
-       mem_ap_read_atomic_u32(armv8->debug_ap,
-                                  armv8->debug_base + CPUV8_DBG_EDESR, &tmp);
-       if ((tmp & 0x7) == 0x4)
-               target->debug_reason = DBG_REASON_SINGLESTEP;
+       armv8_dpm_report_dscr(dpm, dscr);
 
        /* save address of instruction that triggered the watchpoint? */
        if (target->debug_reason == DBG_REASON_WATCHPOINT) {
-               uint32_t wfar;
+               uint32_t tmp;
+               uint64_t wfar = 0;
 
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_WFAR1,
+                               &tmp);
+               if (retval != ERROR_OK)
+                       return retval;
+               wfar = tmp;
+               wfar = (wfar << 32);
                retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                                armv8->debug_base + CPUV8_DBG_WFAR0,
-                               &wfar);
+                               &tmp);
                if (retval != ERROR_OK)
                        return retval;
-               arm_dpm_report_wfar(&armv8->dpm, wfar);
+               wfar |= tmp;
+               armv8_dpm_report_wfar(&armv8->dpm, wfar);
        }
 
        retval = armv8_dpm_read_current_registers(&armv8->dpm);
 
-       if (armv8->post_debug_entry) {
+       if (retval == ERROR_OK && armv8->post_debug_entry)
                retval = armv8->post_debug_entry(target);
-               if (retval != ERROR_OK)
-                       return retval;
-       }
 
        return retval;
 }
@@ -1107,40 +982,77 @@ static int aarch64_post_debug_entry(struct target *target)
 {
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
-       struct armv8_mmu_common *armv8_mmu = &armv8->armv8_mmu;
-       uint32_t sctlr_el1 = 0;
        int retval;
+       enum arm_mode target_mode = ARM_MODE_ANY;
+       uint32_t instr;
+
+       switch (armv8->arm.core_mode) {
+       case ARMV8_64_EL0T:
+               target_mode = ARMV8_64_EL1H;
+               /* fall through */
+       case ARMV8_64_EL1T:
+       case ARMV8_64_EL1H:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL1, 0);
+               break;
+       case ARMV8_64_EL2T:
+       case ARMV8_64_EL2H:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL2, 0);
+               break;
+       case ARMV8_64_EL3H:
+       case ARMV8_64_EL3T:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL3, 0);
+               break;
+
+       case ARM_MODE_SVC:
+       case ARM_MODE_ABT:
+       case ARM_MODE_FIQ:
+       case ARM_MODE_IRQ:
+               instr = ARMV4_5_MRC(15, 0, 0, 1, 0, 0);
+               break;
+
+       default:
+               LOG_INFO("cannot read system control register in this mode");
+               return ERROR_FAIL;
+       }
+
+       if (target_mode != ARM_MODE_ANY)
+               armv8_dpm_modeswitch(&armv8->dpm, target_mode);
 
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DRCR, 1<<2);
-       retval = aarch64_instr_read_data_r0(armv8->arm.dpm,
-                                           0xd5381000, &sctlr_el1);
+       retval = armv8->dpm.instr_read_data_r0(&armv8->dpm, instr, &aarch64->system_control_reg);
        if (retval != ERROR_OK)
                return retval;
 
-       LOG_DEBUG("sctlr_el1 = %#8.8x", sctlr_el1);
-       aarch64->system_control_reg = sctlr_el1;
-       aarch64->system_control_reg_curr = sctlr_el1;
-       aarch64->curr_mode = armv8->arm.core_mode;
+       if (target_mode != ARM_MODE_ANY)
+               armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
 
-       armv8_mmu->mmu_enabled = sctlr_el1 & 0x1U ? 1 : 0;
-       armv8_mmu->armv8_cache.d_u_cache_enabled = sctlr_el1 & 0x4U ? 1 : 0;
-       armv8_mmu->armv8_cache.i_cache_enabled = sctlr_el1 & 0x1000U ? 1 : 0;
+       LOG_DEBUG("System_register: %8.8" PRIx32, aarch64->system_control_reg);
+       aarch64->system_control_reg_curr = aarch64->system_control_reg;
 
-#if 0
-       if (armv8->armv8_mmu.armv8_cache.ctype == -1)
-               armv8_identify_cache(target);
-#endif
+       if (armv8->armv8_mmu.armv8_cache.info == -1) {
+               armv8_identify_cache(armv8);
+               armv8_read_mpidr(armv8);
+       }
 
+       armv8->armv8_mmu.mmu_enabled =
+                       (aarch64->system_control_reg & 0x1U) ? 1 : 0;
+       armv8->armv8_mmu.armv8_cache.d_u_cache_enabled =
+               (aarch64->system_control_reg & 0x4U) ? 1 : 0;
+       armv8->armv8_mmu.armv8_cache.i_cache_enabled =
+               (aarch64->system_control_reg & 0x1000U) ? 1 : 0;
        return ERROR_OK;
 }
 
+/*
+ * single-step a target
+ */
 static int aarch64_step(struct target *target, int current, target_addr_t address,
        int handle_breakpoints)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+       int saved_retval = ERROR_OK;
        int retval;
-       uint32_t tmp;
+       uint32_t edecr;
 
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
@@ -1148,57 +1060,116 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
        }
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_EDECR, &tmp);
-       if (retval != ERROR_OK)
-               return retval;
+                       armv8->debug_base + CPUV8_DBG_EDECR, &edecr);
+       /* make sure EDECR.SS is not set when restoring the register */
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_EDECR, (tmp|0x4));
+       if (retval == ERROR_OK) {
+               edecr &= ~0x4;
+               /* set EDECR.SS to enter hardware step mode */
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_EDECR, (edecr|0x4));
+       }
+       /* disable interrupts while stepping */
+       if (retval == ERROR_OK && aarch64->isrmasking_mode == AARCH64_ISRMASK_ON)
+               retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0x3 << 22);
+       /* bail out if stepping setup has failed */
        if (retval != ERROR_OK)
                return retval;
 
-       target->debug_reason = DBG_REASON_SINGLESTEP;
-       retval = aarch64_resume(target, 1, address, 0, 0);
+       if (target->smp && !handle_breakpoints) {
+               /*
+                * isolate current target so that it doesn't get resumed
+                * together with the others
+                */
+               retval = arm_cti_gate_channel(armv8->cti, 1);
+               /* resume all other targets in the group */
+               if (retval == ERROR_OK)
+                       retval = aarch64_step_restart_smp(target);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("Failed to restart non-stepping targets in SMP group");
+                       return retval;
+               }
+               LOG_DEBUG("Restarted all non-stepping targets in SMP group");
+       }
+
+       /* all other targets running, restore and restart the current target */
+       retval = aarch64_restore_one(target, current, &address, 0, 0);
+       if (retval == ERROR_OK)
+               retval = aarch64_restart_one(target, RESTART_LAZY);
+
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
-       while (target->state != TARGET_HALTED) {
-               mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_EDESR, &tmp);
-               LOG_DEBUG("DESR = %#x", tmp);
-               retval = aarch64_poll(target);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("timeout waiting for target halt");
-                       return ERROR_FAIL;
+       LOG_DEBUG("target step-resumed at 0x%" PRIx64, address);
+       if (!handle_breakpoints)
+               target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
+
+       int64_t then = timeval_ms();
+       for (;;) {
+               int stepped;
+               uint32_t prsr;
+
+               retval = aarch64_check_state_one(target,
+                                       PRSR_SDR|PRSR_HALT, PRSR_SDR|PRSR_HALT, &stepped, &prsr);
+               if (retval != ERROR_OK || stepped)
+                       break;
+
+               if (timeval_ms() > then + 100) {
+                       LOG_ERROR("timeout waiting for target %s halt after step",
+                                       target_name(target));
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
                }
        }
 
+       /*
+        * At least on one SoC (Renesas R8A7795) stepping over a WFI instruction
+        * causes a timeout. The core takes the step but doesn't complete it and so
+        * debug state is never entered. However, you can manually halt the core
+        * as an external debug even is also a WFI wakeup event.
+        */
+       if (retval == ERROR_TARGET_TIMEOUT)
+               saved_retval = aarch64_halt_one(target, HALT_SYNC);
+
+       /* restore EDECR */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_EDECR, (tmp&(~0x4)));
+                       armv8->debug_base + CPUV8_DBG_EDECR, edecr);
        if (retval != ERROR_OK)
                return retval;
 
-       target_call_event_callbacks(target, TARGET_EVENT_HALTED);
-       if (target->state == TARGET_HALTED)
-               LOG_DEBUG("target stepped");
+       /* restore interrupts */
+       if (aarch64->isrmasking_mode == AARCH64_ISRMASK_ON) {
+               retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0);
+               if (retval != ERROR_OK)
+                       return ERROR_OK;
+       }
+
+       if (saved_retval != ERROR_OK)
+               return saved_retval;
 
-       return ERROR_OK;
+       return aarch64_poll(target);
 }
 
 static int aarch64_restore_context(struct target *target, bool bpwp)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm *arm = &armv8->arm;
 
-       LOG_DEBUG(" ");
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
 
        if (armv8->pre_restore_context)
                armv8->pre_restore_context(target);
 
-       return armv8_dpm_write_dirty_registers(&armv8->dpm, bpwp);
+       retval = armv8_dpm_write_dirty_registers(&armv8->dpm, bpwp);
+       if (retval == ERROR_OK) {
+               /* registers are now invalid */
+               register_cache_invalidate(arm->core_cache);
+               register_cache_invalidate(arm->core_cache->next);
+       }
 
+       return retval;
 }
 
 /*
@@ -1216,7 +1187,6 @@ static int aarch64_set_breakpoint(struct target *target,
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct aarch64_brp *brp_list = aarch64->brp_list;
-       uint32_t dscr;
 
        if (breakpoint->set) {
                LOG_WARNING("breakpoint already set");
@@ -1265,28 +1235,38 @@ static int aarch64_set_breakpoint(struct target *target,
 
        } else if (breakpoint->type == BKPT_SOFT) {
                uint8_t code[4];
-               buf_set_u32(code, 0, 32, 0xD4400000);
 
+               buf_set_u32(code, 0, 32, armv8_opcode(armv8, ARMV8_OPC_HLT));
                retval = target_read_memory(target,
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length, 1,
                                breakpoint->orig_instr);
                if (retval != ERROR_OK)
                        return retval;
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
                retval = target_write_memory(target,
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length, 1, code);
                if (retval != ERROR_OK)
                        return retval;
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
+               armv8_cache_i_inner_inval_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
                breakpoint->set = 0x11; /* Any nice value but 0 */
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        /* Ensure that halting debug mode is enable */
-       dscr = dscr | DSCR_HDE;
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Failed to set DSCR.HDE");
                return retval;
@@ -1462,6 +1442,16 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                                        brp_list[brp_i].control);
                        if (retval != ERROR_OK)
                                return retval;
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
+                                       (uint32_t)brp_list[brp_i].value);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
+                                       (uint32_t)brp_list[brp_i].value);
+                       if (retval != ERROR_OK)
+                               return retval;
                        if ((brp_j < 0) || (brp_j >= aarch64->brp_num)) {
                                LOG_DEBUG("Invalid BRP number in breakpoint");
                                return ERROR_OK;
@@ -1476,6 +1466,17 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                                        brp_list[brp_j].control);
                        if (retval != ERROR_OK)
                                return retval;
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_j].BRPn,
+                                       (uint32_t)brp_list[brp_j].value);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_j].BRPn,
+                                       (uint32_t)brp_list[brp_j].value);
+                       if (retval != ERROR_OK)
+                               return retval;
+
                        breakpoint->linked_BRP = 0;
                        breakpoint->set = 0;
                        return ERROR_OK;
@@ -1501,11 +1502,22 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                                        brp_list[brp_i].value);
                        if (retval != ERROR_OK)
                                return retval;
+
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
+                                       (uint32_t)brp_list[brp_i].value);
+                       if (retval != ERROR_OK)
+                               return retval;
                        breakpoint->set = 0;
                        return ERROR_OK;
                }
        } else {
                /* restore original instruction (kept in target endianness) */
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
                if (breakpoint->length == 4) {
                        retval = target_write_memory(target,
                                        breakpoint->address & 0xFFFFFFFFFFFFFFFE,
@@ -1519,6 +1531,14 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        if (retval != ERROR_OK)
                                return retval;
                }
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
+               armv8_cache_i_inner_inval_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
        }
        breakpoint->set = 0;
 
@@ -1621,7 +1641,10 @@ static int aarch64_assert_reset(struct target *target)
        }
 
        /* registers are now invalid */
-       register_cache_invalidate(armv8->arm.core_cache);
+       if (target_was_examined(target)) {
+               register_cache_invalidate(armv8->arm.core_cache);
+               register_cache_invalidate(armv8->arm.core_cache->next);
+       }
 
        target->state = TARGET_RESET;
 
@@ -1637,6 +1660,9 @@ static int aarch64_deassert_reset(struct target *target)
        /* be certain SRST is off */
        jtag_add_reset(0, 0);
 
+       if (!target_was_examined(target))
+               return ERROR_OK;
+
        retval = aarch64_poll(target);
        if (retval != ERROR_OK)
                return retval;
@@ -1651,206 +1677,328 @@ static int aarch64_deassert_reset(struct target *target)
                }
        }
 
+       return aarch64_init_debug_access(target);
+}
+
+static int aarch64_write_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, const uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* change DCC to normal mode if necessary */
+       if (*dscr & DSCR_MA) {
+               *dscr &= ~DSCR_MA;
+               retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       while (count) {
+               uint32_t data, opcode;
+
+               /* write the data to store into DTRRX */
+               if (size == 1)
+                       data = *buffer;
+               else if (size == 2)
+                       data = target_buffer_get_u16(target, buffer);
+               else
+                       data = target_buffer_get_u32(target, buffer);
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRRX, data);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (arm->core_state == ARM_STATE_AARCH64)
+                       retval = dpm->instr_execute(dpm, ARMV8_MRS(SYSTEM_DBG_DTRRX_EL0, 1));
+               else
+                       retval = dpm->instr_execute(dpm, ARMV4_5_MRC(14, 0, 1, 0, 5, 0));
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (size == 1)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRB_IP);
+               else if (size == 2)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRH_IP);
+               else
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRW_IP);
+               retval = dpm->instr_execute(dpm, opcode);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Advance */
+               buffer += size;
+               --count;
+       }
+
+       return ERROR_OK;
+}
+
+static int aarch64_write_cpu_memory_fast(struct target *target,
+       uint32_t count, const uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm *arm = &armv8->arm;
+       int retval;
+
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* Step 1.d   - Change DCC to memory mode */
+       *dscr |= DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+
+       /* Step 2.a   - Do the write */
+       retval = mem_ap_write_buf_noincr(armv8->debug_ap,
+                                       buffer, 4, count, armv8->debug_base + CPUV8_DBG_DTRRX);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Step 3.a   - Switch DTR mode back to Normal mode */
+       *dscr &= ~DSCR_MA;
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
        return ERROR_OK;
 }
 
-static int aarch64_write_apb_ap_memory(struct target *target,
+static int aarch64_write_cpu_memory(struct target *target,
        uint64_t address, uint32_t size,
        uint32_t count, const uint8_t *buffer)
 {
        /* write memory through APB-AP */
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
-       int total_bytes = count * size;
-       int total_u32;
-       int start_byte = address & 0x3;
-       int end_byte   = (address + total_bytes) & 0x3;
-       struct reg *reg;
        uint32_t dscr;
-       uint8_t *tmp_buff = NULL;
 
-       LOG_DEBUG("Writing APB-AP memory address 0x%" PRIx64 " size %"  PRIu32 " count%"  PRIu32,
-                         address, size, count);
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4);
-
-       /* Mark register R0 as dirty, as it will be used
+       /* Mark register X0 as dirty, as it will be used
         * for transferring the data.
         * It will be restored automatically when exiting
         * debug mode
         */
-       reg = armv8_reg_current(arm, 1);
-       reg->dirty = true;
+       armv8_reg_current(arm, 0)->dirty = true;
 
-       reg = armv8_reg_current(arm, 0);
-       reg->dirty = true;
+       /* This algorithm comes from DDI0487A.g, chapter J9.1 */
+
+       /* Read DSCR */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
-       /*  clear any abort  */
+       /* Set Normal access mode  */
+       dscr = (dscr & ~DSCR_MA);
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+
+       if (arm->core_state == ARM_STATE_AARCH64) {
+               /* Write X0 with value 'address' using write procedure */
+               /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
+               /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
+               retval = dpm->instr_write_data_dcc_64(dpm,
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address);
+       } else {
+               /* Write R0 with value 'address' using write procedure */
+               /* Step 1.a+b - Write the address for read access into DBGDTRRX */
+               /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
+               dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address);
+       }
+
+       if (size == 4 && (address % 4) == 0)
+               retval = aarch64_write_cpu_memory_fast(target, count, buffer, &dscr);
+       else
+               retval = aarch64_write_cpu_memory_slow(target, size, count, buffer, &dscr);
+
+       if (retval != ERROR_OK) {
+               /* Unset DTR mode */
+               mem_ap_read_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+               dscr &= ~DSCR_MA;
+               mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       }
+
+       /* Check for sticky abort flags in the DSCR */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
 
+       dpm->dscr = dscr;
+       if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
+               /* Abort occurred - clear it and exit */
+               LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
+               armv8_dpm_handle_exception(dpm);
+               return ERROR_FAIL;
+       }
 
-       /* This algorithm comes from DDI0487A.g, chapter J9.1 */
+       /* Done */
+       return ERROR_OK;
+}
 
-       /* The algorithm only copies 32 bit words, so the buffer
-        * should be expanded to include the words at either end.
-        * The first and last words will be read first to avoid
-        * corruption if needed.
-        */
-       tmp_buff = malloc(total_u32 * 4);
+static int aarch64_read_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
 
-       if ((start_byte != 0) && (total_u32 > 1)) {
-               /* First bytes not aligned - read the 32 bit word to avoid corrupting
-                * the other bytes in the word.
-                */
-               retval = aarch64_read_apb_ap_memory(target, (address & ~0x3), 4, 1, tmp_buff);
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* change DCC to normal mode (if necessary) */
+       if (*dscr & DSCR_MA) {
+               *dscr &= DSCR_MA;
+               retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
                if (retval != ERROR_OK)
-                       goto error_free_buff_w;
+                       return retval;
        }
 
-       /* If end of write is not aligned, or the write is less than 4 bytes */
-       if ((end_byte != 0) ||
-               ((total_u32 == 1) && (total_bytes != 4))) {
+       while (count) {
+               uint32_t opcode, data;
+
+               if (size == 1)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRB_IP);
+               else if (size == 2)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRH_IP);
+               else
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRW_IP);
+               retval = dpm->instr_execute(dpm, opcode);
+               if (retval != ERROR_OK)
+                       return retval;
 
-               /* Read the last word to avoid corruption during 32 bit write */
-               int mem_offset = (total_u32-1) * 4;
-               retval = aarch64_read_apb_ap_memory(target, (address & ~0x3) + mem_offset, 4, 1, &tmp_buff[mem_offset]);
+               if (arm->core_state == ARM_STATE_AARCH64)
+                       retval = dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DTRTX_EL0, 1));
+               else
+                       retval = dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 1, 0, 5, 0));
                if (retval != ERROR_OK)
-                       goto error_free_buff_w;
-       }
+                       return retval;
+
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRTX, &data);
+               if (retval != ERROR_OK)
+                       return retval;
 
-       /* Copy the write buffer over the top of the temporary buffer */
-       memcpy(&tmp_buff[start_byte], buffer, total_bytes);
+               if (size == 1)
+                       *buffer = (uint8_t)data;
+               else if (size == 2)
+                       target_buffer_set_u16(target, buffer, (uint16_t)data);
+               else
+                       target_buffer_set_u32(target, buffer, data);
 
-       /* We now have a 32 bit aligned buffer that can be written */
+               /* Advance */
+               buffer += size;
+               --count;
+       }
 
-       /* Read DSCR */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       if (retval != ERROR_OK)
-               goto error_free_buff_w;
+       return ERROR_OK;
+}
+
+static int aarch64_read_cpu_memory_fast(struct target *target,
+       uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+       uint32_t value;
 
-       /* Set Normal access mode  */
-       dscr = (dscr & ~DSCR_MA);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       /* Mark X1 as dirty */
+       armv8_reg_current(arm, 1)->dirty = true;
 
        if (arm->core_state == ARM_STATE_AARCH64) {
-               /* Write X0 with value 'address' using write procedure */
-               /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
-               retval += aarch64_write_dcc_64(armv8, address & ~0x3ULL);
-               /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
-               retval += aarch64_exec_opcode(target,
-                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), &dscr);
+               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
+               retval = dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0));
        } else {
-               /* Write R0 with value 'address' using write procedure */
-               /* Step 1.a+b - Write the address for read access into DBGDTRRX */
-               retval += aarch64_write_dcc(armv8, address & ~0x3ULL);
-               /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
-               retval += aarch64_exec_opcode(target,
-                               T32_FMTITR(ARMV4_5_MRC(14, 0, 0, 0, 5, 0)), &dscr);
-
+               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
+               retval = dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
        }
-       /* Step 1.d   - Change DCC to memory mode */
-       dscr = dscr | DSCR_MA;
-       retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
 
+       /* Step 1.e - Change DCC to memory mode */
+       *dscr |= DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       /* Step 1.f - read DBGDTRTX and discard the value */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
 
-       /* Step 2.a   - Do the write */
-       retval = mem_ap_write_buf_noincr(armv8->debug_ap,
-                                       tmp_buff, 4, total_u32, armv8->debug_base + CPUV8_DBG_DTRRX);
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+       count--;
+       /* Read the data - Each read of the DTRTX register causes the instruction to be reissued
+        * Abort flags are sticky, so can be read at end of transactions
+        *
+        * This data is read in aligned to 32 bit boundary.
+        */
 
-       /* Step 3.a   - Switch DTR mode back to Normal mode */
-       dscr = (dscr & ~DSCR_MA);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       if (count) {
+               /* Step 2.a - Loop n-1 times, each read of DBGDTRTX reads the data from [X0] and
+                * increments X0 by 4. */
+               retval = mem_ap_read_buf_noincr(armv8->debug_ap, buffer, 4, count,
+                                                                       armv8->debug_base + CPUV8_DBG_DTRTX);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       /* Step 3.a - set DTR access mode back to Normal mode   */
+       *dscr &= ~DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
        if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+               return retval;
 
-       /* Check for sticky abort flags in the DSCR */
+       /* Step 3.b - read DBGDTRTX for the final value */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
        if (retval != ERROR_OK)
-               goto error_free_buff_w;
-       if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
-               /* Abort occurred - clear it and exit */
-               LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
-               mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DRCR, 1<<2);
-               goto error_free_buff_w;
-       }
-
-       /* Done */
-       free(tmp_buff);
-       return ERROR_OK;
+               return retval;
 
-error_unset_dtr_w:
-       /* Unset DTR mode */
-       mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       dscr = (dscr & ~DSCR_MA);
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-error_free_buff_w:
-       LOG_ERROR("error");
-       free(tmp_buff);
-       return ERROR_FAIL;
+       target_buffer_set_u32(target, buffer + count * 4, value);
+       return retval;
 }
 
-static int aarch64_read_apb_ap_memory(struct target *target,
+static int aarch64_read_cpu_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, uint8_t *buffer)
 {
        /* read memory through APB-AP */
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
-       int total_bytes = count * size;
-       int total_u32;
-       int start_byte = address & 0x3;
-       int end_byte   = (address + total_bytes) & 0x3;
-       struct reg *reg;
        uint32_t dscr;
-       uint8_t *tmp_buff = NULL;
-       uint8_t *u8buf_ptr;
-       uint32_t value;
 
-       LOG_DEBUG("Reading APB-AP memory address 0x%" TARGET_PRIxADDR " size %" PRIu32 " count%"  PRIu32,
-                         address, size, count);
+       LOG_DEBUG("Reading CPU memory address 0x%016" PRIx64 " size %" PRIu32 " count %" PRIu32,
+                       address, size, count);
+
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4);
-       /* Mark register X0, X1 as dirty, as it will be used
+       /* Mark register X0 as dirty, as it will be used
         * for transferring the data.
         * It will be restored automatically when exiting
         * debug mode
         */
-       reg = armv8_reg_current(arm, 1);
-       reg->dirty = true;
-
-       reg = armv8_reg_current(arm, 0);
-       reg->dirty = true;
-
-       /*      clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-       if (retval != ERROR_OK)
-               goto error_free_buff_r;
+       armv8_reg_current(arm, 0)->dirty = true;
 
        /* Read DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
@@ -1859,148 +2007,69 @@ static int aarch64_read_apb_ap_memory(struct target *target,
        /* This algorithm comes from DDI0487A.g, chapter J9.1 */
 
        /* Set Normal access mode  */
-       dscr = (dscr & ~DSCR_MA);
+       dscr &= ~DSCR_MA;
        retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
 
        if (arm->core_state == ARM_STATE_AARCH64) {
                /* Write X0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
-               retval += aarch64_write_dcc_64(armv8, address & ~0x3ULL);
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
-               retval += aarch64_exec_opcode(target, ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), &dscr);
-               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
-               retval += aarch64_exec_opcode(target, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0), &dscr);
-               /* Step 1.e - Change DCC to memory mode */
-               dscr = dscr | DSCR_MA;
-               retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-               /* Step 1.f - read DBGDTRTX and discard the value */
-               retval += mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+               retval += dpm->instr_write_data_dcc_64(dpm,
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address);
        } else {
                /* Write R0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTRRXint */
-               retval += aarch64_write_dcc(armv8, address & ~0x3ULL);
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
-               retval += aarch64_exec_opcode(target,
-                               T32_FMTITR(ARMV4_5_MRC(14, 0, 0, 0, 5, 0)), &dscr);
-               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
-               retval += aarch64_exec_opcode(target,
-                               T32_FMTITR(ARMV4_5_MCR(14, 0, 0, 0, 5, 0)), &dscr);
-               /* Step 1.e - Change DCC to memory mode */
-               dscr = dscr | DSCR_MA;
-               retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-               /* Step 1.f - read DBGDTRTX and discard the value */
-               retval += mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRTX, &value);
-
+               retval += dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address);
        }
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_r;
-
-       /* Optimize the read as much as we can, either way we read in a single pass  */
-       if ((start_byte) || (end_byte)) {
-               /* The algorithm only copies 32 bit words, so the buffer
-                * should be expanded to include the words at either end.
-                * The first and last words will be read into a temp buffer
-                * to avoid corruption
-                */
-               tmp_buff = malloc(total_u32 * 4);
-               if (!tmp_buff)
-                       goto error_unset_dtr_r;
-
-               /* use the tmp buffer to read the entire data */
-               u8buf_ptr = tmp_buff;
-       } else
-               /* address and read length are aligned so read directly into the passed buffer */
-               u8buf_ptr = buffer;
-
-       /* Read the data - Each read of the DTRTX register causes the instruction to be reissued
-        * Abort flags are sticky, so can be read at end of transactions
-        *
-        * This data is read in aligned to 32 bit boundary.
-        */
 
-       /* Step 2.a - Loop n-1 times, each read of DBGDTRTX reads the data from [X0] and
-        * increments X0 by 4. */
-       retval = mem_ap_read_buf_noincr(armv8->debug_ap, u8buf_ptr, 4, total_u32-1,
-                                                                       armv8->debug_base + CPUV8_DBG_DTRTX);
-       if (retval != ERROR_OK)
-                       goto error_unset_dtr_r;
+       if (size == 4 && (address % 4) == 0)
+               retval = aarch64_read_cpu_memory_fast(target, count, buffer, &dscr);
+       else
+               retval = aarch64_read_cpu_memory_slow(target, size, count, buffer, &dscr);
 
-       /* Step 3.a - set DTR access mode back to Normal mode   */
-       dscr = (dscr & ~DSCR_MA);
-       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+       if (dscr & DSCR_MA) {
+               dscr &= ~DSCR_MA;
+               mem_ap_write_atomic_u32(armv8->debug_ap,
                                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       if (retval != ERROR_OK)
-               goto error_free_buff_r;
+       }
 
-       /* Step 3.b - read DBGDTRTX for the final value */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
-       memcpy(u8buf_ptr + (total_u32-1) * 4, &value, 4);
+       if (retval != ERROR_OK)
+               return retval;
 
        /* Check for sticky abort flags in the DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                                armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_r;
+               return retval;
+
+       dpm->dscr = dscr;
+
        if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
                /* Abort occurred - clear it and exit */
                LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
-               mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-               goto error_free_buff_r;
-       }
-
-       /* check if we need to copy aligned data by applying any shift necessary */
-       if (tmp_buff) {
-               memcpy(buffer, tmp_buff + start_byte, total_bytes);
-               free(tmp_buff);
+               armv8_dpm_handle_exception(dpm);
+               return ERROR_FAIL;
        }
 
        /* Done */
        return ERROR_OK;
-
-error_unset_dtr_r:
-       /* Unset DTR mode */
-       mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       dscr = (dscr & ~DSCR_MA);
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-error_free_buff_r:
-       LOG_ERROR("error");
-       free(tmp_buff);
-       return ERROR_FAIL;
 }
 
 static int aarch64_read_phys_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, uint8_t *buffer)
 {
-       struct armv8_common *armv8 = target_to_armv8(target);
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-       LOG_DEBUG("Reading memory at real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32,
-               address, size, count);
 
        if (count && buffer) {
-
-               if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-
-                       /* read memory through AHB-AP */
-                       retval = mem_ap_read_buf(armv8->memory_ap, buffer, size, count, address);
-               } else {
-                       /* read memory through APB-AP */
-                       retval = aarch64_mmu_modify(target, 0);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       retval = aarch64_read_apb_ap_memory(target, address, size, count, buffer);
-               }
+               /* read memory through APB-AP */
+               retval = aarch64_mmu_modify(target, 0);
+               if (retval != ERROR_OK)
+                       return retval;
+               retval = aarch64_read_cpu_memory(target, address, size, count, buffer);
        }
        return retval;
 }
@@ -2009,136 +2078,34 @@ static int aarch64_read_memory(struct target *target, target_addr_t address,
        uint32_t size, uint32_t count, uint8_t *buffer)
 {
        int mmu_enabled = 0;
-       target_addr_t virt, phys;
        int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-
-       /* aarch64 handles unaligned memory access */
-       LOG_DEBUG("Reading memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
-               size, count);
 
        /* determine if MMU was enabled on target stop */
-       if (!armv8->is_armv7r) {
-               retval = aarch64_mmu(target, &mmu_enabled);
+       retval = aarch64_mmu(target, &mmu_enabled);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (mmu_enabled) {
+               /* enable MMU as we could have disabled it for phys access */
+               retval = aarch64_mmu_modify(target, 1);
                if (retval != ERROR_OK)
                        return retval;
        }
-
-       if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-               if (mmu_enabled) {
-                       virt = address;
-                       retval = aarch64_virt2phys(target, virt, &phys);
-                       if (retval != ERROR_OK)
-                               return retval;
-
-                       LOG_DEBUG("Reading at virtual address. Translating v:0x%" TARGET_PRIxADDR " to r:0x%" TARGET_PRIxADDR,
-                                 virt, phys);
-                       address = phys;
-               }
-               retval = aarch64_read_phys_memory(target, address, size, count,
-                                                 buffer);
-       } else {
-               if (mmu_enabled) {
-                       retval = aarch64_check_address(target, address);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       /* enable MMU as we could have disabled it for phys
-                          access */
-                       retval = aarch64_mmu_modify(target, 1);
-                       if (retval != ERROR_OK)
-                               return retval;
-               }
-               retval = aarch64_read_apb_ap_memory(target, address, size,
-                                                   count, buffer);
-       }
-       return retval;
+       return aarch64_read_cpu_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_write_phys_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, const uint8_t *buffer)
 {
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       uint8_t apsel = swjdp->apsel;
-
-       LOG_DEBUG("Writing memory to real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
-               size, count);
 
        if (count && buffer) {
-
-               if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-
-                       /* write memory through AHB-AP */
-                       retval = mem_ap_write_buf(armv8->memory_ap, buffer, size, count, address);
-               } else {
-
-                       /* write memory through APB-AP */
-                       if (!armv8->is_armv7r) {
-                               retval = aarch64_mmu_modify(target, 0);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-                       return aarch64_write_apb_ap_memory(target, address, size, count, buffer);
-               }
-       }
-
-
-       /* REVISIT this op is generic ARMv7-A/R stuff */
-       if (retval == ERROR_OK && target->state == TARGET_HALTED) {
-               struct arm_dpm *dpm = armv8->arm.dpm;
-
-               retval = dpm->prepare(dpm);
+               /* write memory through APB-AP */
+               retval = aarch64_mmu_modify(target, 0);
                if (retval != ERROR_OK)
                        return retval;
-
-               /* The Cache handling will NOT work with MMU active, the
-                * wrong addresses will be invalidated!
-                *
-                * For both ICache and DCache, walk all cache lines in the
-                * address range. Cortex-A8 has fixed 64 byte line length.
-                *
-                * REVISIT per ARMv7, these may trigger watchpoints ...
-                */
-
-               /* invalidate I-Cache */
-               if (armv8->armv8_mmu.armv8_cache.i_cache_enabled) {
-                       /* ICIMVAU - Invalidate Cache single entry
-                        * with MVA to PoU
-                        *      MCR p15, 0, r0, c7, c5, 1
-                        */
-                       for (uint32_t cacheline = address;
-                               cacheline < address + size * count;
-                               cacheline += 64) {
-                               retval = dpm->instr_write_data_r0(dpm,
-                                               ARMV4_5_MCR(15, 0, 0, 7, 5, 1),
-                                               cacheline);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-               }
-
-               /* invalidate D-Cache */
-               if (armv8->armv8_mmu.armv8_cache.d_u_cache_enabled) {
-                       /* DCIMVAC - Invalidate data Cache line
-                        * with MVA to PoC
-                        *      MCR p15, 0, r0, c7, c6, 1
-                        */
-                       for (uint32_t cacheline = address;
-                               cacheline < address + size * count;
-                               cacheline += 64) {
-                               retval = dpm->instr_write_data_r0(dpm,
-                                               ARMV4_5_MCR(15, 0, 0, 7, 6, 1),
-                                               cacheline);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-               }
-
-               /* (void) */ dpm->finish(dpm);
+               return aarch64_write_cpu_memory(target, address, size, count, buffer);
        }
 
        return retval;
@@ -2148,51 +2115,20 @@ static int aarch64_write_memory(struct target *target, target_addr_t address,
        uint32_t size, uint32_t count, const uint8_t *buffer)
 {
        int mmu_enabled = 0;
-       target_addr_t virt, phys;
        int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-
-       /* aarch64 handles unaligned memory access */
-       LOG_DEBUG("Writing memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32
-                 "; count %" PRId32, address, size, count);
 
        /* determine if MMU was enabled on target stop */
-       if (!armv8->is_armv7r) {
-               retval = aarch64_mmu(target, &mmu_enabled);
+       retval = aarch64_mmu(target, &mmu_enabled);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (mmu_enabled) {
+               /* enable MMU as we could have disabled it for phys access */
+               retval = aarch64_mmu_modify(target, 1);
                if (retval != ERROR_OK)
                        return retval;
        }
-
-       if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-               LOG_DEBUG("Writing memory to address 0x%" TARGET_PRIxADDR "; size %"
-                         PRId32 "; count %" PRId32, address, size, count);
-               if (mmu_enabled) {
-                       virt = address;
-                       retval = aarch64_virt2phys(target, virt, &phys);
-                       if (retval != ERROR_OK)
-                               return retval;
-
-                       LOG_DEBUG("Writing to virtual address. Translating v:0x%"
-                                 TARGET_PRIxADDR " to r:0x%" TARGET_PRIxADDR, virt, phys);
-                       address = phys;
-               }
-               retval = aarch64_write_phys_memory(target, address, size,
-                               count, buffer);
-       } else {
-               if (mmu_enabled) {
-                       retval = aarch64_check_address(target, address);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       /* enable MMU as we could have disabled it for phys access */
-                       retval = aarch64_mmu_modify(target, 1);
-                       if (retval != ERROR_OK)
-                               return retval;
-               }
-               retval = aarch64_write_apb_ap_memory(target, address, size, count, buffer);
-       }
-       return retval;
+       return aarch64_write_cpu_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_handle_target_request(void *priv)
@@ -2232,13 +2168,14 @@ static int aarch64_examine_first(struct target *target)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct adiv5_dap *swjdp = armv8->arm.dap;
-       int retval = ERROR_OK;
-       uint32_t pfr, debug, ctypr, ttypr, cpuid;
+       uint32_t cti_base;
        int i;
+       int retval = ERROR_OK;
+       uint64_t debug, ttypr;
+       uint32_t cpuid;
+       uint32_t tmp0, tmp1;
+       debug = ttypr = cpuid = 0;
 
-       /* We do one extra read to ensure DAP is configured,
-        * we call ahbap_debugport_init(swjdp) instead
-        */
        retval = dap_dp_init(swjdp);
        if (retval != ERROR_OK)
                return retval;
@@ -2256,21 +2193,7 @@ static int aarch64_examine_first(struct target *target)
                return retval;
        }
 
-       armv8->debug_ap->memaccess_tck = 80;
-
-       /* Search for the AHB-AB */
-       armv8->memory_ap_available = false;
-       retval = dap_find_ap(swjdp, AP_TYPE_AHB_AP, &armv8->memory_ap);
-       if (retval == ERROR_OK) {
-               retval = mem_ap_init(armv8->memory_ap);
-               if (retval == ERROR_OK)
-                       armv8->memory_ap_available = true;
-       }
-       if (retval != ERROR_OK) {
-               /* AHB-AP not found or unavailable - use the CPU */
-               LOG_DEBUG("No AHB-AP available for memory access");
-       }
-
+       armv8->debug_ap->memaccess_tck = 10;
 
        if (!target->dbgbase_set) {
                uint32_t dbgbase;
@@ -2285,91 +2208,95 @@ static int aarch64_examine_first(struct target *target)
                                &armv8->debug_base, &coreidx);
                if (retval != ERROR_OK)
                        return retval;
-               LOG_DEBUG("Detected core %" PRId32 " dbgbase: %08" PRIx32,
-                         coreidx, armv8->debug_base);
+               LOG_DEBUG("Detected core %" PRId32 " dbgbase: %08" PRIx32
+                               " apid: %08" PRIx32, coreidx, armv8->debug_base, apid);
        } else
                armv8->debug_base = target->dbgbase;
 
-       LOG_DEBUG("Target ctibase is 0x%x", target->ctibase);
-       if (target->ctibase == 0)
-               armv8->cti_base = target->ctibase = armv8->debug_base + 0x1000;
-       else
-               armv8->cti_base = target->ctibase;
-
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_LOCKACCESS, 0xC5ACCE55);
-       if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "oslock");
-               return retval;
-       }
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x88, &cpuid);
-       LOG_DEBUG("0x88 = %x", cpuid);
+       uint32_t prsr;
+       int64_t then = timeval_ms();
+       do {
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_PRSR, &prsr);
+               if (retval == ERROR_OK) {
+                       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_PRCR, PRCR_COREPURQ|PRCR_CORENPDRQ);
+                       if (retval != ERROR_OK) {
+                               LOG_DEBUG("write to PRCR failed");
+                               break;
+                       }
+               }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x314, &cpuid);
-       LOG_DEBUG("0x314 = %x", cpuid);
+               if (timeval_ms() > then + 1000) {
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
+               }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x310, &cpuid);
-       LOG_DEBUG("0x310 = %x", cpuid);
-       if (retval != ERROR_OK)
-               return retval;
+       } while ((prsr & PRSR_PU) == 0);
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_CPUID, &cpuid);
        if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "CPUID");
+               LOG_ERROR("target %s: failed to set power state of the core.", target_name(target));
                return retval;
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_CTYPR, &ctypr);
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_OSLAR, 0);
        if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "CTYPR");
+               LOG_DEBUG("Examine %s failed", "oslock");
                return retval;
        }
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_TTYPR, &ttypr);
+                       armv8->debug_base + CPUV8_DBG_MAINID0, &cpuid);
        if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "TTYPR");
+               LOG_DEBUG("Examine %s failed", "CPUID");
                return retval;
        }
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + ID_AA64PFR0_EL1, &pfr);
+                       armv8->debug_base + CPUV8_DBG_MEMFEATURE0, &tmp0);
+       retval += mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_MEMFEATURE0 + 4, &tmp1);
        if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "ID_AA64DFR0_EL1");
+               LOG_DEBUG("Examine %s failed", "Memory Model Type");
                return retval;
        }
+       ttypr |= tmp1;
+       ttypr = (ttypr << 32) | tmp0;
+
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + ID_AA64DFR0_EL1, &debug);
+                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0, &tmp0);
+       retval += mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0 + 4, &tmp1);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "ID_AA64DFR0_EL1");
                return retval;
        }
+       debug |= tmp1;
+       debug = (debug << 32) | tmp0;
 
        LOG_DEBUG("cpuid = 0x%08" PRIx32, cpuid);
-       LOG_DEBUG("ctypr = 0x%08" PRIx32, ctypr);
-       LOG_DEBUG("ttypr = 0x%08" PRIx32, ttypr);
-       LOG_DEBUG("ID_AA64PFR0_EL1 = 0x%08" PRIx32, pfr);
-       LOG_DEBUG("ID_AA64DFR0_EL1 = 0x%08" PRIx32, debug);
+       LOG_DEBUG("ttypr = 0x%08" PRIx64, ttypr);
+       LOG_DEBUG("debug = 0x%08" PRIx64, debug);
+
+       if (target->ctibase == 0) {
+               /* assume a v8 rom table layout */
+               cti_base = armv8->debug_base + 0x10000;
+               LOG_INFO("Target ctibase is not set, assuming 0x%0" PRIx32, cti_base);
+       } else
+               cti_base = target->ctibase;
+
+       armv8->cti = arm_cti_create(armv8->debug_ap, cti_base);
+       if (armv8->cti == NULL)
+               return ERROR_FAIL;
 
-       armv8->arm.core_type = ARM_MODE_MON;
-       armv8->arm.core_state = ARM_STATE_AARCH64;
        retval = aarch64_dpm_setup(aarch64, debug);
        if (retval != ERROR_OK)
                return retval;
 
        /* Setup Breakpoint Register Pairs */
-       aarch64->brp_num = ((debug >> 12) & 0x0F) + 1;
-       aarch64->brp_num_context = ((debug >> 28) & 0x0F) + 1;
-
-       /* hack - no context bpt support yet */
-       aarch64->brp_num_context = 0;
-
+       aarch64->brp_num = (uint32_t)((debug >> 12) & 0x0F) + 1;
+       aarch64->brp_num_context = (uint32_t)((debug >> 28) & 0x0F) + 1;
        aarch64->brp_num_available = aarch64->brp_num;
        aarch64->brp_list = calloc(aarch64->brp_num, sizeof(struct aarch64_brp));
        for (i = 0; i < aarch64->brp_num; i++) {
@@ -2385,6 +2312,9 @@ static int aarch64_examine_first(struct target *target)
 
        LOG_DEBUG("Configured %i hw breakpoints", aarch64->brp_num);
 
+       target->state = TARGET_UNKNOWN;
+       target->debug_reason = DBG_REASON_NOTHALTED;
+       aarch64->isrmasking_mode = AARCH64_ISRMASK_ON;
        target_set_examined(target);
        return ERROR_OK;
 }
@@ -2419,34 +2349,22 @@ static int aarch64_init_arch_info(struct target *target,
        struct aarch64_common *aarch64, struct jtag_tap *tap)
 {
        struct armv8_common *armv8 = &aarch64->armv8_common;
-       struct adiv5_dap *dap = armv8->arm.dap;
-
-       armv8->arm.dap = dap;
 
        /* Setup struct aarch64_common */
        aarch64->common_magic = AARCH64_COMMON_MAGIC;
        /*  tap has no dap initialized */
        if (!tap->dap) {
                tap->dap = dap_init();
-
-               /* Leave (only) generic DAP stuff for debugport_init() */
                tap->dap->tap = tap;
        }
-
        armv8->arm.dap = tap->dap;
 
-       aarch64->fast_reg_read = 0;
-
        /* register arch-specific functions */
        armv8->examine_debug_reason = NULL;
-
        armv8->post_debug_entry = aarch64_post_debug_entry;
-
        armv8->pre_restore_context = NULL;
-
        armv8->armv8_mmu.read_physical_memory = aarch64_read_phys_memory;
 
-       /* REVISIT v7a setup should be in a v7a-specific routine */
        armv8_init_arch_info(target, armv8);
        target_register_timer_callback(aarch64_handle_target_request, 1, 1, target);
 
@@ -2457,15 +2375,13 @@ static int aarch64_target_create(struct target *target, Jim_Interp *interp)
 {
        struct aarch64_common *aarch64 = calloc(1, sizeof(struct aarch64_common));
 
-       aarch64->armv8_common.is_armv7r = false;
-
        return aarch64_init_arch_info(target, aarch64, target->tap);
 }
 
 static int aarch64_mmu(struct target *target, int *enabled)
 {
        if (target->state != TARGET_HALTED) {
-               LOG_ERROR("%s: target not halted", __func__);
+               LOG_ERROR("%s: target %s not halted", __func__, target_name(target));
                return ERROR_TARGET_INVALID;
        }
 
@@ -2476,26 +2392,7 @@ static int aarch64_mmu(struct target *target, int *enabled)
 static int aarch64_virt2phys(struct target *target, target_addr_t virt,
                             target_addr_t *phys)
 {
-       int retval = ERROR_FAIL;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-       if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-               uint32_t ret;
-               retval = armv8_mmu_translate_va(target,
-                               virt, &ret);
-               if (retval != ERROR_OK)
-                       goto done;
-               *phys = ret;
-       } else {/*  use this method if armv8->memory_ap not selected
-                *  mmu must be enable in order to get a correct translation */
-               retval = aarch64_mmu_modify(target, 1);
-               if (retval != ERROR_OK)
-                       goto done;
-               retval = armv8_mmu_translate_va_pa(target, virt,  phys, 1);
-       }
-done:
-       return retval;
+       return armv8_mmu_translate_va_pa(target, virt, phys, 1);
 }
 
 COMMAND_HANDLER(aarch64_handle_cache_info_command)
@@ -2555,24 +2452,31 @@ COMMAND_HANDLER(aarch64_handle_smp_on_command)
        return ERROR_OK;
 }
 
-COMMAND_HANDLER(aarch64_handle_smp_gdb_command)
+COMMAND_HANDLER(aarch64_mask_interrupts_command)
 {
        struct target *target = get_current_target(CMD_CTX);
-       int retval = ERROR_OK;
-       struct target_list *head;
-       head = target->head;
-       if (head != (struct target_list *)NULL) {
-               if (CMD_ARGC == 1) {
-                       int coreid = 0;
-                       COMMAND_PARSE_NUMBER(int, CMD_ARGV[0], coreid);
-                       if (ERROR_OK != retval)
-                               return retval;
-                       target->gdb_service->core[1] = coreid;
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
 
+       static const Jim_Nvp nvp_maskisr_modes[] = {
+               { .name = "off", .value = AARCH64_ISRMASK_OFF },
+               { .name = "on", .value = AARCH64_ISRMASK_ON },
+               { .name = NULL, .value = -1 },
+       };
+       const Jim_Nvp *n;
+
+       if (CMD_ARGC > 0) {
+               n = Jim_Nvp_name2value_simple(nvp_maskisr_modes, CMD_ARGV[0]);
+               if (n->name == NULL) {
+                       LOG_ERROR("Unknown parameter: %s - should be off or on", CMD_ARGV[0]);
+                       return ERROR_COMMAND_SYNTAX_ERROR;
                }
-               command_print(CMD_CTX, "gdb coreid  %" PRId32 " -> %" PRId32, target->gdb_service->core[0]
-                       , target->gdb_service->core[1]);
+
+               aarch64->isrmasking_mode = n->value;
        }
+
+       n = Jim_Nvp_value2name_simple(nvp_maskisr_modes, aarch64->isrmasking_mode);
+       command_print(CMD_CTX, "aarch64 interrupt mask %s", n->name);
+
        return ERROR_OK;
 }
 
@@ -2605,27 +2509,23 @@ static const struct command_registration aarch64_exec_command_handlers[] = {
                .usage = "",
        },
        {
-               .name = "smp_gdb",
-               .handler = aarch64_handle_smp_gdb_command,
-               .mode = COMMAND_EXEC,
-               .help = "display/fix current core played to gdb",
-               .usage = "",
+               .name = "maskisr",
+               .handler = aarch64_mask_interrupts_command,
+               .mode = COMMAND_ANY,
+               .help = "mask aarch64 interrupts during single-step",
+               .usage = "['on'|'off']",
        },
 
-
        COMMAND_REGISTRATION_DONE
 };
 static const struct command_registration aarch64_command_handlers[] = {
-       {
-               .chain = arm_command_handlers,
-       },
        {
                .chain = armv8_command_handlers,
        },
        {
-               .name = "cortex_a",
+               .name = "aarch64",
                .mode = COMMAND_ANY,
-               .help = "Cortex-A command group",
+               .help = "Aarch64 command group",
                .usage = "",
                .chain = aarch64_exec_command_handlers,
        },
@@ -2651,11 +2551,6 @@ struct target_type aarch64_target = {
        .read_memory = aarch64_read_memory,
        .write_memory = aarch64_write_memory,
 
-       .checksum_memory = arm_checksum_memory,
-       .blank_check_memory = arm_blank_check_memory,
-
-       .run_algorithm = armv4_5_run_algorithm,
-
        .add_breakpoint = aarch64_add_breakpoint,
        .add_context_breakpoint = aarch64_add_context_breakpoint,
        .add_hybrid_breakpoint = aarch64_add_hybrid_breakpoint,

Linking to existing account procedure

If you already have an account and want to add another login method you MUST first sign in with your existing account and then change URL to read https://review.openocd.org/login/?link to get to this page again but this time it'll work for linking. Thank you.

SSH host keys fingerprints

1024 SHA256:YKx8b7u5ZWdcbp7/4AeXNaqElP49m6QrwfXaqQGJAOk gerrit-code-review@openocd.zylin.com (DSA)
384 SHA256:jHIbSQa4REvwCFG4cq5LBlBLxmxSqelQPem/EXIrxjk gerrit-code-review@openocd.org (ECDSA)
521 SHA256:UAOPYkU9Fjtcao0Ul/Rrlnj/OsQvt+pgdYSZ4jOYdgs gerrit-code-review@openocd.org (ECDSA)
256 SHA256:A13M5QlnozFOvTllybRZH6vm7iSt0XLxbA48yfc2yfY gerrit-code-review@openocd.org (ECDSA)
256 SHA256:spYMBqEYoAOtK7yZBrcwE8ZpYt6b68Cfh9yEVetvbXg gerrit-code-review@openocd.org (ED25519)
+--[ED25519 256]--+
|=..              |
|+o..   .         |
|*.o   . .        |
|+B . . .         |
|Bo. = o S        |
|Oo.+ + =         |
|oB=.* = . o      |
| =+=.+   + E     |
|. .=o   . o      |
+----[SHA256]-----+
2048 SHA256:0Onrb7/PHjpo6iVZ7xQX2riKN83FJ3KGU0TvI0TaFG4 gerrit-code-review@openocd.zylin.com (RSA)